Skip to content

Commit 28162e6

Browse files
committed
cvp-2129 PR analysis
1 parent f588eba commit 28162e6

File tree

6 files changed

+237
-0
lines changed

6 files changed

+237
-0
lines changed

pr-analysis/AvgMergeTimes.png

128 KB
Loading

pr-analysis/PR_analysis.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
from pandas.core.frame import DataFrame
2+
import pandas as pd
3+
from pandas import json_normalize
4+
import numpy as np
5+
import matplotlib.pyplot as plt
6+
import time
7+
from datetime import datetime, timedelta
8+
import matplotlib.dates as mdates
9+
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange
10+
from graphql_query import get_PR_data
11+
12+
13+
def createDateColumn(dataframe):
14+
"""This function will create a date column in the data frame which will have datetime type rather
15+
than a string type"""
16+
17+
newDatecol = [] #conatin new date format and will be appended to the dataframe
18+
format_str = r"%Y-%m-%dT%H:%M:%SZ"
19+
for i in dataframe['node.mergedAt']:
20+
if ( i !=None ):
21+
newdate = datetime.strptime(i, format_str) #making the string to a datetime format
22+
newDatecol.append(newdate.date()) #appending to the list as a date
23+
if (i == None):
24+
newDatecol.append("None")
25+
dataframe['Date Merged'] = newDatecol
26+
27+
return dataframe
28+
29+
30+
def numPRMerged_graph(df):
31+
"""This function will create a graph for Num of Pr merged"""
32+
33+
#get oldest and youngest dates from the list
34+
datelist = df['dates']
35+
oldest = min(datelist)
36+
youngest = max(datelist)
37+
timegap = 12
38+
dates = mdates.drange(oldest, youngest, timedelta(weeks= timegap))
39+
# data
40+
counts = df['counts']
41+
42+
# Set up the axes and figure
43+
fig, ax = plt.subplots()
44+
45+
# (To use the exact code below, you'll need to convert your sequence
46+
# of datetimes into matplotlib's float-based date format.
47+
# Use "dates = mdates.date2num(dates)" to convert them.)
48+
dates = mdates.date2num(dates)
49+
width = np.diff(dates).min()
50+
51+
# Make a bar plot. Note that I'm using "dates" directly instead of plotting
52+
# "counts" against x-values of [0,1,2...]
53+
ax.bar(datelist, counts.tolist(), align='center', width=width, ec = 'blue')
54+
55+
# Tell matplotlib to interpret the x-axis values as dates
56+
ax.xaxis_date()
57+
58+
# Make space for and rotate the x-axis tick labels
59+
fig.autofmt_xdate()
60+
61+
plt.ylabel('Counts')
62+
plt.xlabel('Dates')
63+
plt.title('Number of PRs merged over time')
64+
plt.savefig('PRmergeRates.png',dpi=400)
65+
plt.show()
66+
67+
def computeMergetime(created_at, merged_at):
68+
"""This function will calculate the merge time"""
69+
70+
format_str = r"%Y-%m-%dT%H:%M:%SZ"
71+
date_created = datetime.strptime(created_at, format_str)
72+
date_merged = datetime.strptime(merged_at, format_str)
73+
time_diff = (date_merged - date_created).total_seconds() / 60 #return diff in mins
74+
return int(time_diff)
75+
76+
def avgMergetime_graph(df):
77+
"""This function will create a graph for avg merge time"""
78+
79+
x = df['Merged_YM']
80+
y = df['mergetime']
81+
fig, ax = plt.subplots()
82+
x_pos = np.arange(len(x)) # <--
83+
plt.bar(x_pos, y)
84+
plt.xticks(x_pos, x) # <--
85+
# Make space for and rotate the x-axis tick labels
86+
fig.autofmt_xdate()
87+
ax.xaxis_date()
88+
plt.xlabel("Dates")
89+
plt.ylabel("Merge Time in Minutes")
90+
plt.title("Avg Merge Times")
91+
plt.savefig('AvgMergeTimes.png',dpi=400)
92+
plt.show()
93+
94+
def avgMergetime(df):
95+
""" This function will be called to calculate the avg mergetime and produce a graph"""
96+
97+
#1. calculate the mergetime for each PR and add to the dataframe
98+
99+
mergetime_ = []
100+
101+
for index, row in df.iterrows():
102+
if (row.loc['node.mergedAt'] != None) :
103+
mergetime = computeMergetime(row.loc['node.createdAt'] , row.loc['node.mergedAt'])
104+
mergetime_.append(mergetime)
105+
else:
106+
mergetime_.append("None")
107+
108+
df['mergetime'] = mergetime_
109+
110+
#2. calculate the average merge time for each month
111+
112+
df['Merged_YM'] = pd.to_datetime(df['node.mergedAt']).dt.to_period('M')
113+
new_df = df.filter(['Merged_YM','mergetime'], axis=1)
114+
115+
group_mean = new_df.groupby('Merged_YM')['mergetime'].mean()
116+
mean_df = group_mean.reset_index()
117+
118+
mean_df['mergetime'] = mean_df.mergetime.astype(int) #change from float to int
119+
120+
#3. create a bar graph
121+
avgMergetime_graph(mean_df)
122+
123+
def process_data(dataframe):
124+
"""This function will be called in the main() to process the data gathered from the query
125+
and create a dataframe"""
126+
127+
#add a new column for just the date in date format
128+
dataframe = createDateColumn(dataframe)
129+
frequency = dataframe['Date Merged'].value_counts() #get the frequency of each date
130+
# converting to df and assigning new names to the columns
131+
df_value_counts = pd.DataFrame(frequency)
132+
df_value_counts = df_value_counts.reset_index()
133+
df_value_counts.columns = ['dates', 'counts'] # change column names
134+
#delete the the row with None
135+
dateFreq = df_value_counts.loc[df_value_counts["dates"] != "None"]
136+
137+
#1. Create a graph for number of PRs merged over time
138+
numPRMerged_graph(dateFreq)
139+
#2. Create a graph for avg PR merge time
140+
avgMergetime(dataframe)
141+
#average time it takes to merge PRs
142+
#pie chart of author is merger vs author is not the merger
143+
144+
145+
def main():
146+
#get data from the graphql query
147+
pr_cursor = None
148+
res_data = get_PR_data(pr_cursor)
149+
process_data(res_data)
150+
151+
main()

pr-analysis/PRmergeRates.png

92.9 KB
Loading

pr-analysis/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# PR Analysis on operator-test-playbooks
2+
3+
To Run the code, you can use this command: **python PR_analysis.py**
4+
5+
**NOTE** Before you run the code, you must add a github token in **graphql_query.py** file by replacing **Add_Your_Token_Here** in the headers.
6+
The token can be generated by following this [guide](https://docs.github.com/en/github/authenticating-to-github/keeping-your-account-and-data-secure/creating-a-personal-access-token)
7+
8+
Once the code is finished running, you will have two png files saved in your folder. These are the graphs from the analysis of the queried data.

pr-analysis/graphql_query.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
from string import Template
2+
from python_graphql_client import GraphqlClient
3+
import pandas as pd
4+
import requests
5+
import json
6+
7+
headers = {"Authorization": "token Add_Your_Token_Here"}
8+
9+
10+
def run_query(query): # A simple function to use requests.post to make the API call. Note the json= section.
11+
request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
12+
if request.status_code == 200: #200 means request fulfilled
13+
return request.json()
14+
#return request.text
15+
else:
16+
raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))
17+
18+
def build_query(pr_cursor):
19+
return Template("""{
20+
repository(owner: "redhat-openshift-ecosystem", name: "operator-test-playbooks") {
21+
pullRequests(first: 15, after: $cursor) {
22+
pageInfo{
23+
hasNextPage
24+
endCursor
25+
}
26+
edges {
27+
node {
28+
author {
29+
login
30+
}
31+
mergedBy {
32+
login
33+
}
34+
createdAt
35+
mergedAt
36+
37+
}
38+
}
39+
}
40+
}
41+
}
42+
""").substitute({'cursor': pr_cursor})
43+
def format_cursor(cursor):
44+
"""Format cursor inside double quotations as required by API"""
45+
return '"{}"'.format(cursor)
46+
47+
#This function will create and return a data frame with the data returned from the query
48+
def get_PR_data(cursor):
49+
all_data = []
50+
hasNextPage = True
51+
while (hasNextPage == True):
52+
cursor = "null" if cursor is None else format_cursor(cursor)
53+
getPRinfo = build_query(cursor)
54+
result = run_query(getPRinfo)
55+
#print(result)
56+
data_frame = pd.json_normalize(result['data']['repository']['pullRequests']['edges'])
57+
page_info = pd.json_normalize(result['data']['repository']['pullRequests']['pageInfo'])
58+
#print(data_frame)
59+
#print(page_info)
60+
all_data.append(data_frame)
61+
cursor = page_info.loc[0,'endCursor'] #update cursor
62+
hasNextPage = page_info.loc[0,'hasNextPage'] #update hasNextPage
63+
res_data = pd.concat(all_data) #creating a df with all PRs
64+
res_data.pop('node.mergedBy')
65+
#print(res_data)
66+
return res_data

pr-analysis/stats.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Operator Test Playbooks
2+
3+
## History
4+
5+
### Number of PR over time
6+
![Number of PR over time](PRmergeRates.png)
7+
Download : [png](PRmergeRates.png) [pdf](PRmergeRates.pdf)
8+
9+
## Average PR merging time
10+
![Average PR merging time](AvgMergeTimes.png)
11+
Download : [png](AvgMergeTimes.png) [pdf](AvgMergeTimes.pdf)
12+

0 commit comments

Comments
 (0)