Skip to content

Commit f6d1efe

Browse files
committed
cvp-2129 PR analysis
1 parent f588eba commit f6d1efe

File tree

6 files changed

+243
-0
lines changed

6 files changed

+243
-0
lines changed

pr-analysis/AvgMergeTimes.png

125 KB
Loading

pr-analysis/PR_analysis.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
from pandas.core.frame import DataFrame
2+
import pandas as pd
3+
from pandas import json_normalize
4+
import numpy as np
5+
import matplotlib.pyplot as plt
6+
import time
7+
from datetime import datetime, timedelta
8+
import matplotlib.dates as mdates
9+
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange
10+
from graphql_query import get_PR_data
11+
12+
13+
def createDateColumn(dataframe):
14+
"""This function will create a date column in the data frame which will have datetime type rather
15+
than a string type"""
16+
17+
newDatecol = [] #conatin new date format and will be appended to the dataframe
18+
format_str = r"%Y-%m-%dT%H:%M:%SZ"
19+
for i in dataframe['node.mergedAt']:
20+
if ( i !=None ):
21+
newdate = datetime.strptime(i, format_str) #making the string to a datetime format
22+
newDatecol.append(newdate.date()) #appending to the list as a date
23+
if (i == None):
24+
newDatecol.append("None")
25+
dataframe['Date Merged'] = newDatecol
26+
27+
return dataframe
28+
29+
30+
def numPRMerged_graph(df):
31+
"""This function will create a graph for Num of Pr merged"""
32+
33+
#get oldest and youngest dates from the list
34+
datelist = df['dates']
35+
oldest = min(datelist)
36+
youngest = max(datelist)
37+
timegap = 12
38+
dates = mdates.drange(oldest, youngest, timedelta(weeks= timegap))
39+
# data
40+
counts = df['counts']
41+
42+
# Set up the axes and figure
43+
fig, ax = plt.subplots()
44+
45+
# (To use the exact code below, you'll need to convert your sequence
46+
# of datetimes into matplotlib's float-based date format.
47+
# Use "dates = mdates.date2num(dates)" to convert them.)
48+
dates = mdates.date2num(dates)
49+
width = np.diff(dates).min()
50+
51+
# Make a bar plot. Note that I'm using "dates" directly instead of plotting
52+
# "counts" against x-values of [0,1,2...]
53+
ax.bar(datelist, counts.tolist(), align='center', width=width, ec = 'blue')
54+
55+
# Tell matplotlib to interpret the x-axis values as dates
56+
ax.xaxis_date()
57+
58+
# Make space for and rotate the x-axis tick labels
59+
fig.autofmt_xdate()
60+
61+
plt.ylabel('Counts')
62+
plt.xlabel('Dates')
63+
plt.title('Number of PRs merged over time')
64+
plt.savefig('PRmergeRates.png',dpi=400)
65+
plt.show()
66+
67+
def computeMergetime(created_at, merged_at):
68+
"""This function will calculate the merge time"""
69+
70+
format_str = r"%Y-%m-%dT%H:%M:%SZ"
71+
date_created = datetime.strptime(created_at, format_str)
72+
date_merged = datetime.strptime(merged_at, format_str)
73+
time_diff = (date_merged - date_created).total_seconds() / 86400 #return diff in days [86400 secs in a day]
74+
return int(time_diff)
75+
76+
def addlabels(x,y):
77+
for i in range(len(x)):
78+
plt.text(i,y[i],y[i], ha = 'center')
79+
80+
def avgMergetime_graph(df):
81+
"""This function will create a graph for avg merge time"""
82+
83+
x = df['Merged_YM']
84+
y = df['mergetime']
85+
fig, ax = plt.subplots()
86+
x_pos = np.arange(len(x)) # <--
87+
plt.bar(x_pos, y)
88+
plt.xticks(x_pos, x) # <--
89+
# Make space for and rotate the x-axis tick labels
90+
fig.autofmt_xdate()
91+
ax.xaxis_date()
92+
addlabels(x, y)
93+
plt.xlabel("Dates")
94+
plt.ylabel("Merge Time in Days")
95+
plt.title("Avg Merge Times")
96+
plt.savefig('AvgMergeTimes.png',dpi=400)
97+
plt.show()
98+
99+
def avgMergetime(df):
100+
""" This function will be called to calculate the avg mergetime and produce a graph"""
101+
102+
#1. calculate the mergetime for each PR and add to the dataframe
103+
104+
mergetime_ = []
105+
106+
for index, row in df.iterrows():
107+
if (row.loc['node.mergedAt'] != None) :
108+
mergetime = computeMergetime(row.loc['node.createdAt'] , row.loc['node.mergedAt'])
109+
mergetime_.append(mergetime)
110+
else:
111+
mergetime_.append("None")
112+
113+
df['mergetime'] = mergetime_
114+
print(df)
115+
#2. calculate the average merge time for each month
116+
117+
df['Merged_YM'] = pd.to_datetime(df['node.mergedAt']).dt.to_period('M')
118+
new_df = df.filter(['Merged_YM','mergetime'], axis=1)
119+
120+
group_mean = new_df.groupby('Merged_YM')['mergetime'].mean()
121+
mean_df = group_mean.reset_index()
122+
123+
mean_df['mergetime'] = mean_df.mergetime.astype(int) #change from float to int
124+
125+
#3. create a bar graph
126+
avgMergetime_graph(mean_df)
127+
128+
def process_data(dataframe):
129+
"""This function will be called in the main() to process the data gathered from the query
130+
and create a dataframe"""
131+
132+
#add a new column for just the date in date format
133+
dataframe = createDateColumn(dataframe)
134+
frequency = dataframe['Date Merged'].value_counts() #get the frequency of each date
135+
# converting to df and assigning new names to the columns
136+
df_value_counts = pd.DataFrame(frequency)
137+
df_value_counts = df_value_counts.reset_index()
138+
df_value_counts.columns = ['dates', 'counts'] # change column names
139+
#delete the the row with None
140+
dateFreq = df_value_counts.loc[df_value_counts["dates"] != "None"]
141+
142+
#1. Create a graph for number of PRs merged over time
143+
numPRMerged_graph(dateFreq)
144+
#2. Create a graph for avg PR merge time
145+
avgMergetime(dataframe)
146+
#average time it takes to merge PRs
147+
#pie chart of author is merger vs author is not the merger
148+
149+
150+
def main():
151+
#get data from the graphql query
152+
pr_cursor = None
153+
res_data = get_PR_data(pr_cursor)
154+
process_data(res_data)
155+
156+
main()

pr-analysis/PRmergeRates.png

92.9 KB
Loading

pr-analysis/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# PR Analysis on operator-test-playbooks
2+
3+
To Run the code, you can use this command: **python PR_analysis.py**
4+
5+
**NOTE** Before you run the code, you must add a github token in **graphql_query.py** file by replacing **Add_Your_Token_Here** in the headers.
6+
The token can be generated by following this [guide](https://docs.github.com/en/github/authenticating-to-github/keeping-your-account-and-data-secure/creating-a-personal-access-token)
7+
8+
Once the code is finished running, you will have two png files saved in your folder. These are the graphs from the analysis of the queried data.

pr-analysis/graphql_query.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from string import Template
2+
from python_graphql_client import GraphqlClient
3+
import pandas as pd
4+
import requests
5+
import json
6+
7+
headers = {"Authorization": "token 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"}
8+
9+
10+
def run_query(query): # A simple function to use requests.post to make the API call. Note the json= section.
11+
request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
12+
if request.status_code == 200: #200 means request fulfilled
13+
return request.json()
14+
#return request.text
15+
else:
16+
raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))
17+
18+
def build_query(pr_cursor):
19+
return Template("""{
20+
repository(owner: "redhat-openshift-ecosystem", name: "operator-test-playbooks") {
21+
pullRequests(first: 15, after: $cursor) {
22+
pageInfo{
23+
hasNextPage
24+
endCursor
25+
}
26+
edges {
27+
node {
28+
author {
29+
login
30+
}
31+
mergedBy {
32+
login
33+
}
34+
createdAt
35+
mergedAt
36+
37+
}
38+
}
39+
}
40+
}
41+
}
42+
""").substitute({'cursor': pr_cursor})
43+
44+
def format_cursor(cursor):
45+
"""Format cursor inside double quotations as required by API"""
46+
return '"{}"'.format(cursor)
47+
48+
#This function will create and return a data frame with the data returned from the query
49+
def get_PR_data(cursor):
50+
all_data = []
51+
hasNextPage = True
52+
while (hasNextPage == True):
53+
cursor = "null" if cursor is None else format_cursor(cursor)
54+
getPRinfo = build_query(cursor)
55+
result = run_query(getPRinfo)
56+
#print(result)
57+
data_frame = pd.json_normalize(result['data']['repository']['pullRequests']['edges'])
58+
page_info = pd.json_normalize(result['data']['repository']['pullRequests']['pageInfo'])
59+
#print(data_frame)
60+
#print(page_info)
61+
all_data.append(data_frame)
62+
cursor = page_info.loc[0,'endCursor'] #update cursor
63+
hasNextPage = page_info.loc[0,'hasNextPage'] #update hasNextPage
64+
res_data = pd.concat(all_data) #creating a df with all PRs
65+
res_data.pop('node.mergedBy')
66+
#print(res_data)
67+
return res_data

pr-analysis/stats.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Operator Test Playbooks
2+
3+
## History
4+
5+
### Number of PR over time
6+
![Number of PR over time](PRmergeRates.png)
7+
Download : [png](PRmergeRates.png) [pdf](PRmergeRates.pdf)
8+
9+
## Average PR merging time
10+
![Average PR merging time](AvgMergeTimes.png)
11+
Download : [png](AvgMergeTimes.png) [pdf](AvgMergeTimes.pdf)
12+

0 commit comments

Comments
 (0)