-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathmerge_contributors.py
More file actions
103 lines (87 loc) · 3.56 KB
/
merge_contributors.py
File metadata and controls
103 lines (87 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Code to combine all-contributors files from multiple repositories into a single file
# With help from GitHub Copilot
import requests
import os
import pandas as pd
import logging
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# List all physiopy repos (manually inputted for now)
repo_list = {'name': [
'prep4phys',
'physioqc',
'physiopy.github.io',
'phys2denoise',
'peakdet',
'peakdet2',
'physutils',
'physiopy-repository-template',
'physiopy-community-guidelines',
'phys2bids',
'physiopy',
'physiopy-test-workflows',
'brainhack-physiopy-2023',
'physiopy_tutorial',
'.github',
'physiopy-codesprint-spring2023',
'brainhack-physiopy-2022',
'outreach'
]}
# Initialize DataFrame with additional columns
repo_list_df = pd.DataFrame(data=repo_list)
repo_list_df['cont_json'] = False
repo_list_df['cont_txt'] = False
# Create a directory to save the JSON files
os.makedirs('contributors_files', exist_ok=True)
# Loop through repos and save contributor file if it exists
for repo in repo_list_df['name']:
url = f'https://github.com/physiopy/{repo}/raw/master/.all-contributorsrc'
try:
r = requests.get(url, allow_redirects=True)
r.raise_for_status() # Raise an HTTPError for bad responses (4xx and 5xx)
if r.headers.get('Content-Type') == 'application/json':
content = r.json()
with open(f'contributors_files/{repo}_all-contributorsrc.json', 'w') as f:
json.dump(content, f, indent=2)
repo_list_df.loc[repo_list_df['name'] == repo, ['cont_json', 'cont_txt']] = [True, False]
logging.info(f'Successfully saved JSON for {repo}')
else:
with open(f'contributors_files/{repo}_all-contributorsrc.txt', 'w') as f:
f.write(r.text)
repo_list_df.loc[repo_list_df['name'] == repo, ['cont_json', 'cont_txt']] = [False, True]
logging.warning(f'Saved as text for {repo} due to non-JSON content')
except requests.exceptions.RequestException as e:
repo_list_df.loc[repo_list_df['name'] == repo, ['cont_json', 'cont_txt']] = [False, False]
logging.error(f'Failed to fetch {repo}: {e}')
# Display the updated DataFrame
display(repo_list_df)
# Function to merge contributors
def merge_contributors(contributors_list):
merged_contributors = {}
for contributor in contributors_list:
login = contributor['login']
if login in merged_contributors:
merged_contributors[login]['contributions'] = list(set(
merged_contributors[login]['contributions'] + contributor['contributions']
))
else:
merged_contributors[login] = contributor
return list(merged_contributors.values())
# Initialize an empty list to hold all contributors
all_contributors = []
# Read all JSON files from the contributors_files directory
json_files = glob.glob('contributors_files/*.json')
for json_file in json_files:
with open(json_file, 'r') as f:
content = json.load(f)
all_contributors.extend(content['contributors'])
# Merge all contributors
merged_contributors = merge_contributors(all_contributors)
# Create the final .all-contributorsrc content
merged_file = {
"contributors": merged_contributors
}
# Write the merged contributors to a new .all-contributorsrc file
with open('merged_all-contributorsrc.json', 'w') as f:
json.dump(merged_file, f, indent=2)
logging.info('Successfully merged all contributors into merged_all-contributorsrc.json')