summaryrefslogtreecommitdiff
path: root/hack/generate-devstats-repo-sql.py
diff options
context:
space:
mode:
authorDavid Hovey <david@hoveytech.com>2019-09-25 16:48:08 -0700
committerDavid Hovey <david@hoveytech.com>2019-09-25 16:48:08 -0700
commit72c7370548a65efcea9a9ba57c59cd10fa6e7530 (patch)
treef549d6eb1ea92ea3eb11c062c4aa5c485206c567 /hack/generate-devstats-repo-sql.py
parent1250c9771e9a5f0cb6aab40e746612d5c5a670bb (diff)
parent0b070cdc882e6b8f38aae95fcf4c18a983a61f36 (diff)
Merge branch 'master' of github.com:kubernetes/community
Diffstat (limited to 'hack/generate-devstats-repo-sql.py')
-rwxr-xr-xhack/generate-devstats-repo-sql.py194
1 files changed, 194 insertions, 0 deletions
diff --git a/hack/generate-devstats-repo-sql.py b/hack/generate-devstats-repo-sql.py
new file mode 100755
index 00000000..71d77497
--- /dev/null
+++ b/hack/generate-devstats-repo-sql.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+
+# Copyright 2019 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Output devstats repo_groups.sql based on subproject defintions in sigs.yaml
+
+This is likely missing a few repos because:
+ - some repos lack an owner (eg: kubernetes/kubernetes)
+ - it doesn't enumerate all repos from all kubernetes-owned orgs
+ - it ignores the fact that committees can own repos, only grouping by sig
+
+The sql generated is NOT intended to overwrite/replace the file that lives at
+github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql, but instead aid a
+human in doing some manual updates to the file. Future improvements to this
+script could eliminate that part of the process, but it's where we are today.
+"""
+
+import argparse
+import ruamel.yaml as yaml
+import json
+import re
+import sys
+
+repo_group_sql_template = """
+update gha_repos set repo_group = '{}' where name in (
+{}
+);
+"""
+
+# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql,
+# if this differs, consider cncf the authoritative source and update this
+repo_groups_sql_header = """-- generated by github.com/kubernetes/community/hack/generate-devstats-repo-sql.py
+-- Add repository groups
+"""
+
+# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql,
+# if this differs, consider cncf the authoritative source and update this
+repo_groups_sql_footer = """
+-- All other unknown repositories should have 'Other' repository group
+-- update gha_repos set repo_group = 'Other' where repo_group is null;
+
+-- By default alias is the newest repo name for given repo ID
+update
+ gha_repos r
+set
+ alias = coalesce((
+ select e.dup_repo_name
+ from
+ gha_events e
+ where
+ e.repo_id = r.id
+ order by
+ e.created_at desc
+ limit 1
+ ), name)
+;
+
+update gha_repos set alias = 'kubernetes/kubernetes' where name like '%kubernetes' or name = 'kubernetes/';
+
+select
+ repo_group,
+ count(*) as number_of_repos
+from
+ gha_repos
+where
+ repo_group is not null
+group by
+ repo_group
+order by
+ number_of_repos desc,
+ repo_group asc;
+
+"""
+
+special_case_groups = [{
+ # the main repo has no single owner and has gone by many names
+ 'name': 'Kubernetes',
+ 'repos': [
+ 'kubernetes/kubernetes',
+ 'GoogleCloudPlatform/kubernetes',
+ 'kubernetes',
+ 'kubernetes/'
+ ]
+}]
+
+# devstats isn't aware of repo renames or migrations; we need to keep
+# old repo names in its sql groups present for historical purposes;
+#
+# when reconciling deletions from repo_groups.sql by this script, use
+# github.com/kubernetes/org issues to determine why; renamed, migrated,
+# or used-and-retired repos belong here; unused/deleted repos do not
+renamed_repos = {
+ 'sig-architecture': [
+ 'kubernetes/contrib',
+ ],
+ 'sig-api-machinery': [
+ 'kubernetes-incubator/apiserver-builder',
+ ],
+ 'sig-cluster-lifecycle': [
+ 'kubernetes-incubator/kubespray',
+ ],
+ 'sig-multicluster': [
+ 'kubernetes-sigs/federation-v2',
+ ],
+ 'sig-node': [
+ 'kubernetes-incubator/node-feature-discovery',
+ ],
+ 'sig-pm': [
+ 'kubernetes/features',
+ ],
+ 'sig-service-catalog': [
+ 'kubernetes-incubator/service-catalog',
+ ]
+}
+
+def repos_from_k8s_group(k8s_group):
+ """Returns a list of org/repos given a kubernetes community group"""
+ repos = {}
+ subprojects = k8s_group.get('subprojects', [])
+ if subprojects is None:
+ subprojects = []
+ for sp in subprojects:
+ for uri in sp['owners']:
+ owners_path = re.sub(r"https://raw.githubusercontent.com/(.*)/master/(.*)",r"\1/\2",uri)
+ path_parts = owners_path.split('/')
+ # org/repo is owned by k8s_group if org/repo/OWNERS os in one of their subprojects
+ if path_parts[2] == 'OWNERS':
+ repo = '/'.join(path_parts[0:2])
+ repos[repo] = True
+ return sorted(repos.keys())
+
+def k8s_group_name(k8s_group):
+ group_dir = k8s_group.get('dir', '')
+ if group_dir.startswith('sig-'):
+ return "SIG " + k8s_group['name']
+ if group_dir.startswith('committee-'):
+ return k8s_group['name'] + " Committee"
+ return "UNKNOWN " + group_dir
+
+def write_repo_groups_template(name, repos, fp):
+ if len(repos):
+ fp.write(
+ repo_group_sql_template.format(
+ name,
+ ',\n'.join([' \'{}\''.format(r) for r in repos])))
+
+def write_repo_groups_sql(k8s_groups, fp):
+ fp.write(repo_groups_sql_header)
+ for g in special_case_groups:
+ write_repo_groups_template(g['name'], g['repos'], fp)
+ for group_type in ['sigs', 'committees']:
+ for g in k8s_groups[group_type]:
+ repos = set(repos_from_k8s_group(g)) | set(renamed_repos.get(g['dir'],[]))
+ repos = sorted(list(repos))
+ write_repo_groups_template(k8s_group_name(g), repos, fp)
+ fp.write(repo_groups_sql_footer)
+
+def main(sigs_yaml, repo_groups_sql):
+ with open(sigs_yaml) as fp:
+ k8s_groups = yaml.round_trip_load(fp)
+
+ if repo_groups_sql is not None:
+ with open(repo_groups_sql, 'w') as fp:
+ write_repo_groups_sql(k8s_groups, fp)
+ else:
+ write_repo_groups_sql(k8s_groups, sys.stdout)
+
+if __name__ == '__main__':
+ PARSER = argparse.ArgumentParser(
+ description='Generate a repo_groups.sql intended for github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql')
+ PARSER.add_argument(
+ '--sigs-yaml',
+ default='./sigs.yaml',
+ help='Path to sigs.yaml')
+ PARSER.add_argument(
+ '--repo-groups-sql',
+ help='Path to output repo_groups.sql if provided')
+ ARGS = PARSER.parse_args()
+
+ main(ARGS.sigs_yaml, ARGS.repo_groups_sql)
+