JustAnotherArchivist
/
little-things


			
							#!/usr/bin/env python3
import collections
import html
import logging
import re
import requests
import shlex
import sys
import time


GIT_URLS_OPTION = '--git-urls'
GITGUD_COMPLETE_ITEMS_OPTION = '--gitgud-complete-items'
NAME_OPTION = '--name'
MODES = (GIT_URLS_OPTION, GITGUD_COMPLETE_ITEMS_OPTION, NAME_OPTION)


mode = None
users = sys.argv[1:]
if users and users[0] in MODES:
	mode = users[0]
	users = users[1:]
assert users and (mode is None or mode in MODES) and not users[0].startswith('--'), f'Usage: github-list-repos [{" | ".join(MODES)}] USER [USER...]'


def get(url):
	while True:
		logging.info(f'Fetching {url}')
		r = requests.get(url, headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0', 'Accept': 'text/html'})
		if r.status_code == 429:
			logging.warning(f'Got 429, sleeping and retrying')
			time.sleep(5)
		else:
			break
	return r


def p(repoName):
	if mode is None:
		print(f'https://github.com/{repoName}')
	elif mode == GIT_URLS_OPTION:
		print(f'https://github.com/{repoName}.git')
		print(f'https://github.com/{repoName}.wiki.git')
	elif mode == GITGUD_COMPLETE_ITEMS_OPTION:
		print(f'web:complete:{repoName}')


for user in users:
	r = get(f'https://github.com/{user}')
	if '<div id="org-repositories"' in r.text:
		# Organisation, archived repositories don't appear on /orgs/ + pagination, so need to also iterate over all the 'type' parameters
		if mode == NAME_OPTION:
			musername = re.search(r'<meta property="profile:username" content="([^"]*)" />', r.text)
			if not musername:
				print('Error: could not find profile:username meta tag', file = sys.stderr)
				sys.exit(1)
			mfullname = re.search(r'<h1\s(?:[^>]*\s)?class="(?:[^"]*\s)?h2(?:\s[^"]*)?"(?:\s[^>]*)?>(.*?)</h1>', r.text, flags = re.DOTALL)
			if not mfullname:
				print('Error: could not find name h1', file = sys.stderr)
				sys.exit(1)
			print(html.unescape(musername.group(1).strip().replace('\n', ' ').replace('\r', ' ')))
			print(html.unescape(mfullname.group(1).strip().replace('\n', ' ').replace('\r', ' ')))
			sys.exit(0)
		types = collections.deque()
		types.append('')
		seen = set()
		def maybe_p(repoName):
			if repoName not in seen:
				p(repoName)
				seen.add(repoName)
		while types:
			type_ = types.popleft()
			j = '&' if type_ else '?'
			r = get(f'https://github.com/orgs/{user}/repositories{type_}')
			if not type_:
				types.extend(x.split('"')[1] for x in re.findall(r'href="\?type=[^"]*', r.text))
			page = 1
			while True:
				for m in re.finditer(r'<a itemprop="name codeRepository"\s(?:[^>]*\s)?data-hovercard-url="/([^/>"]+/[^/>"]+)/hovercard"', r.text):
					maybe_p(m.group(1))
				for m in re.finditer(r'<a data-testid="listitem-title-link"\s(?:[^>]*\s)?href="/([^/>"]+/[^/>"]+)"', r.text):
					maybe_p(m.group(1))
				if '<a class="next_page"' not in r.text and '<a rel="next"' not in r.text:
					# End of pagination
					break
				page += 1
				r = get(f'https://github.com/orgs/{user}/repositories{type_}{j}page={page}')
	else:
		# User, ?tab=repositories + cursor pagination
		if mode == NAME_OPTION:
			musername = re.search(r'<span\s(?:[^>]*\s)?class="(?:[^"]*\s)?vcard-username(?:\s[^"]*)?"(?:\s[^>]*)?>(.*?)</span>', r.text, flags = re.DOTALL)
			if not musername:
				print('Error: could not find vcard-username span', file = sys.stderr)
				sys.exit(1)
			if (m := re.search(r'<span\s(?:[^>]*\s)?class="(?:[^"]*\s)?vcard-fullname(?:\s[^"]*)?"(?:\s[^>]*)?>(.*?)</span>', r.text, flags = re.DOTALL)):
				fullname = html.unescape(m.group(1).strip())
			else:
				fullname = ''
			print(html.unescape(musername.group(1).strip()).replace('\n', ' ').replace('\r', ' '))
			print(fullname.replace('\n', ' ').replace('\r', ' '))
			sys.exit(0)
		r = get(f'https://github.com/{user}?tab=repositories')
		while True:
			for m in re.finditer(r'<a href="/([^/>"]+/[^/>"]+)" itemprop="name codeRepository"(\s[^>]*)?>', r.text):
				p(m.group(1))
			if not (m := re.search(r'<a\s(?=(?:[^>]*\s)?class="next_page"(?:\s[^>]*)?>)(?:[^>]*\s)?href="/[^/?"]+\?page=([^&]+)&amp;tab=repositories"(?:\s[^>]*)?>', r.text)):
				# End of pagination
				break
			r = get(f'https://github.com/{user}?page={m.group(1)}&tab=repositories')