|
|
@@ -25,7 +25,7 @@ assert users and (mode is None or mode in MODES) and not users[0].startswith('-- |
|
|
|
def get(url): |
|
|
|
while True: |
|
|
|
logging.info(f'Fetching {url}') |
|
|
|
r = requests.get(url, headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0'}) |
|
|
|
r = requests.get(url, headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0', 'Accept': 'text/html'}) |
|
|
|
if r.status_code == 429: |
|
|
|
logging.warning(f'Got 429, sleeping and retrying') |
|
|
|
time.sleep(5) |
|
|
@@ -65,7 +65,9 @@ for user in users: |
|
|
|
while True: |
|
|
|
for m in re.finditer(r'<a itemprop="name codeRepository"\s(?:[^>]*\s)?data-hovercard-url="/([^/>"]+/[^/>"]+)/hovercard"', r.text): |
|
|
|
p(m.group(1)) |
|
|
|
if '<a class="next_page"' not in r.text: |
|
|
|
for m in re.finditer(r'<a data-testid="listitem-title-link"\s(?:[^>]*\s)?href="/([^/>"]+/[^/>"]+)"', r.text): |
|
|
|
p(m.group(1)) |
|
|
|
if '<a class="next_page"' not in r.text and '<a rel="next"' not in r.text: |
|
|
|
# End of pagination |
|
|
|
break |
|
|
|
page += 1 |
|
|
|