From 92c619cd0190a1f218c1c8492535bf548d31c36a Mon Sep 17 00:00:00 2001 From: Chris Adams Date: Mon, 27 Mar 2017 14:38:18 -0400 Subject: [PATCH 001/455] Avoid remote branch name churn This avoids the backup output having lots of "[new branch]" messages because removing the old remote name removed all of the existing branch references. --- bin/github-backup | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 020c664..253dc1e 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -741,11 +741,21 @@ def fetch_repository(name, if clone_exists: log_info('Updating {0} in {1}'.format(name, local_dir)) - git_command = ['git', 'remote', 'rm', 'origin'] - logging_subprocess(git_command, None, cwd=local_dir) - git_command = ['git', 'remote', 'add', 'origin', remote_url] - logging_subprocess(git_command, None, cwd=local_dir) - git_command = ['git', 'fetch', '--all', '--tags', '--prune'] + + remotes = subprocess.check_output(['git', 'remote', 'show'], + cwd=local_dir) + remotes = [i.strip() for i in remotes.decode('utf-8')] + + if 'origin' not in remotes: + git_command = ['git', 'remote', 'rm', 'origin'] + logging_subprocess(git_command, None, cwd=local_dir) + git_command = ['git', 'remote', 'add', 'origin', remote_url] + logging_subprocess(git_command, None, cwd=local_dir) + else: + git_command = ['git', 'remote', 'set-url', 'origin', remote_url] + logging_subprocess(git_command, None, cwd=local_dir) + + git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] logging_subprocess(git_command, None, cwd=local_dir) else: log_info('Cloning {0} repository from {1} to {2}'.format( From dd2b96b1727fe50d5343f44936d100c8176ba6a3 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 27 Mar 2017 14:55:11 -0600 Subject: [PATCH 002/455] Release version 0.12.1 --- CHANGES.rst | 12 ++++++++++++ github_backup/__init__.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8c78d80..aac372a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,18 @@ Changelog ========= +0.12.1 (2017-03-27) +------------------- + +- Avoid remote branch name churn. [Chris Adams] + + This avoids the backup output having lots of "[new branch]" messages + because removing the old remote name removed all of the existing branch + references. + + +- Fix detection of bare git directories. [Andrzej Maczuga] + 0.12.0 (2016-11-22) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 2c7bffb..f8d9095 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.12.0' +__version__ = '0.12.1' From 1d6e1abab15fedcb918275896d7c9169d5afb074 Mon Sep 17 00:00:00 2001 From: Martin O'Reilly Date: Wed, 5 Apr 2017 15:17:52 +0100 Subject: [PATCH 003/455] Add support for storing PAT in OSX keychain Added additional optional arguments and README guidance for storing and accessing a Github personal access token (PAT) in the OSX keychain --- README.rst | 23 +++++++++++++++++++++++ bin/github-backup | 24 +++++++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index af7009d..ac672c1 100644 --- a/README.rst +++ b/README.rst @@ -34,6 +34,8 @@ CLI Usage is as follows:: [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] + [--keychain-name OSX_KEYCHAIN_ITEM_NAME] + [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] USER Backup a github account @@ -83,6 +85,12 @@ CLI Usage is as follows:: -F, --fork include forked repositories --prefer-ssh Clone repositories using SSH instead of HTTPS -v, --version show program's version number and exit + --keychain-name OSX_KEYCHAIN_ITEM_NAME + OSX ONLY: name field of password item in OSX keychain + that holds the personal access or OAuth token + --keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT + OSX ONLY: account field of password item in OSX + keychain that holds the personal access or OAuth token The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues). @@ -91,3 +99,18 @@ Authentication ============== Note: Password-based authentication will fail if you have two-factor authentication enabled. + +Using the Keychain on Mac OSX +============================= +Note: On Mac OSX the token can be stored securely in the user's keychain. To do this: + +1. Open Keychain from "Applications -> Utilities -> Keychain Access" +2. Add a new password item using "File -> New Password Item" +3. Enter a name in the "Keychain Item Name" box. You must provide this name to github-backup using the --keychain-name argument. +4. Enter an account name in the "Account Name" box, enter your Github username as set above. You must provide this name to github-backup using the --keychain-account argument. +5. Enter your Github personal access token in the "Password" box + +Note: When you run github-backup, you will be asked whether you want to allow "security" to use your confidential information stored in your keychain. You have two options: + +1. **Allow:** In this case you will need to click "Allow" each time you run `github-backup` +2. **Always Allow:** In this case, you will not be asked for permission when you run `github-backup` in future. This is less secure, but is required if you want to schedule `github-backup` to run automatically diff --git a/bin/github-backup b/bin/github-backup index 253dc1e..c3c1208 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -251,13 +251,35 @@ def parse_args(): parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) + parser.add_argument('--keychain-name', + dest='osx_keychain_item_name', + help='OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token') + parser.add_argument('--keychain-account', + dest='osx_keychain_item_account', + help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') return parser.parse_args() def get_auth(args, encode=True): auth = None - if args.token: + if args.osx_keychain_item_name: + if not args.osx_keychain_item_account: + log_error('You must specify both name and account fields for osx keychain password items') + else: + try: + with open(os.devnull,'w') as devnull: + token = (subprocess.check_output([ + 'security','find-generic-password', + '-s',args.osx_keychain_item_name, + '-a',args.osx_keychain_item_account, + '-w' ], stderr=devnull).strip()) + auth = token + ':' + 'x-oauth-basic' + except: + log_error('No password item matching the provided name and account could be found in the osx keychain.') + elif args.osx_keychain_item_account: + log_error('You must specify both name and account fields for osx keychain password items') + elif args.token: _path_specifier = 'file://' if args.token.startswith(_path_specifier): args.token = open(args.token[len(_path_specifier):], From 0dfe5c342a7b44ace450c5c7c8d905a5ccdb9522 Mon Sep 17 00:00:00 2001 From: Martin O'Reilly Date: Wed, 5 Apr 2017 16:36:52 +0100 Subject: [PATCH 004/455] Add OS check for OSX specific keychain args Keychain arguments are only supported on Mac OSX. Added check for operating system so we give a "Keychain arguments are only supported on Mac OSX" error message rather than a "No password item matching the provided name and account could be found in the osx keychain" error message --- bin/github-backup | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/github-backup b/bin/github-backup index c3c1208..5a2a635 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -16,6 +16,7 @@ import select import subprocess import sys import time +import platform try: # python 3 from urllib.parse import urlparse @@ -267,6 +268,8 @@ def get_auth(args, encode=True): if not args.osx_keychain_item_account: log_error('You must specify both name and account fields for osx keychain password items') else: + if platform.system() != 'Darwin': + log_error("Keychain arguments are only supported on Mac OSX") try: with open(os.devnull,'w') as devnull: token = (subprocess.check_output([ From 2efeaa75800bceed6fa888c17341584811356177 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 5 Apr 2017 11:49:49 -0400 Subject: [PATCH 005/455] Release version 0.13.0 --- CHANGES.rst | 20 ++++++++++++++++++++ github_backup/__init__.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index aac372a..284440b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,26 @@ Changelog ========= +0.13.0 (2017-04-05) +------------------- + +- Add OS check for OSX specific keychain args. [Martin O'Reilly] + + Keychain arguments are only supported on Mac OSX. + Added check for operating system so we give a + "Keychain arguments are only supported on Mac OSX" + error message rather than a "No password item matching the + provided name and account could be found in the osx keychain" + error message + + +- Add support for storing PAT in OSX keychain. [Martin O'Reilly] + + Added additional optional arguments and README guidance for storing + and accessing a Github personal access token (PAT) in the OSX + keychain + + 0.12.1 (2017-03-27) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index f8d9095..2d7893e 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.12.1' +__version__ = '0.13.0' From de93824498f8ad4a7d79ec52929ebff92672e76b Mon Sep 17 00:00:00 2001 From: Nicolai Ehemann Date: Tue, 11 Apr 2017 11:10:03 +0200 Subject: [PATCH 006/455] Fix error when repository has no updated_at value --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 5a2a635..469987e 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -520,7 +520,7 @@ def backup_repositories(args, output_directory, repositories): repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) if args.incremental: - last_update = max(repository['updated_at'] for repository in repositories) # noqa + last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa last_update_path = os.path.join(output_directory, 'last_update') if os.path.exists(last_update_path): args.since = open(last_update_path).read().strip() From d4055eb99cb622c7e0b8b35a2cf566fe63f0b872 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Tue, 11 Apr 2017 09:40:13 -0600 Subject: [PATCH 007/455] Release version 0.13.1 --- CHANGES.rst | 5 +++++ github_backup/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 284440b..54d2289 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,11 @@ Changelog ========= +0.13.1 (2017-04-11) +------------------- + +- Fix error when repository has no updated_at value. [Nicolai Ehemann] + 0.13.0 (2017-04-05) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 2d7893e..deea98b 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.13.0' +__version__ = '0.13.1' From 922a3c5a6e9ed77a5ed31d539577dab72476dea0 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 6 May 2017 12:58:42 +0100 Subject: [PATCH 008/455] Fix remotes while updating repository --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 469987e..30390a6 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -769,7 +769,7 @@ def fetch_repository(name, remotes = subprocess.check_output(['git', 'remote', 'show'], cwd=local_dir) - remotes = [i.strip() for i in remotes.decode('utf-8')] + remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] if 'origin' not in remotes: git_command = ['git', 'remote', 'rm', 'origin'] From cb33b9bab757c5678f340278dd3dd22e91287d98 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 6 May 2017 14:14:08 -0600 Subject: [PATCH 009/455] Release version 0.13.2 --- CHANGES.rst | 5 +++++ github_backup/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 54d2289..05b182f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,11 @@ Changelog ========= +0.13.2 (2017-05-06) +------------------- + +- Fix remotes while updating repository. [Dima Gerasimov] + 0.13.1 (2017-04-11) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index deea98b..1139861 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.13.1' +__version__ = '0.13.2' From 0f21d7b8a4f6ec7a32f5dc6f58d58153262226dc Mon Sep 17 00:00:00 2001 From: Yusuf Tran Date: Tue, 30 May 2017 18:52:11 +0100 Subject: [PATCH 010/455] add couple examples to help new users --- README.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.rst b/README.rst index ac672c1..fedcc0c 100644 --- a/README.rst +++ b/README.rst @@ -114,3 +114,20 @@ Note: When you run github-backup, you will be asked whether you want to allow " 1. **Allow:** In this case you will need to click "Allow" each time you run `github-backup` 2. **Always Allow:** In this case, you will not be asked for permission when you run `github-backup` in future. This is less secure, but is required if you want to schedule `github-backup` to run automatically + +Examples +======== + +Backup all repositories:: + + export ACCESS_TOKEN=SOME-GITHUB-TOKEN + github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories + +Backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: + + export ACCESS_TOKEN=SOME-GITHUB-TOKEN + ORGANIZATION=docker + REPO=cli + # e.g. git@github.com:docker/cli.git + github-backup $ORGANIZATION -P -t $ACCESS_TOKEN -o . --all -O -R $REPO + From cefb226545115cdebe9e8b0255b2f86346007cf6 Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 30 Sep 2017 17:13:47 -0700 Subject: [PATCH 011/455] Explicitly support python 3 in package description --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 8b841f2..fd0abbb 100644 --- a/setup.py +++ b/setup.py @@ -39,6 +39,8 @@ def open_file(fname): 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', ], description='backup a github user or organization', long_description=open_file('README.rst').read(), From 990249b80b76654c03cbc52bda01a8026affa880 Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 30 Sep 2017 17:16:38 -0700 Subject: [PATCH 012/455] Add pypi info to readme --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index fedcc0c..7126cee 100644 --- a/README.rst +++ b/README.rst @@ -2,6 +2,8 @@ github-backup ============= +|PyPI| |Python Versions| + backup a github user or organization Requirements @@ -131,3 +133,7 @@ Backup a single organization repository with everything else (wiki, pull request # e.g. git@github.com:docker/cli.git github-backup $ORGANIZATION -P -t $ACCESS_TOKEN -o . --all -O -R $REPO +.. |PyPI| image:: https://img.shields.io/pypi/v/github-backup.svg + :target: https://pypi.python.org/pypi/github-backup/ +.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/github-backup.svg + :target: https://github.com/albertyw/github-backup From e6b6eb8bef2117972c90ae29a9182d55f094c2a4 Mon Sep 17 00:00:00 2001 From: pieterclaerhout Date: Tue, 10 Oct 2017 19:52:07 +0200 Subject: [PATCH 013/455] Added support for LFS clones --- README.rst | 3 ++- bin/github-backup | 28 ++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 7126cee..7b914c3 100644 --- a/README.rst +++ b/README.rst @@ -31,7 +31,7 @@ CLI Usage is as follows:: [-o OUTPUT_DIRECTORY] [-i] [--starred] [--watched] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--labels] - [--hooks] [--milestones] [--repositories] [--bare] + [--hooks] [--milestones] [--repositories] [--bare] [--lfs] [--wikis] [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] @@ -72,6 +72,7 @@ CLI Usage is as follows:: --milestones include milestones in backup --repositories include repository clone in backup --bare clone bare repositories + --lfs clone LFS repositories --wikis include wiki clone in backup --skip-existing skip project if a backup directory exists -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] diff --git a/bin/github-backup b/bin/github-backup index 30390a6..e1d8d80 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -208,6 +208,10 @@ def parse_args(): action='store_true', dest='bare_clone', help='clone bare repositories') + parser.add_argument('--lfs', + action='store_true', + dest='lfs_clone', + help='clone lfs repositories') parser.add_argument('--wikis', action='store_true', dest='include_wiki', @@ -540,7 +544,8 @@ def backup_repositories(args, output_directory, repositories): repo_url, repo_dir, skip_existing=args.skip_existing, - bare_clone=args.bare_clone) + bare_clone=args.bare_clone, + lfs_clone=arg.lfs_clone) download_wiki = (args.include_wiki or args.include_everything) if repository['has_wiki'] and download_wiki: @@ -548,7 +553,8 @@ def backup_repositories(args, output_directory, repositories): repo_url.replace('.git', '.wiki.git'), os.path.join(repo_cwd, 'wiki'), skip_existing=args.skip_existing, - bare_clone=args.bare_clone) + bare_clone=args.bare_clone, + lfs_clone=arg.lfs_clone) if args.include_issues or args.include_everything: backup_issues(args, repo_cwd, repository, repos_template) @@ -738,7 +744,8 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False, - bare_clone=False): + bare_clone=False, + lfs_clone=False): if bare_clone: if os.path.exists(local_dir): clone_exists = subprocess.check_output(['git', @@ -780,7 +787,10 @@ def fetch_repository(name, git_command = ['git', 'remote', 'set-url', 'origin', remote_url] logging_subprocess(git_command, None, cwd=local_dir) - git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + if lfs_clone: + git_command = ['git', 'lfs', 'fetch', '--all', '--force', '--tags', '--prune'] + else: + git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] logging_subprocess(git_command, None, cwd=local_dir) else: log_info('Cloning {0} repository from {1} to {2}'.format( @@ -788,9 +798,15 @@ def fetch_repository(name, masked_remote_url, local_dir)) if bare_clone: - git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + if lfs_clone: + git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir] + else: + git_command = ['git', 'clone', '--mirror', remote_url, local_dir] else: - git_command = ['git', 'clone', remote_url, local_dir] + if lfs_clone: + git_command = ['git', 'lfs', 'clone', remote_url, local_dir] + else: + git_command = ['git', 'clone', remote_url, local_dir] logging_subprocess(git_command, None) From 9b1b4a9ebc6d974aeaf98f4eeba55816dd0c2ca2 Mon Sep 17 00:00:00 2001 From: pieterclaerhout Date: Wed, 11 Oct 2017 15:11:14 +0200 Subject: [PATCH 014/455] Added a check to see if git-lfs is installed when doing an LFS clone --- bin/github-backup | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/bin/github-backup b/bin/github-backup index e1d8d80..afa3231 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -471,6 +471,13 @@ def _request_url_error(template, retry_timeout): return False +def check_git_lfs_install(): + exit_code = subprocess.call(['git', 'lfs', 'version']) + if exit_code != 0: + log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') + sys.exit(1) + + def retrieve_repositories(args): log_info('Retrieving repositories') single_request = False @@ -863,6 +870,9 @@ def main(): log_info('Create output directory {0}'.format(output_directory)) mkdir_p(output_directory) + if args.lfs_clone: + check_git_lfs_install() + log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) repositories = retrieve_repositories(args) From bb551a83f4e58a0cce230a865ac00fa1b0c23ce0 Mon Sep 17 00:00:00 2001 From: pieterclaerhout Date: Wed, 11 Oct 2017 15:14:13 +0200 Subject: [PATCH 015/455] Updated the readme --- README.rst | 9 ++++++++- bin/github-backup | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 7b914c3..4ae7d9c 100644 --- a/README.rst +++ b/README.rst @@ -72,7 +72,7 @@ CLI Usage is as follows:: --milestones include milestones in backup --repositories include repository clone in backup --bare clone bare repositories - --lfs clone LFS repositories + --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) --wikis include wiki clone in backup --skip-existing skip project if a backup directory exists -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] @@ -118,6 +118,13 @@ Note: When you run github-backup, you will be asked whether you want to allow " 1. **Allow:** In this case you will need to click "Allow" each time you run `github-backup` 2. **Always Allow:** In this case, you will not be asked for permission when you run `github-backup` in future. This is less secure, but is required if you want to schedule `github-backup` to run automatically +About Git LFS +============= + +When you use the "--lfs" option, you will need to make sure you have Git LFS installed. + +Instructions on how to do this can be found on https://git-lfs.github.com. + Examples ======== diff --git a/bin/github-backup b/bin/github-backup index afa3231..23d0603 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -211,7 +211,7 @@ def parse_args(): parser.add_argument('--lfs', action='store_true', dest='lfs_clone', - help='clone lfs repositories') + help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com)') parser.add_argument('--wikis', action='store_true', dest='include_wiki', From f157ea107f7c18ab9fc3161706162db4f58b6450 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 11 Oct 2017 11:52:16 -0400 Subject: [PATCH 016/455] Release version 0.14.0 --- CHANGES.rst | 14 ++++++++++++++ github_backup/__init__.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 05b182f..e04acfd 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,20 @@ Changelog ========= +0.14.0 (2017-10-11) +------------------- + +- Added a check to see if git-lfs is installed when doing an LFS clone. + [pieterclaerhout] + +- Added support for LFS clones. [pieterclaerhout] + +- Add pypi info to readme. [Albert Wang] + +- Explicitly support python 3 in package description. [Albert Wang] + +- Add couple examples to help new users. [Yusuf Tran] + 0.13.2 (2017-05-06) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 1139861..ef91994 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.13.2' +__version__ = '0.14.0' From 27a1ba2d047308db85d5bc5dda35e843b7a9919a Mon Sep 17 00:00:00 2001 From: Edward Pfremmer Date: Wed, 11 Oct 2017 15:12:34 -0500 Subject: [PATCH 017/455] Fix arg not defined error Ref: https://github.com/josegonzalez/python-github-backup/issues/69 --- bin/github-backup | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 23d0603..96ed3f8 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -552,7 +552,7 @@ def backup_repositories(args, output_directory, repositories): repo_dir, skip_existing=args.skip_existing, bare_clone=args.bare_clone, - lfs_clone=arg.lfs_clone) + lfs_clone=args.lfs_clone) download_wiki = (args.include_wiki or args.include_everything) if repository['has_wiki'] and download_wiki: @@ -561,7 +561,7 @@ def backup_repositories(args, output_directory, repositories): os.path.join(repo_cwd, 'wiki'), skip_existing=args.skip_existing, bare_clone=args.bare_clone, - lfs_clone=arg.lfs_clone) + lfs_clone=args.lfs_clone) if args.include_issues or args.include_everything: backup_issues(args, repo_cwd, repository, repos_template) From 1d50a4038ba41a2db149130eb8eb90e6cd3f47fa Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 11 Oct 2017 16:18:21 -0400 Subject: [PATCH 018/455] Release version 0.14.1 --- CHANGES.rst | 7 +++++++ github_backup/__init__.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index e04acfd..de3af31 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,13 @@ Changelog ========= +0.14.1 (2017-10-11) +------------------- + +- Fix arg not defined error. [Edward Pfremmer] + + Ref: https://github.com/josegonzalez/python-github-backup/issues/69 + 0.14.0 (2017-10-11) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index ef91994..092052c 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.14.0' +__version__ = '0.14.1' From 76895dcf69107cf2e5a5fec7d1f0bfbfc8c279a4 Mon Sep 17 00:00:00 2001 From: Sam Libby Date: Sun, 10 Dec 2017 21:44:26 -0700 Subject: [PATCH 019/455] update check_io() to allow scripts to run on Windows --- bin/github-backup | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/github-backup b/bin/github-backup index 96ed3f8..9aa920d 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -74,6 +74,7 @@ def logging_subprocess(popenargs, child.stderr: stderr_log_level} def check_io(): + if sys.platform == 'win32': return ready_to_read = select.select([child.stdout, child.stderr], [], [], From 4f4785085de05bf86fd26b2cbedd3123ec541288 Mon Sep 17 00:00:00 2001 From: Sam Libby Date: Mon, 11 Dec 2017 09:25:49 -0700 Subject: [PATCH 020/455] update logging_subprocess function 1. added newline for return 2. added one-time warning (once per subprocess) --- bin/github-backup | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 9aa920d..1f4c73e 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -69,12 +69,15 @@ def logging_subprocess(popenargs, """ child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs) + if sys.platform == 'win32': + log_info("Windows operating system detected - no subprocess logging will be returned") log_level = {child.stdout: stdout_log_level, child.stderr: stderr_log_level} def check_io(): - if sys.platform == 'win32': return + if sys.platform == 'win32': + return ready_to_read = select.select([child.stdout, child.stderr], [], [], From 2b9549ffdef2dd050fe8c677c56da38e2479538f Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 11 Dec 2017 11:46:16 -0500 Subject: [PATCH 021/455] Release version 0.15.0 --- CHANGES.rst | 2 +- github_backup/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index de3af31..efe824e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ Changelog ========= -0.14.1 (2017-10-11) +0.15.0 (2017-12-11) ------------------- - Fix arg not defined error. [Edward Pfremmer] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 092052c..a842d05 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.14.1' +__version__ = '0.15.0' From 0a4decfb3bbc471953191010c90639ba0852fda5 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Fri, 29 Dec 2017 21:39:59 +0100 Subject: [PATCH 022/455] Add option to backup additional PR details Some payload is only included when requesting a single pull request --- bin/github-backup | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 1f4c73e..e4627c5 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -192,6 +192,10 @@ def parse_args(): action='store_true', dest='include_pull_commits', help='include pull request commits in backup') + parser.add_argument('--pull-details', + action='store_true', + dest='include_pull_details', + help='include more pull request details in backup') parser.add_argument('--labels', action='store_true', dest='include_labels', @@ -656,23 +660,35 @@ def backup_pulls(args, repo_cwd, repository, repos_template): pulls = {} _pulls_template = '{0}/{1}/pulls'.format(repos_template, repository['full_name']) + query_args = { + 'filter': 'all', + 'state': 'all', + 'sort': 'updated', + 'direction': 'desc', + } - pull_states = ['open', 'closed'] - for pull_state in pull_states: - query_args = { - 'filter': 'all', - 'state': pull_state, - 'sort': 'updated', - 'direction': 'desc', - } - - # It'd be nice to be able to apply the args.since filter here... + if not args.include_pull_details: + pull_states = ['open', 'closed'] + for pull_state in pull_states: + query_args['state'] = pull_state + # It'd be nice to be able to apply the args.since filter here... + _pulls = retrieve_data(args, + _pulls_template, + query_args=query_args) + for pull in _pulls: + if not args.since or pull['updated_at'] >= args.since: + pulls[pull['number']] = pull + else: _pulls = retrieve_data(args, _pulls_template, query_args=query_args) for pull in _pulls: if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = pull + pulls[pull['number']] = retrieve_data( + args, + _pulls_template + '/{}'.format(pull['number']), + single_request=True + ) log_info('Saving {0} pull requests to disk'.format( len(list(pulls.keys())))) From ef88248c41278878266a1575e06cf24210cfee35 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Fri, 29 Dec 2017 21:40:51 +0100 Subject: [PATCH 023/455] Add additional output for the current request This is useful to have some progress indication for huge repositories. --- bin/github-backup | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/github-backup b/bin/github-backup index e4627c5..4d7a7e7 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -431,6 +431,7 @@ def _construct_request(per_page, page, query_args, template, auth): request = Request(template + '?' + querystring) if auth is not None: request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + log_info('Requesting {}?{}'.format(template, querystring)) return request From 8b95f187ad0b07179d27219156ffdc6ae2251af9 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Sat, 13 Jan 2018 14:08:36 -0600 Subject: [PATCH 024/455] Add ability to clone starred repos --- bin/github-backup | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 1f4c73e..d921fbe 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -343,7 +343,7 @@ def get_github_repo_url(args, repository): repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( auth, get_github_host(args), - args.user, + repository['owner']['login'], repository['name']) else: repo_url = repository['clone_url'] @@ -499,7 +499,17 @@ def retrieve_repositories(args): args.user, args.repository) - return retrieve_data(args, template, single_request=single_request) + repos = retrieve_data(args, template, single_request=single_request) + + if args.include_starred or args.include_everything: + starred_template = 'https://{0}/user/starred'.format( + get_github_api_host(args)) + starred_repos = retrieve_data(args, starred_template, single_request=False) + for item in starred_repos: + item.update({'is_starred': True}) + repos.extend(starred_repos) + + return repos def filter_repositories(args, unfiltered_repositories): @@ -507,7 +517,7 @@ def filter_repositories(args, unfiltered_repositories): repositories = [] for r in unfiltered_repositories: - if r['owner']['login'] == args.user: + if r['owner']['login'] == args.user or is_starred_repo(args, r): repositories.append(r) name_regex = None @@ -547,6 +557,14 @@ def backup_repositories(args, output_directory, repositories): for repository in repositories: backup_cwd = os.path.join(output_directory, 'repositories') repo_cwd = os.path.join(backup_cwd, repository['name']) + + # put starred repos in -o/starred/${owner}/${repo} to prevent collision of + # any repositories with the same name + if is_starred_repo(args, repository): + backup_cwd = os.path.join(output_directory, 'starred') + repo_cwd = os.path.join(backup_cwd, repository['owner']['login'], + repository['name']) + repo_dir = os.path.join(repo_cwd, 'repository') repo_url = get_github_repo_url(args, repository) @@ -866,6 +884,10 @@ def json_dump(data, output_file): separators=(',', ': ')) +def is_starred_repo(args, repo): + return (args.include_starred or args.include_everything) and repo.get('is_starred') + + def main(): args = parse_args() From 6e3cbe841ae50e9927a9d4d203379794da8146a3 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Sat, 13 Jan 2018 14:12:26 -0600 Subject: [PATCH 025/455] Add comment --- bin/github-backup | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/github-backup b/bin/github-backup index d921fbe..b382e10 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -505,6 +505,8 @@ def retrieve_repositories(args): starred_template = 'https://{0}/user/starred'.format( get_github_api_host(args)) starred_repos = retrieve_data(args, starred_template, single_request=False) + # we need to be able to determine this repo was retrieved as a starred repo + # later, so add a flag to each item for item in starred_repos: item.update({'is_starred': True}) repos.extend(starred_repos) From bd346de8987bfbdccc36f4269185720f0edcf299 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Sat, 13 Jan 2018 17:43:00 -0600 Subject: [PATCH 026/455] Put starred clone repoistories under a new option --- bin/github-backup | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index b382e10..2d39875 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -160,6 +160,10 @@ def parse_args(): action='store_true', dest='include_starred', help='include starred repositories in backup') + parser.add_argument('--clone-starred', + action='store_true', + dest='clone_starred', + help='clone starred repositories in backup') parser.add_argument('--watched', action='store_true', dest='include_watched', @@ -501,7 +505,7 @@ def retrieve_repositories(args): repos = retrieve_data(args, template, single_request=single_request) - if args.include_starred or args.include_everything: + if args.clone_starred: starred_template = 'https://{0}/user/starred'.format( get_github_api_host(args)) starred_repos = retrieve_data(args, starred_template, single_request=False) @@ -519,7 +523,7 @@ def filter_repositories(args, unfiltered_repositories): repositories = [] for r in unfiltered_repositories: - if r['owner']['login'] == args.user or is_starred_repo(args, r): + if r['owner']['login'] == args.user or r.get('is_starred'): repositories.append(r) name_regex = None @@ -562,7 +566,7 @@ def backup_repositories(args, output_directory, repositories): # put starred repos in -o/starred/${owner}/${repo} to prevent collision of # any repositories with the same name - if is_starred_repo(args, repository): + if repository.get('is_starred'): backup_cwd = os.path.join(output_directory, 'starred') repo_cwd = os.path.join(backup_cwd, repository['owner']['login'], repository['name']) @@ -886,10 +890,6 @@ def json_dump(data, output_file): separators=(',', ': ')) -def is_starred_repo(args, repo): - return (args.include_starred or args.include_everything) and repo.get('is_starred') - - def main(): args = parse_args() From cd2372183e728a666263307b1139bca208796c25 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Sat, 13 Jan 2018 17:44:09 -0600 Subject: [PATCH 027/455] Update documentation --- README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4ae7d9c..af382f4 100644 --- a/README.rst +++ b/README.rst @@ -32,7 +32,7 @@ CLI Usage is as follows:: [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--labels] [--hooks] [--milestones] [--repositories] [--bare] [--lfs] - [--wikis] [--skip-existing] + [--wikis] [--skip-existing] [--clone-starred] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] @@ -75,6 +75,7 @@ CLI Usage is as follows:: --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) --wikis include wiki clone in backup --skip-existing skip project if a backup directory exists + --clone-starred clone starred repositories -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] only allow these languages -N NAME_REGEX, --name-regex NAME_REGEX From 9a539b1d6b4bca2d6e3dd91dd960272b1e45ea11 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Sun, 14 Jan 2018 10:18:51 -0600 Subject: [PATCH 028/455] JK don't update documentation --- README.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/README.rst b/README.rst index af382f4..1e61958 100644 --- a/README.rst +++ b/README.rst @@ -75,7 +75,6 @@ CLI Usage is as follows:: --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) --wikis include wiki clone in backup --skip-existing skip project if a backup directory exists - --clone-starred clone starred repositories -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] only allow these languages -N NAME_REGEX, --name-regex NAME_REGEX From 7cccd42ec9ea4a4d2bcf7d3da85397371a1194ce Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Sun, 14 Jan 2018 10:21:15 -0600 Subject: [PATCH 029/455] Change option to --all-starred --- README.rst | 2 +- bin/github-backup | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 1e61958..4ae7d9c 100644 --- a/README.rst +++ b/README.rst @@ -32,7 +32,7 @@ CLI Usage is as follows:: [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--labels] [--hooks] [--milestones] [--repositories] [--bare] [--lfs] - [--wikis] [--skip-existing] [--clone-starred] + [--wikis] [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] diff --git a/bin/github-backup b/bin/github-backup index 2d39875..4d4b485 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -159,11 +159,11 @@ def parse_args(): parser.add_argument('--starred', action='store_true', dest='include_starred', - help='include starred repositories in backup') - parser.add_argument('--clone-starred', + help='include JSON output of starred repositories in backup') + parser.add_argument('--all-starred', action='store_true', - dest='clone_starred', - help='clone starred repositories in backup') + dest='all_starred', + help='include starred repositories in backup') parser.add_argument('--watched', action='store_true', dest='include_watched', @@ -505,7 +505,7 @@ def retrieve_repositories(args): repos = retrieve_data(args, template, single_request=single_request) - if args.clone_starred: + if args.all_starred: starred_template = 'https://{0}/user/starred'.format( get_github_api_host(args)) starred_repos = retrieve_data(args, starred_template, single_request=False) From c142707a903ed6722a3b2c200b6aacfc70cd0b49 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Mon, 22 Jan 2018 11:34:27 -0600 Subject: [PATCH 030/455] Update documentation --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 4ae7d9c..a45d4d7 100644 --- a/README.rst +++ b/README.rst @@ -32,7 +32,7 @@ CLI Usage is as follows:: [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--labels] [--hooks] [--milestones] [--repositories] [--bare] [--lfs] - [--wikis] [--skip-existing] + [--wikis] [--skip-existing] [--all-starred] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] @@ -57,7 +57,7 @@ CLI Usage is as follows:: -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY directory at which to backup the repositories -i, --incremental incremental backup - --starred include starred repositories in backup + --starred include list of starred repositories in backup --watched include watched repositories in backup --all include everything in backup --issues include issues in backup @@ -75,6 +75,7 @@ CLI Usage is as follows:: --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) --wikis include wiki clone in backup --skip-existing skip project if a backup directory exists + --all-starred include everything from starred repositories in backup -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] only allow these languages -N NAME_REGEX, --name-regex NAME_REGEX From cb054c26316857eba81341efe3d388a246d3ea6d Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 22 Jan 2018 12:36:32 -0500 Subject: [PATCH 031/455] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index a45d4d7..29ac8aa 100644 --- a/README.rst +++ b/README.rst @@ -57,7 +57,7 @@ CLI Usage is as follows:: -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY directory at which to backup the repositories -i, --incremental incremental backup - --starred include list of starred repositories in backup + --starred include JSON output of starred repositories in backup --watched include watched repositories in backup --all include everything in backup --issues include issues in backup @@ -75,7 +75,7 @@ CLI Usage is as follows:: --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) --wikis include wiki clone in backup --skip-existing skip project if a backup directory exists - --all-starred include everything from starred repositories in backup + --all-starred include starred repositories in backup -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] only allow these languages -N NAME_REGEX, --name-regex NAME_REGEX From e59d1e3a682b67a3fc6cf2420f04eaec7c95a0f3 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 22 Jan 2018 12:49:31 -0500 Subject: [PATCH 032/455] Release version 0.16.0 --- CHANGES.rst | 16 +++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index efe824e..35671a1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,21 @@ Changelog ========= -0.15.0 (2017-12-11) +0.16.0 (2018-01-22) +------------------- + +- Change option to --all-starred. [W. Harrison Wright] + +- JK don't update documentation. [W. Harrison Wright] + +- Put starred clone repoistories under a new option. [W. Harrison + Wright] + +- Add comment. [W. Harrison Wright] + +- Add ability to clone starred repos. [W. Harrison Wright] + +0.14.1 (2017-10-11) ------------------- - Fix arg not defined error. [Edward Pfremmer] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index a842d05..8911e95 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.15.0' +__version__ = '0.16.0' From 17e4f9a12545d5033c1847ddbe00c4cf927a7680 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Wed, 7 Feb 2018 21:29:49 -0600 Subject: [PATCH 033/455] Add ability to backup gists --- README.rst | 3 ++ bin/github-backup | 74 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index 29ac8aa..69dd9a6 100644 --- a/README.rst +++ b/README.rst @@ -33,6 +33,7 @@ CLI Usage is as follows:: [--pulls] [--pull-comments] [--pull-commits] [--labels] [--hooks] [--milestones] [--repositories] [--bare] [--lfs] [--wikis] [--skip-existing] [--all-starred] + [--gists] [--starred-gists] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] @@ -76,6 +77,8 @@ CLI Usage is as follows:: --wikis include wiki clone in backup --skip-existing skip project if a backup directory exists --all-starred include starred repositories in backup + --gists include gists in backup + --starred-gists include starred gists in backup -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] only allow these languages -N NAME_REGEX, --name-regex NAME_REGEX diff --git a/bin/github-backup b/bin/github-backup index 4d4b485..5711b14 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -224,6 +224,14 @@ def parse_args(): action='store_true', dest='include_wiki', help='include wiki clone in backup') + parser.add_argument('--gists', + action='store_true', + dest='include_gists', + help='include gists in backup') + parser.add_argument('--starred-gists', + action='store_true', + dest='include_starred_gists', + help='include starred gists in backup') parser.add_argument('--skip-existing', action='store_true', dest='skip_existing', @@ -342,6 +350,9 @@ def get_github_repo_url(args, repository): if args.prefer_ssh: return repository['ssh_url'] + if repository.get('is_gist'): + return repository['git_pull_url'] + auth = get_auth(args, False) if auth: repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( @@ -509,12 +520,30 @@ def retrieve_repositories(args): starred_template = 'https://{0}/user/starred'.format( get_github_api_host(args)) starred_repos = retrieve_data(args, starred_template, single_request=False) - # we need to be able to determine this repo was retrieved as a starred repo - # later, so add a flag to each item + # flag each repo as starred for downstream processing for item in starred_repos: item.update({'is_starred': True}) repos.extend(starred_repos) + if args.include_gists: + gists_template = 'https://{0}/gists'.format( + get_github_api_host(args)) + gists = retrieve_data(args, gists_template, single_request=False) + # flag each repo as a gist for downstream processing + for item in gists: + item.update({'is_gist': True}) + repos.extend(gists) + + if args.include_starred_gists: + starred_gists_template = 'https://{0}/gists/starred'.format( + get_github_api_host(args)) + starred_gists = retrieve_data(args, starred_gists_template, single_request=False) + # flag each repo as a starred gist for downstream processing + for item in starred_gists: + item.update({'is_gist': True, + 'is_starred': True}) + repos.extend(starred_gists) + return repos @@ -523,7 +552,8 @@ def filter_repositories(args, unfiltered_repositories): repositories = [] for r in unfiltered_repositories: - if r['owner']['login'] == args.user or r.get('is_starred'): + # gists can be anonymous, so need to safely check owner + if r.get('owner', {}).get('login') == args.user or r.get('is_starred'): repositories.append(r) name_regex = None @@ -535,11 +565,11 @@ def filter_repositories(args, unfiltered_repositories): languages = [x.lower() for x in args.languages] if not args.fork: - repositories = [r for r in repositories if not r['fork']] + repositories = [r for r in repositories if not r.get('fork')] if not args.private: - repositories = [r for r in repositories if not r['private']] + repositories = [r for r in repositories if not r.get('private') or r.get('public')] if languages: - repositories = [r for r in repositories if r['language'] and r['language'].lower() in languages] # noqa + repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa if name_regex: repositories = [r for r in repositories if name_regex.match(r['name'])] @@ -561,29 +591,37 @@ def backup_repositories(args, output_directory, repositories): args.since = None for repository in repositories: - backup_cwd = os.path.join(output_directory, 'repositories') - repo_cwd = os.path.join(backup_cwd, repository['name']) - - # put starred repos in -o/starred/${owner}/${repo} to prevent collision of - # any repositories with the same name - if repository.get('is_starred'): - backup_cwd = os.path.join(output_directory, 'starred') - repo_cwd = os.path.join(backup_cwd, repository['owner']['login'], - repository['name']) + if repository.get('is_gist'): + repo_cwd = os.path.join(output_directory, 'gists', repository['id']) + elif repository.get('is_starred'): + # put starred repos in -o/starred/${owner}/${repo} to prevent collision of + # any repositories with the same name + repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) + else: + repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) repo_dir = os.path.join(repo_cwd, 'repository') repo_url = get_github_repo_url(args, repository) - if args.include_repository or args.include_everything: - fetch_repository(repository['name'], + include_gists = (args.include_gists or args.include_starred_gists) + if (args.include_repository or args.include_everything) \ + or (include_gists and repository.get('is_gist')): + repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') + fetch_repository(repo_name, repo_url, repo_dir, skip_existing=args.skip_existing, bare_clone=args.bare_clone, lfs_clone=args.lfs_clone) + # dump gist information to a file as well + if repository.get('is_gist'): + output_file = '{0}/gist.json'.format(repo_cwd) + with codecs.open(output_file, 'w', encoding='utf-8') as f: + json_dump(repository, f) + download_wiki = (args.include_wiki or args.include_everything) - if repository['has_wiki'] and download_wiki: + if repository.get('has_wiki') and download_wiki: fetch_repository(repository['name'], repo_url.replace('.git', '.wiki.git'), os.path.join(repo_cwd, 'wiki'), From 83128e986a43eb1bcab34b74f9f245197592a88c Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Wed, 7 Feb 2018 21:30:55 -0600 Subject: [PATCH 034/455] Formatting --- bin/github-backup | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 5711b14..80195c6 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -517,8 +517,7 @@ def retrieve_repositories(args): repos = retrieve_data(args, template, single_request=single_request) if args.all_starred: - starred_template = 'https://{0}/user/starred'.format( - get_github_api_host(args)) + starred_template = 'https://{0}/user/starred'.format(get_github_api_host(args)) starred_repos = retrieve_data(args, starred_template, single_request=False) # flag each repo as starred for downstream processing for item in starred_repos: @@ -526,8 +525,7 @@ def retrieve_repositories(args): repos.extend(starred_repos) if args.include_gists: - gists_template = 'https://{0}/gists'.format( - get_github_api_host(args)) + gists_template = 'https://{0}/gists'.format(get_github_api_host(args)) gists = retrieve_data(args, gists_template, single_request=False) # flag each repo as a gist for downstream processing for item in gists: @@ -535,8 +533,7 @@ def retrieve_repositories(args): repos.extend(gists) if args.include_starred_gists: - starred_gists_template = 'https://{0}/gists/starred'.format( - get_github_api_host(args)) + starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host(args)) starred_gists = retrieve_data(args, starred_gists_template, single_request=False) # flag each repo as a starred gist for downstream processing for item in starred_gists: From f37825418867fb76e0aeb42d0fdf3a9aeb65b88e Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Wed, 7 Feb 2018 21:46:59 -0600 Subject: [PATCH 035/455] Short circuit gists backup process --- bin/github-backup | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 80195c6..d3ca674 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -611,14 +611,16 @@ def backup_repositories(args, output_directory, repositories): bare_clone=args.bare_clone, lfs_clone=args.lfs_clone) - # dump gist information to a file as well if repository.get('is_gist'): + # dump gist information to a file as well output_file = '{0}/gist.json'.format(repo_cwd) with codecs.open(output_file, 'w', encoding='utf-8') as f: json_dump(repository, f) + continue # don't try to back anything else for a gist; it doesn't exist + download_wiki = (args.include_wiki or args.include_everything) - if repository.get('has_wiki') and download_wiki: + if repository['has_wiki'] and download_wiki: fetch_repository(repository['name'], repo_url.replace('.git', '.wiki.git'), os.path.join(repo_cwd, 'wiki'), From afa2a6d5873f792ad10b49eb03f560d725a8ab0c Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Tue, 20 Feb 2018 13:06:48 -0500 Subject: [PATCH 036/455] Release version 0.17.0 --- CHANGES.rst | 9 +++++++++ github_backup/__init__.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 35671a1..41fef76 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,15 @@ Changelog ========= +0.17.0 (2018-02-20) +------------------- + +- Short circuit gists backup process. [W. Harrison Wright] + +- Formatting. [W. Harrison Wright] + +- Add ability to backup gists. [W. Harrison Wright] + 0.16.0 (2018-01-22) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 8911e95..435d64b 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.16.0' +__version__ = '0.17.0' From eca9f0f7dfadcf20056ea83379dd4d29d7194eec Mon Sep 17 00:00:00 2001 From: Stephen Greene Date: Mon, 19 Feb 2018 14:19:23 -0800 Subject: [PATCH 037/455] Add option to fetch followers/following JSON data --- README.rst | 24 ++++++++++++++---------- bin/github-backup | 40 ++++++++++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index 69dd9a6..861e616 100644 --- a/README.rst +++ b/README.rst @@ -28,12 +28,12 @@ Usage CLI Usage is as follows:: github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN] - [-o OUTPUT_DIRECTORY] [-i] [--starred] [--watched] - [--all] [--issues] [--issue-comments] [--issue-events] - [--pulls] [--pull-comments] [--pull-commits] [--labels] - [--hooks] [--milestones] [--repositories] [--bare] [--lfs] - [--wikis] [--skip-existing] [--all-starred] - [--gists] [--starred-gists] + [-o OUTPUT_DIRECTORY] [-i] [--starred] [--all-starred] + [--watched] [--followers] [--following] [--all] + [--issues] [--issue-comments] [--issue-events] [--pulls] + [--pull-comments] [--pull-commits] [--labels] [--hooks] + [--milestones] [--repositories] [--bare] [--lfs] + [--wikis] [--gists] [--starred-gists] [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] @@ -54,12 +54,16 @@ CLI Usage is as follows:: password for basic auth. If a username is given but not a password, the password will be prompted for. -t TOKEN, --token TOKEN - personal access or OAuth token + personal access or OAuth token, or path to token + (file://...) -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY directory at which to backup the repositories -i, --incremental incremental backup --starred include JSON output of starred repositories in backup + --all-starred include starred repositories in backup --watched include watched repositories in backup + --followers include JSON output of followers in backup + --following include JSON output of following users in backup --all include everything in backup --issues include issues in backup --issue-comments include issue comments in backup @@ -73,12 +77,12 @@ CLI Usage is as follows:: --milestones include milestones in backup --repositories include repository clone in backup --bare clone bare repositories - --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) + --lfs clone LFS repositories (requires Git LFS to be + installed, https://git-lfs.github.com) --wikis include wiki clone in backup - --skip-existing skip project if a backup directory exists - --all-starred include starred repositories in backup --gists include gists in backup --starred-gists include starred gists in backup + --skip-existing skip project if a backup directory exists -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] only allow these languages -N NAME_REGEX, --name-regex NAME_REGEX diff --git a/bin/github-backup b/bin/github-backup index d3ca674..59c1968 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -168,6 +168,14 @@ def parse_args(): action='store_true', dest='include_watched', help='include watched repositories in backup') + parser.add_argument('--followers', + action='store_true', + dest='include_followers', + help='include JSON output of followers in backup') + parser.add_argument('--following', + action='store_true', + dest='include_following', + help='include JSON output of following users in backup') parser.add_argument('--all', action='store_true', dest='include_everything', @@ -886,25 +894,41 @@ def backup_account(args, output_directory): account_cwd = os.path.join(output_directory, 'account') if args.include_starred or args.include_everything: - output_file = '{0}/starred.json'.format(account_cwd) - template = "https://{0}/users/{1}/starred" - template = template.format(get_github_api_host(args), args.user) + output_file = "{0}/starred.json".format(account_cwd) + template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) _backup_data(args, - 'starred repositories', + "starred repositories", template, output_file, account_cwd) if args.include_watched or args.include_everything: - output_file = '{0}/watched.json'.format(account_cwd) - template = "https://{0}/users/{1}/subscriptions" - template = template.format(get_github_api_host(args), args.user) + output_file = "{0}/watched.json".format(account_cwd) + template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) _backup_data(args, - 'watched repositories', + "watched repositories", template, output_file, account_cwd) + if args.include_followers or args.include_everything: + output_file = "{0}/followers.json".format(account_cwd) + template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) + _backup_data(args, + "followers", + template, + output_file, + account_cwd) + + if args.include_following or args.include_everything: + output_file = "{0}/following.json".format(account_cwd) + template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) + _backup_data(args, + "following", + template, + output_file, + account_cwd) + def _backup_data(args, name, template, output_file, output_directory): skip_existing = args.skip_existing From 87dab293ed8d9015ec73f47069432f46a520cf89 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 22 Feb 2018 12:13:06 -0500 Subject: [PATCH 038/455] Release version 0.18.0 --- CHANGES.rst | 5 +++++ github_backup/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 41fef76..10328a2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,11 @@ Changelog ========= +0.18.0 (2018-02-22) +------------------- + +- Add option to fetch followers/following JSON data. [Stephen Greene] + 0.17.0 (2018-02-20) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 435d64b..5ec52a9 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.17.0' +__version__ = '0.18.0' From fd33037b1ce40909320e7ce7c76481770f273780 Mon Sep 17 00:00:00 2001 From: Johannes Bornhold Date: Mon, 26 Feb 2018 11:21:25 +0100 Subject: [PATCH 039/455] Mark string as binary in comparison for skip_existing Found out that the flag "--skip-existing" did not work out as expected on Python 3.6. Tracked it down to the comparison which has to be against a string of bytes in Python3. --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 59c1968..a9e3e1c 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -831,7 +831,7 @@ def fetch_repository(name, clone_exists = subprocess.check_output(['git', 'rev-parse', '--is-bare-repository'], - cwd=local_dir) == "true\n" + cwd=local_dir) == b"true\n" else: clone_exists = False else: From 8d2ef2f5282910e00e87422352c5e03556db1004 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 24 Mar 2018 00:54:34 -0400 Subject: [PATCH 040/455] Release version 0.19.0 --- CHANGES.rst | 21 +++++++++++++++++++++ github_backup/__init__.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 10328a2..9b71bb8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,27 @@ Changelog ========= +0.19.0 (2018-03-24) +------------------- + +- Add additional output for the current request. [Robin Gloster] + + This is useful to have some progress indication for huge repositories. + + +- Add option to backup additional PR details. [Robin Gloster] + + Some payload is only included when requesting a single pull request + + +- Mark string as binary in comparison for skip_existing. [Johannes + Bornhold] + + Found out that the flag "--skip-existing" did not work out as expected on Python + 3.6. Tracked it down to the comparison which has to be against a string of bytes + in Python3. + + 0.18.0 (2018-02-22) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 5ec52a9..482e4a1 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.18.0' +__version__ = '0.19.0' From 0110ea40ed9aa8b7eb3651782b5cab229f2c9648 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 24 Mar 2018 01:04:35 -0400 Subject: [PATCH 041/455] Release version 0.19.1 --- CHANGES.rst | 2 +- github_backup/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9b71bb8..e10be28 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ Changelog ========= -0.19.0 (2018-03-24) +0.19.1 (2018-03-24) ------------------- - Add additional output for the current request. [Robin Gloster] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 482e4a1..db7a416 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.19.0' +__version__ = '0.19.1' From 413d4381cc2bc554060a2e8764105098a10a86bd Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 24 Mar 2018 01:16:28 -0400 Subject: [PATCH 042/455] fix: cleanup pep8 violations --- bin/github-backup | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index d6a17e6..2603ad1 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -303,12 +303,12 @@ def get_auth(args, encode=True): if platform.system() != 'Darwin': log_error("Keychain arguments are only supported on Mac OSX") try: - with open(os.devnull,'w') as devnull: + with open(os.devnull, 'w') as devnull: token = (subprocess.check_output([ - 'security','find-generic-password', - '-s',args.osx_keychain_item_name, - '-a',args.osx_keychain_item_account, - '-w' ], stderr=devnull).strip()) + 'security', 'find-generic-password', + '-s', args.osx_keychain_item_name, + '-a', args.osx_keychain_item_account, + '-w'], stderr=devnull).strip()) auth = token + ':' + 'x-oauth-basic' except: log_error('No password item matching the provided name and account could be found in the osx keychain.') @@ -932,19 +932,19 @@ def backup_account(args, output_directory): output_file = "{0}/followers.json".format(account_cwd) template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) _backup_data(args, - "followers", - template, - output_file, - account_cwd) + "followers", + template, + output_file, + account_cwd) if args.include_following or args.include_everything: output_file = "{0}/following.json".format(account_cwd) template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) _backup_data(args, - "following", - template, - output_file, - account_cwd) + "following", + template, + output_file, + account_cwd) def _backup_data(args, name, template, output_file, output_directory): From 321414d352f75d08417d92bd892413c21ba4e8a5 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 24 Mar 2018 01:16:34 -0400 Subject: [PATCH 043/455] Release version 0.19.2 --- CHANGES.rst | 10 +++++++++- github_backup/__init__.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e10be28..777d8ee 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,15 @@ Changelog ========= -0.19.1 (2018-03-24) +0.19.2 (2018-03-24) +------------------- + +Fix +~~~ + +- Cleanup pep8 violations. [Jose Diaz-Gonzalez] + +0.19.0 (2018-03-24) ------------------- - Add additional output for the current request. [Robin Gloster] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index db7a416..5daae67 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.19.1' +__version__ = '0.19.2' From b49f399037811ab7119ddb391f77b335801c4d74 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 24 Mar 2018 14:59:56 -0400 Subject: [PATCH 044/455] feat: simplify release script --- release | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/release b/release index 0ecbe7e..a36a2c7 100755 --- a/release +++ b/release @@ -1,8 +1,13 @@ #!/usr/bin/env bash set -eo pipefail; [[ $RELEASE_TRACE ]] && set -x -PACKAGE_NAME='github-backup' -INIT_PACKAGE_NAME='github_backup' +if [[ ! -f setup.py ]]; then + echo -e "${RED}WARNING: Missing setup.py${COLOR_OFF}\n" + exit 1 +fi + +PACKAGE_NAME="$(cat setup.py | grep "name='" | head | cut -d "'" -f2)" +INIT_PACKAGE_NAME="$(echo "${PACKAGE_NAME//-/_}")" PUBLIC="true" # Colors From b0130fdf942731c155157ce40256e757bbbadcdf Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 24 Mar 2018 15:00:05 -0400 Subject: [PATCH 045/455] chore: drop Python 2.6 --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index fd0abbb..85e2d5f 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,6 @@ def open_file(fname): 'Development Status :: 5 - Production/Stable', 'Topic :: System :: Archiving :: Backup', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', From 0de341eab44cb72934c35945708fc027abad4cbc Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 24 Mar 2018 15:00:26 -0400 Subject: [PATCH 046/455] Release version 0.20.0 --- CHANGES.rst | 7 +++++++ github_backup/__init__.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 777d8ee..beca679 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,13 @@ Changelog ========= +0.20.0 (2018-03-24) +------------------- + +- Chore: drop Python 2.6. [Jose Diaz-Gonzalez] + +- Feat: simplify release script. [Jose Diaz-Gonzalez] + 0.19.2 (2018-03-24) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 5daae67..2f15b8c 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.19.2' +__version__ = '0.20.0' From 5a8e1ac2754db27c4a12bc920739c90210776208 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Fri, 28 Sep 2018 21:46:28 -0500 Subject: [PATCH 047/455] Clone the specified user's starred repos, not the authenticated user --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 2603ad1..7d9e333 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -530,7 +530,7 @@ def retrieve_repositories(args): repos = retrieve_data(args, template, single_request=single_request) if args.all_starred: - starred_template = 'https://{0}/user/starred'.format(get_github_api_host(args)) + starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(args), args.user) starred_repos = retrieve_data(args, starred_template, single_request=False) # flag each repo as starred for downstream processing for item in starred_repos: From 4a4a3173313c2b718ca0af950fd3863153b507da Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Fri, 28 Sep 2018 21:59:50 -0500 Subject: [PATCH 048/455] Clone the specified user's gists, not the authenticated user --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 7d9e333..973456e 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -538,7 +538,7 @@ def retrieve_repositories(args): repos.extend(starred_repos) if args.include_gists: - gists_template = 'https://{0}/gists'.format(get_github_api_host(args)) + gists_template = 'https://{0}/users/{1}/gists'.format(get_github_api_host(args), args.user) gists = retrieve_data(args, gists_template, single_request=False) # flag each repo as a gist for downstream processing for item in gists: From d860f369e9de6118ec5e1168ffef7d931c8919f3 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 29 Sep 2018 00:16:48 -0400 Subject: [PATCH 049/455] Release version 0.20.1 --- CHANGES.rst | 9 +++++++++ github_backup/__init__.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index beca679..b53ecbd 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,15 @@ Changelog ========= +0.20.1 (2018-09-29) +------------------- + +- Clone the specified user's gists, not the authenticated user. [W. + Harrison Wright] + +- Clone the specified user's starred repos, not the authenticated user. + [W. Harrison Wright] + 0.20.0 (2018-03-24) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 2f15b8c..abadaef 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.20.0' +__version__ = '0.20.1' From b5972aaaf041ce2b93c9b69a90725ed95957d5f7 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Sun, 11 Nov 2018 19:40:46 -0600 Subject: [PATCH 050/455] Correctly download repos when user arg != authenticated user --- bin/github-backup | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 2603ad1..7ab5231 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -503,6 +503,12 @@ def _request_url_error(template, retry_timeout): return False +def get_authenticated_user(args): + template = 'https://{0}/user'.format(get_github_api_host(args)) + data = retrieve_data(args, template, single_request=True) + return data[0] + + def check_git_lfs_install(): exit_code = subprocess.call(['git', 'lfs', 'version']) if exit_code != 0: @@ -510,11 +516,20 @@ def check_git_lfs_install(): sys.exit(1) -def retrieve_repositories(args): +def retrieve_repositories(args, authenticated_user): log_info('Retrieving repositories') single_request = False - template = 'https://{0}/user/repos'.format( - get_github_api_host(args)) + if args.user == authenticated_user['login']: + # we must use the /user/repos API to be able to access private repos + template = 'https://{0}/user/repos'.format( + get_github_api_host(args)) + else: + if args.private: + log_error('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') + template = 'https://{0}/users/{1}/repos'.format( + get_github_api_host(args), + args.user) + if args.organization: template = 'https://{0}/orgs/{1}/repos'.format( get_github_api_host(args), @@ -981,7 +996,8 @@ def main(): log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) - repositories = retrieve_repositories(args) + authenticated_user = get_authenticated_user(args) + repositories = retrieve_repositories(args, authenticated_user) repositories = filter_repositories(args, repositories) backup_repositories(args, output_directory, repositories) backup_account(args, output_directory) From 38bf438d2f30c394f5c35ff146814694531f8908 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 28 Nov 2018 01:59:03 -0500 Subject: [PATCH 051/455] Release version 0.21.0 --- CHANGES.rst | 6 ++++++ github_backup/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index b53ecbd..941ffb3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changelog ========= +0.21.0 (2018-11-28) +------------------- + +- Correctly download repos when user arg != authenticated user. [W. + Harrison Wright] + 0.20.1 (2018-09-29) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index abadaef..e453371 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.20.1' +__version__ = '0.21.0' From 628f2cbf73bed49afbbfe450c3ce79e1c75a1140 Mon Sep 17 00:00:00 2001 From: Bernd Date: Mon, 24 Dec 2018 04:19:29 +0100 Subject: [PATCH 052/455] Mark options which are not included in --all As discussed in Issue #100 --- bin/github-backup | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 741123b..0996092 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -163,11 +163,11 @@ def parse_args(): parser.add_argument('--all-starred', action='store_true', dest='all_starred', - help='include starred repositories in backup') + help='include starred repositories in backup [*]') parser.add_argument('--watched', action='store_true', dest='include_watched', - help='include watched repositories in backup') + help='include JSON output of watched repositories in backup') parser.add_argument('--followers', action='store_true', dest='include_followers', @@ -179,7 +179,7 @@ def parse_args(): parser.add_argument('--all', action='store_true', dest='include_everything', - help='include everything in backup') + help='include everything in backup (not including [*])') parser.add_argument('--issues', action='store_true', dest='include_issues', @@ -207,7 +207,7 @@ def parse_args(): parser.add_argument('--pull-details', action='store_true', dest='include_pull_details', - help='include more pull request details in backup') + help='include more pull request details in backup [*]') parser.add_argument('--labels', action='store_true', dest='include_labels', @@ -231,7 +231,7 @@ def parse_args(): parser.add_argument('--lfs', action='store_true', dest='lfs_clone', - help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com)') + help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]') parser.add_argument('--wikis', action='store_true', dest='include_wiki', @@ -239,11 +239,11 @@ def parse_args(): parser.add_argument('--gists', action='store_true', dest='include_gists', - help='include gists in backup') + help='include gists in backup [*]') parser.add_argument('--starred-gists', action='store_true', dest='include_starred_gists', - help='include starred gists in backup') + help='include starred gists in backup [*]') parser.add_argument('--skip-existing', action='store_true', dest='skip_existing', @@ -273,11 +273,11 @@ def parse_args(): parser.add_argument('-P', '--private', action='store_true', dest='private', - help='include private repositories') + help='include private repositories [*]') parser.add_argument('-F', '--fork', action='store_true', dest='fork', - help='include forked repositories') + help='include forked repositories [*]') parser.add_argument('--prefer-ssh', action='store_true', help='Clone repositories using SSH instead of HTTPS') From f8be34562b51a16d97c64ba69a4791c7897fbc3a Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Tue, 25 Dec 2018 06:28:28 -0500 Subject: [PATCH 053/455] Release version 0.21.1 --- CHANGES.rst | 7 +++++++ github_backup/__init__.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 941ffb3..ce5f178 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,13 @@ Changelog ========= +0.21.1 (2018-12-25) +------------------- + +- Mark options which are not included in --all. [Bernd] + + As discussed in Issue #100 + 0.21.0 (2018-11-28) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index e453371..8c306aa 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.21.0' +__version__ = '0.21.1' From b70ea87db7a1e823e261a1d57c30ae42143f0457 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Thu, 27 Dec 2018 12:53:21 -0600 Subject: [PATCH 054/455] Fix accidental system exit with better logging strategy --- bin/github-backup | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 0996092..097ba1c 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -40,21 +40,33 @@ FNULL = open(os.devnull, 'w') def log_error(message): + """ + Log message (str) or messages (List[str]) to stderr and exit with status 1 + """ + log_warning(message) + sys.exit(1) + + +def log_info(message): + """ + Log message (str) or messages (List[str]) to stdout + """ if type(message) == str: message = [message] for msg in message: - sys.stderr.write("{0}\n".format(msg)) - - sys.exit(1) + sys.stdout.write("{0}\n".format(msg)) -def log_info(message): +def log_warning(message): + """ + Log message (str) or messages (List[str]) to stderr + """ if type(message) == str: message = [message] for msg in message: - sys.stdout.write("{0}\n".format(msg)) + sys.stderr.write("{0}\n".format(msg)) def logging_subprocess(popenargs, @@ -525,7 +537,7 @@ def retrieve_repositories(args, authenticated_user): get_github_api_host(args)) else: if args.private: - log_error('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') + log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') template = 'https://{0}/users/{1}/repos'.format( get_github_api_host(args), args.user) From 4b459f9af80909fbefcc34b33f6bc63c41a780c4 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Thu, 27 Dec 2018 12:58:57 -0600 Subject: [PATCH 055/455] Add org check to avoid incorrect log output --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 097ba1c..55b77ae 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -536,7 +536,7 @@ def retrieve_repositories(args, authenticated_user): template = 'https://{0}/user/repos'.format( get_github_api_host(args)) else: - if args.private: + if args.private and not args.organization: log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') template = 'https://{0}/users/{1}/repos'.format( get_github_api_host(args), From 9e472b74e6e8f551d98dc43fb693334153240195 Mon Sep 17 00:00:00 2001 From: "W. Harrison Wright" Date: Thu, 27 Dec 2018 13:07:13 -0600 Subject: [PATCH 056/455] Remove unnecessary sys.exit call --- bin/github-backup | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 55b77ae..0245ba1 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -525,7 +525,6 @@ def check_git_lfs_install(): exit_code = subprocess.call(['git', 'lfs', 'version']) if exit_code != 0: log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') - sys.exit(1) def retrieve_repositories(args, authenticated_user): From d148f9b900f01d73be27d72d7fe55018c48d6722 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 1 Feb 2019 09:50:42 -0500 Subject: [PATCH 057/455] Release version 0.22.0 --- CHANGES.rst | 10 ++++++++++ github_backup/__init__.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index ce5f178..f465f57 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,16 @@ Changelog ========= +0.22.0 (2019-02-01) +------------------- + +- Remove unnecessary sys.exit call. [W. Harrison Wright] + +- Add org check to avoid incorrect log output. [W. Harrison Wright] + +- Fix accidental system exit with better logging strategy. [W. Harrison + Wright] + 0.21.1 (2018-12-25) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 8c306aa..81edede 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.21.1' +__version__ = '0.22.0' From 57ab5ce1a21d5cd16bf9dd0485b5df89a3a7436e Mon Sep 17 00:00:00 2001 From: JOHN STETIC Date: Wed, 20 Feb 2019 20:43:00 -0500 Subject: [PATCH 058/455] Log URL error https://github.com/josegonzalez/python-github-backup/issues/105 --- bin/github-backup | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 0245ba1..ec8596b 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -445,7 +445,8 @@ def _get_response(request, auth, template): except HTTPError as exc: errors, should_continue = _request_http_error(exc, auth, errors) # noqa r = exc - except URLError: + except URLError as e: + log_error(e.reason) should_continue = _request_url_error(template, retry_timeout) if not should_continue: raise From eff6e36974d3e63a4cbe8bec8de51ccdfa05ad23 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 21 Feb 2019 15:13:31 -0500 Subject: [PATCH 059/455] Release version 0.22.1 --- CHANGES.rst | 6 ++++++ github_backup/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f465f57..c0140a9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changelog ========= +0.22.1 (2019-02-21) +------------------- + +- Log URL error https://github.com/josegonzalez/python-github- + backup/issues/105. [JOHN STETIC] + 0.22.0 (2019-02-01) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 81edede..66d9d1e 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.22.0' +__version__ = '0.22.1' From c86163bfe6294cfeda21b284d46e1f27afb2598f Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 21 Feb 2019 15:40:39 -0500 Subject: [PATCH 060/455] fix: warn instead of error Refs #106 --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index ec8596b..88409de 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -446,7 +446,7 @@ def _get_response(request, auth, template): errors, should_continue = _request_http_error(exc, auth, errors) # noqa r = exc except URLError as e: - log_error(e.reason) + log_warning(e.reason) should_continue = _request_url_error(template, retry_timeout) if not should_continue: raise From da4b29a2d6126950492d55d0b6b969f31c2e2314 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 21 Feb 2019 15:41:11 -0500 Subject: [PATCH 061/455] Release version 0.22.2 --- CHANGES.rst | 10 ++++++++++ github_backup/__init__.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index c0140a9..bb53cbf 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,16 @@ Changelog ========= +0.22.2 (2019-02-21) +------------------- + +Fix +~~~ + +- Warn instead of error. [Jose Diaz-Gonzalez] + + Refs #106 + 0.22.1 (2019-02-21) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 66d9d1e..cc37364 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.22.1' +__version__ = '0.22.2' From 3193d120e5a2e11906042482faa12a05a2d079bc Mon Sep 17 00:00:00 2001 From: Gael de Chalendar Date: Tue, 4 Jun 2019 18:53:58 +0200 Subject: [PATCH 062/455] Avoid to crash in case of HTTP 502 error Survive also on socket.error connections like on HTTPError or URLError. This should solve issue #110. --- bin/github-backup | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/bin/github-backup b/bin/github-backup index 88409de..d862641 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -1,6 +1,7 @@ #!/usr/bin/env python from __future__ import print_function +import socket import argparse import base64 @@ -404,6 +405,16 @@ def retrieve_data(args, template, query_args=None, single_request=False): status_code = int(r.getcode()) + retries = 0 + while retries < 3 and status_code == 502: + print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') + retries += 1 + time.sleep(5) + request = _construct_request(per_page, page, query_args, template, auth) # noqa + r, errors = _get_response(request, auth, template) + + status_code = int(r.getcode()) + if status_code != 200: template = 'API request returned HTTP {0}: {1}' errors.append(template.format(status_code, r.reason)) @@ -450,6 +461,11 @@ def _get_response(request, auth, template): should_continue = _request_url_error(template, retry_timeout) if not should_continue: raise + except socket.error as e: + log_warning(e.strerror) + should_continue = _request_url_error(template, retry_timeout) + if not should_continue: + raise if should_continue: continue From 2340a02fc67d417c50ae0ddb314799948bfe27ea Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Tue, 4 Jun 2019 14:43:32 -0400 Subject: [PATCH 063/455] Release version 0.23.0 --- CHANGES.rst | 138 +++++++++----------------------------- github_backup/__init__.py | 2 +- 2 files changed, 31 insertions(+), 109 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index bb53cbf..15a3b76 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,82 +1,83 @@ Changelog ========= +0.23.0 (2019-06-04) +------------------- +------------------------ +- Avoid to crash in case of HTTP 502 error. [Gael de Chalendar] + + Survive also on socket.error connections like on HTTPError or URLError. + + This should solve issue #110. + + 0.22.2 (2019-02-21) ------------------- Fix ~~~ - - Warn instead of error. [Jose Diaz-Gonzalez] Refs #106 + 0.22.1 (2019-02-21) ------------------- - - Log URL error https://github.com/josegonzalez/python-github- backup/issues/105. [JOHN STETIC] + 0.22.0 (2019-02-01) ------------------- - - Remove unnecessary sys.exit call. [W. Harrison Wright] - - Add org check to avoid incorrect log output. [W. Harrison Wright] - - Fix accidental system exit with better logging strategy. [W. Harrison Wright] + 0.21.1 (2018-12-25) ------------------- - - Mark options which are not included in --all. [Bernd] As discussed in Issue #100 + 0.21.0 (2018-11-28) ------------------- - - Correctly download repos when user arg != authenticated user. [W. Harrison Wright] + 0.20.1 (2018-09-29) ------------------- - - Clone the specified user's gists, not the authenticated user. [W. Harrison Wright] - - Clone the specified user's starred repos, not the authenticated user. [W. Harrison Wright] + 0.20.0 (2018-03-24) ------------------- - - Chore: drop Python 2.6. [Jose Diaz-Gonzalez] - - Feat: simplify release script. [Jose Diaz-Gonzalez] + 0.19.2 (2018-03-24) ------------------- Fix ~~~ - - Cleanup pep8 violations. [Jose Diaz-Gonzalez] + 0.19.0 (2018-03-24) ------------------- - - Add additional output for the current request. [Robin Gloster] This is useful to have some progress indication for huge repositories. - - - Add option to backup additional PR details. [Robin Gloster] Some payload is only included when requesting a single pull request - - - Mark string as binary in comparison for skip_existing. [Johannes Bornhold] @@ -87,66 +88,53 @@ Fix 0.18.0 (2018-02-22) ------------------- - - Add option to fetch followers/following JSON data. [Stephen Greene] + 0.17.0 (2018-02-20) ------------------- - - Short circuit gists backup process. [W. Harrison Wright] - - Formatting. [W. Harrison Wright] - - Add ability to backup gists. [W. Harrison Wright] + 0.16.0 (2018-01-22) ------------------- - - Change option to --all-starred. [W. Harrison Wright] - - JK don't update documentation. [W. Harrison Wright] - - Put starred clone repoistories under a new option. [W. Harrison Wright] - - Add comment. [W. Harrison Wright] - - Add ability to clone starred repos. [W. Harrison Wright] + 0.14.1 (2017-10-11) ------------------- - - Fix arg not defined error. [Edward Pfremmer] - Ref: https://github.com/josegonzalez/python-github-backup/issues/69 0.14.0 (2017-10-11) ------------------- - - Added a check to see if git-lfs is installed when doing an LFS clone. [pieterclaerhout] - - Added support for LFS clones. [pieterclaerhout] - - Add pypi info to readme. [Albert Wang] - - Explicitly support python 3 in package description. [Albert Wang] - - Add couple examples to help new users. [Yusuf Tran] + 0.13.2 (2017-05-06) ------------------- - - Fix remotes while updating repository. [Dima Gerasimov] + 0.13.1 (2017-04-11) ------------------- - - Fix error when repository has no updated_at value. [Nicolai Ehemann] + 0.13.0 (2017-04-05) ------------------- - - Add OS check for OSX specific keychain args. [Martin O'Reilly] Keychain arguments are only supported on Mac OSX. @@ -155,8 +143,6 @@ Fix error message rather than a "No password item matching the provided name and account could be found in the osx keychain" error message - - - Add support for storing PAT in OSX keychain. [Martin O'Reilly] Added additional optional arguments and README guidance for storing @@ -166,62 +152,48 @@ Fix 0.12.1 (2017-03-27) ------------------- - - Avoid remote branch name churn. [Chris Adams] This avoids the backup output having lots of "[new branch]" messages because removing the old remote name removed all of the existing branch references. - - - Fix detection of bare git directories. [Andrzej Maczuga] + 0.12.0 (2016-11-22) ------------------- Fix ~~~ - - Properly import version from github_backup package. [Jose Diaz- Gonzalez] - - Support alternate git status output. [Jose Diaz-Gonzalez] Other ~~~~~ - - Pep8: E501 line too long (83 > 79 characters) [Jose Diaz-Gonzalez] - - Pep8: E128 continuation line under-indented for visual indent. [Jose Diaz-Gonzalez] - - Support archivization using bare git clones. [Andrzej Maczuga] - - Fix typo, 3x. [Terrell Russell] + 0.11.0 (2016-10-26) ------------------- - - Support --token file:///home/user/token.txt (fixes gh-51) [Björn Dahlgren] - - Fix some linting. [Albert Wang] - - Fix byte/string conversion for python 3. [Albert Wang] - - Support python 3. [Albert Wang] - - Encode special characters in password. [Remi Rampin] - - Don't pretend program name is "Github Backup" [Remi Rampin] - - Don't install over insecure connection. [Remi Rampin] The git:// protocol is unauthenticated and unencrypted, and no longer advertised by GitHub. Using HTTPS shouldn't impact performance. + 0.10.3 (2016-08-21) ------------------- - - Fixes #29. [Jonas Michel] Reporting an error when the user's rate limit is exceeded causes @@ -229,8 +201,6 @@ Other sleep. Instead of generating an explicit error we just want to inform the user that the script is going to sleep until their rate limit count resets. - - - Fixes #29. [Jonas Michel] The errors list was not being cleared out after resuming a backup @@ -241,14 +211,13 @@ Other 0.10.2 (2016-08-21) ------------------- - - Add a note regarding git version requirement. [Jose Diaz-Gonzalez] Closes #37 + 0.10.0 (2016-08-18) ------------------- - - Implement incremental updates. [Robert Bradshaw] Guarded with an --incremental flag. @@ -261,12 +230,11 @@ Other 0.9.0 (2016-03-29) ------------------ - - Fix cloning private repos with basic auth or token. [Kazuki Suda] + 0.8.0 (2016-02-14) ------------------ - - Don't store issues which are actually pull requests. [Enrico Tröger] This prevents storing pull requests twice since the Github API returns @@ -277,43 +245,31 @@ Other 0.7.0 (2016-02-02) ------------------ - - Softly fail if not able to read hooks. [Albert Wang] - - Add note about 2-factor auth. [Albert Wang] - - Make user repository search go through endpoint capable of reading private repositories. [Albert Wang] - - Prompt for password if only username given. [Alex Hall] + 0.6.0 (2015-11-10) ------------------ - - Force proper remote url. [Jose Diaz-Gonzalez] - - Improve error handling in case of HTTP errors. [Enrico Tröger] In case of a HTTP status code 404, the returned 'r' was never assigned. In case of URL errors which are not timeouts, we probably should bail out. - - - Add --hooks to also include web hooks into the backup. [Enrico Tröger] - - Create the user specified output directory if it does not exist. [Enrico Tröger] Fixes #17. - - - Add missing auth argument to _get_response() [Enrico Tröger] When running unauthenticated and Github starts rate-limiting the client, github-backup crashes because the used auth variable in _get_response() was not available. This change should fix it. - - - Add repository URL to error message for non-existing repositories. [Enrico Tröger] @@ -324,40 +280,28 @@ Other 0.5.0 (2015-10-10) ------------------ - - Add release script. [Jose Diaz-Gonzalez] - - Refactor to both simplify codepath as well as follow PEP8 standards. [Jose Diaz-Gonzalez] - - Retry 3 times when the connection times out. [Mathijs Jonker] - - Made unicode output defalut. [Kirill Grushetsky] - - Import alphabetised. [Kirill Grushetsky] - - Preserve Unicode characters in the output file. [Kirill Grushetsky] Added option to preserve Unicode characters in the output file - - Josegonzales/python-github-backup#12 Added backup of labels and milestones. [aensley] - - Fixed indent. [Mathijs Jonker] - - Skip unitialized repo's. [mjonker-embed] These gave me errors which caused mails from crontab. - - Added prefer-ssh. [mjonker-embed] Was needed for my back-up setup, code includes this but readme wasn't updated - - Retry API requests which failed due to rate-limiting. [Chris Adams] This allows operation to continue, albeit at a slower pace, if you have enough data to trigger the API rate limits - - Logging_subprocess: always log when a command fails. [Chris Adams] Previously git clones could fail without any indication @@ -367,21 +311,15 @@ Other Now a non-zero return code will always output a message to stderr and will display the executed command so it can be rerun for troubleshooting. - - - Switch to using ssh_url. [Chris Adams] The previous commit used the wrong URL for a private repo. This was masked by the lack of error loging in logging_subprocess (which will be in a separate branch) - - - Add an option to prefer checkouts over SSH. [Chris Adams] This is really useful with private repos to avoid being nagged for credentials for every repository - - - Add pull request support. [Kevin Laude] Back up reporitory pull requests by passing the --include-pulls @@ -393,8 +331,6 @@ Other Pull requests are automatically backed up when the --all argument is uesd. - - - Add GitHub Enterprise support. [Kevin Laude] Pass the -H or --github-host argument with a GitHub Enterprise hostname @@ -404,35 +340,21 @@ Other 0.2.0 (2014-09-22) ------------------ - - Add support for retrieving repositories. Closes #1. [Jose Diaz- Gonzalez] - - Fix PEP8 violations. [Jose Diaz-Gonzalez] - - Add authorization to header only if specified by user. [Ioannis Filippidis] - - Fill out readme more. [Jose Diaz-Gonzalez] - - Fix import. [Jose Diaz-Gonzalez] - - Properly name readme. [Jose Diaz-Gonzalez] - - Create MANIFEST.in. [Jose Diaz-Gonzalez] - - Create .gitignore. [Jose Diaz-Gonzalez] - - Create setup.py. [Jose Diaz-Gonzalez] - - Create requirements.txt. [Jose Diaz-Gonzalez] - - Create __init__.py. [Jose Diaz-Gonzalez] - - Create LICENSE.txt. [Jose Diaz-Gonzalez] - - Create README.md. [Jose Diaz-Gonzalez] - - Create github-backup. [Jose Diaz-Gonzalez] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index cc37364..08a9dbf 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.22.2' +__version__ = '0.23.0' From 41130fc8b0b694e68eb6754d005bb2f3295578fb Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Jun 2019 11:20:26 -0500 Subject: [PATCH 064/455] QKT-42: support saving release information --- bin/github-backup | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/bin/github-backup b/bin/github-backup index d862641..427f14c 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -303,6 +303,11 @@ def parse_args(): parser.add_argument('--keychain-account', dest='osx_keychain_item_account', help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') + parser.add_argument('--include-releases', + action='store_true', + dest='include_releases', + help='include release information, not including assets or binaries' + ) return parser.parse_args() @@ -699,6 +704,9 @@ def backup_repositories(args, output_directory, repositories): if args.include_hooks or args.include_everything: backup_hooks(args, repo_cwd, repository, repos_template) + if args.include_releases or args.include_everything: + backup_releases(args, repo_cwd, repository, repos_template) + if args.incremental: open(last_update_path, 'w').write(last_update) @@ -880,6 +888,28 @@ def backup_hooks(args, repo_cwd, repository, repos_template): log_info("Unable to read hooks, skipping") +def backup_releases(args, repo_cwd, repository, repos_template): + repository_fullname = repository['full_name'] + + # give release files somewhere to live & log intent + release_cwd = os.path.join(repo_cwd, 'releases') + log_info('Retrieving {0} releases'.format(repository_fullname)) + mkdir_p(repo_cwd, release_cwd) + + query_args = {} + + _release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + _releases = retrieve_data(args, _release_template, query_args=query_args) + + # for each release, store it + log_info('Saving {0} releases to disk'.format(len(_releases))) + for release in _releases: + release_name = release['tag_name'] + output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) + with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: + json_dump(release, f) + + def fetch_repository(name, remote_url, local_dir, From 4b40ae94d7ffdc75eaac187a2273f3a3d856d0ab Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Jun 2019 16:48:25 -0500 Subject: [PATCH 065/455] QKT-42 update: shorter command flag --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 427f14c..6c88dd2 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -303,7 +303,7 @@ def parse_args(): parser.add_argument('--keychain-account', dest='osx_keychain_item_account', help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--include-releases', + parser.add_argument('--releases', action='store_true', dest='include_releases', help='include release information, not including assets or binaries' From 3d3f5120743a2e141c853712fea78c2a3351fc11 Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Jun 2019 16:53:40 -0500 Subject: [PATCH 066/455] QKT-42: releases - add readme info --- README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 861e616..b8d8c5e 100644 --- a/README.rst +++ b/README.rst @@ -32,7 +32,7 @@ CLI Usage is as follows:: [--watched] [--followers] [--following] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--labels] [--hooks] - [--milestones] [--repositories] [--bare] [--lfs] + [--milestones] [--repositories] [--releases] [--bare] [--lfs] [--wikis] [--gists] [--starred-gists] [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] @@ -76,6 +76,7 @@ CLI Usage is as follows:: authenticated) --milestones include milestones in backup --repositories include repository clone in backup + --releases include repository releases' information without assets or binaries --bare clone bare repositories --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) From de0c3f46c616fe8e1f2d3a80b747a69d4bf7da14 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Fri, 21 Jun 2019 20:03:14 -0500 Subject: [PATCH 067/455] WIP: download assets --- bin/github-backup | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 6c88dd2..e349eaa 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -537,6 +537,24 @@ def _request_url_error(template, retry_timeout): return False +def download_file(url, path, auth): + request = Request(url) + request.add_header('Accept', 'application/octet-stream') + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + data = urlopen(request) + with open(path, 'wb') as f: + f.write(data.read()) + + # import requests + # r = requests.get(url, stream=True, headers={ + # 'Accept': 'application/octet-stream', + # 'Authorization': 'Basic '.encode('ascii') + auth + # }) + # with open(path, 'wb') as f: + # for chunk in r.iter_content(1024): + # f.write(chunk) + + def get_authenticated_user(args): template = 'https://{0}/user'.format(get_github_api_host(args)) data = retrieve_data(args, template, single_request=True) @@ -898,17 +916,21 @@ def backup_releases(args, repo_cwd, repository, repos_template): query_args = {} - _release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) - _releases = retrieve_data(args, _release_template, query_args=query_args) + release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - log_info('Saving {0} releases to disk'.format(len(_releases))) - for release in _releases: + log_info('Saving {0} releases to disk'.format(len(releases))) + for release in releases: release_name = release['tag_name'] output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: json_dump(release, f) + assets = retrieve_data(args, release['assets_url']) + for asset in assets: + download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args)) + def fetch_repository(name, remote_url, From 9b6400932d9ba7ebefcaa180ffe2efcd2be36c68 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Sat, 22 Jun 2019 13:00:42 -0500 Subject: [PATCH 068/455] Fix redirect to s3 --- bin/github-backup | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index e349eaa..583d3ee 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -26,6 +26,8 @@ try: from urllib.error import HTTPError, URLError from urllib.request import urlopen from urllib.request import Request + from urllib.request import HTTPRedirectHandler + from urllib.request import build_opener except ImportError: # python 2 from urlparse import urlparse @@ -34,6 +36,8 @@ except ImportError: from urllib2 import HTTPError, URLError from urllib2 import urlopen from urllib2 import Request + from urllib2 import HTTPRedirectHandler + from urllib2 import build_opener from github_backup import __version__ @@ -537,22 +541,33 @@ def _request_url_error(template, retry_timeout): return False +class S3HTTPRedirectHandler(HTTPRedirectHandler): + """ + A subclassed redirect handler for downloading Github assets from S3. + + urllib will add the Authorization header to the redirected request to S3, which will result in a 400, + so we should remove said header on redirect. + """ + def redirect_request(self, req, fp, code, msg, headers, newurl): + request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) + del request.headers['Authorization'] + return request + + def download_file(url, path, auth): request = Request(url) request.add_header('Accept', 'application/octet-stream') request.add_header('Authorization', 'Basic '.encode('ascii') + auth) - data = urlopen(request) + opener = build_opener(S3HTTPRedirectHandler) + response = opener.open(request) + + chunk_size = 16 * 1024 with open(path, 'wb') as f: - f.write(data.read()) - - # import requests - # r = requests.get(url, stream=True, headers={ - # 'Accept': 'application/octet-stream', - # 'Authorization': 'Basic '.encode('ascii') + auth - # }) - # with open(path, 'wb') as f: - # for chunk in r.iter_content(1024): - # f.write(chunk) + while True: + chunk = response.read(chunk_size) + if not chunk: + break + f.write(chunk) def get_authenticated_user(args): From ea4c3d0f6f79aec742f4497b502a757e185d6e4e Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Sat, 22 Jun 2019 13:05:54 -0500 Subject: [PATCH 069/455] Fix super call for python2 --- bin/github-backup | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 583d3ee..fe3f8e9 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -18,6 +18,7 @@ import subprocess import sys import time import platform +PY2 = False try: # python 3 from urllib.parse import urlparse @@ -30,6 +31,7 @@ try: from urllib.request import build_opener except ImportError: # python 2 + PY2 = True from urlparse import urlparse from urllib import quote as urlquote from urllib import urlencode @@ -549,7 +551,11 @@ class S3HTTPRedirectHandler(HTTPRedirectHandler): so we should remove said header on redirect. """ def redirect_request(self, req, fp, code, msg, headers, newurl): - request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) + if PY2: + # HTTPRedirectHandler is an old style class + request = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) + else: + request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) del request.headers['Authorization'] return request From 921aab372956cc7c0437ee1829d05ae1f834fa34 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Sat, 22 Jun 2019 13:19:45 -0500 Subject: [PATCH 070/455] Fix pull details --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index d862641..4e5eb51 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -801,7 +801,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): args, _pulls_template + '/{}'.format(pull['number']), single_request=True - ) + )[0] log_info('Saving {0} pull requests to disk'.format( len(list(pulls.keys())))) From 89f59cc7a2e10f6e2878821e7ee8d7f8d1f64d76 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Mon, 24 Jun 2019 15:49:19 -0500 Subject: [PATCH 071/455] Make assets it's own flag --- bin/github-backup | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index fe3f8e9..5a09483 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -314,6 +314,10 @@ def parse_args(): dest='include_releases', help='include release information, not including assets or binaries' ) + parser.add_argument('--assets', + action='store_true', + dest='include_assets', + help='include assets alongside release information; only applies if including releases') return parser.parse_args() @@ -744,7 +748,8 @@ def backup_repositories(args, output_directory, repositories): backup_hooks(args, repo_cwd, repository, repos_template) if args.include_releases or args.include_everything: - backup_releases(args, repo_cwd, repository, repos_template) + backup_releases(args, repo_cwd, repository, repos_template, + include_assets=args.include_assets or args.include_everything) if args.incremental: open(last_update_path, 'w').write(last_update) @@ -927,7 +932,7 @@ def backup_hooks(args, repo_cwd, repository, repos_template): log_info("Unable to read hooks, skipping") -def backup_releases(args, repo_cwd, repository, repos_template): +def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): repository_fullname = repository['full_name'] # give release files somewhere to live & log intent @@ -948,9 +953,10 @@ def backup_releases(args, repo_cwd, repository, repos_template): with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: json_dump(release, f) - assets = retrieve_data(args, release['assets_url']) - for asset in assets: - download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args)) + if include_assets: + assets = retrieve_data(args, release['assets_url']) + for asset in assets: + download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args)) def fetch_repository(name, From 121fa6829415fd25d7a79f588b4b79a806669db6 Mon Sep 17 00:00:00 2001 From: Ethan Timm Date: Tue, 25 Jun 2019 15:41:02 -0500 Subject: [PATCH 072/455] QKT-45: include assets - update readme update readme with flag information for including assets alongside their respective releases --- README.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index b8d8c5e..8b5f6f1 100644 --- a/README.rst +++ b/README.rst @@ -32,8 +32,9 @@ CLI Usage is as follows:: [--watched] [--followers] [--following] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--labels] [--hooks] - [--milestones] [--repositories] [--releases] [--bare] [--lfs] - [--wikis] [--gists] [--starred-gists] [--skip-existing] + [--milestones] [--repositories] [--releases] [--assets] + [--bare] [--lfs] [--wikis] [--gists] [--starred-gists] + [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] @@ -77,6 +78,7 @@ CLI Usage is as follows:: --milestones include milestones in backup --repositories include repository clone in backup --releases include repository releases' information without assets or binaries + --assets include assets alongside release information; only applies if including releases --bare clone bare repositories --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) From 6db5bd731b1bf4bfe4c5f44f2f561b4e5052f8d9 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 27 Jun 2019 11:24:43 -0400 Subject: [PATCH 073/455] Release version 0.24.0 --- CHANGES.rst | 17 ++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 15a3b76..3232fd8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,24 @@ Changelog ========= -0.23.0 (2019-06-04) +0.24.0 (2019-06-27) ------------------- ------------------------ +- QKT-45: include assets - update readme. [Ethan Timm] + + update readme with flag information for including assets alongside their respective releases +- Make assets it's own flag. [Harrison Wright] +- Fix super call for python2. [Harrison Wright] +- Fix redirect to s3. [Harrison Wright] +- WIP: download assets. [Harrison Wright] +- QKT-42: releases - add readme info. [ethan] +- QKT-42 update: shorter command flag. [ethan] +- QKT-42: support saving release information. [ethan] +- Fix pull details. [Harrison Wright] + + +0.23.0 (2019-06-04) +------------------- - Avoid to crash in case of HTTP 502 error. [Gael de Chalendar] Survive also on socket.error connections like on HTTPError or URLError. diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 08a9dbf..f8ab8c2 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.23.0' +__version__ = '0.24.0' From e74765ba7f9f9a117113b4bd3e08cc06fadb3330 Mon Sep 17 00:00:00 2001 From: 2a <8h2a@users.noreply.github.com> Date: Wed, 3 Jul 2019 23:01:00 +0200 Subject: [PATCH 074/455] Issue 119: Change retrieve_data to be a generator See issue #119. --- bin/github-backup | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 2dd27c8..87fa565 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -406,12 +406,11 @@ def get_github_repo_url(args, repository): return repo_url -def retrieve_data(args, template, query_args=None, single_request=False): +def retrieve_data_gen(args, template, query_args=None, single_request=False): auth = get_auth(args) query_args = get_query_args(query_args) per_page = 100 page = 0 - data = [] while True: page = page + 1 @@ -438,11 +437,12 @@ def retrieve_data(args, template, query_args=None, single_request=False): response = json.loads(r.read().decode('utf-8')) if len(errors) == 0: if type(response) == list: - data.extend(response) + for resp in response: + yield resp if len(response) < per_page: break elif type(response) == dict and single_request: - data.append(response) + yield response if len(errors) > 0: log_error(errors) @@ -450,8 +450,8 @@ def retrieve_data(args, template, query_args=None, single_request=False): if single_request: break - return data - +def retrieve_data(args, template, query_args=None, single_request=False): + return list(retrieve_data_gen(args, template, query_args, single_request)) def get_query_args(query_args=None): if not query_args: @@ -836,18 +836,21 @@ def backup_pulls(args, repo_cwd, repository, repos_template): pull_states = ['open', 'closed'] for pull_state in pull_states: query_args['state'] = pull_state - # It'd be nice to be able to apply the args.since filter here... - _pulls = retrieve_data(args, + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: + if args.since and pull['updated_at'] < args.since: + break if not args.since or pull['updated_at'] >= args.since: pulls[pull['number']] = pull else: - _pulls = retrieve_data(args, + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: + if args.since and pull['updated_at'] < args.since: + break if not args.since or pull['updated_at'] >= args.since: pulls[pull['number']] = retrieve_data( args, From b73079daf29107eea0fbeef83308cc607dc59f9c Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 3 Jul 2019 17:46:12 -0400 Subject: [PATCH 075/455] Release version 0.25.0 --- CHANGES.rst | 9 ++++++++- github_backup/__init__.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3232fd8..7cdcf5a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,16 @@ Changelog ========= -0.24.0 (2019-06-27) +0.25.0 (2019-07-03) ------------------- ------------------------ +- Issue 119: Change retrieve_data to be a generator. [2a] + + See issue #119. + + +0.24.0 (2019-06-27) +------------------- - QKT-45: include assets - update readme. [Ethan Timm] update readme with flag information for including assets alongside their respective releases diff --git a/github_backup/__init__.py b/github_backup/__init__.py index f8ab8c2..8c308d7 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.24.0' +__version__ = '0.25.0' From bacd77030b0dba144f50a365ae00d0dcec2a4f91 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Jul 2019 13:39:41 -0700 Subject: [PATCH 076/455] Update README.rst --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index 8b5f6f1..f39a9b0 100644 --- a/README.rst +++ b/README.rst @@ -4,6 +4,8 @@ github-backup |PyPI| |Python Versions| +> This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and cannot sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. + backup a github user or organization Requirements From fd684a71fb3b71f6dc1ba6a5f4d1807f9bd8ad85 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Jul 2019 13:40:25 -0700 Subject: [PATCH 077/455] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index f39a9b0..12c50f9 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ github-backup |PyPI| |Python Versions| -> This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and cannot sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. + This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and cannot sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. backup a github user or organization From 2658b039a1c7fca4f3ada5eb2b63f9339aad7036 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 14 Aug 2019 17:47:47 -0400 Subject: [PATCH 078/455] Create ISSUE_TEMPLATE.md --- ISSUE_TEMPLATE.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 ISSUE_TEMPLATE.md diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..eb45cf8 --- /dev/null +++ b/ISSUE_TEMPLATE.md @@ -0,0 +1,13 @@ +# Important notice regarding filed issues + +This project already fills my needs, and as such I have no real reason to continue it's development. This project is otherwise provided as is, and no support is given. + +If pull requests implementing bug fixes or enhancements are pushed, I am happy to review and merge them (time permitting). + +If you wish to have a bug fixed, you have a few options: + +- Fix it yourself. +- File a bug and hope someone else fixes it for you. +- Pay me to fix it (my rate is $200 an hour, minimum 1 hour, contact me via my [github email address](https://github.com/josegonzalez) if you want to go this route). + +In all cases, feel free to file an issue, they may be of help to others in the future. From e57873b6ddf8cebc733a83b55bdbe208f2ebfc0a Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 14 Aug 2019 17:51:19 -0400 Subject: [PATCH 079/455] Create PULL_REQUEST.md --- PULL_REQUEST.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 PULL_REQUEST.md diff --git a/PULL_REQUEST.md b/PULL_REQUEST.md new file mode 100644 index 0000000..1624cfa --- /dev/null +++ b/PULL_REQUEST.md @@ -0,0 +1,7 @@ +# Important notice regarding filed pull requests + +This project already fills my needs, and as such I have no real reason to continue it's development. This project is otherwise provided as is, and no support is given. + +I will attempt to review pull requests at _my_ earliest convenience. If I am unable to get to your pull request in a timely fashion, it is what it is. This repository does not pay any bills, and I am not required to merge any pull request from any individual. + +If you wish to jump my personal priority queue, you may pay me for my time to review. My rate is $200 an hour - minimum 1 hour - feel free contact me via my github email address if you want to go this route. From a56d27dd8b8ad1f9ef94594d2ad8f4bab3983ff4 Mon Sep 17 00:00:00 2001 From: Vladislav Yarmak Date: Sat, 21 Sep 2019 19:22:27 +0300 Subject: [PATCH 080/455] workaround gist clone in `--prefer-ssh` mode --- bin/github-backup | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 87fa565..efa44a6 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -387,12 +387,12 @@ def get_github_host(args): def get_github_repo_url(args, repository): - if args.prefer_ssh: - return repository['ssh_url'] - if repository.get('is_gist'): return repository['git_pull_url'] + if args.prefer_ssh: + return repository['ssh_url'] + auth = get_auth(args, False) if auth: repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( From fac8e4274fdcee90ce52e42e70aa5639486a5013 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 23 Sep 2019 11:45:01 -0400 Subject: [PATCH 081/455] Release version 0.26.0 --- CHANGES.rst | 9 ++++++++- github_backup/__init__.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7cdcf5a..e3f6c8f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,16 @@ Changelog ========= -0.25.0 (2019-07-03) +0.26.0 (2019-09-23) ------------------- ------------------------ +- Workaround gist clone in `--prefer-ssh` mode. [Vladislav Yarmak] +- Create PULL_REQUEST.md. [Jose Diaz-Gonzalez] +- Create ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] + + +0.25.0 (2019-07-03) +------------------- - Issue 119: Change retrieve_data to be a generator. [2a] See issue #119. diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 8c308d7..826d20e 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.25.0' +__version__ = '0.26.0' From cfeaee73090bb71202a09cfb928021c43c5795ee Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 6 Jan 2020 10:20:07 -0500 Subject: [PATCH 082/455] Update ISSUE_TEMPLATE.md --- ISSUE_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md index eb45cf8..734420b 100644 --- a/ISSUE_TEMPLATE.md +++ b/ISSUE_TEMPLATE.md @@ -6,7 +6,7 @@ If pull requests implementing bug fixes or enhancements are pushed, I am happy t If you wish to have a bug fixed, you have a few options: -- Fix it yourself. +- Fix it yourself and file a pull request. - File a bug and hope someone else fixes it for you. - Pay me to fix it (my rate is $200 an hour, minimum 1 hour, contact me via my [github email address](https://github.com/josegonzalez) if you want to go this route). From 27441b71b6644a5de0e27bee12fc9e0c81431445 Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Mon, 6 Jan 2020 11:13:25 -0500 Subject: [PATCH 083/455] Crash when an release asset doesn't exist Currently, the script crashes whenever a release asset is unable to download (for example a 404 response). This change instead logs the failure and allows the script to continue. No retry logic is enabled, but at least it prevents the crash and allows the backup to complete. Retry logic can be implemented later if wanted. closes https://github.com/josegonzalez/python-github-backup/issues/129 --- bin/github-backup | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index efa44a6..e451e6a 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -569,15 +569,27 @@ def download_file(url, path, auth): request.add_header('Accept', 'application/octet-stream') request.add_header('Authorization', 'Basic '.encode('ascii') + auth) opener = build_opener(S3HTTPRedirectHandler) - response = opener.open(request) - chunk_size = 16 * 1024 - with open(path, 'wb') as f: - while True: - chunk = response.read(chunk_size) - if not chunk: - break - f.write(chunk) + try: + response = opener.open(request) + + chunk_size = 16 * 1024 + with open(path, 'wb') as f: + while True: + chunk = response.read(chunk_size) + if not chunk: + break + f.write(chunk) + except HTTPError as exc: + # Gracefully handle 404 responses (and others) when downloading from S3 + log_info('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) + except URLError as e: + # Gracefully hadnle other URL errors + log_info('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) + except socket.error as e: + # Gracefully handle socket errors + # TODO: Implement retry logic + log_info('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) def get_authenticated_user(args): From 195e700128e2738e37e458635e87dde43a16fe93 Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Mon, 6 Jan 2020 11:26:06 -0500 Subject: [PATCH 084/455] Improved gitignore, macOS files and IDE configs Ignores the annoying hidden macOS files .DS_Store and ._* as well as the IDE configuration folders for contributors using the popular Visual Studio Code and Atom IDEs (more can be added later as needed). --- .gitignore | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index fa073f2..b87f456 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,11 @@ doc/_build # Generated man page doc/aws_hostname.1 + +# Annoying macOS files +.DS_Store +._* + +# IDE configuration files +.vscode +.atom \ No newline at end of file From 869f761c906852d41a81856138e4181369dfffa1 Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Mon, 6 Jan 2020 12:40:47 -0500 Subject: [PATCH 085/455] Separate release assets and skip re-downloading Currently the script puts all release assets into the same folder called `releases`. So any time 2 release files have the same name, only the last one downloaded is actually saved. A particularly bad example of this is MacDownApp/macdown where all of their releases are named `MacDown.app.zip`. So even though they have 36 releases and all 36 are downloaded, only the last one is actually saved. With this change, each releases' assets are now stored in a fubfolder inside `releases` named after the release name. There could still be edge cases if two releases have the same name, but this is still much safer tha the previous behavior. This change also now checks if the asset file already exists on disk and skips downloading it. This drastically speeds up addiotnal syncs as it no longer downloads every single release every single time. It will now only download new releases which I believe is the expected behavior. closes https://github.com/josegonzalez/python-github-backup/issues/126 --- bin/github-backup | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index efa44a6..dac6e2a 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -565,6 +565,10 @@ class S3HTTPRedirectHandler(HTTPRedirectHandler): def download_file(url, path, auth): + # Skip downloading release assets if they already exist on disk so we don't redownload on every sync + if os.path.exists(path): + return + request = Request(url) request.add_header('Accept', 'application/octet-stream') request.add_header('Authorization', 'Basic '.encode('ascii') + auth) @@ -958,8 +962,14 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F if include_assets: assets = retrieve_data(args, release['assets_url']) + if len(assets) > 0: + # give release asset files somewhere to live + release_assets_cwd = os.path.join(release_cwd, release_name) + mkdir_p(release_assets_cwd) + + # download any release asset files (not including source archives) for asset in assets: - download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args)) + download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) def fetch_repository(name, From ba4fa9fa2db13287207d2fb4cb447596651f636a Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Mon, 6 Jan 2020 12:50:33 -0500 Subject: [PATCH 086/455] Moved asset downloading loop inside the if block --- bin/github-backup | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index dac6e2a..06f3a42 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -963,13 +963,11 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F if include_assets: assets = retrieve_data(args, release['assets_url']) if len(assets) > 0: - # give release asset files somewhere to live + # give release asset files somewhere to live & download them (not including source archives) release_assets_cwd = os.path.join(release_cwd, release_name) mkdir_p(release_assets_cwd) - - # download any release asset files (not including source archives) - for asset in assets: - download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) + for asset in assets: + download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) def fetch_repository(name, From 71b4288e6b7f53b9a7a60873229da34ba5581131 Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Mon, 6 Jan 2020 13:04:40 -0500 Subject: [PATCH 087/455] Added newline to end of file --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index b87f456..102f70c 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,5 @@ doc/aws_hostname.1 # IDE configuration files .vscode -.atom \ No newline at end of file +.atom + From 38010d7c3976a9f5571c5a4253f8bcd11eb5455d Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Mon, 6 Jan 2020 13:06:22 -0500 Subject: [PATCH 088/455] Switched log_info to log_warning in download_file --- bin/github-backup | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index e451e6a..5ca799c 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -582,14 +582,14 @@ def download_file(url, path, auth): f.write(chunk) except HTTPError as exc: # Gracefully handle 404 responses (and others) when downloading from S3 - log_info('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) + log_warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) except URLError as e: # Gracefully hadnle other URL errors - log_info('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) + log_warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) except socket.error as e: # Gracefully handle socket errors # TODO: Implement retry logic - log_info('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) + log_warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) def get_authenticated_user(args): From cb0293cbe57938cb0286a3feb7c5b121b4c47959 Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Mon, 6 Jan 2020 14:15:41 -0500 Subject: [PATCH 089/455] Fixed comment typo --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 5ca799c..c10694c 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -584,7 +584,7 @@ def download_file(url, path, auth): # Gracefully handle 404 responses (and others) when downloading from S3 log_warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) except URLError as e: - # Gracefully hadnle other URL errors + # Gracefully handle other URL errors log_warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) except socket.error as e: # Gracefully handle socket errors From 81a2f762da5a0ee4c54792ba2174e6775cd0aa3e Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Mon, 6 Jan 2020 21:10:50 -0500 Subject: [PATCH 090/455] Fixed macOS keychain access when using Python 3 Python 3 is returning bytes rather than a string, so the string concatenation to create the auth variable was throwing an exception which the script was interpreting to mean it couldn't find the password. Adding a conversion to string first fixed the issue. --- bin/github-backup | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/github-backup b/bin/github-backup index 06f3a42..9b01735 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -337,6 +337,8 @@ def get_auth(args, encode=True): '-s', args.osx_keychain_item_name, '-a', args.osx_keychain_item_account, '-w'], stderr=devnull).strip()) + if not PY2: + token = token.decode('utf-8') auth = token + ':' + 'x-oauth-basic' except: log_error('No password item matching the provided name and account could be found in the osx keychain.') From e8a255b450baa2e915e3f346cb400587e43b0bf7 Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Mon, 6 Jan 2020 21:25:54 -0500 Subject: [PATCH 091/455] Public repos no longer include the auth token When backing up repositories using an auth token and https, the GitHub personal auth token is leaked in each backed up repository. It is included in the URL of each repository's git remote url. This is not needed as they are public and can be accessed without the token and can cause issues in the future if the token is ever changed, so I think it makes more sense not to have the token stored in each repo backup. I think the token should only be "leaked" like this out of necessity, e.g. it's a private repository and the --prefer-ssh option was not chosen so https with auth token was required to perform the clone. --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 06f3a42..e3fd1f8 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -394,7 +394,7 @@ def get_github_repo_url(args, repository): return repository['ssh_url'] auth = get_auth(args, False) - if auth: + if auth and repository['private'] == True: repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( auth, get_github_host(args), From b864218b4403b595792b82664368648f0f393d17 Mon Sep 17 00:00:00 2001 From: smiley Date: Mon, 20 Jan 2020 15:40:52 +0200 Subject: [PATCH 092/455] Remove deprecated (and removed) git lfs flags "--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script. --- bin/github-backup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index 1d84af3..374d814 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -1032,7 +1032,7 @@ def fetch_repository(name, logging_subprocess(git_command, None, cwd=local_dir) if lfs_clone: - git_command = ['git', 'lfs', 'fetch', '--all', '--force', '--tags', '--prune'] + git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] else: git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] logging_subprocess(git_command, None, cwd=local_dir) From 4d5126f303c4d1bb86349ba73f11bdd4b5f88e8e Mon Sep 17 00:00:00 2001 From: Ben Baron Date: Tue, 21 Jan 2020 21:15:57 -0500 Subject: [PATCH 093/455] Fixed script fails if not installed from pip At the top of the script, the line from github_backup import __version__ gets the script's version number to use if the script is called with the -v or --version flags. The problem is that if the script hasn't been installed via pip (for example I cloned the repo directly to my backup server), the script will fail due to an import exception. Also presumably it will always use the version number from pip even if running a modified version from git or a fork or something, though this does not fix that as I have no idea how to check if it's running the pip installed version or not. But at least the script will now work fine if cloned from git or just copied to another machine. closes https://github.com/josegonzalez/python-github-backup/issues/141 --- bin/github-backup | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 1d84af3..d091f85 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -41,7 +41,11 @@ except ImportError: from urllib2 import HTTPRedirectHandler from urllib2 import build_opener -from github_backup import __version__ +try: + from github_backup import __version__ + VERSION = __version__ +except ImportError: + VERSION = 'unknown' FNULL = open(os.devnull, 'w') @@ -302,7 +306,7 @@ def parse_args(): help='Clone repositories using SSH instead of HTTPS') parser.add_argument('-v', '--version', action='version', - version='%(prog)s ' + __version__) + version='%(prog)s ' + VERSION) parser.add_argument('--keychain-name', dest='osx_keychain_item_name', help='OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token') From 498d9eba32aced3f0cefe450b90f1de50afa1c02 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Tue, 21 Jan 2020 21:29:44 -0500 Subject: [PATCH 094/455] Release version 0.27.0 --- CHANGES.rst | 42 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e3f6c8f..d2c249e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,49 @@ Changelog ========= -0.26.0 (2019-09-23) +0.27.0 (2020-01-21) ------------------- ------------------------ +- Fixed script fails if not installed from pip. [Ben Baron] + + At the top of the script, the line from github_backup import __version__ gets the script's version number to use if the script is called with the -v or --version flags. The problem is that if the script hasn't been installed via pip (for example I cloned the repo directly to my backup server), the script will fail due to an import exception. + + Also presumably it will always use the version number from pip even if running a modified version from git or a fork or something, though this does not fix that as I have no idea how to check if it's running the pip installed version or not. But at least the script will now work fine if cloned from git or just copied to another machine. + + closes https://github.com/josegonzalez/python-github-backup/issues/141 +- Fixed macOS keychain access when using Python 3. [Ben Baron] + + Python 3 is returning bytes rather than a string, so the string concatenation to create the auth variable was throwing an exception which the script was interpreting to mean it couldn't find the password. Adding a conversion to string first fixed the issue. +- Public repos no longer include the auth token. [Ben Baron] + + When backing up repositories using an auth token and https, the GitHub personal auth token is leaked in each backed up repository. It is included in the URL of each repository's git remote url. + + This is not needed as they are public and can be accessed without the token and can cause issues in the future if the token is ever changed, so I think it makes more sense not to have the token stored in each repo backup. I think the token should only be "leaked" like this out of necessity, e.g. it's a private repository and the --prefer-ssh option was not chosen so https with auth token was required to perform the clone. +- Fixed comment typo. [Ben Baron] +- Switched log_info to log_warning in download_file. [Ben Baron] +- Crash when an release asset doesn't exist. [Ben Baron] + + Currently, the script crashes whenever a release asset is unable to download (for example a 404 response). This change instead logs the failure and allows the script to continue. No retry logic is enabled, but at least it prevents the crash and allows the backup to complete. Retry logic can be implemented later if wanted. + + closes https://github.com/josegonzalez/python-github-backup/issues/129 +- Moved asset downloading loop inside the if block. [Ben Baron] +- Separate release assets and skip re-downloading. [Ben Baron] + + Currently the script puts all release assets into the same folder called `releases`. So any time 2 release files have the same name, only the last one downloaded is actually saved. A particularly bad example of this is MacDownApp/macdown where all of their releases are named `MacDown.app.zip`. So even though they have 36 releases and all 36 are downloaded, only the last one is actually saved. + + With this change, each releases' assets are now stored in a fubfolder inside `releases` named after the release name. There could still be edge cases if two releases have the same name, but this is still much safer tha the previous behavior. + + This change also now checks if the asset file already exists on disk and skips downloading it. This drastically speeds up addiotnal syncs as it no longer downloads every single release every single time. It will now only download new releases which I believe is the expected behavior. + + closes https://github.com/josegonzalez/python-github-backup/issues/126 +- Added newline to end of file. [Ben Baron] +- Improved gitignore, macOS files and IDE configs. [Ben Baron] + + Ignores the annoying hidden macOS files .DS_Store and ._* as well as the IDE configuration folders for contributors using the popular Visual Studio Code and Atom IDEs (more can be added later as needed). + + +0.26.0 (2019-09-23) +------------------- - Workaround gist clone in `--prefer-ssh` mode. [Vladislav Yarmak] - Create PULL_REQUEST.md. [Jose Diaz-Gonzalez] - Create ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 826d20e..cf7b6d6 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.26.0' +__version__ = '0.27.0' From 196acd0aca18bbc332be773099ee07a00c896610 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 3 Feb 2020 11:41:34 -0500 Subject: [PATCH 095/455] Release version 0.28.0 --- CHANGES.rst | 9 ++++++++- github_backup/__init__.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d2c249e..a8655d6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,16 @@ Changelog ========= -0.27.0 (2020-01-21) +0.28.0 (2020-02-03) ------------------- ------------------------ +- Remove deprecated (and removed) git lfs flags. [smiley] + + "--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script. + + +0.27.0 (2020-01-22) +------------------- - Fixed script fails if not installed from pip. [Ben Baron] At the top of the script, the line from github_backup import __version__ gets the script's version number to use if the script is called with the -v or --version flags. The problem is that if the script hasn't been installed via pip (for example I cloned the repo directly to my backup server), the script will fail due to an import exception. diff --git a/github_backup/__init__.py b/github_backup/__init__.py index cf7b6d6..1bf3675 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.27.0' +__version__ = '0.28.0' From 03c68561a52b3ff66c84f9d9225a5d9537babb30 Mon Sep 17 00:00:00 2001 From: ethan Date: Mon, 10 Feb 2020 17:22:21 -0600 Subject: [PATCH 096/455] #50 - refactor for friendlier import --- bin/github-backup | 1139 +------------------------------ github_backup/github_backup.py | 1143 ++++++++++++++++++++++++++++++++ 2 files changed, 1144 insertions(+), 1138 deletions(-) create mode 100644 github_backup/github_backup.py diff --git a/bin/github-backup b/bin/github-backup index cc1217b..029752e 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -1,1143 +1,6 @@ #!/usr/bin/env python -from __future__ import print_function -import socket - -import argparse -import base64 -import calendar -import codecs -import errno -import getpass -import json -import logging -import os -import re -import select -import subprocess -import sys -import time -import platform -PY2 = False -try: - # python 3 - from urllib.parse import urlparse - from urllib.parse import quote as urlquote - from urllib.parse import urlencode - from urllib.error import HTTPError, URLError - from urllib.request import urlopen - from urllib.request import Request - from urllib.request import HTTPRedirectHandler - from urllib.request import build_opener -except ImportError: - # python 2 - PY2 = True - from urlparse import urlparse - from urllib import quote as urlquote - from urllib import urlencode - from urllib2 import HTTPError, URLError - from urllib2 import urlopen - from urllib2 import Request - from urllib2 import HTTPRedirectHandler - from urllib2 import build_opener - -try: - from github_backup import __version__ - VERSION = __version__ -except ImportError: - VERSION = 'unknown' - -FNULL = open(os.devnull, 'w') - - -def log_error(message): - """ - Log message (str) or messages (List[str]) to stderr and exit with status 1 - """ - log_warning(message) - sys.exit(1) - - -def log_info(message): - """ - Log message (str) or messages (List[str]) to stdout - """ - if type(message) == str: - message = [message] - - for msg in message: - sys.stdout.write("{0}\n".format(msg)) - - -def log_warning(message): - """ - Log message (str) or messages (List[str]) to stderr - """ - if type(message) == str: - message = [message] - - for msg in message: - sys.stderr.write("{0}\n".format(msg)) - - -def logging_subprocess(popenargs, - logger, - stdout_log_level=logging.DEBUG, - stderr_log_level=logging.ERROR, - **kwargs): - """ - Variant of subprocess.call that accepts a logger instead of stdout/stderr, - and logs stdout messages via logger.debug and stderr messages via - logger.error. - """ - child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, **kwargs) - if sys.platform == 'win32': - log_info("Windows operating system detected - no subprocess logging will be returned") - - log_level = {child.stdout: stdout_log_level, - child.stderr: stderr_log_level} - - def check_io(): - if sys.platform == 'win32': - return - ready_to_read = select.select([child.stdout, child.stderr], - [], - [], - 1000)[0] - for io in ready_to_read: - line = io.readline() - if not logger: - continue - if not (io == child.stderr and not line): - logger.log(log_level[io], line[:-1]) - - # keep checking stdout/stderr until the child exits - while child.poll() is None: - check_io() - - check_io() # check again to catch anything after the process exits - - rc = child.wait() - - if rc != 0: - print('{} returned {}:'.format(popenargs[0], rc), file=sys.stderr) - print('\t', ' '.join(popenargs), file=sys.stderr) - - return rc - - -def mkdir_p(*args): - for path in args: - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - -def mask_password(url, secret='*****'): - parsed = urlparse(url) - - if not parsed.password: - return url - elif parsed.password == 'x-oauth-basic': - return url.replace(parsed.username, secret) - - return url.replace(parsed.password, secret) - - -def parse_args(): - parser = argparse.ArgumentParser(description='Backup a github account') - parser.add_argument('user', - metavar='USER', - type=str, - help='github username') - parser.add_argument('-u', - '--username', - dest='username', - help='username for basic auth') - parser.add_argument('-p', - '--password', - dest='password', - help='password for basic auth. ' - 'If a username is given but not a password, the ' - 'password will be prompted for.') - parser.add_argument('-t', - '--token', - dest='token', - help='personal access or OAuth token, or path to token (file://...)') # noqa - parser.add_argument('-o', - '--output-directory', - default='.', - dest='output_directory', - help='directory at which to backup the repositories') - parser.add_argument('-i', - '--incremental', - action='store_true', - dest='incremental', - help='incremental backup') - parser.add_argument('--starred', - action='store_true', - dest='include_starred', - help='include JSON output of starred repositories in backup') - parser.add_argument('--all-starred', - action='store_true', - dest='all_starred', - help='include starred repositories in backup [*]') - parser.add_argument('--watched', - action='store_true', - dest='include_watched', - help='include JSON output of watched repositories in backup') - parser.add_argument('--followers', - action='store_true', - dest='include_followers', - help='include JSON output of followers in backup') - parser.add_argument('--following', - action='store_true', - dest='include_following', - help='include JSON output of following users in backup') - parser.add_argument('--all', - action='store_true', - dest='include_everything', - help='include everything in backup (not including [*])') - parser.add_argument('--issues', - action='store_true', - dest='include_issues', - help='include issues in backup') - parser.add_argument('--issue-comments', - action='store_true', - dest='include_issue_comments', - help='include issue comments in backup') - parser.add_argument('--issue-events', - action='store_true', - dest='include_issue_events', - help='include issue events in backup') - parser.add_argument('--pulls', - action='store_true', - dest='include_pulls', - help='include pull requests in backup') - parser.add_argument('--pull-comments', - action='store_true', - dest='include_pull_comments', - help='include pull request review comments in backup') - parser.add_argument('--pull-commits', - action='store_true', - dest='include_pull_commits', - help='include pull request commits in backup') - parser.add_argument('--pull-details', - action='store_true', - dest='include_pull_details', - help='include more pull request details in backup [*]') - parser.add_argument('--labels', - action='store_true', - dest='include_labels', - help='include labels in backup') - parser.add_argument('--hooks', - action='store_true', - dest='include_hooks', - help='include hooks in backup (works only when authenticated)') # noqa - parser.add_argument('--milestones', - action='store_true', - dest='include_milestones', - help='include milestones in backup') - parser.add_argument('--repositories', - action='store_true', - dest='include_repository', - help='include repository clone in backup') - parser.add_argument('--bare', - action='store_true', - dest='bare_clone', - help='clone bare repositories') - parser.add_argument('--lfs', - action='store_true', - dest='lfs_clone', - help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]') - parser.add_argument('--wikis', - action='store_true', - dest='include_wiki', - help='include wiki clone in backup') - parser.add_argument('--gists', - action='store_true', - dest='include_gists', - help='include gists in backup [*]') - parser.add_argument('--starred-gists', - action='store_true', - dest='include_starred_gists', - help='include starred gists in backup [*]') - parser.add_argument('--skip-existing', - action='store_true', - dest='skip_existing', - help='skip project if a backup directory exists') - parser.add_argument('-L', - '--languages', - dest='languages', - help='only allow these languages', - nargs='*') - parser.add_argument('-N', - '--name-regex', - dest='name_regex', - help='python regex to match names against') - parser.add_argument('-H', - '--github-host', - dest='github_host', - help='GitHub Enterprise hostname') - parser.add_argument('-O', - '--organization', - action='store_true', - dest='organization', - help='whether or not this is an organization user') - parser.add_argument('-R', - '--repository', - dest='repository', - help='name of repository to limit backup to') - parser.add_argument('-P', '--private', - action='store_true', - dest='private', - help='include private repositories [*]') - parser.add_argument('-F', '--fork', - action='store_true', - dest='fork', - help='include forked repositories [*]') - parser.add_argument('--prefer-ssh', - action='store_true', - help='Clone repositories using SSH instead of HTTPS') - parser.add_argument('-v', '--version', - action='version', - version='%(prog)s ' + VERSION) - parser.add_argument('--keychain-name', - dest='osx_keychain_item_name', - help='OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--keychain-account', - dest='osx_keychain_item_account', - help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--releases', - action='store_true', - dest='include_releases', - help='include release information, not including assets or binaries' - ) - parser.add_argument('--assets', - action='store_true', - dest='include_assets', - help='include assets alongside release information; only applies if including releases') - return parser.parse_args() - - -def get_auth(args, encode=True): - auth = None - - if args.osx_keychain_item_name: - if not args.osx_keychain_item_account: - log_error('You must specify both name and account fields for osx keychain password items') - else: - if platform.system() != 'Darwin': - log_error("Keychain arguments are only supported on Mac OSX") - try: - with open(os.devnull, 'w') as devnull: - token = (subprocess.check_output([ - 'security', 'find-generic-password', - '-s', args.osx_keychain_item_name, - '-a', args.osx_keychain_item_account, - '-w'], stderr=devnull).strip()) - if not PY2: - token = token.decode('utf-8') - auth = token + ':' + 'x-oauth-basic' - except: - log_error('No password item matching the provided name and account could be found in the osx keychain.') - elif args.osx_keychain_item_account: - log_error('You must specify both name and account fields for osx keychain password items') - elif args.token: - _path_specifier = 'file://' - if args.token.startswith(_path_specifier): - args.token = open(args.token[len(_path_specifier):], - 'rt').readline().strip() - auth = args.token + ':' + 'x-oauth-basic' - elif args.username: - if not args.password: - args.password = getpass.getpass() - if encode: - password = args.password - else: - password = urlquote(args.password) - auth = args.username + ':' + password - elif args.password: - log_error('You must specify a username for basic auth') - - if not auth: - return None - - if not encode: - return auth - - return base64.b64encode(auth.encode('ascii')) - - -def get_github_api_host(args): - if args.github_host: - host = args.github_host + '/api/v3' - else: - host = 'api.github.com' - - return host - - -def get_github_host(args): - if args.github_host: - host = args.github_host - else: - host = 'github.com' - - return host - - -def get_github_repo_url(args, repository): - if repository.get('is_gist'): - return repository['git_pull_url'] - - if args.prefer_ssh: - return repository['ssh_url'] - - auth = get_auth(args, False) - if auth and repository['private'] == True: - repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( - auth, - get_github_host(args), - repository['owner']['login'], - repository['name']) - else: - repo_url = repository['clone_url'] - - return repo_url - - -def retrieve_data_gen(args, template, query_args=None, single_request=False): - auth = get_auth(args) - query_args = get_query_args(query_args) - per_page = 100 - page = 0 - - while True: - page = page + 1 - request = _construct_request(per_page, page, query_args, template, auth) # noqa - r, errors = _get_response(request, auth, template) - - status_code = int(r.getcode()) - - retries = 0 - while retries < 3 and status_code == 502: - print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') - retries += 1 - time.sleep(5) - request = _construct_request(per_page, page, query_args, template, auth) # noqa - r, errors = _get_response(request, auth, template) - - status_code = int(r.getcode()) - - if status_code != 200: - template = 'API request returned HTTP {0}: {1}' - errors.append(template.format(status_code, r.reason)) - log_error(errors) - - response = json.loads(r.read().decode('utf-8')) - if len(errors) == 0: - if type(response) == list: - for resp in response: - yield resp - if len(response) < per_page: - break - elif type(response) == dict and single_request: - yield response - - if len(errors) > 0: - log_error(errors) - - if single_request: - break - -def retrieve_data(args, template, query_args=None, single_request=False): - return list(retrieve_data_gen(args, template, query_args, single_request)) - -def get_query_args(query_args=None): - if not query_args: - query_args = {} - return query_args - - -def _get_response(request, auth, template): - retry_timeout = 3 - errors = [] - # We'll make requests in a loop so we can - # delay and retry in the case of rate-limiting - while True: - should_continue = False - try: - r = urlopen(request) - except HTTPError as exc: - errors, should_continue = _request_http_error(exc, auth, errors) # noqa - r = exc - except URLError as e: - log_warning(e.reason) - should_continue = _request_url_error(template, retry_timeout) - if not should_continue: - raise - except socket.error as e: - log_warning(e.strerror) - should_continue = _request_url_error(template, retry_timeout) - if not should_continue: - raise - - if should_continue: - continue - - break - return r, errors - - -def _construct_request(per_page, page, query_args, template, auth): - querystring = urlencode(dict(list({ - 'per_page': per_page, - 'page': page - }.items()) + list(query_args.items()))) - - request = Request(template + '?' + querystring) - if auth is not None: - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) - log_info('Requesting {}?{}'.format(template, querystring)) - return request - - -def _request_http_error(exc, auth, errors): - # HTTPError behaves like a Response so we can - # check the status code and headers to see exactly - # what failed. - - should_continue = False - headers = exc.headers - limit_remaining = int(headers.get('x-ratelimit-remaining', 0)) - - if exc.code == 403 and limit_remaining < 1: - # The X-RateLimit-Reset header includes a - # timestamp telling us when the limit will reset - # so we can calculate how long to wait rather - # than inefficiently polling: - gm_now = calendar.timegm(time.gmtime()) - reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now - # We'll never sleep for less than 10 seconds: - delta = max(10, reset - gm_now) - - limit = headers.get('x-ratelimit-limit') - print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa - file=sys.stderr) - - if auth is None: - print('Hint: Authenticate to raise your GitHub rate limit', - file=sys.stderr) - - time.sleep(delta) - should_continue = True - return errors, should_continue - - -def _request_url_error(template, retry_timeout): - # Incase of a connection timing out, we can retry a few time - # But we won't crash and not back-up the rest now - log_info('{} timed out'.format(template)) - retry_timeout -= 1 - - if retry_timeout >= 0: - return True - - log_error('{} timed out to much, skipping!') - return False - - -class S3HTTPRedirectHandler(HTTPRedirectHandler): - """ - A subclassed redirect handler for downloading Github assets from S3. - - urllib will add the Authorization header to the redirected request to S3, which will result in a 400, - so we should remove said header on redirect. - """ - def redirect_request(self, req, fp, code, msg, headers, newurl): - if PY2: - # HTTPRedirectHandler is an old style class - request = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) - else: - request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) - del request.headers['Authorization'] - return request - - -def download_file(url, path, auth): - # Skip downloading release assets if they already exist on disk so we don't redownload on every sync - if os.path.exists(path): - return - - request = Request(url) - request.add_header('Accept', 'application/octet-stream') - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) - opener = build_opener(S3HTTPRedirectHandler) - - try: - response = opener.open(request) - - chunk_size = 16 * 1024 - with open(path, 'wb') as f: - while True: - chunk = response.read(chunk_size) - if not chunk: - break - f.write(chunk) - except HTTPError as exc: - # Gracefully handle 404 responses (and others) when downloading from S3 - log_warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) - except URLError as e: - # Gracefully handle other URL errors - log_warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) - except socket.error as e: - # Gracefully handle socket errors - # TODO: Implement retry logic - log_warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) - - -def get_authenticated_user(args): - template = 'https://{0}/user'.format(get_github_api_host(args)) - data = retrieve_data(args, template, single_request=True) - return data[0] - - -def check_git_lfs_install(): - exit_code = subprocess.call(['git', 'lfs', 'version']) - if exit_code != 0: - log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') - - -def retrieve_repositories(args, authenticated_user): - log_info('Retrieving repositories') - single_request = False - if args.user == authenticated_user['login']: - # we must use the /user/repos API to be able to access private repos - template = 'https://{0}/user/repos'.format( - get_github_api_host(args)) - else: - if args.private and not args.organization: - log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') - template = 'https://{0}/users/{1}/repos'.format( - get_github_api_host(args), - args.user) - - if args.organization: - template = 'https://{0}/orgs/{1}/repos'.format( - get_github_api_host(args), - args.user) - - if args.repository: - single_request = True - template = 'https://{0}/repos/{1}/{2}'.format( - get_github_api_host(args), - args.user, - args.repository) - - repos = retrieve_data(args, template, single_request=single_request) - - if args.all_starred: - starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(args), args.user) - starred_repos = retrieve_data(args, starred_template, single_request=False) - # flag each repo as starred for downstream processing - for item in starred_repos: - item.update({'is_starred': True}) - repos.extend(starred_repos) - - if args.include_gists: - gists_template = 'https://{0}/users/{1}/gists'.format(get_github_api_host(args), args.user) - gists = retrieve_data(args, gists_template, single_request=False) - # flag each repo as a gist for downstream processing - for item in gists: - item.update({'is_gist': True}) - repos.extend(gists) - - if args.include_starred_gists: - starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host(args)) - starred_gists = retrieve_data(args, starred_gists_template, single_request=False) - # flag each repo as a starred gist for downstream processing - for item in starred_gists: - item.update({'is_gist': True, - 'is_starred': True}) - repos.extend(starred_gists) - - return repos - - -def filter_repositories(args, unfiltered_repositories): - log_info('Filtering repositories') - - repositories = [] - for r in unfiltered_repositories: - # gists can be anonymous, so need to safely check owner - if r.get('owner', {}).get('login') == args.user or r.get('is_starred'): - repositories.append(r) - - name_regex = None - if args.name_regex: - name_regex = re.compile(args.name_regex) - - languages = None - if args.languages: - languages = [x.lower() for x in args.languages] - - if not args.fork: - repositories = [r for r in repositories if not r.get('fork')] - if not args.private: - repositories = [r for r in repositories if not r.get('private') or r.get('public')] - if languages: - repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa - if name_regex: - repositories = [r for r in repositories if name_regex.match(r['name'])] - - return repositories - - -def backup_repositories(args, output_directory, repositories): - log_info('Backing up repositories') - repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) - - if args.incremental: - last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa - last_update_path = os.path.join(output_directory, 'last_update') - if os.path.exists(last_update_path): - args.since = open(last_update_path).read().strip() - else: - args.since = None - else: - args.since = None - - for repository in repositories: - if repository.get('is_gist'): - repo_cwd = os.path.join(output_directory, 'gists', repository['id']) - elif repository.get('is_starred'): - # put starred repos in -o/starred/${owner}/${repo} to prevent collision of - # any repositories with the same name - repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) - else: - repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) - - repo_dir = os.path.join(repo_cwd, 'repository') - repo_url = get_github_repo_url(args, repository) - - include_gists = (args.include_gists or args.include_starred_gists) - if (args.include_repository or args.include_everything) \ - or (include_gists and repository.get('is_gist')): - repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') - fetch_repository(repo_name, - repo_url, - repo_dir, - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) - - if repository.get('is_gist'): - # dump gist information to a file as well - output_file = '{0}/gist.json'.format(repo_cwd) - with codecs.open(output_file, 'w', encoding='utf-8') as f: - json_dump(repository, f) - - continue # don't try to back anything else for a gist; it doesn't exist - - download_wiki = (args.include_wiki or args.include_everything) - if repository['has_wiki'] and download_wiki: - fetch_repository(repository['name'], - repo_url.replace('.git', '.wiki.git'), - os.path.join(repo_cwd, 'wiki'), - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) - - if args.include_issues or args.include_everything: - backup_issues(args, repo_cwd, repository, repos_template) - - if args.include_pulls or args.include_everything: - backup_pulls(args, repo_cwd, repository, repos_template) - - if args.include_milestones or args.include_everything: - backup_milestones(args, repo_cwd, repository, repos_template) - - if args.include_labels or args.include_everything: - backup_labels(args, repo_cwd, repository, repos_template) - - if args.include_hooks or args.include_everything: - backup_hooks(args, repo_cwd, repository, repos_template) - - if args.include_releases or args.include_everything: - backup_releases(args, repo_cwd, repository, repos_template, - include_assets=args.include_assets or args.include_everything) - - if args.incremental: - open(last_update_path, 'w').write(last_update) - - -def backup_issues(args, repo_cwd, repository, repos_template): - has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) - if args.skip_existing and has_issues_dir: - return - - log_info('Retrieving {0} issues'.format(repository['full_name'])) - issue_cwd = os.path.join(repo_cwd, 'issues') - mkdir_p(repo_cwd, issue_cwd) - - issues = {} - issues_skipped = 0 - issues_skipped_message = '' - _issue_template = '{0}/{1}/issues'.format(repos_template, - repository['full_name']) - - should_include_pulls = args.include_pulls or args.include_everything - issue_states = ['open', 'closed'] - for issue_state in issue_states: - query_args = { - 'filter': 'all', - 'state': issue_state - } - if args.since: - query_args['since'] = args.since - - _issues = retrieve_data(args, - _issue_template, - query_args=query_args) - for issue in _issues: - # skip pull requests which are also returned as issues - # if retrieving pull requests is requested as well - if 'pull_request' in issue and should_include_pulls: - issues_skipped += 1 - continue - - issues[issue['number']] = issue - - if issues_skipped: - issues_skipped_message = ' (skipped {0} pull requests)'.format( - issues_skipped) - - log_info('Saving {0} issues to disk{1}'.format( - len(list(issues.keys())), issues_skipped_message)) - comments_template = _issue_template + '/{0}/comments' - events_template = _issue_template + '/{0}/events' - for number, issue in list(issues.items()): - if args.include_issue_comments or args.include_everything: - template = comments_template.format(number) - issues[number]['comment_data'] = retrieve_data(args, template) - if args.include_issue_events or args.include_everything: - template = events_template.format(number) - issues[number]['event_data'] = retrieve_data(args, template) - - issue_file = '{0}/{1}.json'.format(issue_cwd, number) - with codecs.open(issue_file, 'w', encoding='utf-8') as f: - json_dump(issue, f) - - -def backup_pulls(args, repo_cwd, repository, repos_template): - has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) - if args.skip_existing and has_pulls_dir: - return - - log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa - pulls_cwd = os.path.join(repo_cwd, 'pulls') - mkdir_p(repo_cwd, pulls_cwd) - - pulls = {} - _pulls_template = '{0}/{1}/pulls'.format(repos_template, - repository['full_name']) - query_args = { - 'filter': 'all', - 'state': 'all', - 'sort': 'updated', - 'direction': 'desc', - } - - if not args.include_pull_details: - pull_states = ['open', 'closed'] - for pull_state in pull_states: - query_args['state'] = pull_state - _pulls = retrieve_data_gen(args, - _pulls_template, - query_args=query_args) - for pull in _pulls: - if args.since and pull['updated_at'] < args.since: - break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = pull - else: - _pulls = retrieve_data_gen(args, - _pulls_template, - query_args=query_args) - for pull in _pulls: - if args.since and pull['updated_at'] < args.since: - break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = retrieve_data( - args, - _pulls_template + '/{}'.format(pull['number']), - single_request=True - )[0] - - log_info('Saving {0} pull requests to disk'.format( - len(list(pulls.keys())))) - comments_template = _pulls_template + '/{0}/comments' - commits_template = _pulls_template + '/{0}/commits' - for number, pull in list(pulls.items()): - if args.include_pull_comments or args.include_everything: - template = comments_template.format(number) - pulls[number]['comment_data'] = retrieve_data(args, template) - if args.include_pull_commits or args.include_everything: - template = commits_template.format(number) - pulls[number]['commit_data'] = retrieve_data(args, template) - - pull_file = '{0}/{1}.json'.format(pulls_cwd, number) - with codecs.open(pull_file, 'w', encoding='utf-8') as f: - json_dump(pull, f) - - -def backup_milestones(args, repo_cwd, repository, repos_template): - milestone_cwd = os.path.join(repo_cwd, 'milestones') - if args.skip_existing and os.path.isdir(milestone_cwd): - return - - log_info('Retrieving {0} milestones'.format(repository['full_name'])) - mkdir_p(repo_cwd, milestone_cwd) - - template = '{0}/{1}/milestones'.format(repos_template, - repository['full_name']) - - query_args = { - 'state': 'all' - } - - _milestones = retrieve_data(args, template, query_args=query_args) - - milestones = {} - for milestone in _milestones: - milestones[milestone['number']] = milestone - - log_info('Saving {0} milestones to disk'.format( - len(list(milestones.keys())))) - for number, milestone in list(milestones.items()): - milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) - with codecs.open(milestone_file, 'w', encoding='utf-8') as f: - json_dump(milestone, f) - - -def backup_labels(args, repo_cwd, repository, repos_template): - label_cwd = os.path.join(repo_cwd, 'labels') - output_file = '{0}/labels.json'.format(label_cwd) - template = '{0}/{1}/labels'.format(repos_template, - repository['full_name']) - _backup_data(args, - 'labels', - template, - output_file, - label_cwd) - - -def backup_hooks(args, repo_cwd, repository, repos_template): - auth = get_auth(args) - if not auth: - log_info("Skipping hooks since no authentication provided") - return - hook_cwd = os.path.join(repo_cwd, 'hooks') - output_file = '{0}/hooks.json'.format(hook_cwd) - template = '{0}/{1}/hooks'.format(repos_template, - repository['full_name']) - try: - _backup_data(args, - 'hooks', - template, - output_file, - hook_cwd) - except SystemExit: - log_info("Unable to read hooks, skipping") - - -def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): - repository_fullname = repository['full_name'] - - # give release files somewhere to live & log intent - release_cwd = os.path.join(repo_cwd, 'releases') - log_info('Retrieving {0} releases'.format(repository_fullname)) - mkdir_p(repo_cwd, release_cwd) - - query_args = {} - - release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) - releases = retrieve_data(args, release_template, query_args=query_args) - - # for each release, store it - log_info('Saving {0} releases to disk'.format(len(releases))) - for release in releases: - release_name = release['tag_name'] - output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) - with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: - json_dump(release, f) - - if include_assets: - assets = retrieve_data(args, release['assets_url']) - if len(assets) > 0: - # give release asset files somewhere to live & download them (not including source archives) - release_assets_cwd = os.path.join(release_cwd, release_name) - mkdir_p(release_assets_cwd) - for asset in assets: - download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) - - -def fetch_repository(name, - remote_url, - local_dir, - skip_existing=False, - bare_clone=False, - lfs_clone=False): - if bare_clone: - if os.path.exists(local_dir): - clone_exists = subprocess.check_output(['git', - 'rev-parse', - '--is-bare-repository'], - cwd=local_dir) == b"true\n" - else: - clone_exists = False - else: - clone_exists = os.path.exists(os.path.join(local_dir, '.git')) - - if clone_exists and skip_existing: - return - - masked_remote_url = mask_password(remote_url) - - initialized = subprocess.call('git ls-remote ' + remote_url, - stdout=FNULL, - stderr=FNULL, - shell=True) - if initialized == 128: - log_info("Skipping {0} ({1}) since it's not initialized".format( - name, masked_remote_url)) - return - - if clone_exists: - log_info('Updating {0} in {1}'.format(name, local_dir)) - - remotes = subprocess.check_output(['git', 'remote', 'show'], - cwd=local_dir) - remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] - - if 'origin' not in remotes: - git_command = ['git', 'remote', 'rm', 'origin'] - logging_subprocess(git_command, None, cwd=local_dir) - git_command = ['git', 'remote', 'add', 'origin', remote_url] - logging_subprocess(git_command, None, cwd=local_dir) - else: - git_command = ['git', 'remote', 'set-url', 'origin', remote_url] - logging_subprocess(git_command, None, cwd=local_dir) - - if lfs_clone: - git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] - else: - git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] - logging_subprocess(git_command, None, cwd=local_dir) - else: - log_info('Cloning {0} repository from {1} to {2}'.format( - name, - masked_remote_url, - local_dir)) - if bare_clone: - if lfs_clone: - git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir] - else: - git_command = ['git', 'clone', '--mirror', remote_url, local_dir] - else: - if lfs_clone: - git_command = ['git', 'lfs', 'clone', remote_url, local_dir] - else: - git_command = ['git', 'clone', remote_url, local_dir] - logging_subprocess(git_command, None) - - -def backup_account(args, output_directory): - account_cwd = os.path.join(output_directory, 'account') - - if args.include_starred or args.include_everything: - output_file = "{0}/starred.json".format(account_cwd) - template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) - _backup_data(args, - "starred repositories", - template, - output_file, - account_cwd) - - if args.include_watched or args.include_everything: - output_file = "{0}/watched.json".format(account_cwd) - template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) - _backup_data(args, - "watched repositories", - template, - output_file, - account_cwd) - - if args.include_followers or args.include_everything: - output_file = "{0}/followers.json".format(account_cwd) - template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) - _backup_data(args, - "followers", - template, - output_file, - account_cwd) - - if args.include_following or args.include_everything: - output_file = "{0}/following.json".format(account_cwd) - template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) - _backup_data(args, - "following", - template, - output_file, - account_cwd) - - -def _backup_data(args, name, template, output_file, output_directory): - skip_existing = args.skip_existing - if not skip_existing or not os.path.exists(output_file): - log_info('Retrieving {0} {1}'.format(args.user, name)) - mkdir_p(output_directory) - data = retrieve_data(args, template) - - log_info('Writing {0} {1} to disk'.format(len(data), name)) - with codecs.open(output_file, 'w', encoding='utf-8') as f: - json_dump(data, f) - - -def json_dump(data, output_file): - json.dump(data, - output_file, - ensure_ascii=False, - sort_keys=True, - indent=4, - separators=(',', ': ')) - - -def main(): - args = parse_args() - - output_directory = os.path.realpath(args.output_directory) - if not os.path.isdir(output_directory): - log_info('Create output directory {0}'.format(output_directory)) - mkdir_p(output_directory) - - if args.lfs_clone: - check_git_lfs_install() - - log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) - - authenticated_user = get_authenticated_user(args) - repositories = retrieve_repositories(args, authenticated_user) - repositories = filter_repositories(args, repositories) - backup_repositories(args, output_directory, repositories) - backup_account(args, output_directory) - +from github_backup.github_backup import main if __name__ == '__main__': main() diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py new file mode 100644 index 0000000..7a96177 --- /dev/null +++ b/github_backup/github_backup.py @@ -0,0 +1,1143 @@ +#!/usr/bin/env python + +from __future__ import print_function +import socket + +import argparse +import base64 +import calendar +import codecs +import errno +import getpass +import json +import logging +import os +import re +import select +import subprocess +import sys +import time +import platform +PY2 = False +try: + # python 3 + from urllib.parse import urlparse + from urllib.parse import quote as urlquote + from urllib.parse import urlencode + from urllib.error import HTTPError, URLError + from urllib.request import urlopen + from urllib.request import Request + from urllib.request import HTTPRedirectHandler + from urllib.request import build_opener +except ImportError: + # python 2 + PY2 = True + from urlparse import urlparse + from urllib import quote as urlquote + from urllib import urlencode + from urllib2 import HTTPError, URLError + from urllib2 import urlopen + from urllib2 import Request + from urllib2 import HTTPRedirectHandler + from urllib2 import build_opener + +try: + from . import __version__ + VERSION = __version__ +except ImportError: + VERSION = 'unknown' + +FNULL = open(os.devnull, 'w') + + +def log_error(message): + """ + Log message (str) or messages (List[str]) to stderr and exit with status 1 + """ + log_warning(message) + sys.exit(1) + + +def log_info(message): + """ + Log message (str) or messages (List[str]) to stdout + """ + if type(message) == str: + message = [message] + + for msg in message: + sys.stdout.write("{0}\n".format(msg)) + + +def log_warning(message): + """ + Log message (str) or messages (List[str]) to stderr + """ + if type(message) == str: + message = [message] + + for msg in message: + sys.stderr.write("{0}\n".format(msg)) + + +def logging_subprocess(popenargs, + logger, + stdout_log_level=logging.DEBUG, + stderr_log_level=logging.ERROR, + **kwargs): + """ + Variant of subprocess.call that accepts a logger instead of stdout/stderr, + and logs stdout messages via logger.debug and stderr messages via + logger.error. + """ + child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, **kwargs) + if sys.platform == 'win32': + log_info("Windows operating system detected - no subprocess logging will be returned") + + log_level = {child.stdout: stdout_log_level, + child.stderr: stderr_log_level} + + def check_io(): + if sys.platform == 'win32': + return + ready_to_read = select.select([child.stdout, child.stderr], + [], + [], + 1000)[0] + for io in ready_to_read: + line = io.readline() + if not logger: + continue + if not (io == child.stderr and not line): + logger.log(log_level[io], line[:-1]) + + # keep checking stdout/stderr until the child exits + while child.poll() is None: + check_io() + + check_io() # check again to catch anything after the process exits + + rc = child.wait() + + if rc != 0: + print('{} returned {}:'.format(popenargs[0], rc), file=sys.stderr) + print('\t', ' '.join(popenargs), file=sys.stderr) + + return rc + + +def mkdir_p(*args): + for path in args: + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +def mask_password(url, secret='*****'): + parsed = urlparse(url) + + if not parsed.password: + return url + elif parsed.password == 'x-oauth-basic': + return url.replace(parsed.username, secret) + + return url.replace(parsed.password, secret) + + +def parse_args(): + parser = argparse.ArgumentParser(description='Backup a github account') + parser.add_argument('user', + metavar='USER', + type=str, + help='github username') + parser.add_argument('-u', + '--username', + dest='username', + help='username for basic auth') + parser.add_argument('-p', + '--password', + dest='password', + help='password for basic auth. ' + 'If a username is given but not a password, the ' + 'password will be prompted for.') + parser.add_argument('-t', + '--token', + dest='token', + help='personal access or OAuth token, or path to token (file://...)') # noqa + parser.add_argument('-o', + '--output-directory', + default='.', + dest='output_directory', + help='directory at which to backup the repositories') + parser.add_argument('-i', + '--incremental', + action='store_true', + dest='incremental', + help='incremental backup') + parser.add_argument('--starred', + action='store_true', + dest='include_starred', + help='include JSON output of starred repositories in backup') + parser.add_argument('--all-starred', + action='store_true', + dest='all_starred', + help='include starred repositories in backup [*]') + parser.add_argument('--watched', + action='store_true', + dest='include_watched', + help='include JSON output of watched repositories in backup') + parser.add_argument('--followers', + action='store_true', + dest='include_followers', + help='include JSON output of followers in backup') + parser.add_argument('--following', + action='store_true', + dest='include_following', + help='include JSON output of following users in backup') + parser.add_argument('--all', + action='store_true', + dest='include_everything', + help='include everything in backup (not including [*])') + parser.add_argument('--issues', + action='store_true', + dest='include_issues', + help='include issues in backup') + parser.add_argument('--issue-comments', + action='store_true', + dest='include_issue_comments', + help='include issue comments in backup') + parser.add_argument('--issue-events', + action='store_true', + dest='include_issue_events', + help='include issue events in backup') + parser.add_argument('--pulls', + action='store_true', + dest='include_pulls', + help='include pull requests in backup') + parser.add_argument('--pull-comments', + action='store_true', + dest='include_pull_comments', + help='include pull request review comments in backup') + parser.add_argument('--pull-commits', + action='store_true', + dest='include_pull_commits', + help='include pull request commits in backup') + parser.add_argument('--pull-details', + action='store_true', + dest='include_pull_details', + help='include more pull request details in backup [*]') + parser.add_argument('--labels', + action='store_true', + dest='include_labels', + help='include labels in backup') + parser.add_argument('--hooks', + action='store_true', + dest='include_hooks', + help='include hooks in backup (works only when authenticated)') # noqa + parser.add_argument('--milestones', + action='store_true', + dest='include_milestones', + help='include milestones in backup') + parser.add_argument('--repositories', + action='store_true', + dest='include_repository', + help='include repository clone in backup') + parser.add_argument('--bare', + action='store_true', + dest='bare_clone', + help='clone bare repositories') + parser.add_argument('--lfs', + action='store_true', + dest='lfs_clone', + help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]') + parser.add_argument('--wikis', + action='store_true', + dest='include_wiki', + help='include wiki clone in backup') + parser.add_argument('--gists', + action='store_true', + dest='include_gists', + help='include gists in backup [*]') + parser.add_argument('--starred-gists', + action='store_true', + dest='include_starred_gists', + help='include starred gists in backup [*]') + parser.add_argument('--skip-existing', + action='store_true', + dest='skip_existing', + help='skip project if a backup directory exists') + parser.add_argument('-L', + '--languages', + dest='languages', + help='only allow these languages', + nargs='*') + parser.add_argument('-N', + '--name-regex', + dest='name_regex', + help='python regex to match names against') + parser.add_argument('-H', + '--github-host', + dest='github_host', + help='GitHub Enterprise hostname') + parser.add_argument('-O', + '--organization', + action='store_true', + dest='organization', + help='whether or not this is an organization user') + parser.add_argument('-R', + '--repository', + dest='repository', + help='name of repository to limit backup to') + parser.add_argument('-P', '--private', + action='store_true', + dest='private', + help='include private repositories [*]') + parser.add_argument('-F', '--fork', + action='store_true', + dest='fork', + help='include forked repositories [*]') + parser.add_argument('--prefer-ssh', + action='store_true', + help='Clone repositories using SSH instead of HTTPS') + parser.add_argument('-v', '--version', + action='version', + version='%(prog)s ' + VERSION) + parser.add_argument('--keychain-name', + dest='osx_keychain_item_name', + help='OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token') + parser.add_argument('--keychain-account', + dest='osx_keychain_item_account', + help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') + parser.add_argument('--releases', + action='store_true', + dest='include_releases', + help='include release information, not including assets or binaries' + ) + parser.add_argument('--assets', + action='store_true', + dest='include_assets', + help='include assets alongside release information; only applies if including releases') + return parser.parse_args() + + +def get_auth(args, encode=True): + auth = None + + if args.osx_keychain_item_name: + if not args.osx_keychain_item_account: + log_error('You must specify both name and account fields for osx keychain password items') + else: + if platform.system() != 'Darwin': + log_error("Keychain arguments are only supported on Mac OSX") + try: + with open(os.devnull, 'w') as devnull: + token = (subprocess.check_output([ + 'security', 'find-generic-password', + '-s', args.osx_keychain_item_name, + '-a', args.osx_keychain_item_account, + '-w'], stderr=devnull).strip()) + if not PY2: + token = token.decode('utf-8') + auth = token + ':' + 'x-oauth-basic' + except: + log_error('No password item matching the provided name and account could be found in the osx keychain.') + elif args.osx_keychain_item_account: + log_error('You must specify both name and account fields for osx keychain password items') + elif args.token: + _path_specifier = 'file://' + if args.token.startswith(_path_specifier): + args.token = open(args.token[len(_path_specifier):], + 'rt').readline().strip() + auth = args.token + ':' + 'x-oauth-basic' + elif args.username: + if not args.password: + args.password = getpass.getpass() + if encode: + password = args.password + else: + password = urlquote(args.password) + auth = args.username + ':' + password + elif args.password: + log_error('You must specify a username for basic auth') + + if not auth: + return None + + if not encode: + return auth + + return base64.b64encode(auth.encode('ascii')) + + +def get_github_api_host(args): + if args.github_host: + host = args.github_host + '/api/v3' + else: + host = 'api.github.com' + + return host + + +def get_github_host(args): + if args.github_host: + host = args.github_host + else: + host = 'github.com' + + return host + + +def get_github_repo_url(args, repository): + if repository.get('is_gist'): + return repository['git_pull_url'] + + if args.prefer_ssh: + return repository['ssh_url'] + + auth = get_auth(args, False) + if auth and repository['private'] == True: + repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( + auth, + get_github_host(args), + repository['owner']['login'], + repository['name']) + else: + repo_url = repository['clone_url'] + + return repo_url + + +def retrieve_data_gen(args, template, query_args=None, single_request=False): + auth = get_auth(args) + query_args = get_query_args(query_args) + per_page = 100 + page = 0 + + while True: + page = page + 1 + request = _construct_request(per_page, page, query_args, template, auth) # noqa + r, errors = _get_response(request, auth, template) + + status_code = int(r.getcode()) + + retries = 0 + while retries < 3 and status_code == 502: + print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') + retries += 1 + time.sleep(5) + request = _construct_request(per_page, page, query_args, template, auth) # noqa + r, errors = _get_response(request, auth, template) + + status_code = int(r.getcode()) + + if status_code != 200: + template = 'API request returned HTTP {0}: {1}' + errors.append(template.format(status_code, r.reason)) + log_error(errors) + + response = json.loads(r.read().decode('utf-8')) + if len(errors) == 0: + if type(response) == list: + for resp in response: + yield resp + if len(response) < per_page: + break + elif type(response) == dict and single_request: + yield response + + if len(errors) > 0: + log_error(errors) + + if single_request: + break + +def retrieve_data(args, template, query_args=None, single_request=False): + return list(retrieve_data_gen(args, template, query_args, single_request)) + +def get_query_args(query_args=None): + if not query_args: + query_args = {} + return query_args + + +def _get_response(request, auth, template): + retry_timeout = 3 + errors = [] + # We'll make requests in a loop so we can + # delay and retry in the case of rate-limiting + while True: + should_continue = False + try: + r = urlopen(request) + except HTTPError as exc: + errors, should_continue = _request_http_error(exc, auth, errors) # noqa + r = exc + except URLError as e: + log_warning(e.reason) + should_continue = _request_url_error(template, retry_timeout) + if not should_continue: + raise + except socket.error as e: + log_warning(e.strerror) + should_continue = _request_url_error(template, retry_timeout) + if not should_continue: + raise + + if should_continue: + continue + + break + return r, errors + + +def _construct_request(per_page, page, query_args, template, auth): + querystring = urlencode(dict(list({ + 'per_page': per_page, + 'page': page + }.items()) + list(query_args.items()))) + + request = Request(template + '?' + querystring) + if auth is not None: + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + log_info('Requesting {}?{}'.format(template, querystring)) + return request + + +def _request_http_error(exc, auth, errors): + # HTTPError behaves like a Response so we can + # check the status code and headers to see exactly + # what failed. + + should_continue = False + headers = exc.headers + limit_remaining = int(headers.get('x-ratelimit-remaining', 0)) + + if exc.code == 403 and limit_remaining < 1: + # The X-RateLimit-Reset header includes a + # timestamp telling us when the limit will reset + # so we can calculate how long to wait rather + # than inefficiently polling: + gm_now = calendar.timegm(time.gmtime()) + reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now + # We'll never sleep for less than 10 seconds: + delta = max(10, reset - gm_now) + + limit = headers.get('x-ratelimit-limit') + print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa + file=sys.stderr) + + if auth is None: + print('Hint: Authenticate to raise your GitHub rate limit', + file=sys.stderr) + + time.sleep(delta) + should_continue = True + return errors, should_continue + + +def _request_url_error(template, retry_timeout): + # Incase of a connection timing out, we can retry a few time + # But we won't crash and not back-up the rest now + log_info('{} timed out'.format(template)) + retry_timeout -= 1 + + if retry_timeout >= 0: + return True + + log_error('{} timed out to much, skipping!') + return False + + +class S3HTTPRedirectHandler(HTTPRedirectHandler): + """ + A subclassed redirect handler for downloading Github assets from S3. + + urllib will add the Authorization header to the redirected request to S3, which will result in a 400, + so we should remove said header on redirect. + """ + def redirect_request(self, req, fp, code, msg, headers, newurl): + if PY2: + # HTTPRedirectHandler is an old style class + request = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) + else: + request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) + del request.headers['Authorization'] + return request + + +def download_file(url, path, auth): + # Skip downloading release assets if they already exist on disk so we don't redownload on every sync + if os.path.exists(path): + return + + request = Request(url) + request.add_header('Accept', 'application/octet-stream') + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + opener = build_opener(S3HTTPRedirectHandler) + + try: + response = opener.open(request) + + chunk_size = 16 * 1024 + with open(path, 'wb') as f: + while True: + chunk = response.read(chunk_size) + if not chunk: + break + f.write(chunk) + except HTTPError as exc: + # Gracefully handle 404 responses (and others) when downloading from S3 + log_warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) + except URLError as e: + # Gracefully handle other URL errors + log_warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) + except socket.error as e: + # Gracefully handle socket errors + # TODO: Implement retry logic + log_warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) + + +def get_authenticated_user(args): + template = 'https://{0}/user'.format(get_github_api_host(args)) + data = retrieve_data(args, template, single_request=True) + return data[0] + + +def check_git_lfs_install(): + exit_code = subprocess.call(['git', 'lfs', 'version']) + if exit_code != 0: + log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') + + +def retrieve_repositories(args, authenticated_user): + log_info('Retrieving repositories') + single_request = False + if args.user == authenticated_user['login']: + # we must use the /user/repos API to be able to access private repos + template = 'https://{0}/user/repos'.format( + get_github_api_host(args)) + else: + if args.private and not args.organization: + log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') + template = 'https://{0}/users/{1}/repos'.format( + get_github_api_host(args), + args.user) + + if args.organization: + template = 'https://{0}/orgs/{1}/repos'.format( + get_github_api_host(args), + args.user) + + if args.repository: + single_request = True + template = 'https://{0}/repos/{1}/{2}'.format( + get_github_api_host(args), + args.user, + args.repository) + + repos = retrieve_data(args, template, single_request=single_request) + + if args.all_starred: + starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(args), args.user) + starred_repos = retrieve_data(args, starred_template, single_request=False) + # flag each repo as starred for downstream processing + for item in starred_repos: + item.update({'is_starred': True}) + repos.extend(starred_repos) + + if args.include_gists: + gists_template = 'https://{0}/users/{1}/gists'.format(get_github_api_host(args), args.user) + gists = retrieve_data(args, gists_template, single_request=False) + # flag each repo as a gist for downstream processing + for item in gists: + item.update({'is_gist': True}) + repos.extend(gists) + + if args.include_starred_gists: + starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host(args)) + starred_gists = retrieve_data(args, starred_gists_template, single_request=False) + # flag each repo as a starred gist for downstream processing + for item in starred_gists: + item.update({'is_gist': True, + 'is_starred': True}) + repos.extend(starred_gists) + + return repos + + +def filter_repositories(args, unfiltered_repositories): + log_info('Filtering repositories') + + repositories = [] + for r in unfiltered_repositories: + # gists can be anonymous, so need to safely check owner + if r.get('owner', {}).get('login') == args.user or r.get('is_starred'): + repositories.append(r) + + name_regex = None + if args.name_regex: + name_regex = re.compile(args.name_regex) + + languages = None + if args.languages: + languages = [x.lower() for x in args.languages] + + if not args.fork: + repositories = [r for r in repositories if not r.get('fork')] + if not args.private: + repositories = [r for r in repositories if not r.get('private') or r.get('public')] + if languages: + repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa + if name_regex: + repositories = [r for r in repositories if name_regex.match(r['name'])] + + return repositories + + +def backup_repositories(args, output_directory, repositories): + log_info('Backing up repositories') + repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) + + if args.incremental: + last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa + last_update_path = os.path.join(output_directory, 'last_update') + if os.path.exists(last_update_path): + args.since = open(last_update_path).read().strip() + else: + args.since = None + else: + args.since = None + + for repository in repositories: + if repository.get('is_gist'): + repo_cwd = os.path.join(output_directory, 'gists', repository['id']) + elif repository.get('is_starred'): + # put starred repos in -o/starred/${owner}/${repo} to prevent collision of + # any repositories with the same name + repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) + else: + repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) + + repo_dir = os.path.join(repo_cwd, 'repository') + repo_url = get_github_repo_url(args, repository) + + include_gists = (args.include_gists or args.include_starred_gists) + if (args.include_repository or args.include_everything) \ + or (include_gists and repository.get('is_gist')): + repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') + fetch_repository(repo_name, + repo_url, + repo_dir, + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone) + + if repository.get('is_gist'): + # dump gist information to a file as well + output_file = '{0}/gist.json'.format(repo_cwd) + with codecs.open(output_file, 'w', encoding='utf-8') as f: + json_dump(repository, f) + + continue # don't try to back anything else for a gist; it doesn't exist + + download_wiki = (args.include_wiki or args.include_everything) + if repository['has_wiki'] and download_wiki: + fetch_repository(repository['name'], + repo_url.replace('.git', '.wiki.git'), + os.path.join(repo_cwd, 'wiki'), + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone) + + if args.include_issues or args.include_everything: + backup_issues(args, repo_cwd, repository, repos_template) + + if args.include_pulls or args.include_everything: + backup_pulls(args, repo_cwd, repository, repos_template) + + if args.include_milestones or args.include_everything: + backup_milestones(args, repo_cwd, repository, repos_template) + + if args.include_labels or args.include_everything: + backup_labels(args, repo_cwd, repository, repos_template) + + if args.include_hooks or args.include_everything: + backup_hooks(args, repo_cwd, repository, repos_template) + + if args.include_releases or args.include_everything: + backup_releases(args, repo_cwd, repository, repos_template, + include_assets=args.include_assets or args.include_everything) + + if args.incremental: + open(last_update_path, 'w').write(last_update) + + +def backup_issues(args, repo_cwd, repository, repos_template): + has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) + if args.skip_existing and has_issues_dir: + return + + log_info('Retrieving {0} issues'.format(repository['full_name'])) + issue_cwd = os.path.join(repo_cwd, 'issues') + mkdir_p(repo_cwd, issue_cwd) + + issues = {} + issues_skipped = 0 + issues_skipped_message = '' + _issue_template = '{0}/{1}/issues'.format(repos_template, + repository['full_name']) + + should_include_pulls = args.include_pulls or args.include_everything + issue_states = ['open', 'closed'] + for issue_state in issue_states: + query_args = { + 'filter': 'all', + 'state': issue_state + } + if args.since: + query_args['since'] = args.since + + _issues = retrieve_data(args, + _issue_template, + query_args=query_args) + for issue in _issues: + # skip pull requests which are also returned as issues + # if retrieving pull requests is requested as well + if 'pull_request' in issue and should_include_pulls: + issues_skipped += 1 + continue + + issues[issue['number']] = issue + + if issues_skipped: + issues_skipped_message = ' (skipped {0} pull requests)'.format( + issues_skipped) + + log_info('Saving {0} issues to disk{1}'.format( + len(list(issues.keys())), issues_skipped_message)) + comments_template = _issue_template + '/{0}/comments' + events_template = _issue_template + '/{0}/events' + for number, issue in list(issues.items()): + if args.include_issue_comments or args.include_everything: + template = comments_template.format(number) + issues[number]['comment_data'] = retrieve_data(args, template) + if args.include_issue_events or args.include_everything: + template = events_template.format(number) + issues[number]['event_data'] = retrieve_data(args, template) + + issue_file = '{0}/{1}.json'.format(issue_cwd, number) + with codecs.open(issue_file, 'w', encoding='utf-8') as f: + json_dump(issue, f) + + +def backup_pulls(args, repo_cwd, repository, repos_template): + has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) + if args.skip_existing and has_pulls_dir: + return + + log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa + pulls_cwd = os.path.join(repo_cwd, 'pulls') + mkdir_p(repo_cwd, pulls_cwd) + + pulls = {} + _pulls_template = '{0}/{1}/pulls'.format(repos_template, + repository['full_name']) + query_args = { + 'filter': 'all', + 'state': 'all', + 'sort': 'updated', + 'direction': 'desc', + } + + if not args.include_pull_details: + pull_states = ['open', 'closed'] + for pull_state in pull_states: + query_args['state'] = pull_state + _pulls = retrieve_data_gen(args, + _pulls_template, + query_args=query_args) + for pull in _pulls: + if args.since and pull['updated_at'] < args.since: + break + if not args.since or pull['updated_at'] >= args.since: + pulls[pull['number']] = pull + else: + _pulls = retrieve_data_gen(args, + _pulls_template, + query_args=query_args) + for pull in _pulls: + if args.since and pull['updated_at'] < args.since: + break + if not args.since or pull['updated_at'] >= args.since: + pulls[pull['number']] = retrieve_data( + args, + _pulls_template + '/{}'.format(pull['number']), + single_request=True + )[0] + + log_info('Saving {0} pull requests to disk'.format( + len(list(pulls.keys())))) + comments_template = _pulls_template + '/{0}/comments' + commits_template = _pulls_template + '/{0}/commits' + for number, pull in list(pulls.items()): + if args.include_pull_comments or args.include_everything: + template = comments_template.format(number) + pulls[number]['comment_data'] = retrieve_data(args, template) + if args.include_pull_commits or args.include_everything: + template = commits_template.format(number) + pulls[number]['commit_data'] = retrieve_data(args, template) + + pull_file = '{0}/{1}.json'.format(pulls_cwd, number) + with codecs.open(pull_file, 'w', encoding='utf-8') as f: + json_dump(pull, f) + + +def backup_milestones(args, repo_cwd, repository, repos_template): + milestone_cwd = os.path.join(repo_cwd, 'milestones') + if args.skip_existing and os.path.isdir(milestone_cwd): + return + + log_info('Retrieving {0} milestones'.format(repository['full_name'])) + mkdir_p(repo_cwd, milestone_cwd) + + template = '{0}/{1}/milestones'.format(repos_template, + repository['full_name']) + + query_args = { + 'state': 'all' + } + + _milestones = retrieve_data(args, template, query_args=query_args) + + milestones = {} + for milestone in _milestones: + milestones[milestone['number']] = milestone + + log_info('Saving {0} milestones to disk'.format( + len(list(milestones.keys())))) + for number, milestone in list(milestones.items()): + milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) + with codecs.open(milestone_file, 'w', encoding='utf-8') as f: + json_dump(milestone, f) + + +def backup_labels(args, repo_cwd, repository, repos_template): + label_cwd = os.path.join(repo_cwd, 'labels') + output_file = '{0}/labels.json'.format(label_cwd) + template = '{0}/{1}/labels'.format(repos_template, + repository['full_name']) + _backup_data(args, + 'labels', + template, + output_file, + label_cwd) + + +def backup_hooks(args, repo_cwd, repository, repos_template): + auth = get_auth(args) + if not auth: + log_info("Skipping hooks since no authentication provided") + return + hook_cwd = os.path.join(repo_cwd, 'hooks') + output_file = '{0}/hooks.json'.format(hook_cwd) + template = '{0}/{1}/hooks'.format(repos_template, + repository['full_name']) + try: + _backup_data(args, + 'hooks', + template, + output_file, + hook_cwd) + except SystemExit: + log_info("Unable to read hooks, skipping") + + +def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): + repository_fullname = repository['full_name'] + + # give release files somewhere to live & log intent + release_cwd = os.path.join(repo_cwd, 'releases') + log_info('Retrieving {0} releases'.format(repository_fullname)) + mkdir_p(repo_cwd, release_cwd) + + query_args = {} + + release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + releases = retrieve_data(args, release_template, query_args=query_args) + + # for each release, store it + log_info('Saving {0} releases to disk'.format(len(releases))) + for release in releases: + release_name = release['tag_name'] + output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) + with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: + json_dump(release, f) + + if include_assets: + assets = retrieve_data(args, release['assets_url']) + if len(assets) > 0: + # give release asset files somewhere to live & download them (not including source archives) + release_assets_cwd = os.path.join(release_cwd, release_name) + mkdir_p(release_assets_cwd) + for asset in assets: + download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) + + +def fetch_repository(name, + remote_url, + local_dir, + skip_existing=False, + bare_clone=False, + lfs_clone=False): + if bare_clone: + if os.path.exists(local_dir): + clone_exists = subprocess.check_output(['git', + 'rev-parse', + '--is-bare-repository'], + cwd=local_dir) == b"true\n" + else: + clone_exists = False + else: + clone_exists = os.path.exists(os.path.join(local_dir, '.git')) + + if clone_exists and skip_existing: + return + + masked_remote_url = mask_password(remote_url) + + initialized = subprocess.call('git ls-remote ' + remote_url, + stdout=FNULL, + stderr=FNULL, + shell=True) + if initialized == 128: + log_info("Skipping {0} ({1}) since it's not initialized".format( + name, masked_remote_url)) + return + + if clone_exists: + log_info('Updating {0} in {1}'.format(name, local_dir)) + + remotes = subprocess.check_output(['git', 'remote', 'show'], + cwd=local_dir) + remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] + + if 'origin' not in remotes: + git_command = ['git', 'remote', 'rm', 'origin'] + logging_subprocess(git_command, None, cwd=local_dir) + git_command = ['git', 'remote', 'add', 'origin', remote_url] + logging_subprocess(git_command, None, cwd=local_dir) + else: + git_command = ['git', 'remote', 'set-url', 'origin', remote_url] + logging_subprocess(git_command, None, cwd=local_dir) + + if lfs_clone: + git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] + else: + git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + logging_subprocess(git_command, None, cwd=local_dir) + else: + log_info('Cloning {0} repository from {1} to {2}'.format( + name, + masked_remote_url, + local_dir)) + if bare_clone: + if lfs_clone: + git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir] + else: + git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + else: + if lfs_clone: + git_command = ['git', 'lfs', 'clone', remote_url, local_dir] + else: + git_command = ['git', 'clone', remote_url, local_dir] + logging_subprocess(git_command, None) + + +def backup_account(args, output_directory): + account_cwd = os.path.join(output_directory, 'account') + + if args.include_starred or args.include_everything: + output_file = "{0}/starred.json".format(account_cwd) + template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) + _backup_data(args, + "starred repositories", + template, + output_file, + account_cwd) + + if args.include_watched or args.include_everything: + output_file = "{0}/watched.json".format(account_cwd) + template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) + _backup_data(args, + "watched repositories", + template, + output_file, + account_cwd) + + if args.include_followers or args.include_everything: + output_file = "{0}/followers.json".format(account_cwd) + template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) + _backup_data(args, + "followers", + template, + output_file, + account_cwd) + + if args.include_following or args.include_everything: + output_file = "{0}/following.json".format(account_cwd) + template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) + _backup_data(args, + "following", + template, + output_file, + account_cwd) + + +def _backup_data(args, name, template, output_file, output_directory): + skip_existing = args.skip_existing + if not skip_existing or not os.path.exists(output_file): + log_info('Retrieving {0} {1}'.format(args.user, name)) + mkdir_p(output_directory) + data = retrieve_data(args, template) + + log_info('Writing {0} {1} to disk'.format(len(data), name)) + with codecs.open(output_file, 'w', encoding='utf-8') as f: + json_dump(data, f) + + +def json_dump(data, output_file): + json.dump(data, + output_file, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(',', ': ')) + + +def main(): + args = parse_args() + + output_directory = os.path.realpath(args.output_directory) + if not os.path.isdir(output_directory): + log_info('Create output directory {0}'.format(output_directory)) + mkdir_p(output_directory) + + if args.lfs_clone: + check_git_lfs_install() + + log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + + authenticated_user = get_authenticated_user(args) + repositories = retrieve_repositories(args, authenticated_user) + repositories = filter_repositories(args, repositories) + backup_repositories(args, output_directory, repositories) + backup_account(args, output_directory) + + +if __name__ == '__main__': + main() From 1865941b14a7fc88670a1c3265101ffd46dc0b82 Mon Sep 17 00:00:00 2001 From: ethan Date: Wed, 12 Feb 2020 18:27:58 -0600 Subject: [PATCH 097/455] #50 update: keep main() in bin --- bin/github-backup | 35 +++++++++++++++++++++++++++++++++- github_backup/github_backup.py | 24 ----------------------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 029752e..5f50c13 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -1,6 +1,39 @@ #!/usr/bin/env python -from github_backup.github_backup import main +import os + +from github_backup.github_backup import ( + backup_account, + backup_repositories, + check_git_lfs_install, + filter_repositories, + get_authenticated_user, + log_info, + mkdir_p, + parse_args, + retrieve_repositories, +) + + +def main(): + args = parse_args() + + output_directory = os.path.realpath(args.output_directory) + if not os.path.isdir(output_directory): + log_info('Create output directory {0}'.format(output_directory)) + mkdir_p(output_directory) + + if args.lfs_clone: + check_git_lfs_install() + + log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + + authenticated_user = get_authenticated_user(args) + repositories = retrieve_repositories(args, authenticated_user) + repositories = filter_repositories(args, repositories) + backup_repositories(args, output_directory, repositories) + backup_account(args, output_directory) + if __name__ == '__main__': main() diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 7a96177..c61bef8 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1117,27 +1117,3 @@ def json_dump(data, output_file): sort_keys=True, indent=4, separators=(',', ': ')) - - -def main(): - args = parse_args() - - output_directory = os.path.realpath(args.output_directory) - if not os.path.isdir(output_directory): - log_info('Create output directory {0}'.format(output_directory)) - mkdir_p(output_directory) - - if args.lfs_clone: - check_git_lfs_install() - - log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) - - authenticated_user = get_authenticated_user(args) - repositories = retrieve_repositories(args, authenticated_user) - repositories = filter_repositories(args, repositories) - backup_repositories(args, output_directory, repositories) - backup_account(args, output_directory) - - -if __name__ == '__main__': - main() From a8e8841b260e0c7bad0db8e3ec4a745c6f6b9cb6 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 14 Feb 2020 12:00:07 -0500 Subject: [PATCH 098/455] Release version 0.29.0 --- github_backup/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 1bf3675..9093e4e 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.28.0' +__version__ = '0.29.0' From 3f65eadee145b6e0291b1f3112266c66e4a8bbd0 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 14 Feb 2020 12:01:05 -0500 Subject: [PATCH 099/455] Release version 0.30.0 --- CHANGES.rst | 8 +++++++- github_backup/__init__.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a8655d6..12a9fc8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,15 @@ Changelog ========= +0.30.0 (2020-02-14) +------------------- +------------------- +- #50 update: keep main() in bin. [ethan] +- #50 - refactor for friendlier import. [ethan] + + 0.28.0 (2020-02-03) ------------------- ------------------------- - Remove deprecated (and removed) git lfs flags. [smiley] "--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script. diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 9093e4e..e187e0a 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.29.0' +__version__ = '0.30.0' From 857ad0afabcde1bec6dfb7faebc2247e6f4f2261 Mon Sep 17 00:00:00 2001 From: ethan Date: Tue, 25 Feb 2020 12:35:24 -0600 Subject: [PATCH 100/455] #123: Support Authenticating As Github Application --- bin/github-backup | 7 ++++-- github_backup/github_backup.py | 39 ++++++++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 5f50c13..be929bb 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -26,9 +26,12 @@ def main(): if args.lfs_clone: check_git_lfs_install() - log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + if not args.as_app: + log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + authenticated_user = get_authenticated_user(args) + else: + authenticated_user = {'login': None} - authenticated_user = get_authenticated_user(args) repositories = retrieve_repositories(args, authenticated_user) repositories = filter_repositories(args, repositories) backup_repositories(args, output_directory, repositories) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index c61bef8..48c4267 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -168,7 +168,16 @@ def parse_args(): parser.add_argument('-t', '--token', dest='token', - help='personal access or OAuth token, or path to token (file://...)') # noqa + help='personal access, OAuth, or JSON Web token, or path to token (file://...)') # noqa + parser.add_argument('--as-app', + action='store_true', + dest='as_app', + help='authenticate as github app instead of as a user. ' + 'Using this option will probably break the following options: ' + 'starred, all-starred, watched, gists, starred-gists, ' + 'followers, following. ' + 'Other options may be affected. ' + 'User account information will not be backed up.') parser.add_argument('-o', '--output-directory', default='.', @@ -325,7 +334,7 @@ def parse_args(): return parser.parse_args() -def get_auth(args, encode=True): +def get_auth(args, encode=True, for_git_cli=False): auth = None if args.osx_keychain_item_name: @@ -353,7 +362,13 @@ def get_auth(args, encode=True): if args.token.startswith(_path_specifier): args.token = open(args.token[len(_path_specifier):], 'rt').readline().strip() - auth = args.token + ':' + 'x-oauth-basic' + if not args.as_app: + auth = args.token + ':' + 'x-oauth-basic' + else: + if not for_git_cli: + auth = args.token + else: + auth = 'x-access-token:' + args.token elif args.username: if not args.password: args.password = getpass.getpass() @@ -399,7 +414,7 @@ def get_github_repo_url(args, repository): if args.prefer_ssh: return repository['ssh_url'] - auth = get_auth(args, False) + auth = get_auth(args, encode=False, for_git_cli=True) if auth and repository['private'] == True: repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( auth, @@ -413,14 +428,14 @@ def get_github_repo_url(args, repository): def retrieve_data_gen(args, template, query_args=None, single_request=False): - auth = get_auth(args) + auth = get_auth(args, encode=not args.as_app) query_args = get_query_args(query_args) per_page = 100 page = 0 while True: page = page + 1 - request = _construct_request(per_page, page, query_args, template, auth) # noqa + request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) @@ -430,7 +445,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') retries += 1 time.sleep(5) - request = _construct_request(per_page, page, query_args, template, auth) # noqa + request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) @@ -495,7 +510,7 @@ def _get_response(request, auth, template): return r, errors -def _construct_request(per_page, page, query_args, template, auth): +def _construct_request(per_page, page, query_args, template, auth, as_app=None): querystring = urlencode(dict(list({ 'per_page': per_page, 'page': page @@ -503,7 +518,13 @@ def _construct_request(per_page, page, query_args, template, auth): request = Request(template + '?' + querystring) if auth is not None: - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + if not as_app: + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + else: + if not PY2: + auth = auth.encode('ascii') + request.add_header('Authorization', 'token '.encode('ascii') + auth) + request.add_header('Accept', 'application/vnd.github.machine-man-preview+json') log_info('Requesting {}?{}'.format(template, querystring)) return request From 523c811cc6cdb39989ad2af4330a59114a8a0d73 Mon Sep 17 00:00:00 2001 From: ethan Date: Tue, 25 Feb 2020 13:13:20 -0600 Subject: [PATCH 101/455] #123 update: changed --as-app 'help' description --- github_backup/github_backup.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 48c4267..aa5e832 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -172,12 +172,7 @@ def parse_args(): parser.add_argument('--as-app', action='store_true', dest='as_app', - help='authenticate as github app instead of as a user. ' - 'Using this option will probably break the following options: ' - 'starred, all-starred, watched, gists, starred-gists, ' - 'followers, following. ' - 'Other options may be affected. ' - 'User account information will not be backed up.') + help='authenticate as github app instead of as a user.') parser.add_argument('-o', '--output-directory', default='.', From 85e439940831b4287b410786ce520486d9d5d5e5 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Tue, 25 Feb 2020 14:41:22 -0500 Subject: [PATCH 102/455] Release version 0.31.0 --- CHANGES.rst | 8 +++++++- github_backup/__init__.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 12a9fc8..cfe5ebe 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,8 +1,14 @@ Changelog ========= -0.30.0 (2020-02-14) +0.31.0 (2020-02-25) ------------------- +------------------------ +- #123 update: changed --as-app 'help' description. [ethan] +- #123: Support Authenticating As Github Application. [ethan] + + +0.29.0 (2020-02-14) ------------------- - #50 update: keep main() in bin. [ethan] - #50 - refactor for friendlier import. [ethan] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index e187e0a..c3d10d7 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.30.0' +__version__ = '0.31.0' From a351cdc1034def0042810b8ef63f68861970fc16 Mon Sep 17 00:00:00 2001 From: Tom Hoover Date: Sun, 22 Mar 2020 08:48:50 -0500 Subject: [PATCH 103/455] Update README.rst to match 'github-backup -h' --- README.rst | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/README.rst b/README.rst index 12c50f9..701ab32 100644 --- a/README.rst +++ b/README.rst @@ -29,19 +29,19 @@ Usage CLI Usage is as follows:: - github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN] + github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN] [--as-app] [-o OUTPUT_DIRECTORY] [-i] [--starred] [--all-starred] [--watched] [--followers] [--following] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] - [--pull-comments] [--pull-commits] [--labels] [--hooks] - [--milestones] [--repositories] [--releases] [--assets] + [--pull-comments] [--pull-commits] [--pull-details] + [--labels] [--hooks] [--milestones] [--repositories] [--bare] [--lfs] [--wikis] [--gists] [--starred-gists] - [--skip-existing] - [-L [LANGUAGES [LANGUAGES ...]]] [-N NAME_REGEX] - [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] - [--prefer-ssh] [-v] + [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] + [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] + [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] + [--releases] [--assets] USER Backup a github account @@ -57,36 +57,36 @@ CLI Usage is as follows:: password for basic auth. If a username is given but not a password, the password will be prompted for. -t TOKEN, --token TOKEN - personal access or OAuth token, or path to token - (file://...) + personal access, OAuth, or JSON Web token, or path to + token (file://...) + --as-app authenticate as github app instead of as a user. -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY directory at which to backup the repositories -i, --incremental incremental backup --starred include JSON output of starred repositories in backup - --all-starred include starred repositories in backup - --watched include watched repositories in backup + --all-starred include starred repositories in backup [*] + --watched include JSON output of watched repositories in backup --followers include JSON output of followers in backup --following include JSON output of following users in backup - --all include everything in backup + --all include everything in backup (not including [*]) --issues include issues in backup --issue-comments include issue comments in backup --issue-events include issue events in backup --pulls include pull requests in backup --pull-comments include pull request review comments in backup --pull-commits include pull request commits in backup + --pull-details include more pull request details in backup [*] --labels include labels in backup --hooks include hooks in backup (works only when authenticated) --milestones include milestones in backup --repositories include repository clone in backup - --releases include repository releases' information without assets or binaries - --assets include assets alongside release information; only applies if including releases --bare clone bare repositories --lfs clone LFS repositories (requires Git LFS to be - installed, https://git-lfs.github.com) + installed, https://git-lfs.github.com) [*] --wikis include wiki clone in backup - --gists include gists in backup - --starred-gists include starred gists in backup + --gists include gists in backup [*] + --starred-gists include starred gists in backup [*] --skip-existing skip project if a backup directory exists -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] only allow these languages @@ -97,8 +97,8 @@ CLI Usage is as follows:: -O, --organization whether or not this is an organization user -R REPOSITORY, --repository REPOSITORY name of repository to limit backup to - -P, --private include private repositories - -F, --fork include forked repositories + -P, --private include private repositories [*] + -F, --fork include forked repositories [*] --prefer-ssh Clone repositories using SSH instead of HTTPS -v, --version show program's version number and exit --keychain-name OSX_KEYCHAIN_ITEM_NAME @@ -107,6 +107,10 @@ CLI Usage is as follows:: --keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token + --releases include release information, not including assets or + binaries + --assets include assets alongside release information; only + applies if including releases The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues). From 03b9d1b2d8249768af3fcd4a8fd91fe5d33f8224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrico=20Tr=C3=B6ger?= Date: Mon, 13 Apr 2020 22:11:48 +0200 Subject: [PATCH 104/455] Add timestamp to log messages --- github_backup/github_backup.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index aa5e832..8798c85 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -7,6 +7,7 @@ import base64 import calendar import codecs +import datetime import errno import getpass import json @@ -50,6 +51,10 @@ FNULL = open(os.devnull, 'w') +def _get_log_date(): + return datetime.datetime.isoformat(datetime.datetime.now()) + + def log_error(message): """ Log message (str) or messages (List[str]) to stderr and exit with status 1 @@ -66,7 +71,7 @@ def log_info(message): message = [message] for msg in message: - sys.stdout.write("{0}\n".format(msg)) + sys.stdout.write("{0}: {1}\n".format(_get_log_date(), msg)) def log_warning(message): @@ -77,7 +82,7 @@ def log_warning(message): message = [message] for msg in message: - sys.stderr.write("{0}\n".format(msg)) + sys.stderr.write("{0}: {1}\n".format(_get_log_date(), msg)) def logging_subprocess(popenargs, @@ -437,7 +442,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): retries = 0 while retries < 3 and status_code == 502: - print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') + log_warning('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') retries += 1 time.sleep(5) request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa @@ -544,12 +549,10 @@ def _request_http_error(exc, auth, errors): delta = max(10, reset - gm_now) limit = headers.get('x-ratelimit-limit') - print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa - file=sys.stderr) + log_warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa if auth is None: - print('Hint: Authenticate to raise your GitHub rate limit', - file=sys.stderr) + log_info('Hint: Authenticate to raise your GitHub rate limit') time.sleep(delta) should_continue = True From fb7cc5ed534da3677d627735df4d48f1dc5285f0 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 13 Apr 2020 17:02:59 -0400 Subject: [PATCH 105/455] Release version 0.32.0 --- CHANGES.rst | 7 ++++++- github_backup/__init__.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index cfe5ebe..8d2d079 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,14 @@ Changelog ========= -0.31.0 (2020-02-25) +0.32.0 (2020-04-13) ------------------- ------------------------ +- Add timestamp to log messages. [Enrico Tröger] + + +0.31.0 (2020-02-25) +------------------- - #123 update: changed --as-app 'help' description. [ethan] - #123: Support Authenticating As Github Application. [ethan] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index c3d10d7..2ef0c52 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.31.0' +__version__ = '0.32.0' From 78098aae23ef3c439e96e876c6931c59b17fe4bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrico=20Tr=C3=B6ger?= Date: Mon, 13 Apr 2020 23:06:09 +0200 Subject: [PATCH 106/455] Add basic API request throttling A simple approach to throttle API requests and so keep within the rate limits of the API. Can be enabled with "--throttle-limit" to specify when throttling should start. "--throttle-pause" defines the time to sleep between further API requests. --- github_backup/github_backup.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 8798c85..178da62 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -331,6 +331,16 @@ def parse_args(): action='store_true', dest='include_assets', help='include assets alongside release information; only applies if including releases') + parser.add_argument('--throttle-limit', + dest='throttle_limit', + type=int, + default=0, + help='start throttling of GitHub API requests after this amount of API requests remain') + parser.add_argument('--throttle-pause', + dest='throttle_pause', + type=float, + default=30.0, + help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)') return parser.parse_args() @@ -439,6 +449,14 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) + # be gentle with API request limit and throttle requests if remaining requests getting low + limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0)) + if limit_remaining <= args.throttle_limit: + log_info( + 'API request limit hit: {} requests left, pausing further requests for {}s'.format( + limit_remaining, + args.throttle_pause)) + time.sleep(args.throttle_pause) retries = 0 while retries < 3 and status_code == 502: From 70f711ea68f7a341842dfa14b7b8629e90869bdc Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 13 Apr 2020 17:14:20 -0400 Subject: [PATCH 107/455] Release version 0.33.0 --- CHANGES.rst | 13 ++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8d2d079..0ade7d3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,20 @@ Changelog ========= -0.32.0 (2020-04-13) +0.33.0 (2020-04-13) ------------------- ------------------------ +- Add basic API request throttling. [Enrico Tröger] + + A simple approach to throttle API requests and so keep within the rate + limits of the API. Can be enabled with "--throttle-limit" to specify + when throttling should start. + "--throttle-pause" defines the time to sleep between further API + requests. + + +0.32.0 (2020-04-13) +------------------- - Add timestamp to log messages. [Enrico Tröger] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 2ef0c52..e3d0b7b 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.32.0' +__version__ = '0.33.0' From 272177c395fd6b44d70dc4a96f91b800568fbfd2 Mon Sep 17 00:00:00 2001 From: Gary Moon Date: Tue, 26 May 2020 19:59:47 -0400 Subject: [PATCH 108/455] Update the readme for new switches added in 0.33 --- README.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 701ab32..b13739c 100644 --- a/README.rst +++ b/README.rst @@ -41,7 +41,8 @@ CLI Usage is as follows:: [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] - [--releases] [--assets] + [--releases] [--assets] [--throttle-limit THROTTLE_LIMIT] + [--throttle-pause THROTTLE_PAUSE] USER Backup a github account @@ -111,6 +112,13 @@ CLI Usage is as follows:: binaries --assets include assets alongside release information; only applies if including releases + --throttle-limit THROTTLE_LIMIT + start throttling of GitHub API requests after this + amount of API requests remain + --throttle-pause THROTTLE_PAUSE + wait this amount of seconds when API request + throttling is active (default: 30.0, requires + --throttle-limit to be set) The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues). From 13128635cbd60282917cc3538fec16e7ce972448 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 28 May 2020 16:44:40 -0400 Subject: [PATCH 109/455] Release version 0.33.1 --- CHANGES.rst | 4 ++-- github_backup/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0ade7d3..10b4401 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,9 @@ Changelog ========= -0.33.0 (2020-04-13) +0.33.1 (2020-05-28) +------------------- ------------------- ------------------------- - Add basic API request throttling. [Enrico Tröger] A simple approach to throttle API requests and so keep within the rate diff --git a/github_backup/__init__.py b/github_backup/__init__.py index e3d0b7b..52c1b08 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.33.0' +__version__ = '0.33.1' From 356f5f674be197259e917814c7c0aee530254d79 Mon Sep 17 00:00:00 2001 From: Matt Fields Date: Tue, 7 Jul 2020 16:43:11 -0400 Subject: [PATCH 110/455] Add logic for transforming gist repository urls to ssh --- github_backup/github_backup.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 178da62..839435b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -419,7 +419,13 @@ def get_github_host(args): def get_github_repo_url(args, repository): if repository.get('is_gist'): - return repository['git_pull_url'] + if args.prefer_ssh: + # The git_pull_url value is always https for gists, so we need to transform it to ssh form + repo_url = re.sub('^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url']) + repo_url = re.sub('^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility + else: + repo_url = repository['git_pull_url'] + return repo_url if args.prefer_ssh: return repository['ssh_url'] From 8a00bb190304d138cd083818c04007f482f82b71 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 24 Jul 2020 13:31:03 -0400 Subject: [PATCH 111/455] Release version 0.34.0 --- CHANGES.rst | 7 ++++++- github_backup/__init__.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 10b4401..ecea949 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,8 +1,13 @@ Changelog ========= -0.33.1 (2020-05-28) +0.34.0 (2020-07-24) ------------------- +------------------------ +- Add logic for transforming gist repository urls to ssh. [Matt Fields] + + +0.33.0 (2020-04-13) ------------------- - Add basic API request throttling. [Enrico Tröger] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 52c1b08..cac7112 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.33.1' +__version__ = '0.34.0' From a2115ce3e534fe2bbaf9337a8a2a9871adde6c71 Mon Sep 17 00:00:00 2001 From: Samantha Baldwin Date: Wed, 5 Aug 2020 11:53:17 -0400 Subject: [PATCH 112/455] Make API request throttling optional --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 839435b..f6fd158 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -457,7 +457,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): status_code = int(r.getcode()) # be gentle with API request limit and throttle requests if remaining requests getting low limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0)) - if limit_remaining <= args.throttle_limit: + if args.throttle_limit and limit_remaining <= args.throttle_limit: log_info( 'API request limit hit: {} requests left, pausing further requests for {}s'.format( limit_remaining, From 751b0d6e821b7d4ce212d7693a186c9dc2bfd66b Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 5 Aug 2020 12:02:21 -0400 Subject: [PATCH 113/455] Release version 0.35.0 --- CHANGES.rst | 7 ++++++- github_backup/__init__.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index ecea949..90bf6a5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,14 @@ Changelog ========= -0.34.0 (2020-07-24) +0.35.0 (2020-08-05) ------------------- ------------------------ +- Make API request throttling optional. [Samantha Baldwin] + + +0.34.0 (2020-07-24) +------------------- - Add logic for transforming gist repository urls to ssh. [Matt Fields] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index cac7112..2670d05 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.34.0' +__version__ = '0.35.0' From d8bcbfa644ca67b6ced53cef98f207f48ba5bce0 Mon Sep 17 00:00:00 2001 From: wouter bolsterlee Date: Thu, 27 Aug 2020 17:01:56 +0200 Subject: [PATCH 114/455] Include --private flag in example By default, private repositories are not included. This is surprising. It took me a while to figure this out, and making that clear in the example can help others to be aware of that. --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index b13739c..7edb74f 100644 --- a/README.rst +++ b/README.rst @@ -153,10 +153,10 @@ Instructions on how to do this can be found on https://git-lfs.github.com. Examples ======== -Backup all repositories:: +Backup all repositories, including private ones:: export ACCESS_TOKEN=SOME-GITHUB-TOKEN - github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories + github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private Backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: From 607b6ca69bf6798c8c27bf30c749a7299c967a5c Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Fri, 28 Aug 2020 01:54:27 -0700 Subject: [PATCH 115/455] Add .circleci/config.yml --- .circleci/config.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..f23481c --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,23 @@ +version: 2.1 + +orbs: + python: circleci/python@0.3.2 + +jobs: + build-and-test: + executor: python/default + steps: + - checkout + - python/load-cache + - run: + command: pip install flake8 + name: Install dependencies + - python/save-cache + - run: + command: flake8 --ignore=E501 + name: Lint + +workflows: + main: + jobs: + - build-and-test From 8fd0f2b64f091e857d4a9228ee3ce1171a74807c Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 15 Aug 2020 17:21:59 -0700 Subject: [PATCH 116/455] Do not use bare excepts --- github_backup/github_backup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index f6fd158..416d9ea 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -30,9 +30,11 @@ from urllib.request import Request from urllib.request import HTTPRedirectHandler from urllib.request import build_opener + from subprocess import SubprocessError except ImportError: # python 2 PY2 = True + from subprocess import CalledProcessError as SubprocessError from urlparse import urlparse from urllib import quote as urlquote from urllib import urlencode @@ -363,7 +365,7 @@ def get_auth(args, encode=True, for_git_cli=False): if not PY2: token = token.decode('utf-8') auth = token + ':' + 'x-oauth-basic' - except: + except SubprocessError: log_error('No password item matching the provided name and account could be found in the osx keychain.') elif args.osx_keychain_item_account: log_error('You must specify both name and account fields for osx keychain password items') From bb2e2b8c6f6a6aa6102f362e27bd3e84537c0cf7 Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 15 Aug 2020 17:23:44 -0700 Subject: [PATCH 117/455] Fix whitespace issues --- github_backup/github_backup.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 416d9ea..2e56279 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -424,7 +424,7 @@ def get_github_repo_url(args, repository): if args.prefer_ssh: # The git_pull_url value is always https for gists, so we need to transform it to ssh form repo_url = re.sub('^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url']) - repo_url = re.sub('^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility + repo_url = re.sub('^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility else: repo_url = repository['git_pull_url'] return repo_url @@ -497,9 +497,11 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): if single_request: break + def retrieve_data(args, template, query_args=None, single_request=False): return list(retrieve_data_gen(args, template, query_args, single_request)) + def get_query_args(query_args=None): if not query_args: query_args = {} @@ -903,18 +905,22 @@ def backup_pulls(args, repo_cwd, repository, repos_template): pull_states = ['open', 'closed'] for pull_state in pull_states: query_args['state'] = pull_state - _pulls = retrieve_data_gen(args, - _pulls_template, - query_args=query_args) + _pulls = retrieve_data_gen( + args, + _pulls_template, + query_args=query_args + ) for pull in _pulls: if args.since and pull['updated_at'] < args.since: break if not args.since or pull['updated_at'] >= args.since: pulls[pull['number']] = pull else: - _pulls = retrieve_data_gen(args, - _pulls_template, - query_args=query_args) + _pulls = retrieve_data_gen( + args, + _pulls_template, + query_args=query_args + ) for pull in _pulls: if args.since and pull['updated_at'] < args.since: break From fa27988c1cd8e6565d33ed196bf1124d5c4b7583 Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 15 Aug 2020 17:24:25 -0700 Subject: [PATCH 118/455] Update boolean check --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 2e56279..a1e1a20 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -433,7 +433,7 @@ def get_github_repo_url(args, repository): return repository['ssh_url'] auth = get_auth(args, encode=False, for_git_cli=True) - if auth and repository['private'] == True: + if auth and repository['private'] is True: repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( auth, get_github_host(args), From 78cff47a911a438975fce524fa5edd2285c2e06b Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 15 Aug 2020 17:26:04 -0700 Subject: [PATCH 119/455] Fix regex string --- github_backup/github_backup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index a1e1a20..68bfb59 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -423,8 +423,8 @@ def get_github_repo_url(args, repository): if repository.get('is_gist'): if args.prefer_ssh: # The git_pull_url value is always https for gists, so we need to transform it to ssh form - repo_url = re.sub('^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url']) - repo_url = re.sub('^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility + repo_url = re.sub(r'^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url']) + repo_url = re.sub(r'^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility else: repo_url = repository['git_pull_url'] return repo_url From 2de96390be5b613045b89e8fcdadd7c946d75dc1 Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 15 Aug 2020 17:27:22 -0700 Subject: [PATCH 120/455] Add flake8 instructions to readme --- README.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.rst b/README.rst index 7edb74f..a53a373 100644 --- a/README.rst +++ b/README.rst @@ -166,6 +166,15 @@ Backup a single organization repository with everything else (wiki, pull request # e.g. git@github.com:docker/cli.git github-backup $ORGANIZATION -P -t $ACCESS_TOKEN -o . --all -O -R $REPO +Testing +======= + +This project currently contains no unit tests. To run linting:: + + pip install flake8 + flake8 --ignore=E501 + + .. |PyPI| image:: https://img.shields.io/pypi/v/github-backup.svg :target: https://pypi.python.org/pypi/github-backup/ .. |Python Versions| image:: https://img.shields.io/pypi/pyversions/github-backup.svg From 031a9844342b8357d81c936aa62d1a30785cb981 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 29 Aug 2020 02:37:48 -0400 Subject: [PATCH 121/455] Release version 0.36.0 --- CHANGES.rst | 16 +++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 90bf6a5..f4b24a2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,23 @@ Changelog ========= -0.35.0 (2020-08-05) +0.36.0 (2020-08-29) ------------------- ------------------------ +- Add flake8 instructions to readme. [Albert Wang] +- Fix regex string. [Albert Wang] +- Fix whitespace issues. [Albert Wang] +- Do not use bare excepts. [Albert Wang] +- Add .circleci/config.yml. [Albert Wang] +- Include --private flag in example. [wouter bolsterlee] + + By default, private repositories are not included. This is surprising. + It took me a while to figure this out, and making that clear in the + example can help others to be aware of that. + + +0.35.0 (2020-08-05) +------------------- - Make API request throttling optional. [Samantha Baldwin] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 2670d05..aae5aca 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.35.0' +__version__ = '0.36.0' From d7b85264cd517d9e74b39206ff0968519a0829fe Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 15 Aug 2020 17:32:16 -0700 Subject: [PATCH 122/455] Remove python 2 specific logic --- github_backup/github_backup.py | 46 +++++++++------------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 68bfb59..653611d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -19,30 +19,14 @@ import sys import time import platform -PY2 = False -try: - # python 3 - from urllib.parse import urlparse - from urllib.parse import quote as urlquote - from urllib.parse import urlencode - from urllib.error import HTTPError, URLError - from urllib.request import urlopen - from urllib.request import Request - from urllib.request import HTTPRedirectHandler - from urllib.request import build_opener - from subprocess import SubprocessError -except ImportError: - # python 2 - PY2 = True - from subprocess import CalledProcessError as SubprocessError - from urlparse import urlparse - from urllib import quote as urlquote - from urllib import urlencode - from urllib2 import HTTPError, URLError - from urllib2 import urlopen - from urllib2 import Request - from urllib2 import HTTPRedirectHandler - from urllib2 import build_opener +from urllib.parse import urlparse +from urllib.parse import quote as urlquote +from urllib.parse import urlencode +from urllib.error import HTTPError, URLError +from urllib.request import urlopen +from urllib.request import Request +from urllib.request import HTTPRedirectHandler +from urllib.request import build_opener try: from . import __version__ @@ -362,10 +346,9 @@ def get_auth(args, encode=True, for_git_cli=False): '-s', args.osx_keychain_item_name, '-a', args.osx_keychain_item_account, '-w'], stderr=devnull).strip()) - if not PY2: - token = token.decode('utf-8') + token = token.decode('utf-8') auth = token + ':' + 'x-oauth-basic' - except SubprocessError: + except subprocess.SubprocessError: log_error('No password item matching the provided name and account could be found in the osx keychain.') elif args.osx_keychain_item_account: log_error('You must specify both name and account fields for osx keychain password items') @@ -549,8 +532,7 @@ def _construct_request(per_page, page, query_args, template, auth, as_app=None): if not as_app: request.add_header('Authorization', 'Basic '.encode('ascii') + auth) else: - if not PY2: - auth = auth.encode('ascii') + auth = auth.encode('ascii') request.add_header('Authorization', 'token '.encode('ascii') + auth) request.add_header('Accept', 'application/vnd.github.machine-man-preview+json') log_info('Requesting {}?{}'.format(template, querystring)) @@ -608,11 +590,7 @@ class S3HTTPRedirectHandler(HTTPRedirectHandler): so we should remove said header on redirect. """ def redirect_request(self, req, fp, code, msg, headers, newurl): - if PY2: - # HTTPRedirectHandler is an old style class - request = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) - else: - request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) + request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) del request.headers['Authorization'] return request From d411e2058053c5607b2bb49002cab0e110683624 Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 15 Aug 2020 17:34:43 -0700 Subject: [PATCH 123/455] Remove python 2 specific import logic --- setup.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/setup.py b/setup.py index 85e2d5f..3930574 100644 --- a/setup.py +++ b/setup.py @@ -1,23 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os -from github_backup import __version__ - -try: - from setuptools import setup - setup # workaround for pyflakes issue #13 -except ImportError: - from distutils.core import setup +from setuptools import setup -# Hack to prevent stupid TypeError: 'NoneType' object is not callable error on -# exit of python setup.py test # in multiprocessing/util.py _exit_function when -# running python setup.py test (see -# http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html) -try: - import multiprocessing - multiprocessing -except ImportError: - pass +from github_backup import __version__ def open_file(fname): From d7f07474326610bd6a01ac63157125b0ac43d450 Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 15 Aug 2020 17:35:18 -0700 Subject: [PATCH 124/455] Remove support for python 2.7 in package classifiers --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 3930574..2197a52 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,6 @@ def open_file(fname): 'Development Status :: 5 - Production/Stable', 'Topic :: System :: Archiving :: Backup', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', ], From cb1b0b6c6bb3fb2f982f775ee831abf20916c020 Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sat, 15 Aug 2020 17:35:39 -0700 Subject: [PATCH 125/455] Add support for python 3.7 and 3.8 in package classifiers --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 2197a52..9764f42 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,8 @@ def open_file(fname): 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', ], description='backup a github user or organization', long_description=open_file('README.rst').read(), From 3a5ef5158decabb3c0bc478b7502089d10a236cd Mon Sep 17 00:00:00 2001 From: Gary Moon Date: Fri, 30 Oct 2020 15:39:32 -0400 Subject: [PATCH 126/455] Add ability to skip archived repositories --- github_backup/github_backup.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 68bfb59..9f17110 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -278,6 +278,10 @@ def parse_args(): action='store_true', dest='include_starred_gists', help='include starred gists in backup [*]') + parser.add_argument('--skip-archived', + action='store_true', + dest='skip_archived', + help='skip project if it is archived') parser.add_argument('--skip-existing', action='store_true', dest='skip_existing', @@ -742,6 +746,8 @@ def filter_repositories(args, unfiltered_repositories): repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa if name_regex: repositories = [r for r in repositories if name_regex.match(r['name'])] + if args.skip_archived: + repositories = [r for r in repositories if not r.get('archived')] return repositories From 638bf7a77e6c418dffa79a8406d9b3aaa3059bca Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 1 Jan 2021 21:23:49 -0500 Subject: [PATCH 127/455] Release version 0.37.0 --- CHANGES.rst | 399 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 399 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f4b24a2..cbf1f03 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,14 +1,41 @@ Changelog ========= +0.37.0 (2021-01-01) +------------------- +------------ +- Merge pull request #158 from albertyw/python3. [Jose Diaz-Gonzalez] + + Remove support for python 2 +- Add support for python 3.7 and 3.8 in package classifiers. [Albert + Wang] +- Remove support for python 2.7 in package classifiers. [Albert Wang] +- Remove python 2 specific import logic. [Albert Wang] +- Remove python 2 specific logic. [Albert Wang] +- Merge pull request #165 from garymoon/add-skip-archived. [Jose Diaz- + Gonzalez] + + Add option to skip archived repositories +- Add ability to skip archived repositories. [Gary Moon] + + 0.36.0 (2020-08-29) ------------------- ------------------------- +- Release version 0.36.0. [Jose Diaz-Gonzalez] +- Merge pull request #157 from albertyw/lint. [Jose Diaz-Gonzalez] - Add flake8 instructions to readme. [Albert Wang] - Fix regex string. [Albert Wang] +- Update boolean check. [Albert Wang] - Fix whitespace issues. [Albert Wang] - Do not use bare excepts. [Albert Wang] +- Merge pull request #161 from albertyw/circleci-project-setup. [Jose + Diaz-Gonzalez] + + Add circleci config - Add .circleci/config.yml. [Albert Wang] +- Merge pull request #160 from wbolster/patch-1. [Jose Diaz-Gonzalez] + + Include --private flag in example - Include --private flag in example. [wouter bolsterlee] By default, private repositories are not included. This is surprising. @@ -18,16 +45,38 @@ Changelog 0.35.0 (2020-08-05) ------------------- +- Release version 0.35.0. [Jose Diaz-Gonzalez] +- Merge pull request #156 from samanthaq/restore-optional-throttling. + [Jose Diaz-Gonzalez] + + Make API request throttling optional - Make API request throttling optional. [Samantha Baldwin] 0.34.0 (2020-07-24) ------------------- +- Release version 0.34.0. [Jose Diaz-Gonzalez] +- Merge pull request #153 from 0x6d617474/gist_ssh. [Jose Diaz-Gonzalez] + + Add logic for transforming gist repository urls to ssh - Add logic for transforming gist repository urls to ssh. [Matt Fields] +0.33.1 (2020-05-28) +------------------- +- Release version 0.33.1. [Jose Diaz-Gonzalez] +- Merge pull request #151 from garymoon/readme-update-0.33. [Jose Diaz- + Gonzalez] +- Update the readme for new switches added in 0.33. [Gary Moon] + + 0.33.0 (2020-04-13) ------------------- +- Release version 0.33.0. [Jose Diaz-Gonzalez] +- Merge pull request #149 from eht16/simple_api_request_throttling. + [Jose Diaz-Gonzalez] + + Add basic API request throttling - Add basic API request throttling. [Enrico Tröger] A simple approach to throttle API requests and so keep within the rate @@ -39,23 +88,50 @@ Changelog 0.32.0 (2020-04-13) ------------------- +- Release version 0.32.0. [Jose Diaz-Gonzalez] +- Merge pull request #148 from eht16/logging_with_timestamp. [Jose Diaz- + Gonzalez] + + Add timestamp to log messages - Add timestamp to log messages. [Enrico Tröger] +- Merge pull request #147 from tomhoover/update-readme. [Jose Diaz- + Gonzalez] + + Update README.rst to match 'github-backup -h' +- Update README.rst to match 'github-backup -h' [Tom Hoover] 0.31.0 (2020-02-25) ------------------- +- Release version 0.31.0. [Jose Diaz-Gonzalez] +- Merge pull request #146 from timm3/upstream-123. [Jose Diaz-Gonzalez] + + Authenticate as Github App - #123 update: changed --as-app 'help' description. [ethan] - #123: Support Authenticating As Github Application. [ethan] +0.30.0 (2020-02-14) +------------------- +- Release version 0.30.0. [Jose Diaz-Gonzalez] + + 0.29.0 (2020-02-14) ------------------- +- Release version 0.29.0. [Jose Diaz-Gonzalez] +- Merge pull request #145 from timm3/50-v0.28.0. [Jose Diaz-Gonzalez] + + #50 - refactor for friendlier import - #50 update: keep main() in bin. [ethan] - #50 - refactor for friendlier import. [ethan] 0.28.0 (2020-02-03) ------------------- +- Release version 0.28.0. [Jose Diaz-Gonzalez] +- Merge pull request #143 from smiley/patch-1. [Jose Diaz-Gonzalez] + + Remove deprecated (and removed) "git lfs fetch" flags - Remove deprecated (and removed) git lfs flags. [smiley] "--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script. @@ -63,6 +139,11 @@ Changelog 0.27.0 (2020-01-22) ------------------- +- Release version 0.27.0. [Jose Diaz-Gonzalez] +- Merge pull request #142 from einsteinx2/issue/141-import-error- + version. [Jose Diaz-Gonzalez] + + Fixed script fails if not installed from pip - Fixed script fails if not installed from pip. [Ben Baron] At the top of the script, the line from github_backup import __version__ gets the script's version number to use if the script is called with the -v or --version flags. The problem is that if the script hasn't been installed via pip (for example I cloned the repo directly to my backup server), the script will fail due to an import exception. @@ -70,14 +151,26 @@ Changelog Also presumably it will always use the version number from pip even if running a modified version from git or a fork or something, though this does not fix that as I have no idea how to check if it's running the pip installed version or not. But at least the script will now work fine if cloned from git or just copied to another machine. closes https://github.com/josegonzalez/python-github-backup/issues/141 +- Merge pull request #136 from einsteinx2/issue/88-macos-keychain- + broken-python3. [Jose Diaz-Gonzalez] + + Fixed macOS keychain access when using Python 3 - Fixed macOS keychain access when using Python 3. [Ben Baron] Python 3 is returning bytes rather than a string, so the string concatenation to create the auth variable was throwing an exception which the script was interpreting to mean it couldn't find the password. Adding a conversion to string first fixed the issue. +- Merge pull request #137 from einsteinx2/issue/134-only-use-auth-token- + when-needed. [Jose Diaz-Gonzalez] + + Public repos no longer include the auth token - Public repos no longer include the auth token. [Ben Baron] When backing up repositories using an auth token and https, the GitHub personal auth token is leaked in each backed up repository. It is included in the URL of each repository's git remote url. This is not needed as they are public and can be accessed without the token and can cause issues in the future if the token is ever changed, so I think it makes more sense not to have the token stored in each repo backup. I think the token should only be "leaked" like this out of necessity, e.g. it's a private repository and the --prefer-ssh option was not chosen so https with auth token was required to perform the clone. +- Merge pull request #130 from einsteinx2/issue/129-fix-crash-on- + release-asset-download-error. [Jose Diaz-Gonzalez] + + Crash when an release asset doesn't exist - Fixed comment typo. [Ben Baron] - Switched log_info to log_warning in download_file. [Ben Baron] - Crash when an release asset doesn't exist. [Ben Baron] @@ -85,6 +178,10 @@ Changelog Currently, the script crashes whenever a release asset is unable to download (for example a 404 response). This change instead logs the failure and allows the script to continue. No retry logic is enabled, but at least it prevents the crash and allows the backup to complete. Retry logic can be implemented later if wanted. closes https://github.com/josegonzalez/python-github-backup/issues/129 +- Merge pull request #132 from einsteinx2/issue/126-prevent-overwriting- + release-assets. [Jose Diaz-Gonzalez] + + Separate release assets and skip re-downloading - Moved asset downloading loop inside the if block. [Ben Baron] - Separate release assets and skip re-downloading. [Ben Baron] @@ -95,21 +192,36 @@ Changelog This change also now checks if the asset file already exists on disk and skips downloading it. This drastically speeds up addiotnal syncs as it no longer downloads every single release every single time. It will now only download new releases which I believe is the expected behavior. closes https://github.com/josegonzalez/python-github-backup/issues/126 +- Merge pull request #131 from einsteinx2/improve-gitignore. [Jose Diaz- + Gonzalez] + + Improved gitignore, macOS files and IDE configs - Added newline to end of file. [Ben Baron] - Improved gitignore, macOS files and IDE configs. [Ben Baron] Ignores the annoying hidden macOS files .DS_Store and ._* as well as the IDE configuration folders for contributors using the popular Visual Studio Code and Atom IDEs (more can be added later as needed). +- Update ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] 0.26.0 (2019-09-23) ------------------- +- Release version 0.26.0. [Jose Diaz-Gonzalez] +- Merge pull request #128 from Snawoot/master. [Jose Diaz-Gonzalez] + + Workaround gist clone in `--prefer-ssh` mode - Workaround gist clone in `--prefer-ssh` mode. [Vladislav Yarmak] - Create PULL_REQUEST.md. [Jose Diaz-Gonzalez] - Create ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] +- Update README.rst. [Jose Diaz-Gonzalez] +- Update README.rst. [Jose Diaz-Gonzalez] 0.25.0 (2019-07-03) ------------------- +- Release version 0.25.0. [Jose Diaz-Gonzalez] +- Merge pull request #120 from 8h2a/patch-1. [Jose Diaz-Gonzalez] + + Issue 119: Change retrieve_data to be a generator - Issue 119: Change retrieve_data to be a generator. [2a] See issue #119. @@ -117,21 +229,43 @@ Changelog 0.24.0 (2019-06-27) ------------------- +- Release version 0.24.0. [Jose Diaz-Gonzalez] +- Merge pull request #117 from QuicketSolutions/master. [Jose Diaz- + Gonzalez] + + Add option for Releases +- Merge pull request #5 from QuicketSolutions/QKT-45. [Ethan Timm] - QKT-45: include assets - update readme. [Ethan Timm] update readme with flag information for including assets alongside their respective releases +- Merge pull request #4 from whwright/wip-releases. [Ethan Timm] + + Download github assets - Make assets it's own flag. [Harrison Wright] - Fix super call for python2. [Harrison Wright] - Fix redirect to s3. [Harrison Wright] - WIP: download assets. [Harrison Wright] +- Merge pull request #3 from QuicketSolutions/QKT-42. [Ethan Timm] - QKT-42: releases - add readme info. [ethan] +- Merge pull request #2 from QuicketSolutions/QKT-42. [Ethan Timm] + + QKT-42 update: shorter command flag - QKT-42 update: shorter command flag. [ethan] +- Merge pull request #1 from QuicketSolutions/QKT-42. [Ethan Timm] - QKT-42: support saving release information. [ethan] +- Merge pull request #118 from whwright/115-fix-pull-details. [Jose + Diaz-Gonzalez] + + Fix pull details - Fix pull details. [Harrison Wright] 0.23.0 (2019-06-04) ------------------- +- Release version 0.23.0. [Jose Diaz-Gonzalez] +- Merge pull request #113 from kleag/master. [Jose Diaz-Gonzalez] + + Avoid to crash in case of HTTP 502 error - Avoid to crash in case of HTTP 502 error. [Gael de Chalendar] Survive also on socket.error connections like on HTTPError or URLError. @@ -148,15 +282,32 @@ Fix Refs #106 +Other +~~~~~ +- Release version 0.22.2. [Jose Diaz-Gonzalez] +- Merge pull request #107 from josegonzalez/patch-1. [Jose Diaz- + Gonzalez] + + fix: warn instead of error + 0.22.1 (2019-02-21) ------------------- +- Release version 0.22.1. [Jose Diaz-Gonzalez] +- Merge pull request #106 from jstetic/master. [Jose Diaz-Gonzalez] + + Log URL error - Log URL error https://github.com/josegonzalez/python-github- backup/issues/105. [JOHN STETIC] 0.22.0 (2019-02-01) ------------------- +- Release version 0.22.0. [Jose Diaz-Gonzalez] +- Merge pull request #103 from whwright/98-better-logging. [Jose Diaz- + Gonzalez] + + Fix accidental system exit with better logging strategy - Remove unnecessary sys.exit call. [W. Harrison Wright] - Add org check to avoid incorrect log output. [W. Harrison Wright] - Fix accidental system exit with better logging strategy. [W. Harrison @@ -165,6 +316,10 @@ Fix 0.21.1 (2018-12-25) ------------------- +- Release version 0.21.1. [Jose Diaz-Gonzalez] +- Merge pull request #101 from ecki/patch-2. [Jose Diaz-Gonzalez] + + Mark options which are not included in --all - Mark options which are not included in --all. [Bernd] As discussed in Issue #100 @@ -172,12 +327,22 @@ Fix 0.21.0 (2018-11-28) ------------------- +- Release version 0.21.0. [Jose Diaz-Gonzalez] +- Merge pull request #97 from whwright/94-fix-user-repos. [Jose Diaz- + Gonzalez] + + Correctly download repos when user arg != authenticated user - Correctly download repos when user arg != authenticated user. [W. Harrison Wright] 0.20.1 (2018-09-29) ------------------- +- Release version 0.20.1. [Jose Diaz-Gonzalez] +- Merge pull request #92 from whwright/87-fix-starred-bug. [Jose Diaz- + Gonzalez] + + Clone the specified user's starred repos/gists, not the authenticated user - Clone the specified user's gists, not the authenticated user. [W. Harrison Wright] - Clone the specified user's starred repos, not the authenticated user. @@ -186,6 +351,7 @@ Fix 0.20.0 (2018-03-24) ------------------- +- Release version 0.20.0. [Jose Diaz-Gonzalez] - Chore: drop Python 2.6. [Jose Diaz-Gonzalez] - Feat: simplify release script. [Jose Diaz-Gonzalez] @@ -197,15 +363,33 @@ Fix ~~~ - Cleanup pep8 violations. [Jose Diaz-Gonzalez] +Other +~~~~~ +- Release version 0.19.2. [Jose Diaz-Gonzalez] + + +0.19.1 (2018-03-24) +------------------- +- Release version 0.19.1. [Jose Diaz-Gonzalez] + 0.19.0 (2018-03-24) ------------------- +- Release version 0.19.0. [Jose Diaz-Gonzalez] +- Merge pull request #77 from mayflower/pull-details. [Jose Diaz- + Gonzalez] + + Pull Details - Add additional output for the current request. [Robin Gloster] This is useful to have some progress indication for huge repositories. - Add option to backup additional PR details. [Robin Gloster] Some payload is only included when requesting a single pull request +- Merge pull request #84 from johbo/fix-python36-skip-existing. [Jose + Diaz-Gonzalez] + + Mark string as binary in comparison for skip_existing - Mark string as binary in comparison for skip_existing. [Johannes Bornhold] @@ -216,11 +400,20 @@ Fix 0.18.0 (2018-02-22) ------------------- +- Release version 0.18.0. [Jose Diaz-Gonzalez] +- Merge pull request #82 from sgreene570/add-followers. [Jose Diaz- + Gonzalez] + + Add option to fetch followers/following JSON data - Add option to fetch followers/following JSON data. [Stephen Greene] 0.17.0 (2018-02-20) ------------------- +- Release version 0.17.0. [Jose Diaz-Gonzalez] +- Merge pull request #81 from whwright/gists. [Jose Diaz-Gonzalez] + + Add ability to back up gists - Short circuit gists backup process. [W. Harrison Wright] - Formatting. [W. Harrison Wright] - Add ability to backup gists. [W. Harrison Wright] @@ -228,41 +421,94 @@ Fix 0.16.0 (2018-01-22) ------------------- +- Release version 0.16.0. [Jose Diaz-Gonzalez] +- Merge pull request #78 from whwright/clone-starred-repos. [Jose Diaz- + Gonzalez] + + Clone starred repos +- Update README.rst. [Jose Diaz-Gonzalez] +- Update documentation. [W. Harrison Wright] - Change option to --all-starred. [W. Harrison Wright] - JK don't update documentation. [W. Harrison Wright] +- Update documentation. [W. Harrison Wright] - Put starred clone repoistories under a new option. [W. Harrison Wright] - Add comment. [W. Harrison Wright] - Add ability to clone starred repos. [W. Harrison Wright] +0.15.0 (2017-12-11) +------------------- +- Release version 0.15.0. [Jose Diaz-Gonzalez] +- Merge pull request #75 from slibby/slibby-patch-windows. [Jose Diaz- + Gonzalez] + + update check_io() to allow scripts to run on Windows +- Update logging_subprocess function. [Sam Libby] + + 1. added newline for return + 2. added one-time warning (once per subprocess) +- Update check_io() to allow scripts to run on Windows. [Sam Libby] + + 0.14.1 (2017-10-11) ------------------- +- Release version 0.14.1. [Jose Diaz-Gonzalez] +- Merge pull request #70 from epfremmer/patch-1. [Jose Diaz-Gonzalez] + + Fix arg not defined error - Fix arg not defined error. [Edward Pfremmer] 0.14.0 (2017-10-11) ------------------- +- Release version 0.14.0. [Jose Diaz-Gonzalez] +- Merge pull request #68 from pieterclaerhout/master. [Jose Diaz- + Gonzalez] + + Added support for LFS clones +- Updated the readme. [pieterclaerhout] - Added a check to see if git-lfs is installed when doing an LFS clone. [pieterclaerhout] - Added support for LFS clones. [pieterclaerhout] +- Merge pull request #66 from albertyw/python3. [Jose Diaz-Gonzalez] + + Explicitly support python 3 - Add pypi info to readme. [Albert Wang] - Explicitly support python 3 in package description. [Albert Wang] +- Merge pull request #65 from mumblez/master. [Jose Diaz-Gonzalez] + + add couple examples to help new users - Add couple examples to help new users. [Yusuf Tran] 0.13.2 (2017-05-06) ------------------- +- Release version 0.13.2. [Jose Diaz-Gonzalez] +- Merge pull request #64 from karlicoss/fix-remotes. [Jose Diaz- + Gonzalez] + + Fix remotes while updating repository - Fix remotes while updating repository. [Dima Gerasimov] 0.13.1 (2017-04-11) ------------------- +- Release version 0.13.1. [Jose Diaz-Gonzalez] +- Merge pull request #61 from McNetic/fix_empty_updated_at. [Jose Diaz- + Gonzalez] + + Fix error when repository has no updated_at value - Fix error when repository has no updated_at value. [Nicolai Ehemann] 0.13.0 (2017-04-05) ------------------- +- Release version 0.13.0. [Jose Diaz-Gonzalez] +- Merge pull request #59 from martintoreilly/master. [Jose Diaz- + Gonzalez] + + Add support for storing PAT in OSX keychain - Add OS check for OSX specific keychain args. [Martin O'Reilly] Keychain arguments are only supported on Mac OSX. @@ -280,11 +526,19 @@ Fix 0.12.1 (2017-03-27) ------------------- +- Release version 0.12.1. [Jose Diaz-Gonzalez] +- Merge pull request #57 from acdha/reuse-existing-remotes. [Jose Diaz- + Gonzalez] + + Avoid remote branch name churn - Avoid remote branch name churn. [Chris Adams] This avoids the backup output having lots of "[new branch]" messages because removing the old remote name removed all of the existing branch references. +- Merge pull request #55 from amaczuga/master. [Jose Diaz-Gonzalez] + + Fix detection of bare git directories - Fix detection of bare git directories. [Andrzej Maczuga] @@ -299,22 +553,49 @@ Fix Other ~~~~~ +- Release version 0.12.0. [Jose Diaz-Gonzalez] - Pep8: E501 line too long (83 > 79 characters) [Jose Diaz-Gonzalez] - Pep8: E128 continuation line under-indented for visual indent. [Jose Diaz-Gonzalez] +- Merge pull request #54 from amaczuga/master. [Jose Diaz-Gonzalez] + + Support archivization using bare git clones - Support archivization using bare git clones. [Andrzej Maczuga] +- Merge pull request #53 from trel/master. [Jose Diaz-Gonzalez] + + fix typo, 3x - Fix typo, 3x. [Terrell Russell] 0.11.0 (2016-10-26) ------------------- +- Release version 0.11.0. [Jose Diaz-Gonzalez] +- Merge pull request #52 from bjodah/fix-gh-51. [Jose Diaz-Gonzalez] + + Support --token file:///home/user/token.txt (fixes gh-51) - Support --token file:///home/user/token.txt (fixes gh-51) [Björn Dahlgren] +- Merge pull request #48 from albertyw/python3. [Jose Diaz-Gonzalez] + + Support Python 3 - Fix some linting. [Albert Wang] - Fix byte/string conversion for python 3. [Albert Wang] - Support python 3. [Albert Wang] +- Merge pull request #46 from remram44/encode-password. [Jose Diaz- + Gonzalez] + + Encode special characters in password - Encode special characters in password. [Remi Rampin] +- Merge pull request #45 from remram44/cli-programname. [Jose Diaz- + Gonzalez] + + Fix program name +- Update README.rst. [Remi Rampin] - Don't pretend program name is "Github Backup" [Remi Rampin] +- Merge pull request #44 from remram44/readme-git-https. [Jose Diaz- + Gonzalez] + + Don't install over insecure connection - Don't install over insecure connection. [Remi Rampin] The git:// protocol is unauthenticated and unencrypted, and no longer advertised by GitHub. Using HTTPS shouldn't impact performance. @@ -322,6 +603,10 @@ Other 0.10.3 (2016-08-21) ------------------- +- Release version 0.10.3. [Jose Diaz-Gonzalez] +- Merge pull request #30 from jonasrmichel/master. [Jose Diaz-Gonzalez] + + Fixes #29 - Fixes #29. [Jonas Michel] Reporting an error when the user's rate limit is exceeded causes @@ -339,13 +624,23 @@ Other 0.10.2 (2016-08-21) ------------------- +- Release version 0.10.2. [Jose Diaz-Gonzalez] - Add a note regarding git version requirement. [Jose Diaz-Gonzalez] Closes #37 +0.10.1 (2016-08-21) +------------------- +- Release version 0.10.1. [Jose Diaz-Gonzalez] + + 0.10.0 (2016-08-18) ------------------- +- Release version 0.10.0. [Jose Diaz-Gonzalez] +- Merge pull request #42 from robertwb/master. [Jose Diaz-Gonzalez] + + Implement incremental updates - Implement incremental updates. [Robert Bradshaw] Guarded with an --incremental flag. @@ -358,11 +653,21 @@ Other 0.9.0 (2016-03-29) ------------------ +- Release version 0.9.0. [Jose Diaz-Gonzalez] +- Merge pull request #36 from zlabjp/fix-cloning-private-repos. [Jose + Diaz-Gonzalez] + + Fix cloning private repos with basic auth or token - Fix cloning private repos with basic auth or token. [Kazuki Suda] 0.8.0 (2016-02-14) ------------------ +- Release version 0.8.0. [Jose Diaz-Gonzalez] +- Merge pull request #35 from eht16/issue23_store_pullrequests_once. + [Jose Diaz-Gonzalez] + + Don't store issues which are actually pull requests - Don't store issues which are actually pull requests. [Enrico Tröger] This prevents storing pull requests twice since the Github API returns @@ -373,31 +678,65 @@ Other 0.7.0 (2016-02-02) ------------------ +- Release version 0.7.0. [Jose Diaz-Gonzalez] +- Merge pull request #32 from albertyw/soft-fail-hooks. [Jose Diaz- + Gonzalez] + + Softly fail if not able to read hooks - Softly fail if not able to read hooks. [Albert Wang] +- Merge pull request #33 from albertyw/update-readme. [Jose Diaz- + Gonzalez] + + Add note about 2-factor auth in readme - Add note about 2-factor auth. [Albert Wang] +- Merge pull request #31 from albertyw/fix-private-repos. [Jose Diaz- + Gonzalez] + + Fix reading user's private repositories - Make user repository search go through endpoint capable of reading private repositories. [Albert Wang] +- Merge pull request #28 from alexmojaki/getpass. [Jose Diaz-Gonzalez] + + Prompt for password if only username given +- Update README with new CLI usage. [Alex Hall] - Prompt for password if only username given. [Alex Hall] 0.6.0 (2015-11-10) ------------------ +- Release version 0.6.0. [Jose Diaz-Gonzalez] - Force proper remote url. [Jose Diaz-Gonzalez] +- Merge pull request #24 from eht16/add_backup_hooks. [Jose Diaz- + Gonzalez] + + Add backup hooks - Improve error handling in case of HTTP errors. [Enrico Tröger] In case of a HTTP status code 404, the returned 'r' was never assigned. In case of URL errors which are not timeouts, we probably should bail out. - Add --hooks to also include web hooks into the backup. [Enrico Tröger] +- Merge pull request #22 from eht16/issue_17_create_output_directory. + [Jose Diaz-Gonzalez] + + Create the user specified output directory if it does not exist - Create the user specified output directory if it does not exist. [Enrico Tröger] Fixes #17. +- Merge pull request #21 from eht16/fix_get_response_missing_auth. [Jose + Diaz-Gonzalez] + + Add missing auth argument to _get_response() - Add missing auth argument to _get_response() [Enrico Tröger] When running unauthenticated and Github starts rate-limiting the client, github-backup crashes because the used auth variable in _get_response() was not available. This change should fix it. +- Merge pull request #20 from + eht16/improve_error_msg_on_non_existing_repo. [Jose Diaz-Gonzalez] + + Add repository URL to error message for non-existing repositories - Add repository URL to error message for non-existing repositories. [Enrico Tröger] @@ -408,28 +747,69 @@ Other 0.5.0 (2015-10-10) ------------------ +- Release version 0.5.0. [Jose Diaz-Gonzalez] - Add release script. [Jose Diaz-Gonzalez] - Refactor to both simplify codepath as well as follow PEP8 standards. [Jose Diaz-Gonzalez] +- Merge pull request #19 from Embed-Engineering/retry-timeout. [Jose + Diaz-Gonzalez] + + Retry 3 times when the connection times out - Retry 3 times when the connection times out. [Mathijs Jonker] +- Merge pull request #15 from kromkrom/master. [Jose Diaz-Gonzalez] + + Preserve Unicode characters in the output file +- Update github-backup. [Kirill Grushetsky] +- Update github-backup. [Kirill Grushetsky] - Made unicode output defalut. [Kirill Grushetsky] - Import alphabetised. [Kirill Grushetsky] - Preserve Unicode characters in the output file. [Kirill Grushetsky] Added option to preserve Unicode characters in the output file +- Merge pull request #14 from aensley/master. [Jose Diaz-Gonzalez] + + Added backup of labels and milestones. - Josegonzales/python-github-backup#12 Added backup of labels and milestones. [aensley] +- Merge pull request #11 from Embed-Engineering/master. [Jose Diaz- + Gonzalez] + + Added test for uninitialized repo's (or wiki's) - Fixed indent. [Mathijs Jonker] +- Update github-backup. [mjonker-embed] - Skip unitialized repo's. [mjonker-embed] These gave me errors which caused mails from crontab. +- Merge pull request #10 from Embed-Engineering/master. [Jose Diaz- + Gonzalez] + + Added prefer-ssh - Added prefer-ssh. [mjonker-embed] Was needed for my back-up setup, code includes this but readme wasn't updated +- Merge pull request #9 from acdha/ratelimit-retries. [Jose Diaz- + Gonzalez] + + Retry API requests which failed due to rate-limiting - Retry API requests which failed due to rate-limiting. [Chris Adams] This allows operation to continue, albeit at a slower pace, if you have enough data to trigger the API rate limits +- Release 0.4.0. [Jose Diaz-Gonzalez] +- Merge pull request #7 from acdha/repo-backup-overhaul. [Jose Diaz- + Gonzalez] + + Repo backup overhaul +- Update repository back up handling for wikis. [Chris Adams] + + * Now wikis will follow the same logic as the main repo + checkout for --prefer-ssh. + * The regular repository and wiki paths both use the same + function to handle either cloning or updating a local copy + of the remote repo + * All git updates will now use “git fetch --all --tags” + to ensure that tags and branches other than master will + also be backed up - Logging_subprocess: always log when a command fails. [Chris Adams] Previously git clones could fail without any indication @@ -444,10 +824,19 @@ Other The previous commit used the wrong URL for a private repo. This was masked by the lack of error loging in logging_subprocess (which will be in a separate branch) +- Merge pull request #6 from acdha/allow-clone-over-ssh. [Jose Diaz- + Gonzalez] + + Add an option to prefer checkouts over SSH - Add an option to prefer checkouts over SSH. [Chris Adams] This is really useful with private repos to avoid being nagged for credentials for every repository +- Release 0.3.0. [Jose Diaz-Gonzalez] +- Merge pull request #4 from klaude/pull_request_support. [Jose Diaz- + Gonzalez] + + Add pull request support - Add pull request support. [Kevin Laude] Back up reporitory pull requests by passing the --include-pulls @@ -459,6 +848,10 @@ Other Pull requests are automatically backed up when the --all argument is uesd. +- Merge pull request #5 from klaude/github-enterprise-support. [Jose + Diaz-Gonzalez] + + Add GitHub Enterprise Support - Add GitHub Enterprise support. [Kevin Laude] Pass the -H or --github-host argument with a GitHub Enterprise hostname @@ -468,9 +861,13 @@ Other 0.2.0 (2014-09-22) ------------------ +- Release 0.2.0. [Jose Diaz-Gonzalez] - Add support for retrieving repositories. Closes #1. [Jose Diaz- Gonzalez] - Fix PEP8 violations. [Jose Diaz-Gonzalez] +- Merge pull request #2 from johnyf/master. [Jose Diaz-Gonzalez] + + add authorization to header only if specified by user - Add authorization to header only if specified by user. [Ioannis Filippidis] - Fill out readme more. [Jose Diaz-Gonzalez] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index aae5aca..8935b5b 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.36.0' +__version__ = '0.37.0' From 613576dd251611e8fc231756b1e0c6bfbde59ee7 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 1 Jan 2021 21:26:54 -0500 Subject: [PATCH 128/455] fix: use twine for releases The old method of releasing to pypi broke for whatever reason and switching to a supported toolchain is easier than debugging the old one. Additionally: - Update gitchangelog - Fix license entry - Set long description type - Gitignore the temporary readme file --- .gitignore | 1 + release | 10 ++++++++-- setup.py | 3 ++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 102f70c..52a12ea 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ doc/aws_hostname.1 .vscode .atom +README \ No newline at end of file diff --git a/release b/release index a36a2c7..c48de82 100755 --- a/release +++ b/release @@ -22,7 +22,7 @@ CYAN="\033[0;36m" # cyan pip install wheel > /dev/null command -v gitchangelog >/dev/null 2>&1 || { - echo -e "${RED}WARNING: Missing gitchangelog binary, please run: pip install gitchangelog==2.2.0${COLOR_OFF}\n" + echo -e "${RED}WARNING: Missing gitchangelog binary, please run: pip install gitchangelog==3.0.4${COLOR_OFF}\n" exit 1 } @@ -31,6 +31,11 @@ command -v rst-lint > /dev/null || { exit 1 } +command -v twine > /dev/null || { + echo -e "${RED}WARNING: Missing twine binary, please run: pip install twine==3.2.0${COLOR_OFF}\n" + exit 1 +} + if [[ "$@" != "major" ]] && [[ "$@" != "minor" ]] && [[ "$@" != "patch" ]]; then echo -e "${RED}WARNING: Invalid release type, must specify 'major', 'minor', or 'patch'${COLOR_OFF}\n" exit 1 @@ -125,7 +130,8 @@ git push -q origin master && git push -q --tags if [[ "$PUBLIC" == "true" ]]; then echo -e "${YELLOW}--->${COLOR_OFF} Creating python release" cp README.rst README - python setup.py sdist bdist_wheel upload > /dev/null + python setup.py sdist bdist_wheel > /dev/null + twine upload dist/* rm README fi diff --git a/setup.py b/setup.py index 9764f42..98a67b7 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ def open_file(fname): packages=['github_backup'], scripts=['bin/github-backup'], url='http://github.com/josegonzalez/python-github-backup', - license=open('LICENSE.txt').read(), + license='MIT', classifiers=[ 'Development Status :: 5 - Production/Stable', 'Topic :: System :: Archiving :: Backup', @@ -30,6 +30,7 @@ def open_file(fname): ], description='backup a github user or organization', long_description=open_file('README.rst').read(), + long_description_content_type='text/x-rst', install_requires=open_file('requirements.txt').readlines(), zip_safe=True, ) From 977424c153c822e73bb4f8e2e1c6e210ba570a82 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 1 Jan 2021 21:28:25 -0500 Subject: [PATCH 129/455] Release version 0.37.1 --- CHANGES.rst | 20 +++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index cbf1f03..99febdb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,27 @@ Changelog ========= -0.37.0 (2021-01-01) +0.37.1 (2021-01-01) ------------------- ------------ + +Fix +~~~ +- Use twine for releases. [Jose Diaz-Gonzalez] + + The old method of releasing to pypi broke for whatever reason and switching to a supported toolchain is easier than debugging the old one. + + Additionally: + + - Update gitchangelog + - Fix license entry + - Set long description type + - Gitignore the temporary readme file + + +0.37.0 (2021-01-02) +------------------- +- Release version 0.37.0. [Jose Diaz-Gonzalez] - Merge pull request #158 from albertyw/python3. [Jose Diaz-Gonzalez] Remove support for python 2 diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 8935b5b..a4b3835 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.37.0' +__version__ = '0.37.1' From 794ccf3996462c8de06c8439373fd6e31db4da04 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 1 Jan 2021 21:30:54 -0500 Subject: [PATCH 130/455] fix: use distutils.core on error --- setup.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 98a67b7..3b4df41 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os -from setuptools import setup - from github_backup import __version__ +try: + from setuptools import setup + setup # workaround for pyflakes issue #13 +except ImportError: + from distutils.core import setup + +# Hack to prevent stupid TypeError: 'NoneType' object is not callable error on +# exit of python setup.py test # in multiprocessing/util.py _exit_function when +# running python setup.py test (see +# http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html) +try: + import multiprocessing + multiprocessing +except ImportError: + pass + def open_file(fname): return open(os.path.join(os.path.dirname(__file__), fname)) From 5a71bc5e5a611f0f63129b0a56e8bf02a33cafc2 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 1 Jan 2021 21:31:06 -0500 Subject: [PATCH 131/455] Release version 0.37.2 --- CHANGES.rst | 14 +++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 99febdb..d5a4a62 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,18 @@ Changelog ========= -0.37.1 (2021-01-01) +0.37.2 (2021-01-01) ------------------- ------------ +Fix +~~~ +- Use distutils.core on error. [Jose Diaz-Gonzalez] + + +0.37.1 (2021-01-02) +------------------- + Fix ~~~ - Use twine for releases. [Jose Diaz-Gonzalez] @@ -18,6 +26,10 @@ Fix - Set long description type - Gitignore the temporary readme file +Other +~~~~~ +- Release version 0.37.1. [Jose Diaz-Gonzalez] + 0.37.0 (2021-01-02) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index a4b3835..89a93b1 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.37.1' +__version__ = '0.37.2' From 4e571d07353c663bf799e140df025810c6d2cd2e Mon Sep 17 00:00:00 2001 From: Rick van Schijndel Date: Sun, 3 Jan 2021 23:05:45 +0100 Subject: [PATCH 132/455] Change broken link to a fork to a working link to upstream --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index a53a373..b7cd93b 100644 --- a/README.rst +++ b/README.rst @@ -178,4 +178,4 @@ This project currently contains no unit tests. To run linting:: .. |PyPI| image:: https://img.shields.io/pypi/v/github-backup.svg :target: https://pypi.python.org/pypi/github-backup/ .. |Python Versions| image:: https://img.shields.io/pypi/pyversions/github-backup.svg - :target: https://github.com/albertyw/github-backup + :target: https://github.com/josegonzalez/python-github-backup From f62c4eaf8b8b6da35974f75adc9480d03eb80305 Mon Sep 17 00:00:00 2001 From: Samantha Baldwin Date: Fri, 12 Feb 2021 00:46:06 -0500 Subject: [PATCH 133/455] fix: Always clone with OAuth token when provided Github Enterprise servers with 'Anonymous Git read access' disabled cause `git ls-remote` to fail (128) for a repo's `clone_url`. Using the OAuth token when provided allows cloning private AND public repos when Anonymous Git read access is disabled. --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index dc69e68..762e583 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -420,7 +420,7 @@ def get_github_repo_url(args, repository): return repository['ssh_url'] auth = get_auth(args, encode=False, for_git_cli=True) - if auth and repository['private'] is True: + if auth: repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( auth, get_github_host(args), From 943e84e3d9779f67e55321abe432308b62431be5 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 13 Feb 2021 03:55:02 -0500 Subject: [PATCH 134/455] Release version 0.38.0 --- CHANGES.rst | 32 +++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d5a4a62..26e8ab1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,14 +1,44 @@ Changelog ========= -0.37.2 (2021-01-01) +0.38.0 (2021-02-13) ------------------- ------------ +Fix +~~~ +- Always clone with OAuth token when provided. [Samantha Baldwin] + + Github Enterprise servers with 'Anonymous Git read access' disabled + cause `git ls-remote` to fail (128) for a repo's `clone_url`. Using the + OAuth token when provided allows cloning private AND public repos when + Anonymous Git read access is disabled. + +Other +~~~~~ +- Merge pull request #172 from samanthaq/always-use-oauth-when-provided. + [Jose Diaz-Gonzalez] + + fix: Always clone with OAuth token when provided +- Merge pull request #170 from Mindavi/bugfix/broken-url. [Jose Diaz- + Gonzalez] + + Fix broken and incorrect link to github repository +- Change broken link to a fork to a working link to upstream. [Rick van + Schijndel] + + +0.37.2 (2021-01-02) +------------------- + Fix ~~~ - Use distutils.core on error. [Jose Diaz-Gonzalez] +Other +~~~~~ +- Release version 0.37.2. [Jose Diaz-Gonzalez] + 0.37.1 (2021-01-02) ------------------- diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 89a93b1..457618b 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.37.2' +__version__ = '0.38.0' From 480ce3ce2ae5886f27767d3bd885f6385e79a75f Mon Sep 17 00:00:00 2001 From: Gallo Feliz <40487446+gallofeliz@users.noreply.github.com> Date: Tue, 16 Feb 2021 13:13:51 +0100 Subject: [PATCH 135/455] Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli --- bin/github-backup | 10 ++++++++-- github_backup/github_backup.py | 36 +++++++++++++--------------------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index be929bb..dfef030 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -1,6 +1,6 @@ #!/usr/bin/env python -import os +import os, sys, logging from github_backup.github_backup import ( backup_account, @@ -9,11 +9,13 @@ from github_backup.github_backup import ( filter_repositories, get_authenticated_user, log_info, + log_warning, mkdir_p, parse_args, retrieve_repositories, ) +logging.basicConfig(format='%(asctime)s.%(msecs)03d: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S') def main(): args = parse_args() @@ -39,4 +41,8 @@ def main(): if __name__ == '__main__': - main() + try: + main() + except Exception as e: + log_warning(str(e)) + sys.exit(1) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 762e583..a966297 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -17,6 +17,7 @@ import select import subprocess import sys +import logging import time import platform from urllib.parse import urlparse @@ -40,15 +41,6 @@ def _get_log_date(): return datetime.datetime.isoformat(datetime.datetime.now()) - -def log_error(message): - """ - Log message (str) or messages (List[str]) to stderr and exit with status 1 - """ - log_warning(message) - sys.exit(1) - - def log_info(message): """ Log message (str) or messages (List[str]) to stdout @@ -57,7 +49,7 @@ def log_info(message): message = [message] for msg in message: - sys.stdout.write("{0}: {1}\n".format(_get_log_date(), msg)) + logging.info(msg) def log_warning(message): @@ -68,7 +60,7 @@ def log_warning(message): message = [message] for msg in message: - sys.stderr.write("{0}: {1}\n".format(_get_log_date(), msg)) + logging.warning(msg) def logging_subprocess(popenargs, @@ -140,7 +132,7 @@ def mask_password(url, secret='*****'): return url.replace(parsed.password, secret) -def parse_args(): +def parse_args(args = None): parser = argparse.ArgumentParser(description='Backup a github account') parser.add_argument('user', metavar='USER', @@ -331,7 +323,7 @@ def parse_args(): type=float, default=30.0, help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)') - return parser.parse_args() + return parser.parse_args(args) def get_auth(args, encode=True, for_git_cli=False): @@ -339,10 +331,10 @@ def get_auth(args, encode=True, for_git_cli=False): if args.osx_keychain_item_name: if not args.osx_keychain_item_account: - log_error('You must specify both name and account fields for osx keychain password items') + raise Exception('You must specify both name and account fields for osx keychain password items') else: if platform.system() != 'Darwin': - log_error("Keychain arguments are only supported on Mac OSX") + raise Exception("Keychain arguments are only supported on Mac OSX") try: with open(os.devnull, 'w') as devnull: token = (subprocess.check_output([ @@ -353,9 +345,9 @@ def get_auth(args, encode=True, for_git_cli=False): token = token.decode('utf-8') auth = token + ':' + 'x-oauth-basic' except subprocess.SubprocessError: - log_error('No password item matching the provided name and account could be found in the osx keychain.') + raise Exception('No password item matching the provided name and account could be found in the osx keychain.') elif args.osx_keychain_item_account: - log_error('You must specify both name and account fields for osx keychain password items') + raise Exception('You must specify both name and account fields for osx keychain password items') elif args.token: _path_specifier = 'file://' if args.token.startswith(_path_specifier): @@ -377,7 +369,7 @@ def get_auth(args, encode=True, for_git_cli=False): password = urlquote(args.password) auth = args.username + ':' + password elif args.password: - log_error('You must specify a username for basic auth') + raise Exception('You must specify a username for basic auth') if not auth: return None @@ -466,7 +458,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): if status_code != 200: template = 'API request returned HTTP {0}: {1}' errors.append(template.format(status_code, r.reason)) - log_error(errors) + raise Exception(', '.join(errors)) response = json.loads(r.read().decode('utf-8')) if len(errors) == 0: @@ -479,7 +471,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): yield response if len(errors) > 0: - log_error(errors) + raise Exception(', '.join(errors)) if single_request: break @@ -582,7 +574,7 @@ def _request_url_error(template, retry_timeout): if retry_timeout >= 0: return True - log_error('{} timed out to much, skipping!') + raise Exception('{} timed out to much, skipping!') return False @@ -640,7 +632,7 @@ def get_authenticated_user(args): def check_git_lfs_install(): exit_code = subprocess.call(['git', 'lfs', 'version']) if exit_code != 0: - log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') + raise Exception('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') def retrieve_repositories(args, authenticated_user): From fa7148d38f0d6dd078780cc1b648874a4bac6a34 Mon Sep 17 00:00:00 2001 From: Gallo Feliz <40487446+gallofeliz@users.noreply.github.com> Date: Tue, 16 Feb 2021 13:25:16 +0100 Subject: [PATCH 136/455] fix: fix missing INFO logs --- bin/github-backup | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/github-backup b/bin/github-backup index dfef030..25f6ddd 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -15,7 +15,11 @@ from github_backup.github_backup import ( retrieve_repositories, ) -logging.basicConfig(format='%(asctime)s.%(msecs)03d: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S') +logging.basicConfig( + format='%(asctime)s.%(msecs)03d: %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S', + level=logging.INFO +) def main(): args = parse_args() From 664c2a765ea05ba0c92dda7f4113c16ae90f0957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Torres=20Cogollo?= Date: Wed, 3 Mar 2021 11:36:44 +0100 Subject: [PATCH 137/455] Fixed release_name with slash bug --- github_backup/github_backup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 762e583..ac851b7 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1009,7 +1009,8 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F log_info('Saving {0} releases to disk'.format(len(releases))) for release in releases: release_name = release['tag_name'] - output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) + release_name_safe = release_name.replace('/', '__') + output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name_safe)) with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: json_dump(release, f) @@ -1017,7 +1018,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F assets = retrieve_data(args, release['assets_url']) if len(assets) > 0: # give release asset files somewhere to live & download them (not including source archives) - release_assets_cwd = os.path.join(release_cwd, release_name) + release_assets_cwd = os.path.join(release_cwd, release_name_safe) mkdir_p(release_assets_cwd) for asset in assets: download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) From 853b7c46a19ce71125a127ea711ebc022fe754d3 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 18 Mar 2021 23:16:04 -0400 Subject: [PATCH 138/455] Release version 0.39.0 --- CHANGES.rst | 25 ++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 26e8ab1..47b335d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,32 @@ Changelog ========= -0.38.0 (2021-02-13) +0.39.0 (2021-03-18) ------------------- ------------ +Fix +~~~ +- Fix missing INFO logs. [Gallo Feliz] + +Other +~~~~~ +- Merge pull request #173 from gallofeliz/make-compatible-python-call. + [Jose Diaz-Gonzalez] + + Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli +- Try to make compatible code with direct Python call ; reduce the hard + link of the code with the cli. [Gallo Feliz] +- Merge pull request #174 from atorrescogollo/master. [Jose Diaz- + Gonzalez] + + Fixed release_name with slash bug +- Fixed release_name with slash bug. [Álvaro Torres Cogollo] + + +0.38.0 (2021-02-13) +------------------- + Fix ~~~ - Always clone with OAuth token when provided. [Samantha Baldwin] @@ -16,6 +38,7 @@ Fix Other ~~~~~ +- Release version 0.38.0. [Jose Diaz-Gonzalez] - Merge pull request #172 from samanthaq/always-use-oauth-when-provided. [Jose Diaz-Gonzalez] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 457618b..31a9ee7 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.38.0' +__version__ = '0.39.0' From cc52587f5248161504d312188cbdf04b63fd0cb2 Mon Sep 17 00:00:00 2001 From: Jacek Nykis Date: Thu, 1 Jul 2021 14:39:10 +0100 Subject: [PATCH 139/455] Add retry on certain network errors This change includes certain network level errors in the retry logic. It partially address #110 but I think more comprehensive fix would be useful. --- github_backup/github_backup.py | 38 +++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 98296d1..18bb3d5 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -28,6 +28,7 @@ from urllib.request import Request from urllib.request import HTTPRedirectHandler from urllib.request import build_opener +from http.client import IncompleteRead try: from . import __version__ @@ -436,6 +437,21 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) + # Check if we got correct data + try: + response = json.loads(r.read().decode('utf-8')) + except IncompleteRead: + log_warning("Incomplete read error detected") + read_error = True + except json.decoder.JSONDecodeError: + log_warning("JSON decode error detected") + read_error = True + except TimeoutError: + log_warning("Tiemout error detected") + read_error = True + else: + read_error = False + # be gentle with API request limit and throttle requests if remaining requests getting low limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0)) if args.throttle_limit and limit_remaining <= args.throttle_limit: @@ -446,21 +462,37 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): time.sleep(args.throttle_pause) retries = 0 - while retries < 3 and status_code == 502: - log_warning('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') + while retries < 3 and (status_code == 502 or read_error): + log_warning('API request failed. Retrying in 5 seconds') retries += 1 time.sleep(5) request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) + try: + response = json.loads(r.read().decode('utf-8')) + read_error = False + except IncompleteRead: + log_warning("Incomplete read error detected") + read_error = True + except json.decoder.JSONDecodeError: + log_warning("JSON decode error detected") + read_error = True + except TimeoutError: + log_warning("Tiemout error detected") + read_error = True if status_code != 200: template = 'API request returned HTTP {0}: {1}' errors.append(template.format(status_code, r.reason)) raise Exception(', '.join(errors)) - response = json.loads(r.read().decode('utf-8')) + if read_error: + template = 'API request problem reading response for {0}' + errors.append(template.format(request)) + raise Exception(', '.join(errors)) + if len(errors) == 0: if type(response) == list: for resp in response: From 0a30a92fe4fb071ae8a7cc08eaef701cfc630ec1 Mon Sep 17 00:00:00 2001 From: Jonas Date: Tue, 6 Jul 2021 06:21:06 +0200 Subject: [PATCH 140/455] pull changes from remote use `git pull` to pull actual files from the remote instead of using `fetch` for only the metadata --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 98296d1..4d0c647 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1066,7 +1066,7 @@ def fetch_repository(name, if lfs_clone: git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] else: - git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + git_command = ['git', 'pull', '--all', '--force', '--tags', '--prune'] logging_subprocess(git_command, None, cwd=local_dir) else: log_info('Cloning {0} repository from {1} to {2}'.format( From 18e78a4d66120961590836e63d1fa939e4d036f3 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 12 Jul 2021 00:44:33 -0400 Subject: [PATCH 141/455] Release version 0.40.0 --- github_backup/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 31a9ee7..eb9b6f1 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.39.0' +__version__ = '0.40.0' From b1acfed83a1b1244f4a4d836d4b1739b82edaa42 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Wed, 14 Jul 2021 10:53:14 -0500 Subject: [PATCH 142/455] Revert to fetch --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 4ef8b7e..18bb3d5 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1098,7 +1098,7 @@ def fetch_repository(name, if lfs_clone: git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] else: - git_command = ['git', 'pull', '--all', '--force', '--tags', '--prune'] + git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] logging_subprocess(git_command, None, cwd=local_dir) else: log_info('Cloning {0} repository from {1} to {2}'.format( From f7f9ffd017b2d23f17d08e263085f19e8301fda4 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 22 Sep 2021 12:29:08 -0400 Subject: [PATCH 143/455] Release version 0.40.1 --- github_backup/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index eb9b6f1..5e1c3f3 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.40.0' +__version__ = '0.40.1' From 64562f2460ef76927563ffa4f42bce7d685d1669 Mon Sep 17 00:00:00 2001 From: atinary-afoulon <87645799+atinary-afoulon@users.noreply.github.com> Date: Mon, 13 Dec 2021 14:33:21 +0100 Subject: [PATCH 144/455] Fix lint issues raised by Flake8 According to job: [ https://app.circleci.com/pipelines/github/josegonzalez/python-github-backup/30/workflows/74eb93f2-2505-435d-b728-03b3cc04c14a/jobs/23 ] Failed on the following checks: ./github_backup/github_backup.py:20:1: F811 redefinition of unused 'logging' from line 14 ./github_backup/github_backup.py:45:1: E302 expected 2 blank lines, found 1 ./github_backup/github_backup.py:136:20: E251 unexpected spaces around keyword / parameter equals --- github_backup/github_backup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 18bb3d5..e9217cc 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -11,7 +11,6 @@ import errno import getpass import json -import logging import os import re import select @@ -42,6 +41,7 @@ def _get_log_date(): return datetime.datetime.isoformat(datetime.datetime.now()) + def log_info(message): """ Log message (str) or messages (List[str]) to stdout @@ -133,7 +133,7 @@ def mask_password(url, secret='*****'): return url.replace(parsed.password, secret) -def parse_args(args = None): +def parse_args(args=None): parser = argparse.ArgumentParser(description='Backup a github account') parser.add_argument('user', metavar='USER', From 9e1800f56ec899c8b00daac464967ca312f76520 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 29 Dec 2021 12:49:10 -0500 Subject: [PATCH 145/455] Release version 0.40.2 --- github_backup/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 5e1c3f3..b228564 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.40.1' +__version__ = '0.40.2' From 885e94a102affb6de77c9a7df7ff665149b71571 Mon Sep 17 00:00:00 2001 From: Louis Parisot Date: Thu, 3 Feb 2022 11:45:59 +0100 Subject: [PATCH 146/455] git lfs clone doe snot respect --mirror --- github_backup/github_backup.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index e9217cc..873a40c 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1106,16 +1106,17 @@ def fetch_repository(name, masked_remote_url, local_dir)) if bare_clone: + git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + logging_subprocess(git_command, None) if lfs_clone: - git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir] - else: - git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] + logging_subprocess(git_command, None, cwd=local_dir) else: if lfs_clone: git_command = ['git', 'lfs', 'clone', remote_url, local_dir] else: git_command = ['git', 'clone', remote_url, local_dir] - logging_subprocess(git_command, None) + logging_subprocess(git_command, None) def backup_account(args, output_directory): From 63441ebfbc9f4e5f59922670a5167da4b880e00a Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Wed, 2 Mar 2022 02:36:41 -0500 Subject: [PATCH 147/455] Release version 0.41.0 --- github_backup/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index b228564..9f86a39 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.40.2' +__version__ = '0.41.0' From 125cfca05eae096053fd1598f67aac916727bf01 Mon Sep 17 00:00:00 2001 From: Harrison Wright Date: Wed, 23 Mar 2022 19:05:36 -0500 Subject: [PATCH 148/455] Refactor logging and add support for quiet flag --- bin/github-backup | 23 +++---- github_backup/github_backup.py | 106 +++++++++++++-------------------- 2 files changed, 56 insertions(+), 73 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 25f6ddd..faea49f 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -2,38 +2,41 @@ import os, sys, logging +logging.basicConfig( + format='%(asctime)s.%(msecs)03d: %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S', + level=logging.INFO +) + from github_backup.github_backup import ( backup_account, backup_repositories, check_git_lfs_install, filter_repositories, get_authenticated_user, - log_info, - log_warning, + logger, mkdir_p, parse_args, retrieve_repositories, ) -logging.basicConfig( - format='%(asctime)s.%(msecs)03d: %(message)s', - datefmt='%Y-%m-%dT%H:%M:%S', - level=logging.INFO -) def main(): args = parse_args() + if args.quiet: + logger.setLevel(logging.WARNING) + output_directory = os.path.realpath(args.output_directory) if not os.path.isdir(output_directory): - log_info('Create output directory {0}'.format(output_directory)) + logger.info('Create output directory {0}'.format(output_directory)) mkdir_p(output_directory) if args.lfs_clone: check_git_lfs_install() if not args.as_app: - log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + logger.info('Backing up user {0} to {1}'.format(args.user, output_directory)) authenticated_user = get_authenticated_user(args) else: authenticated_user = {'login': None} @@ -48,5 +51,5 @@ if __name__ == '__main__': try: main() except Exception as e: - log_warning(str(e)) + logger.warning(str(e)) sys.exit(1) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index e9217cc..fd4003d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -7,7 +7,6 @@ import base64 import calendar import codecs -import datetime import errno import getpass import json @@ -37,31 +36,7 @@ FNULL = open(os.devnull, 'w') - -def _get_log_date(): - return datetime.datetime.isoformat(datetime.datetime.now()) - - -def log_info(message): - """ - Log message (str) or messages (List[str]) to stdout - """ - if type(message) == str: - message = [message] - - for msg in message: - logging.info(msg) - - -def log_warning(message): - """ - Log message (str) or messages (List[str]) to stderr - """ - if type(message) == str: - message = [message] - - for msg in message: - logging.warning(msg) +logger = logging.getLogger(__name__) def logging_subprocess(popenargs, @@ -77,7 +52,7 @@ def logging_subprocess(popenargs, child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs) if sys.platform == 'win32': - log_info("Windows operating system detected - no subprocess logging will be returned") + logger.info("Windows operating system detected - no subprocess logging will be returned") log_level = {child.stdout: stdout_log_level, child.stderr: stderr_log_level} @@ -139,6 +114,11 @@ def parse_args(args=None): metavar='USER', type=str, help='github username') + parser.add_argument('-q', + '--quiet', + action='store_true', + dest='quiet', + help='supress non-error log messages') parser.add_argument('-u', '--username', dest='username', @@ -441,13 +421,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): try: response = json.loads(r.read().decode('utf-8')) except IncompleteRead: - log_warning("Incomplete read error detected") + logger.warning("Incomplete read error detected") read_error = True except json.decoder.JSONDecodeError: - log_warning("JSON decode error detected") + logger.warning("JSON decode error detected") read_error = True except TimeoutError: - log_warning("Tiemout error detected") + logger.warning("Tiemout error detected") read_error = True else: read_error = False @@ -455,7 +435,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): # be gentle with API request limit and throttle requests if remaining requests getting low limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0)) if args.throttle_limit and limit_remaining <= args.throttle_limit: - log_info( + logger.info( 'API request limit hit: {} requests left, pausing further requests for {}s'.format( limit_remaining, args.throttle_pause)) @@ -463,7 +443,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): retries = 0 while retries < 3 and (status_code == 502 or read_error): - log_warning('API request failed. Retrying in 5 seconds') + logger.warning('API request failed. Retrying in 5 seconds') retries += 1 time.sleep(5) request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa @@ -474,13 +454,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): response = json.loads(r.read().decode('utf-8')) read_error = False except IncompleteRead: - log_warning("Incomplete read error detected") + logger.warning("Incomplete read error detected") read_error = True except json.decoder.JSONDecodeError: - log_warning("JSON decode error detected") + logger.warning("JSON decode error detected") read_error = True except TimeoutError: - log_warning("Tiemout error detected") + logger.warning("Tiemout error detected") read_error = True if status_code != 200: @@ -532,12 +512,12 @@ def _get_response(request, auth, template): errors, should_continue = _request_http_error(exc, auth, errors) # noqa r = exc except URLError as e: - log_warning(e.reason) + logger.warning(e.reason) should_continue = _request_url_error(template, retry_timeout) if not should_continue: raise except socket.error as e: - log_warning(e.strerror) + logger.warning(e.strerror) should_continue = _request_url_error(template, retry_timeout) if not should_continue: raise @@ -563,7 +543,7 @@ def _construct_request(per_page, page, query_args, template, auth, as_app=None): auth = auth.encode('ascii') request.add_header('Authorization', 'token '.encode('ascii') + auth) request.add_header('Accept', 'application/vnd.github.machine-man-preview+json') - log_info('Requesting {}?{}'.format(template, querystring)) + logger.info('Requesting {}?{}'.format(template, querystring)) return request @@ -587,10 +567,10 @@ def _request_http_error(exc, auth, errors): delta = max(10, reset - gm_now) limit = headers.get('x-ratelimit-limit') - log_warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa + logger.warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa if auth is None: - log_info('Hint: Authenticate to raise your GitHub rate limit') + logger.info('Hint: Authenticate to raise your GitHub rate limit') time.sleep(delta) should_continue = True @@ -600,7 +580,7 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): # Incase of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now - log_info('{} timed out'.format(template)) + logger.info('{} timed out'.format(template)) retry_timeout -= 1 if retry_timeout >= 0: @@ -645,14 +625,14 @@ def download_file(url, path, auth): f.write(chunk) except HTTPError as exc: # Gracefully handle 404 responses (and others) when downloading from S3 - log_warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) + logger.warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) except URLError as e: # Gracefully handle other URL errors - log_warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) + logger.warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) except socket.error as e: # Gracefully handle socket errors # TODO: Implement retry logic - log_warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) + logger.warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) def get_authenticated_user(args): @@ -668,7 +648,7 @@ def check_git_lfs_install(): def retrieve_repositories(args, authenticated_user): - log_info('Retrieving repositories') + logger.info('Retrieving repositories') single_request = False if args.user == authenticated_user['login']: # we must use the /user/repos API to be able to access private repos @@ -676,7 +656,7 @@ def retrieve_repositories(args, authenticated_user): get_github_api_host(args)) else: if args.private and not args.organization: - log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') + logger.warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') template = 'https://{0}/users/{1}/repos'.format( get_github_api_host(args), args.user) @@ -724,7 +704,7 @@ def retrieve_repositories(args, authenticated_user): def filter_repositories(args, unfiltered_repositories): - log_info('Filtering repositories') + logger.info('Filtering repositories') repositories = [] for r in unfiltered_repositories: @@ -755,7 +735,7 @@ def filter_repositories(args, unfiltered_repositories): def backup_repositories(args, output_directory, repositories): - log_info('Backing up repositories') + logger.info('Backing up repositories') repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) if args.incremental: @@ -837,7 +817,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): if args.skip_existing and has_issues_dir: return - log_info('Retrieving {0} issues'.format(repository['full_name'])) + logger.info('Retrieving {0} issues'.format(repository['full_name'])) issue_cwd = os.path.join(repo_cwd, 'issues') mkdir_p(repo_cwd, issue_cwd) @@ -873,7 +853,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): issues_skipped_message = ' (skipped {0} pull requests)'.format( issues_skipped) - log_info('Saving {0} issues to disk{1}'.format( + logger.info('Saving {0} issues to disk{1}'.format( len(list(issues.keys())), issues_skipped_message)) comments_template = _issue_template + '/{0}/comments' events_template = _issue_template + '/{0}/events' @@ -895,7 +875,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): if args.skip_existing and has_pulls_dir: return - log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa + logger.info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa pulls_cwd = os.path.join(repo_cwd, 'pulls') mkdir_p(repo_cwd, pulls_cwd) @@ -939,7 +919,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): single_request=True )[0] - log_info('Saving {0} pull requests to disk'.format( + logger.info('Saving {0} pull requests to disk'.format( len(list(pulls.keys())))) comments_template = _pulls_template + '/{0}/comments' commits_template = _pulls_template + '/{0}/commits' @@ -961,7 +941,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template): if args.skip_existing and os.path.isdir(milestone_cwd): return - log_info('Retrieving {0} milestones'.format(repository['full_name'])) + logger.info('Retrieving {0} milestones'.format(repository['full_name'])) mkdir_p(repo_cwd, milestone_cwd) template = '{0}/{1}/milestones'.format(repos_template, @@ -977,7 +957,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template): for milestone in _milestones: milestones[milestone['number']] = milestone - log_info('Saving {0} milestones to disk'.format( + logger.info('Saving {0} milestones to disk'.format( len(list(milestones.keys())))) for number, milestone in list(milestones.items()): milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) @@ -1000,7 +980,7 @@ def backup_labels(args, repo_cwd, repository, repos_template): def backup_hooks(args, repo_cwd, repository, repos_template): auth = get_auth(args) if not auth: - log_info("Skipping hooks since no authentication provided") + logger.info("Skipping hooks since no authentication provided") return hook_cwd = os.path.join(repo_cwd, 'hooks') output_file = '{0}/hooks.json'.format(hook_cwd) @@ -1013,7 +993,7 @@ def backup_hooks(args, repo_cwd, repository, repos_template): output_file, hook_cwd) except SystemExit: - log_info("Unable to read hooks, skipping") + logger.info("Unable to read hooks, skipping") def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): @@ -1021,7 +1001,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F # give release files somewhere to live & log intent release_cwd = os.path.join(repo_cwd, 'releases') - log_info('Retrieving {0} releases'.format(repository_fullname)) + logger.info('Retrieving {0} releases'.format(repository_fullname)) mkdir_p(repo_cwd, release_cwd) query_args = {} @@ -1030,7 +1010,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - log_info('Saving {0} releases to disk'.format(len(releases))) + logger.info('Saving {0} releases to disk'.format(len(releases))) for release in releases: release_name = release['tag_name'] release_name_safe = release_name.replace('/', '__') @@ -1075,12 +1055,12 @@ def fetch_repository(name, stderr=FNULL, shell=True) if initialized == 128: - log_info("Skipping {0} ({1}) since it's not initialized".format( + logger.info("Skipping {0} ({1}) since it's not initialized".format( name, masked_remote_url)) return if clone_exists: - log_info('Updating {0} in {1}'.format(name, local_dir)) + logger.info('Updating {0} in {1}'.format(name, local_dir)) remotes = subprocess.check_output(['git', 'remote', 'show'], cwd=local_dir) @@ -1101,7 +1081,7 @@ def fetch_repository(name, git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] logging_subprocess(git_command, None, cwd=local_dir) else: - log_info('Cloning {0} repository from {1} to {2}'.format( + logger.info('Cloning {0} repository from {1} to {2}'.format( name, masked_remote_url, local_dir)) @@ -1161,11 +1141,11 @@ def backup_account(args, output_directory): def _backup_data(args, name, template, output_file, output_directory): skip_existing = args.skip_existing if not skip_existing or not os.path.exists(output_file): - log_info('Retrieving {0} {1}'.format(args.user, name)) + logger.info('Retrieving {0} {1}'.format(args.user, name)) mkdir_p(output_directory) data = retrieve_data(args, template) - log_info('Writing {0} {1} to disk'.format(len(data), name)) + logger.info('Writing {0} {1} to disk'.format(len(data), name)) with codecs.open(output_file, 'w', encoding='utf-8') as f: json_dump(data, f) From 3d5eb359e2956c2a784596ed87ff32c255795bdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrico=20Tr=C3=B6ger?= Date: Thu, 26 May 2022 10:04:36 +0200 Subject: [PATCH 149/455] Add --log-level command line argument Support changing the log level to the desired value easily. For example, this is useful to suppress progress messages but keep logging warnings and errors. --- README.rst | 21 ++++++++++++--------- bin/github-backup | 5 +++++ github_backup/github_backup.py | 5 +++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index b7cd93b..e1a6d01 100644 --- a/README.rst +++ b/README.rst @@ -30,15 +30,15 @@ Usage CLI Usage is as follows:: github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN] [--as-app] - [-o OUTPUT_DIRECTORY] [-i] [--starred] [--all-starred] - [--watched] [--followers] [--following] [--all] - [--issues] [--issue-comments] [--issue-events] [--pulls] - [--pull-comments] [--pull-commits] [--pull-details] - [--labels] [--hooks] [--milestones] [--repositories] - [--bare] [--lfs] [--wikis] [--gists] [--starred-gists] - [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] - [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] - [-P] [-F] [--prefer-ssh] [-v] + [-o OUTPUT_DIRECTORY] [-l LOG_LEVEL] [-i] [--starred] + [--all-starred] [--watched] [--followers] [--following] + [--all] [--issues] [--issue-comments] [--issue-events] + [--pulls] [--pull-comments] [--pull-commits] + [--pull-details] [--labels] [--hooks] [--milestones] + [--repositories] [--bare] [--lfs] [--wikis] [--gists] + [--starred-gists] [--skip-archived] [--skip-existing] + [-L [LANGUAGES ...]] [-N NAME_REGEX] [-H GITHUB_HOST] + [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] [--releases] [--assets] [--throttle-limit THROTTLE_LIMIT] @@ -63,6 +63,9 @@ CLI Usage is as follows:: --as-app authenticate as github app instead of as a user. -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY directory at which to backup the repositories + -l LOG_LEVEL, --log-level LOG_LEVEL + log level to use (default: info, possible levels: + debug, info, warning, error, critical) -i, --incremental incremental backup --starred include JSON output of starred repositories in backup --all-starred include starred repositories in backup [*] diff --git a/bin/github-backup b/bin/github-backup index 25f6ddd..8d2698b 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -32,6 +32,11 @@ def main(): if args.lfs_clone: check_git_lfs_install() + if args.log_level: + log_level = logging.getLevelName(args.log_level.upper()) + if isinstance(log_level, int): + logging.root.setLevel(log_level) + if not args.as_app: log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) authenticated_user = get_authenticated_user(args) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 873a40c..76f369a 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -162,6 +162,11 @@ def parse_args(args=None): default='.', dest='output_directory', help='directory at which to backup the repositories') + parser.add_argument('-l', + '--log-level', + default='info', + dest='log_level', + help='log level to use (default: info, possible levels: debug, info, warning, error, critical)') parser.add_argument('-i', '--incremental', action='store_true', From f8a16ee0f85d4fa694f7acc8f8419fb799a63a7b Mon Sep 17 00:00:00 2001 From: kornpisey Date: Mon, 30 May 2022 13:46:41 +0900 Subject: [PATCH 150/455] added --no-prune option to disable prune option when doing git fetch --- github_backup/github_backup.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 873a40c..5ea1d42 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -239,6 +239,10 @@ def parse_args(args=None): action='store_true', dest='bare_clone', help='clone bare repositories') + parser.add_argument('--no-prune', + action='store_true', + dest='no_prune', + help='disable prune option for git fetch') parser.add_argument('--lfs', action='store_true', dest='lfs_clone', @@ -790,7 +794,8 @@ def backup_repositories(args, output_directory, repositories): repo_dir, skip_existing=args.skip_existing, bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) + lfs_clone=args.lfs_clone, + no_prune=args.no_prune) if repository.get('is_gist'): # dump gist information to a file as well @@ -807,8 +812,9 @@ def backup_repositories(args, output_directory, repositories): os.path.join(repo_cwd, 'wiki'), skip_existing=args.skip_existing, bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) - + lfs_clone=args.lfs_clone, + no_prune=args.no_prune + ) if args.include_issues or args.include_everything: backup_issues(args, repo_cwd, repository, repos_template) @@ -1053,7 +1059,8 @@ def fetch_repository(name, local_dir, skip_existing=False, bare_clone=False, - lfs_clone=False): + lfs_clone=False, + no_prune=False): if bare_clone: if os.path.exists(local_dir): clone_exists = subprocess.check_output(['git', @@ -1099,6 +1106,8 @@ def fetch_repository(name, git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] else: git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + if no_prune: + git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: log_info('Cloning {0} repository from {1} to {2}'.format( @@ -1110,6 +1119,8 @@ def fetch_repository(name, logging_subprocess(git_command, None) if lfs_clone: git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] + if no_prune: + git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: if lfs_clone: From 75ec773a6f9033951a2a548a3b0276d25c431a72 Mon Sep 17 00:00:00 2001 From: kornpisey Date: Mon, 30 May 2022 13:50:23 +0900 Subject: [PATCH 151/455] fix bug forever retry when request url error --- github_backup/github_backup.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 873a40c..371917b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -533,12 +533,12 @@ def _get_response(request, auth, template): r = exc except URLError as e: log_warning(e.reason) - should_continue = _request_url_error(template, retry_timeout) + should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise except socket.error as e: log_warning(e.strerror) - should_continue = _request_url_error(template, retry_timeout) + should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise @@ -598,16 +598,15 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): - # Incase of a connection timing out, we can retry a few time + # In case of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now log_info('{} timed out'.format(template)) retry_timeout -= 1 if retry_timeout >= 0: - return True + return True, retry_timeout raise Exception('{} timed out to much, skipping!') - return False class S3HTTPRedirectHandler(HTTPRedirectHandler): From b629a865f45928cc85de2483231a98bbb1a24dc8 Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 12 Jul 2022 18:24:27 +0200 Subject: [PATCH 152/455] Backup regular pull request comments as well Before, only review comments were backed up; regular comments need to be fetched via issue API. Fixes: https://github.com/josegonzalez/python-github-backup/issues/150 --- github_backup/github_backup.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 873a40c..4634fb8 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -902,6 +902,8 @@ def backup_pulls(args, repo_cwd, repository, repos_template): pulls = {} _pulls_template = '{0}/{1}/pulls'.format(repos_template, repository['full_name']) + _issue_template = '{0}/{1}/issues'.format(repos_template, + repository['full_name']) query_args = { 'filter': 'all', 'state': 'all', @@ -941,10 +943,17 @@ def backup_pulls(args, repo_cwd, repository, repos_template): log_info('Saving {0} pull requests to disk'.format( len(list(pulls.keys())))) + # Comments from pulls API are only _review_ comments + # regular comments need to be fetched via issue API. + # For backwards compatibility with versions <= 0.41.0 + # keep name "comment_data" for review comments + comments_regular_template = _issue_template + '/{0}/comments' comments_template = _pulls_template + '/{0}/comments' commits_template = _pulls_template + '/{0}/commits' for number, pull in list(pulls.items()): if args.include_pull_comments or args.include_everything: + template = comments_regular_template.format(number) + pulls[number]['comment_regular_data'] = retrieve_data(args, template) template = comments_template.format(number) pulls[number]['comment_data'] = retrieve_data(args, template) if args.include_pull_commits or args.include_everything: From 753a26d0d6380130db67cfee21685fa9bb05637b Mon Sep 17 00:00:00 2001 From: npounder Date: Fri, 23 Sep 2022 14:35:02 +0100 Subject: [PATCH 153/455] add option to exclude repositories --- README.rst | 6 ++++-- github_backup/github_backup.py | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index b7cd93b..689c0b6 100644 --- a/README.rst +++ b/README.rst @@ -41,8 +41,8 @@ CLI Usage is as follows:: [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] - [--releases] [--assets] [--throttle-limit THROTTLE_LIMIT] - [--throttle-pause THROTTLE_PAUSE] + [--releases] [--assets] [--exclude [REPOSITORY [REPOSITORY ...]] + [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] USER Backup a github account @@ -112,6 +112,8 @@ CLI Usage is as follows:: binaries --assets include assets alongside release information; only applies if including releases + --exclude [REPOSITORY [REPOSITORY ...]] + names of repositories to exclude from backup. --throttle-limit THROTTLE_LIMIT start throttling of GitHub API requests after this amount of API requests remain diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 873a40c..393c198 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -324,6 +324,10 @@ def parse_args(args=None): type=float, default=30.0, help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)') + parser.add_argument('--exclude', + dest='exclude', + help='names of repositories to exclude', + nargs="*") return parser.parse_args(args) @@ -750,6 +754,8 @@ def filter_repositories(args, unfiltered_repositories): repositories = [r for r in repositories if name_regex.match(r['name'])] if args.skip_archived: repositories = [r for r in repositories if not r.get('archived')] + if args.exclude: + repositories = [r for r in repositories if r['name'] not in args.exclude] return repositories From 07e32b186c98a1941213b38864fcddc8aa29f827 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 28 Nov 2022 00:25:13 -0500 Subject: [PATCH 154/455] Release version 0.42.0 --- github_backup/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 9f86a39..ccd8b38 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.41.0' +__version__ = '0.42.0' From fbb977acf4bf8a63336369a11aef441084f0355a Mon Sep 17 00:00:00 2001 From: Ken Bailey <12869442+kenbailey@users.noreply.github.com> Date: Tue, 28 Feb 2023 15:44:14 -0700 Subject: [PATCH 155/455] Check both updated_at and pushed_at properties Check both updated_at and pushed_at dates to get the last_update to reduce data retrieved on incremental api calls using since. --- github_backup/github_backup.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 738831b..e2a0a7d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -768,7 +768,6 @@ def backup_repositories(args, output_directory, repositories): repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) if args.incremental: - last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa last_update_path = os.path.join(output_directory, 'last_update') if os.path.exists(last_update_path): args.since = open(last_update_path).read().strip() @@ -777,7 +776,13 @@ def backup_repositories(args, output_directory, repositories): else: args.since = None + last_update = '0000-00-00T00:00:00Z' for repository in repositories: + if 'updated_at' in repository and repository['updated_at'] > last_update: + last_update = repository['updated_at'] + elif 'pushed_at' in repository and repository['pushed_at'] > last_update: + last_update = repository['pushed_at'] + if repository.get('is_gist'): repo_cwd = os.path.join(output_directory, 'gists', repository['id']) elif repository.get('is_starred'): @@ -840,6 +845,9 @@ def backup_repositories(args, output_directory, repositories): include_assets=args.include_assets or args.include_everything) if args.incremental: + if last_update == '0000-00-00T00:00:00Z': + last_update = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime()) + open(last_update_path, 'w').write(last_update) From 60cb484a19448542b7cf7c523cd01dac56f30074 Mon Sep 17 00:00:00 2001 From: froggleston Date: Wed, 22 Mar 2023 14:53:07 +0000 Subject: [PATCH 156/455] Add support for fine-grained tokens --- github_backup/github_backup.py | 59 ++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 738831b..a024cc5 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -150,9 +150,13 @@ def parse_args(args=None): 'If a username is given but not a password, the ' 'password will be prompted for.') parser.add_argument('-t', - '--token', - dest='token', + '--token-classic', + dest='token_classic', help='personal access, OAuth, or JSON Web token, or path to token (file://...)') # noqa + parser.add_argument('-f', + '--token-fine', + dest='token_fine', + help='fine-grained personal access token (github_pat_....)') # noqa parser.add_argument('--as-app', action='store_true', dest='as_app', @@ -357,18 +361,23 @@ def get_auth(args, encode=True, for_git_cli=False): raise Exception('No password item matching the provided name and account could be found in the osx keychain.') elif args.osx_keychain_item_account: raise Exception('You must specify both name and account fields for osx keychain password items') - elif args.token: + elif args.token_fine: + if args.token_fine.startswith("github_pat_"): + auth = args.token_fine + else: + raise Exception("Fine-grained token supplied does not look like a GitHub PAT") + elif args.token_classic: _path_specifier = 'file://' - if args.token.startswith(_path_specifier): - args.token = open(args.token[len(_path_specifier):], - 'rt').readline().strip() + if args.token_classic.startswith(_path_specifier): + args.token_classic = open(args.token_classic[len(_path_specifier):], + 'rt').readline().strip() if not args.as_app: - auth = args.token + ':' + 'x-oauth-basic' + auth = args.token_classic + ':' + 'x-oauth-basic' else: if not for_git_cli: - auth = args.token + auth = args.token_classic else: - auth = 'x-access-token:' + args.token + auth = 'x-access-token:' + args.token_classic elif args.username: if not args.password: args.password = getpass.getpass() @@ -383,7 +392,7 @@ def get_auth(args, encode=True, for_git_cli=False): if not auth: return None - if not encode: + if not encode or args.token_fine is not None: return auth return base64.b64encode(auth.encode('ascii')) @@ -421,12 +430,19 @@ def get_github_repo_url(args, repository): return repository['ssh_url'] auth = get_auth(args, encode=False, for_git_cli=True) - if auth: - repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( - auth, - get_github_host(args), - repository['owner']['login'], - repository['name']) + if auth: + if args.token_fine is None: + repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( + auth, + get_github_host(args), + repository['owner']['login'], + repository['name']) + else: + repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( + "oauth2:"+auth, + get_github_host(args), + repository['owner']['login'], + repository['name']) else: repo_url = repository['clone_url'] @@ -441,7 +457,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): while True: page = page + 1 - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app, fine=True if args.token_fine is not None else False) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) @@ -474,7 +490,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): log_warning('API request failed. Retrying in 5 seconds') retries += 1 time.sleep(5) - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app, fine=True if args.token_fine is not None else False) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) @@ -557,7 +573,7 @@ def _get_response(request, auth, template): return r, errors -def _construct_request(per_page, page, query_args, template, auth, as_app=None): +def _construct_request(per_page, page, query_args, template, auth, as_app=None, fine=False): querystring = urlencode(dict(list({ 'per_page': per_page, 'page': page @@ -566,7 +582,10 @@ def _construct_request(per_page, page, query_args, template, auth, as_app=None): request = Request(template + '?' + querystring) if auth is not None: if not as_app: - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + if fine: + request.add_header('Authorization', 'token ' + auth) + else: + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) else: auth = auth.encode('ascii') request.add_header('Authorization', 'token '.encode('ascii') + auth) From 61275c61b246452dc2867b9ee83aa9da2c746530 Mon Sep 17 00:00:00 2001 From: Robert Davey Date: Tue, 28 Mar 2023 16:52:48 +0100 Subject: [PATCH 157/455] Update README.rst Add flags and example for fine-grained tokens --- README.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 689c0b6..aadd645 100644 --- a/README.rst +++ b/README.rst @@ -29,7 +29,8 @@ Usage CLI Usage is as follows:: - github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN] [--as-app] + github-backup [-h] [-u USERNAME] [-p PASSWORD] + [-t TOKEN_CLASSIC] [-f TOKEN_FINE] [--as-app] [-o OUTPUT_DIRECTORY] [-i] [--starred] [--all-starred] [--watched] [--followers] [--following] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] @@ -57,7 +58,9 @@ CLI Usage is as follows:: -p PASSWORD, --password PASSWORD password for basic auth. If a username is given but not a password, the password will be prompted for. - -t TOKEN, --token TOKEN + -f TOKEN_FINE, --token-fine TOKEN + fine-grained personal access token + -t TOKEN_CLASSIC, --token-classic TOKEN personal access, OAuth, or JSON Web token, or path to token (file://...) --as-app authenticate as github app instead of as a user. @@ -160,13 +163,13 @@ Backup all repositories, including private ones:: export ACCESS_TOKEN=SOME-GITHUB-TOKEN github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private -Backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: +Use a fine-grained access token to backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: export ACCESS_TOKEN=SOME-GITHUB-TOKEN ORGANIZATION=docker REPO=cli # e.g. git@github.com:docker/cli.git - github-backup $ORGANIZATION -P -t $ACCESS_TOKEN -o . --all -O -R $REPO + github-backup $ORGANIZATION -P -f $ACCESS_TOKEN -o . --all -O -R $REPO Testing ======= From cca8a851ad4370c9d421029baba89dc077d17b50 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:32:39 -0400 Subject: [PATCH 158/455] chore: run black --- github_backup/__init__.py | 2 +- github_backup/github_backup.py | 1255 ++++++++++++++++++-------------- setup.py | 38 +- 3 files changed, 719 insertions(+), 576 deletions(-) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index ccd8b38..92717f7 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.42.0' +__version__ = "0.42.0" diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 0a7765a..7bf021d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -31,11 +31,12 @@ try: from . import __version__ + VERSION = __version__ except ImportError: - VERSION = 'unknown' + VERSION = "unknown" -FNULL = open(os.devnull, 'w') +FNULL = open(os.devnull, "w") def _get_log_date(): @@ -64,31 +65,32 @@ def log_warning(message): logging.warning(msg) -def logging_subprocess(popenargs, - logger, - stdout_log_level=logging.DEBUG, - stderr_log_level=logging.ERROR, - **kwargs): +def logging_subprocess( + popenargs, + logger, + stdout_log_level=logging.DEBUG, + stderr_log_level=logging.ERROR, + **kwargs +): """ Variant of subprocess.call that accepts a logger instead of stdout/stderr, and logs stdout messages via logger.debug and stderr messages via logger.error. """ - child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, **kwargs) - if sys.platform == 'win32': - log_info("Windows operating system detected - no subprocess logging will be returned") + child = subprocess.Popen( + popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs + ) + if sys.platform == "win32": + log_info( + "Windows operating system detected - no subprocess logging will be returned" + ) - log_level = {child.stdout: stdout_log_level, - child.stderr: stderr_log_level} + log_level = {child.stdout: stdout_log_level, child.stderr: stderr_log_level} def check_io(): - if sys.platform == 'win32': + if sys.platform == "win32": return - ready_to_read = select.select([child.stdout, child.stderr], - [], - [], - 1000)[0] + ready_to_read = select.select([child.stdout, child.stderr], [], [], 1000)[0] for io in ready_to_read: line = io.readline() if not logger: @@ -105,8 +107,8 @@ def check_io(): rc = child.wait() if rc != 0: - print('{} returned {}:'.format(popenargs[0], rc), file=sys.stderr) - print('\t', ' '.join(popenargs), file=sys.stderr) + print("{} returned {}:".format(popenargs[0], rc), file=sys.stderr) + print("\t", " ".join(popenargs), file=sys.stderr) return rc @@ -122,221 +124,301 @@ def mkdir_p(*args): raise -def mask_password(url, secret='*****'): +def mask_password(url, secret="*****"): parsed = urlparse(url) if not parsed.password: return url - elif parsed.password == 'x-oauth-basic': + elif parsed.password == "x-oauth-basic": return url.replace(parsed.username, secret) return url.replace(parsed.password, secret) def parse_args(args=None): - parser = argparse.ArgumentParser(description='Backup a github account') - parser.add_argument('user', - metavar='USER', - type=str, - help='github username') - parser.add_argument('-u', - '--username', - dest='username', - help='username for basic auth') - parser.add_argument('-p', - '--password', - dest='password', - help='password for basic auth. ' - 'If a username is given but not a password, the ' - 'password will be prompted for.') - parser.add_argument('-t', - '--token', - dest='token', - help='personal access, OAuth, or JSON Web token, or path to token (file://...)') # noqa - parser.add_argument('--as-app', - action='store_true', - dest='as_app', - help='authenticate as github app instead of as a user.') - parser.add_argument('-o', - '--output-directory', - default='.', - dest='output_directory', - help='directory at which to backup the repositories') - parser.add_argument('-l', - '--log-level', - default='info', - dest='log_level', - help='log level to use (default: info, possible levels: debug, info, warning, error, critical)') - parser.add_argument('-i', - '--incremental', - action='store_true', - dest='incremental', - help='incremental backup') - parser.add_argument('--starred', - action='store_true', - dest='include_starred', - help='include JSON output of starred repositories in backup') - parser.add_argument('--all-starred', - action='store_true', - dest='all_starred', - help='include starred repositories in backup [*]') - parser.add_argument('--watched', - action='store_true', - dest='include_watched', - help='include JSON output of watched repositories in backup') - parser.add_argument('--followers', - action='store_true', - dest='include_followers', - help='include JSON output of followers in backup') - parser.add_argument('--following', - action='store_true', - dest='include_following', - help='include JSON output of following users in backup') - parser.add_argument('--all', - action='store_true', - dest='include_everything', - help='include everything in backup (not including [*])') - parser.add_argument('--issues', - action='store_true', - dest='include_issues', - help='include issues in backup') - parser.add_argument('--issue-comments', - action='store_true', - dest='include_issue_comments', - help='include issue comments in backup') - parser.add_argument('--issue-events', - action='store_true', - dest='include_issue_events', - help='include issue events in backup') - parser.add_argument('--pulls', - action='store_true', - dest='include_pulls', - help='include pull requests in backup') - parser.add_argument('--pull-comments', - action='store_true', - dest='include_pull_comments', - help='include pull request review comments in backup') - parser.add_argument('--pull-commits', - action='store_true', - dest='include_pull_commits', - help='include pull request commits in backup') - parser.add_argument('--pull-details', - action='store_true', - dest='include_pull_details', - help='include more pull request details in backup [*]') - parser.add_argument('--labels', - action='store_true', - dest='include_labels', - help='include labels in backup') - parser.add_argument('--hooks', - action='store_true', - dest='include_hooks', - help='include hooks in backup (works only when authenticated)') # noqa - parser.add_argument('--milestones', - action='store_true', - dest='include_milestones', - help='include milestones in backup') - parser.add_argument('--repositories', - action='store_true', - dest='include_repository', - help='include repository clone in backup') - parser.add_argument('--bare', - action='store_true', - dest='bare_clone', - help='clone bare repositories') - parser.add_argument('--no-prune', - action='store_true', - dest='no_prune', - help='disable prune option for git fetch') - parser.add_argument('--lfs', - action='store_true', - dest='lfs_clone', - help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]') - parser.add_argument('--wikis', - action='store_true', - dest='include_wiki', - help='include wiki clone in backup') - parser.add_argument('--gists', - action='store_true', - dest='include_gists', - help='include gists in backup [*]') - parser.add_argument('--starred-gists', - action='store_true', - dest='include_starred_gists', - help='include starred gists in backup [*]') - parser.add_argument('--skip-archived', - action='store_true', - dest='skip_archived', - help='skip project if it is archived') - parser.add_argument('--skip-existing', - action='store_true', - dest='skip_existing', - help='skip project if a backup directory exists') - parser.add_argument('-L', - '--languages', - dest='languages', - help='only allow these languages', - nargs='*') - parser.add_argument('-N', - '--name-regex', - dest='name_regex', - help='python regex to match names against') - parser.add_argument('-H', - '--github-host', - dest='github_host', - help='GitHub Enterprise hostname') - parser.add_argument('-O', - '--organization', - action='store_true', - dest='organization', - help='whether or not this is an organization user') - parser.add_argument('-R', - '--repository', - dest='repository', - help='name of repository to limit backup to') - parser.add_argument('-P', '--private', - action='store_true', - dest='private', - help='include private repositories [*]') - parser.add_argument('-F', '--fork', - action='store_true', - dest='fork', - help='include forked repositories [*]') - parser.add_argument('--prefer-ssh', - action='store_true', - help='Clone repositories using SSH instead of HTTPS') - parser.add_argument('-v', '--version', - action='version', - version='%(prog)s ' + VERSION) - parser.add_argument('--keychain-name', - dest='osx_keychain_item_name', - help='OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--keychain-account', - dest='osx_keychain_item_account', - help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--releases', - action='store_true', - dest='include_releases', - help='include release information, not including assets or binaries' - ) - parser.add_argument('--assets', - action='store_true', - dest='include_assets', - help='include assets alongside release information; only applies if including releases') - parser.add_argument('--throttle-limit', - dest='throttle_limit', - type=int, - default=0, - help='start throttling of GitHub API requests after this amount of API requests remain') - parser.add_argument('--throttle-pause', - dest='throttle_pause', - type=float, - default=30.0, - help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)') - parser.add_argument('--exclude', - dest='exclude', - help='names of repositories to exclude', - nargs="*") + parser = argparse.ArgumentParser(description="Backup a github account") + parser.add_argument("user", metavar="USER", type=str, help="github username") + parser.add_argument( + "-u", "--username", dest="username", help="username for basic auth" + ) + parser.add_argument( + "-p", + "--password", + dest="password", + help="password for basic auth. " + "If a username is given but not a password, the " + "password will be prompted for.", + ) + parser.add_argument( + "-t", + "--token", + dest="token", + help="personal access, OAuth, or JSON Web token, or path to token (file://...)", + ) # noqa + parser.add_argument( + "--as-app", + action="store_true", + dest="as_app", + help="authenticate as github app instead of as a user.", + ) + parser.add_argument( + "-o", + "--output-directory", + default=".", + dest="output_directory", + help="directory at which to backup the repositories", + ) + parser.add_argument( + "-l", + "--log-level", + default="info", + dest="log_level", + help="log level to use (default: info, possible levels: debug, info, warning, error, critical)", + ) + parser.add_argument( + "-i", + "--incremental", + action="store_true", + dest="incremental", + help="incremental backup", + ) + parser.add_argument( + "--starred", + action="store_true", + dest="include_starred", + help="include JSON output of starred repositories in backup", + ) + parser.add_argument( + "--all-starred", + action="store_true", + dest="all_starred", + help="include starred repositories in backup [*]", + ) + parser.add_argument( + "--watched", + action="store_true", + dest="include_watched", + help="include JSON output of watched repositories in backup", + ) + parser.add_argument( + "--followers", + action="store_true", + dest="include_followers", + help="include JSON output of followers in backup", + ) + parser.add_argument( + "--following", + action="store_true", + dest="include_following", + help="include JSON output of following users in backup", + ) + parser.add_argument( + "--all", + action="store_true", + dest="include_everything", + help="include everything in backup (not including [*])", + ) + parser.add_argument( + "--issues", + action="store_true", + dest="include_issues", + help="include issues in backup", + ) + parser.add_argument( + "--issue-comments", + action="store_true", + dest="include_issue_comments", + help="include issue comments in backup", + ) + parser.add_argument( + "--issue-events", + action="store_true", + dest="include_issue_events", + help="include issue events in backup", + ) + parser.add_argument( + "--pulls", + action="store_true", + dest="include_pulls", + help="include pull requests in backup", + ) + parser.add_argument( + "--pull-comments", + action="store_true", + dest="include_pull_comments", + help="include pull request review comments in backup", + ) + parser.add_argument( + "--pull-commits", + action="store_true", + dest="include_pull_commits", + help="include pull request commits in backup", + ) + parser.add_argument( + "--pull-details", + action="store_true", + dest="include_pull_details", + help="include more pull request details in backup [*]", + ) + parser.add_argument( + "--labels", + action="store_true", + dest="include_labels", + help="include labels in backup", + ) + parser.add_argument( + "--hooks", + action="store_true", + dest="include_hooks", + help="include hooks in backup (works only when authenticated)", + ) # noqa + parser.add_argument( + "--milestones", + action="store_true", + dest="include_milestones", + help="include milestones in backup", + ) + parser.add_argument( + "--repositories", + action="store_true", + dest="include_repository", + help="include repository clone in backup", + ) + parser.add_argument( + "--bare", action="store_true", dest="bare_clone", help="clone bare repositories" + ) + parser.add_argument( + "--no-prune", + action="store_true", + dest="no_prune", + help="disable prune option for git fetch", + ) + parser.add_argument( + "--lfs", + action="store_true", + dest="lfs_clone", + help="clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]", + ) + parser.add_argument( + "--wikis", + action="store_true", + dest="include_wiki", + help="include wiki clone in backup", + ) + parser.add_argument( + "--gists", + action="store_true", + dest="include_gists", + help="include gists in backup [*]", + ) + parser.add_argument( + "--starred-gists", + action="store_true", + dest="include_starred_gists", + help="include starred gists in backup [*]", + ) + parser.add_argument( + "--skip-archived", + action="store_true", + dest="skip_archived", + help="skip project if it is archived", + ) + parser.add_argument( + "--skip-existing", + action="store_true", + dest="skip_existing", + help="skip project if a backup directory exists", + ) + parser.add_argument( + "-L", + "--languages", + dest="languages", + help="only allow these languages", + nargs="*", + ) + parser.add_argument( + "-N", + "--name-regex", + dest="name_regex", + help="python regex to match names against", + ) + parser.add_argument( + "-H", "--github-host", dest="github_host", help="GitHub Enterprise hostname" + ) + parser.add_argument( + "-O", + "--organization", + action="store_true", + dest="organization", + help="whether or not this is an organization user", + ) + parser.add_argument( + "-R", + "--repository", + dest="repository", + help="name of repository to limit backup to", + ) + parser.add_argument( + "-P", + "--private", + action="store_true", + dest="private", + help="include private repositories [*]", + ) + parser.add_argument( + "-F", + "--fork", + action="store_true", + dest="fork", + help="include forked repositories [*]", + ) + parser.add_argument( + "--prefer-ssh", + action="store_true", + help="Clone repositories using SSH instead of HTTPS", + ) + parser.add_argument( + "-v", "--version", action="version", version="%(prog)s " + VERSION + ) + parser.add_argument( + "--keychain-name", + dest="osx_keychain_item_name", + help="OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token", + ) + parser.add_argument( + "--keychain-account", + dest="osx_keychain_item_account", + help="OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token", + ) + parser.add_argument( + "--releases", + action="store_true", + dest="include_releases", + help="include release information, not including assets or binaries", + ) + parser.add_argument( + "--assets", + action="store_true", + dest="include_assets", + help="include assets alongside release information; only applies if including releases", + ) + parser.add_argument( + "--throttle-limit", + dest="throttle_limit", + type=int, + default=0, + help="start throttling of GitHub API requests after this amount of API requests remain", + ) + parser.add_argument( + "--throttle-pause", + dest="throttle_pause", + type=float, + default=30.0, + help="wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)", + ) + parser.add_argument( + "--exclude", dest="exclude", help="names of repositories to exclude", nargs="*" + ) return parser.parse_args(args) @@ -345,35 +427,49 @@ def get_auth(args, encode=True, for_git_cli=False): if args.osx_keychain_item_name: if not args.osx_keychain_item_account: - raise Exception('You must specify both name and account fields for osx keychain password items') + raise Exception( + "You must specify both name and account fields for osx keychain password items" + ) else: - if platform.system() != 'Darwin': + if platform.system() != "Darwin": raise Exception("Keychain arguments are only supported on Mac OSX") try: - with open(os.devnull, 'w') as devnull: - token = (subprocess.check_output([ - 'security', 'find-generic-password', - '-s', args.osx_keychain_item_name, - '-a', args.osx_keychain_item_account, - '-w'], stderr=devnull).strip()) - token = token.decode('utf-8') - auth = token + ':' + 'x-oauth-basic' + with open(os.devnull, "w") as devnull: + token = subprocess.check_output( + [ + "security", + "find-generic-password", + "-s", + args.osx_keychain_item_name, + "-a", + args.osx_keychain_item_account, + "-w", + ], + stderr=devnull, + ).strip() + token = token.decode("utf-8") + auth = token + ":" + "x-oauth-basic" except subprocess.SubprocessError: - raise Exception('No password item matching the provided name and account could be found in the osx keychain.') + raise Exception( + "No password item matching the provided name and account could be found in the osx keychain." + ) elif args.osx_keychain_item_account: - raise Exception('You must specify both name and account fields for osx keychain password items') + raise Exception( + "You must specify both name and account fields for osx keychain password items" + ) elif args.token: - _path_specifier = 'file://' + _path_specifier = "file://" if args.token.startswith(_path_specifier): - args.token = open(args.token[len(_path_specifier):], - 'rt').readline().strip() + args.token = ( + open(args.token[len(_path_specifier) :], "rt").readline().strip() + ) if not args.as_app: - auth = args.token + ':' + 'x-oauth-basic' + auth = args.token + ":" + "x-oauth-basic" else: if not for_git_cli: auth = args.token else: - auth = 'x-access-token:' + args.token + auth = "x-access-token:" + args.token elif args.username: if not args.password: args.password = getpass.getpass() @@ -381,9 +477,9 @@ def get_auth(args, encode=True, for_git_cli=False): password = args.password else: password = urlquote(args.password) - auth = args.username + ':' + password + auth = args.username + ":" + password elif args.password: - raise Exception('You must specify a username for basic auth') + raise Exception("You must specify a username for basic auth") if not auth: return None @@ -391,14 +487,14 @@ def get_auth(args, encode=True, for_git_cli=False): if not encode: return auth - return base64.b64encode(auth.encode('ascii')) + return base64.b64encode(auth.encode("ascii")) def get_github_api_host(args): if args.github_host: - host = args.github_host + '/api/v3' + host = args.github_host + "/api/v3" else: - host = 'api.github.com' + host = "api.github.com" return host @@ -407,33 +503,40 @@ def get_github_host(args): if args.github_host: host = args.github_host else: - host = 'github.com' + host = "github.com" return host def get_github_repo_url(args, repository): - if repository.get('is_gist'): + if repository.get("is_gist"): if args.prefer_ssh: # The git_pull_url value is always https for gists, so we need to transform it to ssh form - repo_url = re.sub(r'^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url']) - repo_url = re.sub(r'^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility + repo_url = re.sub( + r"^https?:\/\/(.+)\/(.+)\.git$", + r"git@\1:\2.git", + repository["git_pull_url"], + ) + repo_url = re.sub( + r"^git@gist\.", "git@", repo_url + ) # strip gist subdomain for better hostkey compatibility else: - repo_url = repository['git_pull_url'] + repo_url = repository["git_pull_url"] return repo_url if args.prefer_ssh: - return repository['ssh_url'] + return repository["ssh_url"] auth = get_auth(args, encode=False, for_git_cli=True) if auth: - repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( + repo_url = "https://{0}@{1}/{2}/{3}.git".format( auth, get_github_host(args), - repository['owner']['login'], - repository['name']) + repository["owner"]["login"], + repository["name"], + ) else: - repo_url = repository['clone_url'] + repo_url = repository["clone_url"] return repo_url @@ -446,13 +549,15 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): while True: page = page + 1 - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request( + per_page, page, query_args, template, auth, as_app=args.as_app + ) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) # Check if we got correct data try: - response = json.loads(r.read().decode('utf-8')) + response = json.loads(r.read().decode("utf-8")) except IncompleteRead: log_warning("Incomplete read error detected") read_error = True @@ -466,25 +571,28 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): read_error = False # be gentle with API request limit and throttle requests if remaining requests getting low - limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0)) + limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0)) if args.throttle_limit and limit_remaining <= args.throttle_limit: log_info( - 'API request limit hit: {} requests left, pausing further requests for {}s'.format( - limit_remaining, - args.throttle_pause)) + "API request limit hit: {} requests left, pausing further requests for {}s".format( + limit_remaining, args.throttle_pause + ) + ) time.sleep(args.throttle_pause) retries = 0 while retries < 3 and (status_code == 502 or read_error): - log_warning('API request failed. Retrying in 5 seconds') + log_warning("API request failed. Retrying in 5 seconds") retries += 1 time.sleep(5) - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request( + per_page, page, query_args, template, auth, as_app=args.as_app + ) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) try: - response = json.loads(r.read().decode('utf-8')) + response = json.loads(r.read().decode("utf-8")) read_error = False except IncompleteRead: log_warning("Incomplete read error detected") @@ -497,14 +605,14 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): read_error = True if status_code != 200: - template = 'API request returned HTTP {0}: {1}' + template = "API request returned HTTP {0}: {1}" errors.append(template.format(status_code, r.reason)) - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if read_error: - template = 'API request problem reading response for {0}' + template = "API request problem reading response for {0}" errors.append(template.format(request)) - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if len(errors) == 0: if type(response) == list: @@ -516,7 +624,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): yield response if len(errors) > 0: - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if single_request: break @@ -563,20 +671,24 @@ def _get_response(request, auth, template): def _construct_request(per_page, page, query_args, template, auth, as_app=None): - querystring = urlencode(dict(list({ - 'per_page': per_page, - 'page': page - }.items()) + list(query_args.items()))) + querystring = urlencode( + dict( + list({"per_page": per_page, "page": page}.items()) + + list(query_args.items()) + ) + ) - request = Request(template + '?' + querystring) + request = Request(template + "?" + querystring) if auth is not None: if not as_app: - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + request.add_header("Authorization", "Basic ".encode("ascii") + auth) else: - auth = auth.encode('ascii') - request.add_header('Authorization', 'token '.encode('ascii') + auth) - request.add_header('Accept', 'application/vnd.github.machine-man-preview+json') - log_info('Requesting {}?{}'.format(template, querystring)) + auth = auth.encode("ascii") + request.add_header("Authorization", "token ".encode("ascii") + auth) + request.add_header( + "Accept", "application/vnd.github.machine-man-preview+json" + ) + log_info("Requesting {}?{}".format(template, querystring)) return request @@ -587,7 +699,7 @@ def _request_http_error(exc, auth, errors): should_continue = False headers = exc.headers - limit_remaining = int(headers.get('x-ratelimit-remaining', 0)) + limit_remaining = int(headers.get("x-ratelimit-remaining", 0)) if exc.code == 403 and limit_remaining < 1: # The X-RateLimit-Reset header includes a @@ -595,15 +707,19 @@ def _request_http_error(exc, auth, errors): # so we can calculate how long to wait rather # than inefficiently polling: gm_now = calendar.timegm(time.gmtime()) - reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now + reset = int(headers.get("x-ratelimit-reset", 0)) or gm_now # We'll never sleep for less than 10 seconds: delta = max(10, reset - gm_now) - limit = headers.get('x-ratelimit-limit') - log_warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa + limit = headers.get("x-ratelimit-limit") + log_warning( + "Exceeded rate limit of {} requests; waiting {} seconds to reset".format( + limit, delta + ) + ) # noqa if auth is None: - log_info('Hint: Authenticate to raise your GitHub rate limit') + log_info("Hint: Authenticate to raise your GitHub rate limit") time.sleep(delta) should_continue = True @@ -613,13 +729,13 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): # In case of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now - log_info('{} timed out'.format(template)) + log_info("{} timed out".format(template)) retry_timeout -= 1 if retry_timeout >= 0: return True, retry_timeout - raise Exception('{} timed out to much, skipping!') + raise Exception("{} timed out to much, skipping!") class S3HTTPRedirectHandler(HTTPRedirectHandler): @@ -629,9 +745,12 @@ class S3HTTPRedirectHandler(HTTPRedirectHandler): urllib will add the Authorization header to the redirected request to S3, which will result in a 400, so we should remove said header on redirect. """ + def redirect_request(self, req, fp, code, msg, headers, newurl): - request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) - del request.headers['Authorization'] + request = super(S3HTTPRedirectHandler, self).redirect_request( + req, fp, code, msg, headers, newurl + ) + del request.headers["Authorization"] return request @@ -641,15 +760,15 @@ def download_file(url, path, auth): return request = Request(url) - request.add_header('Accept', 'application/octet-stream') - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + request.add_header("Accept", "application/octet-stream") + request.add_header("Authorization", "Basic ".encode("ascii") + auth) opener = build_opener(S3HTTPRedirectHandler) try: response = opener.open(request) chunk_size = 16 * 1024 - with open(path, 'wb') as f: + with open(path, "wb") as f: while True: chunk = response.read(chunk_size) if not chunk: @@ -657,91 +776,110 @@ def download_file(url, path, auth): f.write(chunk) except HTTPError as exc: # Gracefully handle 404 responses (and others) when downloading from S3 - log_warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) + log_warning( + "Skipping download of asset {0} due to HTTPError: {1}".format( + url, exc.reason + ) + ) except URLError as e: # Gracefully handle other URL errors - log_warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) + log_warning( + "Skipping download of asset {0} due to URLError: {1}".format(url, e.reason) + ) except socket.error as e: # Gracefully handle socket errors # TODO: Implement retry logic - log_warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) + log_warning( + "Skipping download of asset {0} due to socker error: {1}".format( + url, e.strerror + ) + ) def get_authenticated_user(args): - template = 'https://{0}/user'.format(get_github_api_host(args)) + template = "https://{0}/user".format(get_github_api_host(args)) data = retrieve_data(args, template, single_request=True) return data[0] def check_git_lfs_install(): - exit_code = subprocess.call(['git', 'lfs', 'version']) + exit_code = subprocess.call(["git", "lfs", "version"]) if exit_code != 0: - raise Exception('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') + raise Exception( + "The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com." + ) def retrieve_repositories(args, authenticated_user): - log_info('Retrieving repositories') + log_info("Retrieving repositories") single_request = False - if args.user == authenticated_user['login']: + if args.user == authenticated_user["login"]: # we must use the /user/repos API to be able to access private repos - template = 'https://{0}/user/repos'.format( - get_github_api_host(args)) + template = "https://{0}/user/repos".format(get_github_api_host(args)) else: if args.private and not args.organization: - log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') - template = 'https://{0}/users/{1}/repos'.format( - get_github_api_host(args), - args.user) + log_warning( + "Authenticated user is different from user being backed up, thus private repositories cannot be accessed" + ) + template = "https://{0}/users/{1}/repos".format( + get_github_api_host(args), args.user + ) if args.organization: - template = 'https://{0}/orgs/{1}/repos'.format( - get_github_api_host(args), - args.user) + template = "https://{0}/orgs/{1}/repos".format( + get_github_api_host(args), args.user + ) if args.repository: single_request = True - template = 'https://{0}/repos/{1}/{2}'.format( - get_github_api_host(args), - args.user, - args.repository) + template = "https://{0}/repos/{1}/{2}".format( + get_github_api_host(args), args.user, args.repository + ) repos = retrieve_data(args, template, single_request=single_request) if args.all_starred: - starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(args), args.user) + starred_template = "https://{0}/users/{1}/starred".format( + get_github_api_host(args), args.user + ) starred_repos = retrieve_data(args, starred_template, single_request=False) # flag each repo as starred for downstream processing for item in starred_repos: - item.update({'is_starred': True}) + item.update({"is_starred": True}) repos.extend(starred_repos) if args.include_gists: - gists_template = 'https://{0}/users/{1}/gists'.format(get_github_api_host(args), args.user) + gists_template = "https://{0}/users/{1}/gists".format( + get_github_api_host(args), args.user + ) gists = retrieve_data(args, gists_template, single_request=False) # flag each repo as a gist for downstream processing for item in gists: - item.update({'is_gist': True}) + item.update({"is_gist": True}) repos.extend(gists) if args.include_starred_gists: - starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host(args)) - starred_gists = retrieve_data(args, starred_gists_template, single_request=False) + starred_gists_template = "https://{0}/gists/starred".format( + get_github_api_host(args) + ) + starred_gists = retrieve_data( + args, starred_gists_template, single_request=False + ) # flag each repo as a starred gist for downstream processing for item in starred_gists: - item.update({'is_gist': True, - 'is_starred': True}) + item.update({"is_gist": True, "is_starred": True}) repos.extend(starred_gists) return repos def filter_repositories(args, unfiltered_repositories): - log_info('Filtering repositories') + log_info("Filtering repositories") repositories = [] for r in unfiltered_repositories: # gists can be anonymous, so need to safely check owner - if r.get('owner', {}).get('login') == args.user or r.get('is_starred'): + if r.get("owner", {}).get("login") == args.user or r.get("is_starred"): repositories.append(r) name_regex = None @@ -753,27 +891,33 @@ def filter_repositories(args, unfiltered_repositories): languages = [x.lower() for x in args.languages] if not args.fork: - repositories = [r for r in repositories if not r.get('fork')] + repositories = [r for r in repositories if not r.get("fork")] if not args.private: - repositories = [r for r in repositories if not r.get('private') or r.get('public')] + repositories = [ + r for r in repositories if not r.get("private") or r.get("public") + ] if languages: - repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa + repositories = [ + r + for r in repositories + if r.get("language") and r.get("language").lower() in languages + ] # noqa if name_regex: - repositories = [r for r in repositories if name_regex.match(r['name'])] + repositories = [r for r in repositories if name_regex.match(r["name"])] if args.skip_archived: - repositories = [r for r in repositories if not r.get('archived')] + repositories = [r for r in repositories if not r.get("archived")] if args.exclude: - repositories = [r for r in repositories if r['name'] not in args.exclude] + repositories = [r for r in repositories if r["name"] not in args.exclude] return repositories def backup_repositories(args, output_directory, repositories): - log_info('Backing up repositories') - repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) + log_info("Backing up repositories") + repos_template = "https://{0}/repos".format(get_github_api_host(args)) if args.incremental: - last_update_path = os.path.join(output_directory, 'last_update') + last_update_path = os.path.join(output_directory, "last_update") if os.path.exists(last_update_path): args.since = open(last_update_path).read().strip() else: @@ -781,55 +925,70 @@ def backup_repositories(args, output_directory, repositories): else: args.since = None - last_update = '0000-00-00T00:00:00Z' + last_update = "0000-00-00T00:00:00Z" for repository in repositories: - if 'updated_at' in repository and repository['updated_at'] > last_update: - last_update = repository['updated_at'] - elif 'pushed_at' in repository and repository['pushed_at'] > last_update: - last_update = repository['pushed_at'] - - if repository.get('is_gist'): - repo_cwd = os.path.join(output_directory, 'gists', repository['id']) - elif repository.get('is_starred'): + if "updated_at" in repository and repository["updated_at"] > last_update: + last_update = repository["updated_at"] + elif "pushed_at" in repository and repository["pushed_at"] > last_update: + last_update = repository["pushed_at"] + + if repository.get("is_gist"): + repo_cwd = os.path.join(output_directory, "gists", repository["id"]) + elif repository.get("is_starred"): # put starred repos in -o/starred/${owner}/${repo} to prevent collision of # any repositories with the same name - repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) + repo_cwd = os.path.join( + output_directory, + "starred", + repository["owner"]["login"], + repository["name"], + ) else: - repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) + repo_cwd = os.path.join( + output_directory, "repositories", repository["name"] + ) - repo_dir = os.path.join(repo_cwd, 'repository') + repo_dir = os.path.join(repo_cwd, "repository") repo_url = get_github_repo_url(args, repository) - include_gists = (args.include_gists or args.include_starred_gists) - if (args.include_repository or args.include_everything) \ - or (include_gists and repository.get('is_gist')): - repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') - fetch_repository(repo_name, - repo_url, - repo_dir, - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone, - no_prune=args.no_prune) - - if repository.get('is_gist'): + include_gists = args.include_gists or args.include_starred_gists + if (args.include_repository or args.include_everything) or ( + include_gists and repository.get("is_gist") + ): + repo_name = ( + repository.get("name") + if not repository.get("is_gist") + else repository.get("id") + ) + fetch_repository( + repo_name, + repo_url, + repo_dir, + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone, + no_prune=args.no_prune, + ) + + if repository.get("is_gist"): # dump gist information to a file as well - output_file = '{0}/gist.json'.format(repo_cwd) - with codecs.open(output_file, 'w', encoding='utf-8') as f: + output_file = "{0}/gist.json".format(repo_cwd) + with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(repository, f) continue # don't try to back anything else for a gist; it doesn't exist - download_wiki = (args.include_wiki or args.include_everything) - if repository['has_wiki'] and download_wiki: - fetch_repository(repository['name'], - repo_url.replace('.git', '.wiki.git'), - os.path.join(repo_cwd, 'wiki'), - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone, - no_prune=args.no_prune - ) + download_wiki = args.include_wiki or args.include_everything + if repository["has_wiki"] and download_wiki: + fetch_repository( + repository["name"], + repo_url.replace(".git", ".wiki.git"), + os.path.join(repo_cwd, "wiki"), + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone, + no_prune=args.no_prune, + ) if args.include_issues or args.include_everything: backup_issues(args, repo_cwd, repository, repos_template) @@ -846,188 +1005,169 @@ def backup_repositories(args, output_directory, repositories): backup_hooks(args, repo_cwd, repository, repos_template) if args.include_releases or args.include_everything: - backup_releases(args, repo_cwd, repository, repos_template, - include_assets=args.include_assets or args.include_everything) + backup_releases( + args, + repo_cwd, + repository, + repos_template, + include_assets=args.include_assets or args.include_everything, + ) if args.incremental: - if last_update == '0000-00-00T00:00:00Z': - last_update = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime()) - - open(last_update_path, 'w').write(last_update) + if last_update == "0000-00-00T00:00:00Z": + last_update = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime()) + + open(last_update_path, "w").write(last_update) def backup_issues(args, repo_cwd, repository, repos_template): - has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) + has_issues_dir = os.path.isdir("{0}/issues/.git".format(repo_cwd)) if args.skip_existing and has_issues_dir: return - log_info('Retrieving {0} issues'.format(repository['full_name'])) - issue_cwd = os.path.join(repo_cwd, 'issues') + log_info("Retrieving {0} issues".format(repository["full_name"])) + issue_cwd = os.path.join(repo_cwd, "issues") mkdir_p(repo_cwd, issue_cwd) issues = {} issues_skipped = 0 - issues_skipped_message = '' - _issue_template = '{0}/{1}/issues'.format(repos_template, - repository['full_name']) + issues_skipped_message = "" + _issue_template = "{0}/{1}/issues".format(repos_template, repository["full_name"]) should_include_pulls = args.include_pulls or args.include_everything - issue_states = ['open', 'closed'] + issue_states = ["open", "closed"] for issue_state in issue_states: - query_args = { - 'filter': 'all', - 'state': issue_state - } + query_args = {"filter": "all", "state": issue_state} if args.since: - query_args['since'] = args.since + query_args["since"] = args.since - _issues = retrieve_data(args, - _issue_template, - query_args=query_args) + _issues = retrieve_data(args, _issue_template, query_args=query_args) for issue in _issues: # skip pull requests which are also returned as issues # if retrieving pull requests is requested as well - if 'pull_request' in issue and should_include_pulls: + if "pull_request" in issue and should_include_pulls: issues_skipped += 1 continue - issues[issue['number']] = issue + issues[issue["number"]] = issue if issues_skipped: - issues_skipped_message = ' (skipped {0} pull requests)'.format( - issues_skipped) + issues_skipped_message = " (skipped {0} pull requests)".format(issues_skipped) - log_info('Saving {0} issues to disk{1}'.format( - len(list(issues.keys())), issues_skipped_message)) - comments_template = _issue_template + '/{0}/comments' - events_template = _issue_template + '/{0}/events' + log_info( + "Saving {0} issues to disk{1}".format( + len(list(issues.keys())), issues_skipped_message + ) + ) + comments_template = _issue_template + "/{0}/comments" + events_template = _issue_template + "/{0}/events" for number, issue in list(issues.items()): if args.include_issue_comments or args.include_everything: template = comments_template.format(number) - issues[number]['comment_data'] = retrieve_data(args, template) + issues[number]["comment_data"] = retrieve_data(args, template) if args.include_issue_events or args.include_everything: template = events_template.format(number) - issues[number]['event_data'] = retrieve_data(args, template) + issues[number]["event_data"] = retrieve_data(args, template) - issue_file = '{0}/{1}.json'.format(issue_cwd, number) - with codecs.open(issue_file, 'w', encoding='utf-8') as f: + issue_file = "{0}/{1}.json".format(issue_cwd, number) + with codecs.open(issue_file, "w", encoding="utf-8") as f: json_dump(issue, f) def backup_pulls(args, repo_cwd, repository, repos_template): - has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) + has_pulls_dir = os.path.isdir("{0}/pulls/.git".format(repo_cwd)) if args.skip_existing and has_pulls_dir: return - log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa - pulls_cwd = os.path.join(repo_cwd, 'pulls') + log_info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa + pulls_cwd = os.path.join(repo_cwd, "pulls") mkdir_p(repo_cwd, pulls_cwd) pulls = {} - _pulls_template = '{0}/{1}/pulls'.format(repos_template, - repository['full_name']) - _issue_template = '{0}/{1}/issues'.format(repos_template, - repository['full_name']) + _pulls_template = "{0}/{1}/pulls".format(repos_template, repository["full_name"]) + _issue_template = "{0}/{1}/issues".format(repos_template, repository["full_name"]) query_args = { - 'filter': 'all', - 'state': 'all', - 'sort': 'updated', - 'direction': 'desc', + "filter": "all", + "state": "all", + "sort": "updated", + "direction": "desc", } if not args.include_pull_details: - pull_states = ['open', 'closed'] + pull_states = ["open", "closed"] for pull_state in pull_states: - query_args['state'] = pull_state - _pulls = retrieve_data_gen( - args, - _pulls_template, - query_args=query_args - ) + query_args["state"] = pull_state + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull['updated_at'] < args.since: + if args.since and pull["updated_at"] < args.since: break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = pull + if not args.since or pull["updated_at"] >= args.since: + pulls[pull["number"]] = pull else: - _pulls = retrieve_data_gen( - args, - _pulls_template, - query_args=query_args - ) + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull['updated_at'] < args.since: + if args.since and pull["updated_at"] < args.since: break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = retrieve_data( + if not args.since or pull["updated_at"] >= args.since: + pulls[pull["number"]] = retrieve_data( args, - _pulls_template + '/{}'.format(pull['number']), - single_request=True + _pulls_template + "/{}".format(pull["number"]), + single_request=True, )[0] - log_info('Saving {0} pull requests to disk'.format( - len(list(pulls.keys())))) + log_info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) # Comments from pulls API are only _review_ comments # regular comments need to be fetched via issue API. # For backwards compatibility with versions <= 0.41.0 # keep name "comment_data" for review comments - comments_regular_template = _issue_template + '/{0}/comments' - comments_template = _pulls_template + '/{0}/comments' - commits_template = _pulls_template + '/{0}/commits' + comments_regular_template = _issue_template + "/{0}/comments" + comments_template = _pulls_template + "/{0}/comments" + commits_template = _pulls_template + "/{0}/commits" for number, pull in list(pulls.items()): if args.include_pull_comments or args.include_everything: template = comments_regular_template.format(number) - pulls[number]['comment_regular_data'] = retrieve_data(args, template) + pulls[number]["comment_regular_data"] = retrieve_data(args, template) template = comments_template.format(number) - pulls[number]['comment_data'] = retrieve_data(args, template) + pulls[number]["comment_data"] = retrieve_data(args, template) if args.include_pull_commits or args.include_everything: template = commits_template.format(number) - pulls[number]['commit_data'] = retrieve_data(args, template) + pulls[number]["commit_data"] = retrieve_data(args, template) - pull_file = '{0}/{1}.json'.format(pulls_cwd, number) - with codecs.open(pull_file, 'w', encoding='utf-8') as f: + pull_file = "{0}/{1}.json".format(pulls_cwd, number) + with codecs.open(pull_file, "w", encoding="utf-8") as f: json_dump(pull, f) def backup_milestones(args, repo_cwd, repository, repos_template): - milestone_cwd = os.path.join(repo_cwd, 'milestones') + milestone_cwd = os.path.join(repo_cwd, "milestones") if args.skip_existing and os.path.isdir(milestone_cwd): return - log_info('Retrieving {0} milestones'.format(repository['full_name'])) + log_info("Retrieving {0} milestones".format(repository["full_name"])) mkdir_p(repo_cwd, milestone_cwd) - template = '{0}/{1}/milestones'.format(repos_template, - repository['full_name']) + template = "{0}/{1}/milestones".format(repos_template, repository["full_name"]) - query_args = { - 'state': 'all' - } + query_args = {"state": "all"} _milestones = retrieve_data(args, template, query_args=query_args) milestones = {} for milestone in _milestones: - milestones[milestone['number']] = milestone + milestones[milestone["number"]] = milestone - log_info('Saving {0} milestones to disk'.format( - len(list(milestones.keys())))) + log_info("Saving {0} milestones to disk".format(len(list(milestones.keys())))) for number, milestone in list(milestones.items()): - milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) - with codecs.open(milestone_file, 'w', encoding='utf-8') as f: + milestone_file = "{0}/{1}.json".format(milestone_cwd, number) + with codecs.open(milestone_file, "w", encoding="utf-8") as f: json_dump(milestone, f) def backup_labels(args, repo_cwd, repository, repos_template): - label_cwd = os.path.join(repo_cwd, 'labels') - output_file = '{0}/labels.json'.format(label_cwd) - template = '{0}/{1}/labels'.format(repos_template, - repository['full_name']) - _backup_data(args, - 'labels', - template, - output_file, - label_cwd) + label_cwd = os.path.join(repo_cwd, "labels") + output_file = "{0}/labels.json".format(label_cwd) + template = "{0}/{1}/labels".format(repos_template, repository["full_name"]) + _backup_data(args, "labels", template, output_file, label_cwd) def backup_hooks(args, repo_cwd, repository, repos_template): @@ -1035,184 +1175,185 @@ def backup_hooks(args, repo_cwd, repository, repos_template): if not auth: log_info("Skipping hooks since no authentication provided") return - hook_cwd = os.path.join(repo_cwd, 'hooks') - output_file = '{0}/hooks.json'.format(hook_cwd) - template = '{0}/{1}/hooks'.format(repos_template, - repository['full_name']) + hook_cwd = os.path.join(repo_cwd, "hooks") + output_file = "{0}/hooks.json".format(hook_cwd) + template = "{0}/{1}/hooks".format(repos_template, repository["full_name"]) try: - _backup_data(args, - 'hooks', - template, - output_file, - hook_cwd) + _backup_data(args, "hooks", template, output_file, hook_cwd) except SystemExit: log_info("Unable to read hooks, skipping") def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): - repository_fullname = repository['full_name'] + repository_fullname = repository["full_name"] # give release files somewhere to live & log intent - release_cwd = os.path.join(repo_cwd, 'releases') - log_info('Retrieving {0} releases'.format(repository_fullname)) + release_cwd = os.path.join(repo_cwd, "releases") + log_info("Retrieving {0} releases".format(repository_fullname)) mkdir_p(repo_cwd, release_cwd) query_args = {} - release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + release_template = "{0}/{1}/releases".format(repos_template, repository_fullname) releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - log_info('Saving {0} releases to disk'.format(len(releases))) + log_info("Saving {0} releases to disk".format(len(releases))) for release in releases: - release_name = release['tag_name'] - release_name_safe = release_name.replace('/', '__') - output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name_safe)) - with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: + release_name = release["tag_name"] + release_name_safe = release_name.replace("/", "__") + output_filepath = os.path.join( + release_cwd, "{0}.json".format(release_name_safe) + ) + with codecs.open(output_filepath, "w+", encoding="utf-8") as f: json_dump(release, f) if include_assets: - assets = retrieve_data(args, release['assets_url']) + assets = retrieve_data(args, release["assets_url"]) if len(assets) > 0: # give release asset files somewhere to live & download them (not including source archives) release_assets_cwd = os.path.join(release_cwd, release_name_safe) mkdir_p(release_assets_cwd) for asset in assets: - download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) - - -def fetch_repository(name, - remote_url, - local_dir, - skip_existing=False, - bare_clone=False, - lfs_clone=False, - no_prune=False): + download_file( + asset["url"], + os.path.join(release_assets_cwd, asset["name"]), + get_auth(args), + ) + + +def fetch_repository( + name, + remote_url, + local_dir, + skip_existing=False, + bare_clone=False, + lfs_clone=False, + no_prune=False, +): if bare_clone: if os.path.exists(local_dir): - clone_exists = subprocess.check_output(['git', - 'rev-parse', - '--is-bare-repository'], - cwd=local_dir) == b"true\n" + clone_exists = ( + subprocess.check_output( + ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir + ) + == b"true\n" + ) else: clone_exists = False else: - clone_exists = os.path.exists(os.path.join(local_dir, '.git')) + clone_exists = os.path.exists(os.path.join(local_dir, ".git")) if clone_exists and skip_existing: return masked_remote_url = mask_password(remote_url) - initialized = subprocess.call('git ls-remote ' + remote_url, - stdout=FNULL, - stderr=FNULL, - shell=True) + initialized = subprocess.call( + "git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True + ) if initialized == 128: - log_info("Skipping {0} ({1}) since it's not initialized".format( - name, masked_remote_url)) + log_info( + "Skipping {0} ({1}) since it's not initialized".format( + name, masked_remote_url + ) + ) return if clone_exists: - log_info('Updating {0} in {1}'.format(name, local_dir)) + log_info("Updating {0} in {1}".format(name, local_dir)) - remotes = subprocess.check_output(['git', 'remote', 'show'], - cwd=local_dir) - remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] + remotes = subprocess.check_output(["git", "remote", "show"], cwd=local_dir) + remotes = [i.strip() for i in remotes.decode("utf-8").splitlines()] - if 'origin' not in remotes: - git_command = ['git', 'remote', 'rm', 'origin'] + if "origin" not in remotes: + git_command = ["git", "remote", "rm", "origin"] logging_subprocess(git_command, None, cwd=local_dir) - git_command = ['git', 'remote', 'add', 'origin', remote_url] + git_command = ["git", "remote", "add", "origin", remote_url] logging_subprocess(git_command, None, cwd=local_dir) else: - git_command = ['git', 'remote', 'set-url', 'origin', remote_url] + git_command = ["git", "remote", "set-url", "origin", remote_url] logging_subprocess(git_command, None, cwd=local_dir) if lfs_clone: - git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] + git_command = ["git", "lfs", "fetch", "--all", "--prune"] else: - git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + git_command = ["git", "fetch", "--all", "--force", "--tags", "--prune"] if no_prune: git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: - log_info('Cloning {0} repository from {1} to {2}'.format( - name, - masked_remote_url, - local_dir)) + log_info( + "Cloning {0} repository from {1} to {2}".format( + name, masked_remote_url, local_dir + ) + ) if bare_clone: - git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + git_command = ["git", "clone", "--mirror", remote_url, local_dir] logging_subprocess(git_command, None) if lfs_clone: - git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] + git_command = ["git", "lfs", "fetch", "--all", "--prune"] if no_prune: git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: if lfs_clone: - git_command = ['git', 'lfs', 'clone', remote_url, local_dir] + git_command = ["git", "lfs", "clone", remote_url, local_dir] else: - git_command = ['git', 'clone', remote_url, local_dir] + git_command = ["git", "clone", remote_url, local_dir] logging_subprocess(git_command, None) def backup_account(args, output_directory): - account_cwd = os.path.join(output_directory, 'account') + account_cwd = os.path.join(output_directory, "account") if args.include_starred or args.include_everything: output_file = "{0}/starred.json".format(account_cwd) - template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) - _backup_data(args, - "starred repositories", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/starred".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "starred repositories", template, output_file, account_cwd) if args.include_watched or args.include_everything: output_file = "{0}/watched.json".format(account_cwd) - template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) - _backup_data(args, - "watched repositories", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/subscriptions".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "watched repositories", template, output_file, account_cwd) if args.include_followers or args.include_everything: output_file = "{0}/followers.json".format(account_cwd) - template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) - _backup_data(args, - "followers", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/followers".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "followers", template, output_file, account_cwd) if args.include_following or args.include_everything: output_file = "{0}/following.json".format(account_cwd) - template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) - _backup_data(args, - "following", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/following".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "following", template, output_file, account_cwd) def _backup_data(args, name, template, output_file, output_directory): skip_existing = args.skip_existing if not skip_existing or not os.path.exists(output_file): - log_info('Retrieving {0} {1}'.format(args.user, name)) + log_info("Retrieving {0} {1}".format(args.user, name)) mkdir_p(output_directory) data = retrieve_data(args, template) - log_info('Writing {0} {1} to disk'.format(len(data), name)) - with codecs.open(output_file, 'w', encoding='utf-8') as f: + log_info("Writing {0} {1} to disk".format(len(data), name)) + with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(data, f) def json_dump(data, output_file): - json.dump(data, - output_file, - ensure_ascii=False, - sort_keys=True, - indent=4, - separators=(',', ': ')) + json.dump( + data, + output_file, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(",", ": "), + ) diff --git a/setup.py b/setup.py index 3b4df41..898e4fb 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ try: from setuptools import setup + setup # workaround for pyflakes issue #13 except ImportError: from distutils.core import setup @@ -15,6 +16,7 @@ # http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html) try: import multiprocessing + multiprocessing except ImportError: pass @@ -25,26 +27,26 @@ def open_file(fname): setup( - name='github-backup', + name="github-backup", version=__version__, - author='Jose Diaz-Gonzalez', - author_email='github-backup@josediazgonzalez.com', - packages=['github_backup'], - scripts=['bin/github-backup'], - url='http://github.com/josegonzalez/python-github-backup', - license='MIT', + author="Jose Diaz-Gonzalez", + author_email="github-backup@josediazgonzalez.com", + packages=["github_backup"], + scripts=["bin/github-backup"], + url="http://github.com/josegonzalez/python-github-backup", + license="MIT", classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Topic :: System :: Archiving :: Backup', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', + "Development Status :: 5 - Production/Stable", + "Topic :: System :: Archiving :: Backup", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", ], - description='backup a github user or organization', - long_description=open_file('README.rst').read(), - long_description_content_type='text/x-rst', - install_requires=open_file('requirements.txt').readlines(), + description="backup a github user or organization", + long_description=open_file("README.rst").read(), + long_description_content_type="text/x-rst", + install_requires=open_file("requirements.txt").readlines(), zip_safe=True, ) From 8b1bfd433cf90cd21872a24c5198b1948484f38c Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:34:22 -0400 Subject: [PATCH 159/455] chore: formatting --- github_backup/github_backup.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 7bf021d..b9ff345 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -460,9 +460,8 @@ def get_auth(args, encode=True, for_git_cli=False): elif args.token: _path_specifier = "file://" if args.token.startswith(_path_specifier): - args.token = ( - open(args.token[len(_path_specifier) :], "rt").readline().strip() - ) + path_specifier_len = len(_path_specifier) + args.token = open(args.token[path_specifier_len:], "rt").readline().strip() if not args.as_app: auth = args.token + ":" + "x-oauth-basic" else: @@ -673,8 +672,7 @@ def _get_response(request, auth, template): def _construct_request(per_page, page, query_args, template, auth, as_app=None): querystring = urlencode( dict( - list({"per_page": per_page, "page": page}.items()) - + list(query_args.items()) + list({"per_page": per_page, "page": page}.items()) + list(query_args.items()) ) ) From 96a73b3fe89fc2a5412259fd5145d51091477dc5 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:35:07 -0400 Subject: [PATCH 160/455] feat: add release tagging --- .github/workflows/tagged-release.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/tagged-release.yml diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml new file mode 100644 index 0000000..846c457 --- /dev/null +++ b/.github/workflows/tagged-release.yml @@ -0,0 +1,19 @@ +--- +name: "tagged-release" + +# yamllint disable-line rule:truthy +on: + push: + tags: + - '*' + +jobs: + tagged-release: + name: tagged-release + runs-on: ubuntu-20.04 + + steps: + - uses: "marvinpinto/action-automatic-releases@v1.2.1" + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + prerelease: false From 37ef0222e1efe9324ca89e5082833b683c8fb871 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:36:06 -0400 Subject: [PATCH 161/455] fix: adjust for black --- release | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release b/release index c48de82..bec6f9c 100755 --- a/release +++ b/release @@ -6,7 +6,7 @@ if [[ ! -f setup.py ]]; then exit 1 fi -PACKAGE_NAME="$(cat setup.py | grep "name='" | head | cut -d "'" -f2)" +PACKAGE_NAME="$(cat setup.py | grep "name=\"" | head | cut -d "'" -f2)" INIT_PACKAGE_NAME="$(echo "${PACKAGE_NAME//-/_}")" PUBLIC="true" From e8f027469e5db370d625137af9771eeb1903d03f Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:37:02 -0400 Subject: [PATCH 162/455] fix: adjust for black --- release | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release b/release index bec6f9c..c6e6033 100755 --- a/release +++ b/release @@ -6,7 +6,7 @@ if [[ ! -f setup.py ]]; then exit 1 fi -PACKAGE_NAME="$(cat setup.py | grep "name=\"" | head | cut -d "'" -f2)" +PACKAGE_NAME="$(cat setup.py | grep 'name="' | head | cut -d '"' -f2)" INIT_PACKAGE_NAME="$(echo "${PACKAGE_NAME//-/_}")" PUBLIC="true" From 82e35fb1cfd7f2d0f1b2dc4cd5cff0a23e228250 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:43:23 -0400 Subject: [PATCH 163/455] feat: commit gitchangelog.rc to repo so anyone can generate a changelog --- .gitchangelog.rc | 117 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 .gitchangelog.rc diff --git a/.gitchangelog.rc b/.gitchangelog.rc new file mode 100644 index 0000000..842973f --- /dev/null +++ b/.gitchangelog.rc @@ -0,0 +1,117 @@ +# +# Format +# +# ACTION: [AUDIENCE:] COMMIT_MSG [@TAG ...] +# +# Description +# +# ACTION is one of 'chg', 'fix', 'new' +# +# Is WHAT the change is about. +# +# 'chg' is for refactor, small improvement, cosmetic changes... +# 'fix' is for bug fixes +# 'new' is for new features, big improvement +# +# SUBJECT is optional and one of 'dev', 'usr', 'pkg', 'test', 'doc' +# +# Is WHO is concerned by the change. +# +# 'dev' is for developpers (API changes, refactors...) +# 'usr' is for final users (UI changes) +# 'pkg' is for packagers (packaging changes) +# 'test' is for testers (test only related changes) +# 'doc' is for doc guys (doc only changes) +# +# COMMIT_MSG is ... well ... the commit message itself. +# +# TAGs are additionnal adjective as 'refactor' 'minor' 'cosmetic' +# +# 'refactor' is obviously for refactoring code only +# 'minor' is for a very meaningless change (a typo, adding a comment) +# 'cosmetic' is for cosmetic driven change (re-indentation, 80-col...) +# +# Example: +# +# new: usr: support of bazaar implemented +# chg: re-indentend some lines @cosmetic +# new: dev: updated code to be compatible with last version of killer lib. +# fix: pkg: updated year of licence coverage. +# new: test: added a bunch of test around user usability of feature X. +# fix: typo in spelling my name in comment. @minor +# +# Please note that multi-line commit message are supported, and only the +# first line will be considered as the "summary" of the commit message. So +# tags, and other rules only applies to the summary. The body of the commit +# message will be displayed in the changelog with minor reformating. + +# +# ``ignore_regexps`` is a line of regexps +# +# Any commit having its full commit message matching any regexp listed here +# will be ignored and won't be reported in the changelog. +# +ignore_regexps = [ + r'(?i)^(Merge pull request|Merge branch|Release|Update)', +] + + +# +# ``replace_regexps`` is a dict associating a regexp pattern and its replacement +# +# It will be applied to get the summary line from the full commit message. +# +# Note that you can provide multiple replacement patterns, they will be all +# tried. If None matches, the summary line will be the full commit message. +# +replace_regexps = { + # current format (ie: 'chg: dev: my commit msg @tag1 @tag2') + + r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n@]*)(@[a-z]+\s+)*$': + r'\4', +} + + +# ``section_regexps`` is a list of 2-tuples associating a string label and a +# list of regexp +# +# Commit messages will be classified in sections thanks to this. Section +# titles are the label, and a commit is classified under this section if any +# of the regexps associated is matching. +# +section_regexps = [ + ('New', [ + r'^[nN]ew\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Changes', [ + r'^[cC]hg\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Fix', [ + r'^[fF]ix\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Other', None # Match all lines + ), + +] + +# ``body_split_regexp`` is a regexp +# +# Commit message body (not the summary) if existing will be split +# (new line) on this regexp +# +body_split_regexp = r'[\n-]' + + +# ``tag_filter_regexp`` is a regexp +# +# Tags that will be used for the changelog must match this regexp. +# +# tag_filter_regexp = r'^[0-9]+$' +tag_filter_regexp = r'^(?:[vV])?[0-9\.]+$' + + +# ``unreleased_version_label`` is a string +# +# This label will be used as the changelog Title of the last set of changes +# between last valid tag and HEAD if any. +unreleased_version_label = "%%version%% (unreleased)" From 0b2330c2c44c3993b762186183944688ecc69b4e Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:43:29 -0400 Subject: [PATCH 164/455] fix: adjust for black --- release | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release b/release index c6e6033..51d1916 100755 --- a/release +++ b/release @@ -86,8 +86,8 @@ TMPFILE=$(mktemp /tmp/${tempfoo}.XXXXXX) || { exit 1 } -find_this="__version__ = '$current_version'" -replace_with="__version__ = '$next_version'" +find_this="__version__ = \"$current_version\"" +replace_with="__version__ = \"$next_version\"" echo -e "${YELLOW}--->${COLOR_OFF} Updating ${INIT_PACKAGE_NAME}/__init__.py" sed "s/$find_this/$replace_with/" ${INIT_PACKAGE_NAME}/__init__.py > $TMPFILE && mv $TMPFILE ${INIT_PACKAGE_NAME}/__init__.py From 2730fc3e5a56814293f6eda267367036841d7dee Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:44:13 -0400 Subject: [PATCH 165/455] fix: do not update readme --- release | 6 ------ 1 file changed, 6 deletions(-) diff --git a/release b/release index 51d1916..21a14f7 100755 --- a/release +++ b/release @@ -92,12 +92,6 @@ replace_with="__version__ = \"$next_version\"" echo -e "${YELLOW}--->${COLOR_OFF} Updating ${INIT_PACKAGE_NAME}/__init__.py" sed "s/$find_this/$replace_with/" ${INIT_PACKAGE_NAME}/__init__.py > $TMPFILE && mv $TMPFILE ${INIT_PACKAGE_NAME}/__init__.py -find_this="${PACKAGE_NAME}.git@$current_version" -replace_with="${PACKAGE_NAME}.git@$next_version" - -echo -e "${YELLOW}--->${COLOR_OFF} Updating README.rst" -sed "s/$find_this/$replace_with/" README.rst > $TMPFILE && mv $TMPFILE README.rst - if [ -f docs/conf.py ]; then echo -e "${YELLOW}--->${COLOR_OFF} Updating docs" find_this="version = '${current_version}'" From 0ebaffd10248598ab9dca3bf7c497ecbbd13154e Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:44:23 -0400 Subject: [PATCH 166/455] Release version 0.43.0 --- CHANGES.rst | 490 ++++++-------------------------------- github_backup/__init__.py | 2 +- 2 files changed, 76 insertions(+), 416 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 47b335d..26a4243 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,26 +1,91 @@ Changelog ========= -0.39.0 (2021-03-18) +0.43.0 (2023-05-29) ------------------- ------------- +------------------------ Fix ~~~ -- Fix missing INFO logs. [Gallo Feliz] +- Do not update readme. [Jose Diaz-Gonzalez] +- Adjust for black. [Jose Diaz-Gonzalez] +- Adjust for black. [Jose Diaz-Gonzalez] +- Adjust for black. [Jose Diaz-Gonzalez] Other ~~~~~ -- Merge pull request #173 from gallofeliz/make-compatible-python-call. - [Jose Diaz-Gonzalez] +- Feat: commit gitchangelog.rc to repo so anyone can generate a + changelog. [Jose Diaz-Gonzalez] +- Feat: add release tagging. [Jose Diaz-Gonzalez] +- Chore: formatting. [Jose Diaz-Gonzalez] +- Chore: run black. [Jose Diaz-Gonzalez] +- Add --log-level command line argument. [Enrico Tröger] + + Support changing the log level to the desired value easily. + For example, this is useful to suppress progress messages but + keep logging warnings and errors. +- Check both updated_at and pushed_at properties. [Ken Bailey] + + Check both updated_at and pushed_at dates to get the last_update to reduce data retrieved on incremental api calls using since. + + +0.42.0 (2022-11-28) +------------------- +- Add option to exclude repositories. [npounder] +- Backup regular pull request comments as well. [Oneric] + + Before, only review comments were backed up; + regular comments need to be fetched via issue API. +- Fix bug forever retry when request url error. [kornpisey] +- Added --no-prune option to disable prune option when doing git fetch. + [kornpisey] + + +0.41.0 (2022-03-02) +------------------- +- Git lfs clone doe snot respect --mirror. [Louis Parisot] + + +0.40.2 (2021-12-29) +------------------- +- Fix lint issues raised by Flake8. [atinary-afoulon] + + According to job: + [ https://app.circleci.com/pipelines/github/josegonzalez/python-github-backup/30/workflows/74eb93f2-2505-435d-b728-03b3cc04c14a/jobs/23 ] + + Failed on the following checks: + ./github_backup/github_backup.py:20:1: F811 redefinition of unused 'logging' from line 14 + ./github_backup/github_backup.py:45:1: E302 expected 2 blank lines, found 1 + ./github_backup/github_backup.py:136:20: E251 unexpected spaces around keyword / parameter equals + + +0.40.1 (2021-09-22) +------------------- +- Revert to fetch. [Harrison Wright] + + +0.40.0 (2021-07-12) +------------------- +- Add retry on certain network errors. [Jacek Nykis] + + This change includes certain network level errors in the retry logic. + It partially address #110 but I think more comprehensive fix would be useful. +- Pull changes from remote. [Jonas] + + use `git pull` to pull actual files from the remote instead of using `fetch` for only the metadata - Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli + +0.39.0 (2021-03-19) +------------------- + +Fix +~~~ +- Fix missing INFO logs. [Gallo Feliz] + +Other +~~~~~ - Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli. [Gallo Feliz] -- Merge pull request #174 from atorrescogollo/master. [Jose Diaz- - Gonzalez] - - Fixed release_name with slash bug - Fixed release_name with slash bug. [Álvaro Torres Cogollo] @@ -38,15 +103,6 @@ Fix Other ~~~~~ -- Release version 0.38.0. [Jose Diaz-Gonzalez] -- Merge pull request #172 from samanthaq/always-use-oauth-when-provided. - [Jose Diaz-Gonzalez] - - fix: Always clone with OAuth token when provided -- Merge pull request #170 from Mindavi/bugfix/broken-url. [Jose Diaz- - Gonzalez] - - Fix broken and incorrect link to github repository - Change broken link to a fork to a working link to upstream. [Rick van Schijndel] @@ -58,10 +114,6 @@ Fix ~~~ - Use distutils.core on error. [Jose Diaz-Gonzalez] -Other -~~~~~ -- Release version 0.37.2. [Jose Diaz-Gonzalez] - 0.37.1 (2021-01-02) ------------------- @@ -79,46 +131,24 @@ Fix - Set long description type - Gitignore the temporary readme file -Other -~~~~~ -- Release version 0.37.1. [Jose Diaz-Gonzalez] - 0.37.0 (2021-01-02) ------------------- -- Release version 0.37.0. [Jose Diaz-Gonzalez] -- Merge pull request #158 from albertyw/python3. [Jose Diaz-Gonzalez] - - Remove support for python 2 - Add support for python 3.7 and 3.8 in package classifiers. [Albert Wang] - Remove support for python 2.7 in package classifiers. [Albert Wang] - Remove python 2 specific import logic. [Albert Wang] - Remove python 2 specific logic. [Albert Wang] -- Merge pull request #165 from garymoon/add-skip-archived. [Jose Diaz- - Gonzalez] - - Add option to skip archived repositories - Add ability to skip archived repositories. [Gary Moon] 0.36.0 (2020-08-29) ------------------- -- Release version 0.36.0. [Jose Diaz-Gonzalez] -- Merge pull request #157 from albertyw/lint. [Jose Diaz-Gonzalez] - Add flake8 instructions to readme. [Albert Wang] - Fix regex string. [Albert Wang] -- Update boolean check. [Albert Wang] - Fix whitespace issues. [Albert Wang] - Do not use bare excepts. [Albert Wang] -- Merge pull request #161 from albertyw/circleci-project-setup. [Jose - Diaz-Gonzalez] - - Add circleci config - Add .circleci/config.yml. [Albert Wang] -- Merge pull request #160 from wbolster/patch-1. [Jose Diaz-Gonzalez] - - Include --private flag in example - Include --private flag in example. [wouter bolsterlee] By default, private repositories are not included. This is surprising. @@ -128,38 +158,16 @@ Other 0.35.0 (2020-08-05) ------------------- -- Release version 0.35.0. [Jose Diaz-Gonzalez] -- Merge pull request #156 from samanthaq/restore-optional-throttling. - [Jose Diaz-Gonzalez] - - Make API request throttling optional - Make API request throttling optional. [Samantha Baldwin] 0.34.0 (2020-07-24) ------------------- -- Release version 0.34.0. [Jose Diaz-Gonzalez] -- Merge pull request #153 from 0x6d617474/gist_ssh. [Jose Diaz-Gonzalez] - - Add logic for transforming gist repository urls to ssh - Add logic for transforming gist repository urls to ssh. [Matt Fields] -0.33.1 (2020-05-28) -------------------- -- Release version 0.33.1. [Jose Diaz-Gonzalez] -- Merge pull request #151 from garymoon/readme-update-0.33. [Jose Diaz- - Gonzalez] -- Update the readme for new switches added in 0.33. [Gary Moon] - - 0.33.0 (2020-04-13) ------------------- -- Release version 0.33.0. [Jose Diaz-Gonzalez] -- Merge pull request #149 from eht16/simple_api_request_throttling. - [Jose Diaz-Gonzalez] - - Add basic API request throttling - Add basic API request throttling. [Enrico Tröger] A simple approach to throttle API requests and so keep within the rate @@ -171,50 +179,23 @@ Other 0.32.0 (2020-04-13) ------------------- -- Release version 0.32.0. [Jose Diaz-Gonzalez] -- Merge pull request #148 from eht16/logging_with_timestamp. [Jose Diaz- - Gonzalez] - - Add timestamp to log messages - Add timestamp to log messages. [Enrico Tröger] -- Merge pull request #147 from tomhoover/update-readme. [Jose Diaz- - Gonzalez] - - Update README.rst to match 'github-backup -h' -- Update README.rst to match 'github-backup -h' [Tom Hoover] 0.31.0 (2020-02-25) ------------------- -- Release version 0.31.0. [Jose Diaz-Gonzalez] -- Merge pull request #146 from timm3/upstream-123. [Jose Diaz-Gonzalez] - - Authenticate as Github App - #123 update: changed --as-app 'help' description. [ethan] - #123: Support Authenticating As Github Application. [ethan] -0.30.0 (2020-02-14) -------------------- -- Release version 0.30.0. [Jose Diaz-Gonzalez] - - 0.29.0 (2020-02-14) ------------------- -- Release version 0.29.0. [Jose Diaz-Gonzalez] -- Merge pull request #145 from timm3/50-v0.28.0. [Jose Diaz-Gonzalez] - - #50 - refactor for friendlier import - #50 update: keep main() in bin. [ethan] - #50 - refactor for friendlier import. [ethan] 0.28.0 (2020-02-03) ------------------- -- Release version 0.28.0. [Jose Diaz-Gonzalez] -- Merge pull request #143 from smiley/patch-1. [Jose Diaz-Gonzalez] - - Remove deprecated (and removed) "git lfs fetch" flags - Remove deprecated (and removed) git lfs flags. [smiley] "--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script. @@ -222,11 +203,6 @@ Other 0.27.0 (2020-01-22) ------------------- -- Release version 0.27.0. [Jose Diaz-Gonzalez] -- Merge pull request #142 from einsteinx2/issue/141-import-error- - version. [Jose Diaz-Gonzalez] - - Fixed script fails if not installed from pip - Fixed script fails if not installed from pip. [Ben Baron] At the top of the script, the line from github_backup import __version__ gets the script's version number to use if the script is called with the -v or --version flags. The problem is that if the script hasn't been installed via pip (for example I cloned the repo directly to my backup server), the script will fail due to an import exception. @@ -234,26 +210,14 @@ Other Also presumably it will always use the version number from pip even if running a modified version from git or a fork or something, though this does not fix that as I have no idea how to check if it's running the pip installed version or not. But at least the script will now work fine if cloned from git or just copied to another machine. closes https://github.com/josegonzalez/python-github-backup/issues/141 -- Merge pull request #136 from einsteinx2/issue/88-macos-keychain- - broken-python3. [Jose Diaz-Gonzalez] - - Fixed macOS keychain access when using Python 3 - Fixed macOS keychain access when using Python 3. [Ben Baron] Python 3 is returning bytes rather than a string, so the string concatenation to create the auth variable was throwing an exception which the script was interpreting to mean it couldn't find the password. Adding a conversion to string first fixed the issue. -- Merge pull request #137 from einsteinx2/issue/134-only-use-auth-token- - when-needed. [Jose Diaz-Gonzalez] - - Public repos no longer include the auth token - Public repos no longer include the auth token. [Ben Baron] When backing up repositories using an auth token and https, the GitHub personal auth token is leaked in each backed up repository. It is included in the URL of each repository's git remote url. This is not needed as they are public and can be accessed without the token and can cause issues in the future if the token is ever changed, so I think it makes more sense not to have the token stored in each repo backup. I think the token should only be "leaked" like this out of necessity, e.g. it's a private repository and the --prefer-ssh option was not chosen so https with auth token was required to perform the clone. -- Merge pull request #130 from einsteinx2/issue/129-fix-crash-on- - release-asset-download-error. [Jose Diaz-Gonzalez] - - Crash when an release asset doesn't exist - Fixed comment typo. [Ben Baron] - Switched log_info to log_warning in download_file. [Ben Baron] - Crash when an release asset doesn't exist. [Ben Baron] @@ -261,10 +225,6 @@ Other Currently, the script crashes whenever a release asset is unable to download (for example a 404 response). This change instead logs the failure and allows the script to continue. No retry logic is enabled, but at least it prevents the crash and allows the backup to complete. Retry logic can be implemented later if wanted. closes https://github.com/josegonzalez/python-github-backup/issues/129 -- Merge pull request #132 from einsteinx2/issue/126-prevent-overwriting- - release-assets. [Jose Diaz-Gonzalez] - - Separate release assets and skip re-downloading - Moved asset downloading loop inside the if block. [Ben Baron] - Separate release assets and skip re-downloading. [Ben Baron] @@ -275,36 +235,21 @@ Other This change also now checks if the asset file already exists on disk and skips downloading it. This drastically speeds up addiotnal syncs as it no longer downloads every single release every single time. It will now only download new releases which I believe is the expected behavior. closes https://github.com/josegonzalez/python-github-backup/issues/126 -- Merge pull request #131 from einsteinx2/improve-gitignore. [Jose Diaz- - Gonzalez] - - Improved gitignore, macOS files and IDE configs - Added newline to end of file. [Ben Baron] - Improved gitignore, macOS files and IDE configs. [Ben Baron] Ignores the annoying hidden macOS files .DS_Store and ._* as well as the IDE configuration folders for contributors using the popular Visual Studio Code and Atom IDEs (more can be added later as needed). -- Update ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] 0.26.0 (2019-09-23) ------------------- -- Release version 0.26.0. [Jose Diaz-Gonzalez] -- Merge pull request #128 from Snawoot/master. [Jose Diaz-Gonzalez] - - Workaround gist clone in `--prefer-ssh` mode - Workaround gist clone in `--prefer-ssh` mode. [Vladislav Yarmak] - Create PULL_REQUEST.md. [Jose Diaz-Gonzalez] - Create ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] -- Update README.rst. [Jose Diaz-Gonzalez] -- Update README.rst. [Jose Diaz-Gonzalez] 0.25.0 (2019-07-03) ------------------- -- Release version 0.25.0. [Jose Diaz-Gonzalez] -- Merge pull request #120 from 8h2a/patch-1. [Jose Diaz-Gonzalez] - - Issue 119: Change retrieve_data to be a generator - Issue 119: Change retrieve_data to be a generator. [2a] See issue #119. @@ -312,43 +257,21 @@ Other 0.24.0 (2019-06-27) ------------------- -- Release version 0.24.0. [Jose Diaz-Gonzalez] -- Merge pull request #117 from QuicketSolutions/master. [Jose Diaz- - Gonzalez] - - Add option for Releases -- Merge pull request #5 from QuicketSolutions/QKT-45. [Ethan Timm] - QKT-45: include assets - update readme. [Ethan Timm] update readme with flag information for including assets alongside their respective releases -- Merge pull request #4 from whwright/wip-releases. [Ethan Timm] - - Download github assets - Make assets it's own flag. [Harrison Wright] - Fix super call for python2. [Harrison Wright] - Fix redirect to s3. [Harrison Wright] - WIP: download assets. [Harrison Wright] -- Merge pull request #3 from QuicketSolutions/QKT-42. [Ethan Timm] - QKT-42: releases - add readme info. [ethan] -- Merge pull request #2 from QuicketSolutions/QKT-42. [Ethan Timm] - - QKT-42 update: shorter command flag - QKT-42 update: shorter command flag. [ethan] -- Merge pull request #1 from QuicketSolutions/QKT-42. [Ethan Timm] - QKT-42: support saving release information. [ethan] -- Merge pull request #118 from whwright/115-fix-pull-details. [Jose - Diaz-Gonzalez] - - Fix pull details - Fix pull details. [Harrison Wright] 0.23.0 (2019-06-04) ------------------- -- Release version 0.23.0. [Jose Diaz-Gonzalez] -- Merge pull request #113 from kleag/master. [Jose Diaz-Gonzalez] - - Avoid to crash in case of HTTP 502 error - Avoid to crash in case of HTTP 502 error. [Gael de Chalendar] Survive also on socket.error connections like on HTTPError or URLError. @@ -365,32 +288,15 @@ Fix Refs #106 -Other -~~~~~ -- Release version 0.22.2. [Jose Diaz-Gonzalez] -- Merge pull request #107 from josegonzalez/patch-1. [Jose Diaz- - Gonzalez] - - fix: warn instead of error - 0.22.1 (2019-02-21) ------------------- -- Release version 0.22.1. [Jose Diaz-Gonzalez] -- Merge pull request #106 from jstetic/master. [Jose Diaz-Gonzalez] - - Log URL error - Log URL error https://github.com/josegonzalez/python-github- backup/issues/105. [JOHN STETIC] 0.22.0 (2019-02-01) ------------------- -- Release version 0.22.0. [Jose Diaz-Gonzalez] -- Merge pull request #103 from whwright/98-better-logging. [Jose Diaz- - Gonzalez] - - Fix accidental system exit with better logging strategy - Remove unnecessary sys.exit call. [W. Harrison Wright] - Add org check to avoid incorrect log output. [W. Harrison Wright] - Fix accidental system exit with better logging strategy. [W. Harrison @@ -399,10 +305,6 @@ Other 0.21.1 (2018-12-25) ------------------- -- Release version 0.21.1. [Jose Diaz-Gonzalez] -- Merge pull request #101 from ecki/patch-2. [Jose Diaz-Gonzalez] - - Mark options which are not included in --all - Mark options which are not included in --all. [Bernd] As discussed in Issue #100 @@ -410,22 +312,12 @@ Other 0.21.0 (2018-11-28) ------------------- -- Release version 0.21.0. [Jose Diaz-Gonzalez] -- Merge pull request #97 from whwright/94-fix-user-repos. [Jose Diaz- - Gonzalez] - - Correctly download repos when user arg != authenticated user - Correctly download repos when user arg != authenticated user. [W. Harrison Wright] 0.20.1 (2018-09-29) ------------------- -- Release version 0.20.1. [Jose Diaz-Gonzalez] -- Merge pull request #92 from whwright/87-fix-starred-bug. [Jose Diaz- - Gonzalez] - - Clone the specified user's starred repos/gists, not the authenticated user - Clone the specified user's gists, not the authenticated user. [W. Harrison Wright] - Clone the specified user's starred repos, not the authenticated user. @@ -434,7 +326,6 @@ Other 0.20.0 (2018-03-24) ------------------- -- Release version 0.20.0. [Jose Diaz-Gonzalez] - Chore: drop Python 2.6. [Jose Diaz-Gonzalez] - Feat: simplify release script. [Jose Diaz-Gonzalez] @@ -446,33 +337,15 @@ Fix ~~~ - Cleanup pep8 violations. [Jose Diaz-Gonzalez] -Other -~~~~~ -- Release version 0.19.2. [Jose Diaz-Gonzalez] - - -0.19.1 (2018-03-24) -------------------- -- Release version 0.19.1. [Jose Diaz-Gonzalez] - 0.19.0 (2018-03-24) ------------------- -- Release version 0.19.0. [Jose Diaz-Gonzalez] -- Merge pull request #77 from mayflower/pull-details. [Jose Diaz- - Gonzalez] - - Pull Details - Add additional output for the current request. [Robin Gloster] This is useful to have some progress indication for huge repositories. - Add option to backup additional PR details. [Robin Gloster] Some payload is only included when requesting a single pull request -- Merge pull request #84 from johbo/fix-python36-skip-existing. [Jose - Diaz-Gonzalez] - - Mark string as binary in comparison for skip_existing - Mark string as binary in comparison for skip_existing. [Johannes Bornhold] @@ -483,20 +356,11 @@ Other 0.18.0 (2018-02-22) ------------------- -- Release version 0.18.0. [Jose Diaz-Gonzalez] -- Merge pull request #82 from sgreene570/add-followers. [Jose Diaz- - Gonzalez] - - Add option to fetch followers/following JSON data - Add option to fetch followers/following JSON data. [Stephen Greene] 0.17.0 (2018-02-20) ------------------- -- Release version 0.17.0. [Jose Diaz-Gonzalez] -- Merge pull request #81 from whwright/gists. [Jose Diaz-Gonzalez] - - Add ability to back up gists - Short circuit gists backup process. [W. Harrison Wright] - Formatting. [W. Harrison Wright] - Add ability to backup gists. [W. Harrison Wright] @@ -504,94 +368,41 @@ Other 0.16.0 (2018-01-22) ------------------- -- Release version 0.16.0. [Jose Diaz-Gonzalez] -- Merge pull request #78 from whwright/clone-starred-repos. [Jose Diaz- - Gonzalez] - - Clone starred repos -- Update README.rst. [Jose Diaz-Gonzalez] -- Update documentation. [W. Harrison Wright] - Change option to --all-starred. [W. Harrison Wright] - JK don't update documentation. [W. Harrison Wright] -- Update documentation. [W. Harrison Wright] - Put starred clone repoistories under a new option. [W. Harrison Wright] - Add comment. [W. Harrison Wright] - Add ability to clone starred repos. [W. Harrison Wright] -0.15.0 (2017-12-11) -------------------- -- Release version 0.15.0. [Jose Diaz-Gonzalez] -- Merge pull request #75 from slibby/slibby-patch-windows. [Jose Diaz- - Gonzalez] - - update check_io() to allow scripts to run on Windows -- Update logging_subprocess function. [Sam Libby] - - 1. added newline for return - 2. added one-time warning (once per subprocess) -- Update check_io() to allow scripts to run on Windows. [Sam Libby] - - 0.14.1 (2017-10-11) ------------------- -- Release version 0.14.1. [Jose Diaz-Gonzalez] -- Merge pull request #70 from epfremmer/patch-1. [Jose Diaz-Gonzalez] - - Fix arg not defined error - Fix arg not defined error. [Edward Pfremmer] 0.14.0 (2017-10-11) ------------------- -- Release version 0.14.0. [Jose Diaz-Gonzalez] -- Merge pull request #68 from pieterclaerhout/master. [Jose Diaz- - Gonzalez] - - Added support for LFS clones -- Updated the readme. [pieterclaerhout] - Added a check to see if git-lfs is installed when doing an LFS clone. [pieterclaerhout] - Added support for LFS clones. [pieterclaerhout] -- Merge pull request #66 from albertyw/python3. [Jose Diaz-Gonzalez] - - Explicitly support python 3 - Add pypi info to readme. [Albert Wang] - Explicitly support python 3 in package description. [Albert Wang] -- Merge pull request #65 from mumblez/master. [Jose Diaz-Gonzalez] - - add couple examples to help new users - Add couple examples to help new users. [Yusuf Tran] 0.13.2 (2017-05-06) ------------------- -- Release version 0.13.2. [Jose Diaz-Gonzalez] -- Merge pull request #64 from karlicoss/fix-remotes. [Jose Diaz- - Gonzalez] - - Fix remotes while updating repository - Fix remotes while updating repository. [Dima Gerasimov] 0.13.1 (2017-04-11) ------------------- -- Release version 0.13.1. [Jose Diaz-Gonzalez] -- Merge pull request #61 from McNetic/fix_empty_updated_at. [Jose Diaz- - Gonzalez] - - Fix error when repository has no updated_at value - Fix error when repository has no updated_at value. [Nicolai Ehemann] 0.13.0 (2017-04-05) ------------------- -- Release version 0.13.0. [Jose Diaz-Gonzalez] -- Merge pull request #59 from martintoreilly/master. [Jose Diaz- - Gonzalez] - - Add support for storing PAT in OSX keychain - Add OS check for OSX specific keychain args. [Martin O'Reilly] Keychain arguments are only supported on Mac OSX. @@ -609,19 +420,11 @@ Other 0.12.1 (2017-03-27) ------------------- -- Release version 0.12.1. [Jose Diaz-Gonzalez] -- Merge pull request #57 from acdha/reuse-existing-remotes. [Jose Diaz- - Gonzalez] - - Avoid remote branch name churn - Avoid remote branch name churn. [Chris Adams] This avoids the backup output having lots of "[new branch]" messages because removing the old remote name removed all of the existing branch references. -- Merge pull request #55 from amaczuga/master. [Jose Diaz-Gonzalez] - - Fix detection of bare git directories - Fix detection of bare git directories. [Andrzej Maczuga] @@ -636,49 +439,22 @@ Fix Other ~~~~~ -- Release version 0.12.0. [Jose Diaz-Gonzalez] - Pep8: E501 line too long (83 > 79 characters) [Jose Diaz-Gonzalez] - Pep8: E128 continuation line under-indented for visual indent. [Jose Diaz-Gonzalez] -- Merge pull request #54 from amaczuga/master. [Jose Diaz-Gonzalez] - - Support archivization using bare git clones - Support archivization using bare git clones. [Andrzej Maczuga] -- Merge pull request #53 from trel/master. [Jose Diaz-Gonzalez] - - fix typo, 3x - Fix typo, 3x. [Terrell Russell] 0.11.0 (2016-10-26) ------------------- -- Release version 0.11.0. [Jose Diaz-Gonzalez] -- Merge pull request #52 from bjodah/fix-gh-51. [Jose Diaz-Gonzalez] - - Support --token file:///home/user/token.txt (fixes gh-51) - Support --token file:///home/user/token.txt (fixes gh-51) [Björn Dahlgren] -- Merge pull request #48 from albertyw/python3. [Jose Diaz-Gonzalez] - - Support Python 3 - Fix some linting. [Albert Wang] - Fix byte/string conversion for python 3. [Albert Wang] - Support python 3. [Albert Wang] -- Merge pull request #46 from remram44/encode-password. [Jose Diaz- - Gonzalez] - - Encode special characters in password - Encode special characters in password. [Remi Rampin] -- Merge pull request #45 from remram44/cli-programname. [Jose Diaz- - Gonzalez] - - Fix program name -- Update README.rst. [Remi Rampin] - Don't pretend program name is "Github Backup" [Remi Rampin] -- Merge pull request #44 from remram44/readme-git-https. [Jose Diaz- - Gonzalez] - - Don't install over insecure connection - Don't install over insecure connection. [Remi Rampin] The git:// protocol is unauthenticated and unencrypted, and no longer advertised by GitHub. Using HTTPS shouldn't impact performance. @@ -686,10 +462,6 @@ Other 0.10.3 (2016-08-21) ------------------- -- Release version 0.10.3. [Jose Diaz-Gonzalez] -- Merge pull request #30 from jonasrmichel/master. [Jose Diaz-Gonzalez] - - Fixes #29 - Fixes #29. [Jonas Michel] Reporting an error when the user's rate limit is exceeded causes @@ -707,23 +479,13 @@ Other 0.10.2 (2016-08-21) ------------------- -- Release version 0.10.2. [Jose Diaz-Gonzalez] - Add a note regarding git version requirement. [Jose Diaz-Gonzalez] Closes #37 -0.10.1 (2016-08-21) -------------------- -- Release version 0.10.1. [Jose Diaz-Gonzalez] - - 0.10.0 (2016-08-18) ------------------- -- Release version 0.10.0. [Jose Diaz-Gonzalez] -- Merge pull request #42 from robertwb/master. [Jose Diaz-Gonzalez] - - Implement incremental updates - Implement incremental updates. [Robert Bradshaw] Guarded with an --incremental flag. @@ -736,21 +498,11 @@ Other 0.9.0 (2016-03-29) ------------------ -- Release version 0.9.0. [Jose Diaz-Gonzalez] -- Merge pull request #36 from zlabjp/fix-cloning-private-repos. [Jose - Diaz-Gonzalez] - - Fix cloning private repos with basic auth or token - Fix cloning private repos with basic auth or token. [Kazuki Suda] 0.8.0 (2016-02-14) ------------------ -- Release version 0.8.0. [Jose Diaz-Gonzalez] -- Merge pull request #35 from eht16/issue23_store_pullrequests_once. - [Jose Diaz-Gonzalez] - - Don't store issues which are actually pull requests - Don't store issues which are actually pull requests. [Enrico Tröger] This prevents storing pull requests twice since the Github API returns @@ -761,65 +513,31 @@ Other 0.7.0 (2016-02-02) ------------------ -- Release version 0.7.0. [Jose Diaz-Gonzalez] -- Merge pull request #32 from albertyw/soft-fail-hooks. [Jose Diaz- - Gonzalez] - - Softly fail if not able to read hooks - Softly fail if not able to read hooks. [Albert Wang] -- Merge pull request #33 from albertyw/update-readme. [Jose Diaz- - Gonzalez] - - Add note about 2-factor auth in readme - Add note about 2-factor auth. [Albert Wang] -- Merge pull request #31 from albertyw/fix-private-repos. [Jose Diaz- - Gonzalez] - - Fix reading user's private repositories - Make user repository search go through endpoint capable of reading private repositories. [Albert Wang] -- Merge pull request #28 from alexmojaki/getpass. [Jose Diaz-Gonzalez] - - Prompt for password if only username given -- Update README with new CLI usage. [Alex Hall] - Prompt for password if only username given. [Alex Hall] 0.6.0 (2015-11-10) ------------------ -- Release version 0.6.0. [Jose Diaz-Gonzalez] - Force proper remote url. [Jose Diaz-Gonzalez] -- Merge pull request #24 from eht16/add_backup_hooks. [Jose Diaz- - Gonzalez] - - Add backup hooks - Improve error handling in case of HTTP errors. [Enrico Tröger] In case of a HTTP status code 404, the returned 'r' was never assigned. In case of URL errors which are not timeouts, we probably should bail out. - Add --hooks to also include web hooks into the backup. [Enrico Tröger] -- Merge pull request #22 from eht16/issue_17_create_output_directory. - [Jose Diaz-Gonzalez] - - Create the user specified output directory if it does not exist - Create the user specified output directory if it does not exist. [Enrico Tröger] Fixes #17. -- Merge pull request #21 from eht16/fix_get_response_missing_auth. [Jose - Diaz-Gonzalez] - - Add missing auth argument to _get_response() - Add missing auth argument to _get_response() [Enrico Tröger] When running unauthenticated and Github starts rate-limiting the client, github-backup crashes because the used auth variable in _get_response() was not available. This change should fix it. -- Merge pull request #20 from - eht16/improve_error_msg_on_non_existing_repo. [Jose Diaz-Gonzalez] - - Add repository URL to error message for non-existing repositories - Add repository URL to error message for non-existing repositories. [Enrico Tröger] @@ -830,69 +548,28 @@ Other 0.5.0 (2015-10-10) ------------------ -- Release version 0.5.0. [Jose Diaz-Gonzalez] - Add release script. [Jose Diaz-Gonzalez] - Refactor to both simplify codepath as well as follow PEP8 standards. [Jose Diaz-Gonzalez] -- Merge pull request #19 from Embed-Engineering/retry-timeout. [Jose - Diaz-Gonzalez] - - Retry 3 times when the connection times out - Retry 3 times when the connection times out. [Mathijs Jonker] -- Merge pull request #15 from kromkrom/master. [Jose Diaz-Gonzalez] - - Preserve Unicode characters in the output file -- Update github-backup. [Kirill Grushetsky] -- Update github-backup. [Kirill Grushetsky] - Made unicode output defalut. [Kirill Grushetsky] - Import alphabetised. [Kirill Grushetsky] - Preserve Unicode characters in the output file. [Kirill Grushetsky] Added option to preserve Unicode characters in the output file -- Merge pull request #14 from aensley/master. [Jose Diaz-Gonzalez] - - Added backup of labels and milestones. - Josegonzales/python-github-backup#12 Added backup of labels and milestones. [aensley] -- Merge pull request #11 from Embed-Engineering/master. [Jose Diaz- - Gonzalez] - - Added test for uninitialized repo's (or wiki's) - Fixed indent. [Mathijs Jonker] -- Update github-backup. [mjonker-embed] - Skip unitialized repo's. [mjonker-embed] These gave me errors which caused mails from crontab. -- Merge pull request #10 from Embed-Engineering/master. [Jose Diaz- - Gonzalez] - - Added prefer-ssh - Added prefer-ssh. [mjonker-embed] Was needed for my back-up setup, code includes this but readme wasn't updated -- Merge pull request #9 from acdha/ratelimit-retries. [Jose Diaz- - Gonzalez] - - Retry API requests which failed due to rate-limiting - Retry API requests which failed due to rate-limiting. [Chris Adams] This allows operation to continue, albeit at a slower pace, if you have enough data to trigger the API rate limits -- Release 0.4.0. [Jose Diaz-Gonzalez] -- Merge pull request #7 from acdha/repo-backup-overhaul. [Jose Diaz- - Gonzalez] - - Repo backup overhaul -- Update repository back up handling for wikis. [Chris Adams] - - * Now wikis will follow the same logic as the main repo - checkout for --prefer-ssh. - * The regular repository and wiki paths both use the same - function to handle either cloning or updating a local copy - of the remote repo - * All git updates will now use “git fetch --all --tags” - to ensure that tags and branches other than master will - also be backed up - Logging_subprocess: always log when a command fails. [Chris Adams] Previously git clones could fail without any indication @@ -907,19 +584,10 @@ Other The previous commit used the wrong URL for a private repo. This was masked by the lack of error loging in logging_subprocess (which will be in a separate branch) -- Merge pull request #6 from acdha/allow-clone-over-ssh. [Jose Diaz- - Gonzalez] - - Add an option to prefer checkouts over SSH - Add an option to prefer checkouts over SSH. [Chris Adams] This is really useful with private repos to avoid being nagged for credentials for every repository -- Release 0.3.0. [Jose Diaz-Gonzalez] -- Merge pull request #4 from klaude/pull_request_support. [Jose Diaz- - Gonzalez] - - Add pull request support - Add pull request support. [Kevin Laude] Back up reporitory pull requests by passing the --include-pulls @@ -931,10 +599,6 @@ Other Pull requests are automatically backed up when the --all argument is uesd. -- Merge pull request #5 from klaude/github-enterprise-support. [Jose - Diaz-Gonzalez] - - Add GitHub Enterprise Support - Add GitHub Enterprise support. [Kevin Laude] Pass the -H or --github-host argument with a GitHub Enterprise hostname @@ -944,13 +608,9 @@ Other 0.2.0 (2014-09-22) ------------------ -- Release 0.2.0. [Jose Diaz-Gonzalez] - Add support for retrieving repositories. Closes #1. [Jose Diaz- Gonzalez] - Fix PEP8 violations. [Jose Diaz-Gonzalez] -- Merge pull request #2 from johnyf/master. [Jose Diaz-Gonzalez] - - add authorization to header only if specified by user - Add authorization to header only if specified by user. [Ioannis Filippidis] - Fill out readme more. [Jose Diaz-Gonzalez] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 92717f7..1a92310 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.42.0" +__version__ = "0.43.0" From f3340cd9eb0d45f6ddc3074047fca4c322d70f31 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:45:49 -0400 Subject: [PATCH 167/455] chore: add release requirements --- release-requirements.txt | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 release-requirements.txt diff --git a/release-requirements.txt b/release-requirements.txt new file mode 100644 index 0000000..1571464 --- /dev/null +++ b/release-requirements.txt @@ -0,0 +1,31 @@ +bleach==6.0.0 +certifi==2023.5.7 +charset-normalizer==3.1.0 +colorama==0.4.6 +docutils==0.20.1 +flake8==6.0.0 +gitchangelog==3.0.4 +idna==3.4 +importlib-metadata==6.6.0 +jaraco.classes==3.2.3 +keyring==23.13.1 +markdown-it-py==2.2.0 +mccabe==0.7.0 +mdurl==0.1.2 +more-itertools==9.1.0 +pkginfo==1.9.6 +pycodestyle==2.10.0 +pyflakes==3.0.1 +Pygments==2.15.1 +readme-renderer==37.3 +requests==2.31.0 +requests-toolbelt==1.0.0 +restructuredtext-lint==1.4.0 +rfc3986==2.0.0 +rich==13.3.5 +six==1.16.0 +tqdm==4.65.0 +twine==4.0.2 +urllib3==2.0.2 +webencodings==0.5.1 +zipp==3.15.0 From f1cf4cd31576530888abe2578681a4d8070a28bb Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Mon, 29 May 2023 18:45:57 -0400 Subject: [PATCH 168/455] Release version 0.43.1 --- CHANGES.rst | 7 ++++++- github_backup/__init__.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 26a4243..b4f774b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,14 @@ Changelog ========= -0.43.0 (2023-05-29) +0.43.1 (2023-05-29) ------------------- ------------------------ +- Chore: add release requirements. [Jose Diaz-Gonzalez] + + +0.43.0 (2023-05-29) +------------------- Fix ~~~ diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 1a92310..fb8a056 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.43.0" +__version__ = "0.43.1" From 12799bb72cccb9c961489cad801b9830797754a2 Mon Sep 17 00:00:00 2001 From: Zhymabek Roman <61125068+ZhymabekRoman@users.noreply.github.com> Date: Fri, 23 Jun 2023 21:27:52 +0600 Subject: [PATCH 169/455] fix: minor typo fix --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index b0ae62f..f7ce874 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ github-backup |PyPI| |Python Versions| - This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and cannot sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. + This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and can not sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. backup a github user or organization From a06c3e9fd35fc3c32f760ade7cac0ad708582f01 Mon Sep 17 00:00:00 2001 From: ZhymabekRoman Date: Sun, 25 Jun 2023 10:38:31 +0600 Subject: [PATCH 170/455] fix: refactor logging Based on #195 --- bin/github-backup | 22 +++---- github_backup/github_backup.py | 102 ++++++++++++--------------------- 2 files changed, 49 insertions(+), 75 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index 8d2698b..2fb3a9a 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -2,31 +2,31 @@ import os, sys, logging +logging.basicConfig( + format='%(asctime)s.%(msecs)03d: %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S', + level=logging.INFO +) + from github_backup.github_backup import ( backup_account, backup_repositories, check_git_lfs_install, filter_repositories, get_authenticated_user, - log_info, - log_warning, + logger, mkdir_p, parse_args, retrieve_repositories, ) -logging.basicConfig( - format='%(asctime)s.%(msecs)03d: %(message)s', - datefmt='%Y-%m-%dT%H:%M:%S', - level=logging.INFO -) def main(): args = parse_args() output_directory = os.path.realpath(args.output_directory) if not os.path.isdir(output_directory): - log_info('Create output directory {0}'.format(output_directory)) + logger.info('Create output directory {0}'.format(output_directory)) mkdir_p(output_directory) if args.lfs_clone: @@ -35,10 +35,10 @@ def main(): if args.log_level: log_level = logging.getLevelName(args.log_level.upper()) if isinstance(log_level, int): - logging.root.setLevel(log_level) + logger.root.setLevel(log_level) if not args.as_app: - log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + logger.info('Backing up user {0} to {1}'.format(args.user, output_directory)) authenticated_user = get_authenticated_user(args) else: authenticated_user = {'login': None} @@ -53,5 +53,5 @@ if __name__ == '__main__': try: main() except Exception as e: - log_warning(str(e)) + logger.error(str(e)) sys.exit(1) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index b9ff345..25202d3 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -7,7 +7,6 @@ import base64 import calendar import codecs -import datetime import errno import getpass import json @@ -37,32 +36,7 @@ VERSION = "unknown" FNULL = open(os.devnull, "w") - - -def _get_log_date(): - return datetime.datetime.isoformat(datetime.datetime.now()) - - -def log_info(message): - """ - Log message (str) or messages (List[str]) to stdout - """ - if type(message) == str: - message = [message] - - for msg in message: - logging.info(msg) - - -def log_warning(message): - """ - Log message (str) or messages (List[str]) to stderr - """ - if type(message) == str: - message = [message] - - for msg in message: - logging.warning(msg) +logger = logging.getLogger(__name__) def logging_subprocess( @@ -81,7 +55,7 @@ def logging_subprocess( popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs ) if sys.platform == "win32": - log_info( + logger.info( "Windows operating system detected - no subprocess logging will be returned" ) @@ -558,13 +532,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): try: response = json.loads(r.read().decode("utf-8")) except IncompleteRead: - log_warning("Incomplete read error detected") + logger.warning("Incomplete read error detected") read_error = True except json.decoder.JSONDecodeError: - log_warning("JSON decode error detected") + logger.warning("JSON decode error detected") read_error = True except TimeoutError: - log_warning("Tiemout error detected") + logger.warning("Tiemout error detected") read_error = True else: read_error = False @@ -572,7 +546,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): # be gentle with API request limit and throttle requests if remaining requests getting low limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0)) if args.throttle_limit and limit_remaining <= args.throttle_limit: - log_info( + logger.info( "API request limit hit: {} requests left, pausing further requests for {}s".format( limit_remaining, args.throttle_pause ) @@ -581,7 +555,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): retries = 0 while retries < 3 and (status_code == 502 or read_error): - log_warning("API request failed. Retrying in 5 seconds") + logger.warning("API request failed. Retrying in 5 seconds") retries += 1 time.sleep(5) request = _construct_request( @@ -594,13 +568,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): response = json.loads(r.read().decode("utf-8")) read_error = False except IncompleteRead: - log_warning("Incomplete read error detected") + logger.warning("Incomplete read error detected") read_error = True except json.decoder.JSONDecodeError: - log_warning("JSON decode error detected") + logger.warning("JSON decode error detected") read_error = True except TimeoutError: - log_warning("Tiemout error detected") + logger.warning("Tiemout error detected") read_error = True if status_code != 200: @@ -652,12 +626,12 @@ def _get_response(request, auth, template): errors, should_continue = _request_http_error(exc, auth, errors) # noqa r = exc except URLError as e: - log_warning(e.reason) + logger.warning(e.reason) should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise except socket.error as e: - log_warning(e.strerror) + logger.warning(e.strerror) should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise @@ -686,7 +660,7 @@ def _construct_request(per_page, page, query_args, template, auth, as_app=None): request.add_header( "Accept", "application/vnd.github.machine-man-preview+json" ) - log_info("Requesting {}?{}".format(template, querystring)) + logger.info("Requesting {}?{}".format(template, querystring)) return request @@ -710,14 +684,14 @@ def _request_http_error(exc, auth, errors): delta = max(10, reset - gm_now) limit = headers.get("x-ratelimit-limit") - log_warning( + logger.warning( "Exceeded rate limit of {} requests; waiting {} seconds to reset".format( limit, delta ) ) # noqa if auth is None: - log_info("Hint: Authenticate to raise your GitHub rate limit") + logger.info("Hint: Authenticate to raise your GitHub rate limit") time.sleep(delta) should_continue = True @@ -727,7 +701,7 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): # In case of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now - log_info("{} timed out".format(template)) + logger.info("{} timed out".format(template)) retry_timeout -= 1 if retry_timeout >= 0: @@ -774,20 +748,20 @@ def download_file(url, path, auth): f.write(chunk) except HTTPError as exc: # Gracefully handle 404 responses (and others) when downloading from S3 - log_warning( + logger.warning( "Skipping download of asset {0} due to HTTPError: {1}".format( url, exc.reason ) ) except URLError as e: # Gracefully handle other URL errors - log_warning( + logger.warning( "Skipping download of asset {0} due to URLError: {1}".format(url, e.reason) ) except socket.error as e: # Gracefully handle socket errors # TODO: Implement retry logic - log_warning( + logger.warning( "Skipping download of asset {0} due to socker error: {1}".format( url, e.strerror ) @@ -809,14 +783,14 @@ def check_git_lfs_install(): def retrieve_repositories(args, authenticated_user): - log_info("Retrieving repositories") + logger.info("Retrieving repositories") single_request = False if args.user == authenticated_user["login"]: # we must use the /user/repos API to be able to access private repos template = "https://{0}/user/repos".format(get_github_api_host(args)) else: if args.private and not args.organization: - log_warning( + logger.warning( "Authenticated user is different from user being backed up, thus private repositories cannot be accessed" ) template = "https://{0}/users/{1}/repos".format( @@ -872,7 +846,7 @@ def retrieve_repositories(args, authenticated_user): def filter_repositories(args, unfiltered_repositories): - log_info("Filtering repositories") + logger.info("Filtering repositories") repositories = [] for r in unfiltered_repositories: @@ -911,7 +885,7 @@ def filter_repositories(args, unfiltered_repositories): def backup_repositories(args, output_directory, repositories): - log_info("Backing up repositories") + logger.info("Backing up repositories") repos_template = "https://{0}/repos".format(get_github_api_host(args)) if args.incremental: @@ -1023,7 +997,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): if args.skip_existing and has_issues_dir: return - log_info("Retrieving {0} issues".format(repository["full_name"])) + logger.info("Retrieving {0} issues".format(repository["full_name"])) issue_cwd = os.path.join(repo_cwd, "issues") mkdir_p(repo_cwd, issue_cwd) @@ -1052,7 +1026,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): if issues_skipped: issues_skipped_message = " (skipped {0} pull requests)".format(issues_skipped) - log_info( + logger.info( "Saving {0} issues to disk{1}".format( len(list(issues.keys())), issues_skipped_message ) @@ -1077,7 +1051,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): if args.skip_existing and has_pulls_dir: return - log_info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa + logger.info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa pulls_cwd = os.path.join(repo_cwd, "pulls") mkdir_p(repo_cwd, pulls_cwd) @@ -1113,7 +1087,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): single_request=True, )[0] - log_info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) + logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) # Comments from pulls API are only _review_ comments # regular comments need to be fetched via issue API. # For backwards compatibility with versions <= 0.41.0 @@ -1141,7 +1115,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template): if args.skip_existing and os.path.isdir(milestone_cwd): return - log_info("Retrieving {0} milestones".format(repository["full_name"])) + logger.info("Retrieving {0} milestones".format(repository["full_name"])) mkdir_p(repo_cwd, milestone_cwd) template = "{0}/{1}/milestones".format(repos_template, repository["full_name"]) @@ -1154,7 +1128,7 @@ def backup_milestones(args, repo_cwd, repository, repos_template): for milestone in _milestones: milestones[milestone["number"]] = milestone - log_info("Saving {0} milestones to disk".format(len(list(milestones.keys())))) + logger.info("Saving {0} milestones to disk".format(len(list(milestones.keys())))) for number, milestone in list(milestones.items()): milestone_file = "{0}/{1}.json".format(milestone_cwd, number) with codecs.open(milestone_file, "w", encoding="utf-8") as f: @@ -1171,7 +1145,7 @@ def backup_labels(args, repo_cwd, repository, repos_template): def backup_hooks(args, repo_cwd, repository, repos_template): auth = get_auth(args) if not auth: - log_info("Skipping hooks since no authentication provided") + logger.info("Skipping hooks since no authentication provided") return hook_cwd = os.path.join(repo_cwd, "hooks") output_file = "{0}/hooks.json".format(hook_cwd) @@ -1179,7 +1153,7 @@ def backup_hooks(args, repo_cwd, repository, repos_template): try: _backup_data(args, "hooks", template, output_file, hook_cwd) except SystemExit: - log_info("Unable to read hooks, skipping") + logger.info("Unable to read hooks, skipping") def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): @@ -1187,7 +1161,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F # give release files somewhere to live & log intent release_cwd = os.path.join(repo_cwd, "releases") - log_info("Retrieving {0} releases".format(repository_fullname)) + logger.info("Retrieving {0} releases".format(repository_fullname)) mkdir_p(repo_cwd, release_cwd) query_args = {} @@ -1196,7 +1170,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - log_info("Saving {0} releases to disk".format(len(releases))) + logger.info("Saving {0} releases to disk".format(len(releases))) for release in releases: release_name = release["tag_name"] release_name_safe = release_name.replace("/", "__") @@ -1251,7 +1225,7 @@ def fetch_repository( "git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True ) if initialized == 128: - log_info( + logger.info( "Skipping {0} ({1}) since it's not initialized".format( name, masked_remote_url ) @@ -1259,7 +1233,7 @@ def fetch_repository( return if clone_exists: - log_info("Updating {0} in {1}".format(name, local_dir)) + logger.info("Updating {0} in {1}".format(name, local_dir)) remotes = subprocess.check_output(["git", "remote", "show"], cwd=local_dir) remotes = [i.strip() for i in remotes.decode("utf-8").splitlines()] @@ -1281,7 +1255,7 @@ def fetch_repository( git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: - log_info( + logger.info( "Cloning {0} repository from {1} to {2}".format( name, masked_remote_url, local_dir ) @@ -1337,11 +1311,11 @@ def backup_account(args, output_directory): def _backup_data(args, name, template, output_file, output_directory): skip_existing = args.skip_existing if not skip_existing or not os.path.exists(output_file): - log_info("Retrieving {0} {1}".format(args.user, name)) + logger.info("Retrieving {0} {1}".format(args.user, name)) mkdir_p(output_directory) data = retrieve_data(args, template) - log_info("Writing {0} {1} to disk".format(len(data), name)) + logger.info("Writing {0} {1} to disk".format(len(data), name)) with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(data, f) From 68e718010f1027507f4aa4f718a8880821ccda50 Mon Sep 17 00:00:00 2001 From: ZhymabekRoman Date: Sun, 25 Jun 2023 10:39:16 +0600 Subject: [PATCH 171/455] fix: add forgotten variable formatting --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 25202d3..81386e0 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -707,7 +707,7 @@ def _request_url_error(template, retry_timeout): if retry_timeout >= 0: return True, retry_timeout - raise Exception("{} timed out to much, skipping!") + raise Exception("{} timed out to much, skipping!".format(template)) class S3HTTPRedirectHandler(HTTPRedirectHandler): From a8a583bed115585337b5de080c1cf4156d665a3d Mon Sep 17 00:00:00 2001 From: ZhymabekRoman Date: Sun, 25 Jun 2023 10:41:48 +0600 Subject: [PATCH 172/455] fix: minor cosmetic changes --- github_backup/github_backup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 81386e0..4558504 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -701,13 +701,13 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): # In case of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now - logger.info("{} timed out".format(template)) + logger.info("'{}' timed out".format(template)) retry_timeout -= 1 if retry_timeout >= 0: return True, retry_timeout - raise Exception("{} timed out to much, skipping!".format(template)) + raise Exception("'{}' timed out to much, skipping!".format(template)) class S3HTTPRedirectHandler(HTTPRedirectHandler): From 96e6f58159385ca9580cef50edda88accc21f761 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Jul 2023 23:20:45 +0000 Subject: [PATCH 173/455] Bump certifi from 2023.5.7 to 2023.7.22 Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.5.7 to 2023.7.22. - [Commits](https://github.com/certifi/python-certifi/compare/2023.05.07...2023.07.22) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 1571464..cb1dcbb 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,5 +1,5 @@ bleach==6.0.0 -certifi==2023.5.7 +certifi==2023.7.22 charset-normalizer==3.1.0 colorama==0.4.6 docutils==0.20.1 From f12b8775093630b1acc088b349d8dcaca1955d4c Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Fri, 29 Sep 2023 14:01:53 +0200 Subject: [PATCH 174/455] Keep backwards compatability by going back to "--token" for classic. Allow "file://" uri for "--token-fine". --- README.rst | 7 ++++--- github_backup/github_backup.py | 8 +++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index aadd645..7e9e592 100644 --- a/README.rst +++ b/README.rst @@ -58,9 +58,10 @@ CLI Usage is as follows:: -p PASSWORD, --password PASSWORD password for basic auth. If a username is given but not a password, the password will be prompted for. - -f TOKEN_FINE, --token-fine TOKEN - fine-grained personal access token - -t TOKEN_CLASSIC, --token-classic TOKEN + -f TOKEN_FINE, --token-fine TOKEN_FINE + fine-grained personal access token or path to token + (file://...) + -t TOKEN_CLASSIC, --token TOKEN_CLASSIC personal access, OAuth, or JSON Web token, or path to token (file://...) --as-app authenticate as github app instead of as a user. diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index a024cc5..bc1387b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -150,7 +150,7 @@ def parse_args(args=None): 'If a username is given but not a password, the ' 'password will be prompted for.') parser.add_argument('-t', - '--token-classic', + '--token', dest='token_classic', help='personal access, OAuth, or JSON Web token, or path to token (file://...)') # noqa parser.add_argument('-f', @@ -362,6 +362,11 @@ def get_auth(args, encode=True, for_git_cli=False): elif args.osx_keychain_item_account: raise Exception('You must specify both name and account fields for osx keychain password items') elif args.token_fine: + _path_specifier = 'file://' + if args.token_fine.startswith(_path_specifier): + args.token_fine = open(args.token_fine[len(_path_specifier):], + 'rt').readline().strip() + if args.token_fine.startswith("github_pat_"): auth = args.token_fine else: @@ -371,6 +376,7 @@ def get_auth(args, encode=True, for_git_cli=False): if args.token_classic.startswith(_path_specifier): args.token_classic = open(args.token_classic[len(_path_specifier):], 'rt').readline().strip() + if not args.as_app: auth = args.token_classic + ':' + 'x-oauth-basic' else: From a9d35c0fd52659ead22446bffe22567784444ac5 Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Fri, 29 Sep 2023 14:40:16 +0200 Subject: [PATCH 175/455] Ran black. --- github_backup/github_backup.py | 51 ++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 7337783..ec03d3f 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -127,13 +127,13 @@ def parse_args(args=None): "-t", "--token", dest="token_classic", - help="personal access, OAuth, or JSON Web token, or path to token (file://...)" + help="personal access, OAuth, or JSON Web token, or path to token (file://...)", ) # noqa parser.add_argument( "-f", - '--token-fine', + "--token-fine", dest="token_fine", - help="fine-grained personal access token (github_pat_....), or path to token (file://...)" + help="fine-grained personal access token (github_pat_....), or path to token (file://...)", ) # noqa parser.add_argument( "--as-app", @@ -436,22 +436,28 @@ def get_auth(args, encode=True, for_git_cli=False): elif args.osx_keychain_item_account: raise Exception( "You must specify both name and account fields for osx keychain password items" - ) + ) elif args.token_fine: _path_specifier = "file://" if args.token_fine.startswith(_path_specifier): - args.token_fine = open(args.token_fine[len(_path_specifier):], - "rt").readline().strip() + args.token_fine = ( + open(args.token_fine[len(_path_specifier) :], "rt").readline().strip() + ) if args.token_fine.startswith("github_pat_"): auth = args.token_fine else: - raise Exception("Fine-grained token supplied does not look like a GitHub PAT") + raise Exception( + "Fine-grained token supplied does not look like a GitHub PAT" + ) elif args.token_classic: _path_specifier = "file://" if args.token_classic.startswith(_path_specifier): - args.token_classic = open(args.token_classic[len(_path_specifier):], - "rt").readline().strip() + args.token_classic = ( + open(args.token_classic[len(_path_specifier) :], "rt") + .readline() + .strip() + ) if not args.as_app: auth = args.token_classic + ":" + "x-oauth-basic" @@ -518,7 +524,7 @@ def get_github_repo_url(args, repository): return repository["ssh_url"] auth = get_auth(args, encode=False, for_git_cli=True) - if auth: + if auth: if args.token_fine is None: repo_url = "https://{0}@{1}/{2}/{3}.git".format( auth, @@ -528,7 +534,7 @@ def get_github_repo_url(args, repository): ) else: repo_url = "https://{0}@{1}/{2}/{3}.git".format( - "oauth2:"+auth, + "oauth2:" + auth, get_github_host(args), repository["owner"]["login"], repository["name"], @@ -548,7 +554,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): while True: page = page + 1 request = _construct_request( - per_page, page, query_args, template, auth, as_app=args.as_app, fine=True if args.token_fine is not None else False + per_page, + page, + query_args, + template, + auth, + as_app=args.as_app, + fine=True if args.token_fine is not None else False, ) # noqa r, errors = _get_response(request, auth, template) @@ -584,7 +596,13 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): retries += 1 time.sleep(5) request = _construct_request( - per_page, page, query_args, template, auth, as_app=args.as_app, fine=True if args.token_fine is not None else False + per_page, + page, + query_args, + template, + auth, + as_app=args.as_app, + fine=True if args.token_fine is not None else False, ) # noqa r, errors = _get_response(request, auth, template) @@ -668,10 +686,13 @@ def _get_response(request, auth, template): return r, errors -def _construct_request(per_page, page, query_args, template, auth, as_app=None, fine=False): +def _construct_request( + per_page, page, query_args, template, auth, as_app=None, fine=False +): querystring = urlencode( dict( - list({"per_page": per_page, "page": page}.items()) + list(query_args.items()) + list({"per_page": per_page, "page": page}.items()) + + list(query_args.items()) ) ) From 15de769d674647afe0aecf4cdd0b021b40533f03 Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Sun, 1 Oct 2023 22:22:15 +0200 Subject: [PATCH 176/455] Simplified one if/elif scenario. Extracted file reading of another if/elif scenario. --- github_backup/github_backup.py | 41 +++++++++++++--------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index ec03d3f..b05d1fb 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -36,6 +36,7 @@ VERSION = "unknown" FNULL = open(os.devnull, "w") +FILE_URI_PREFIX = "file://" logger = logging.getLogger(__name__) @@ -438,11 +439,8 @@ def get_auth(args, encode=True, for_git_cli=False): "You must specify both name and account fields for osx keychain password items" ) elif args.token_fine: - _path_specifier = "file://" - if args.token_fine.startswith(_path_specifier): - args.token_fine = ( - open(args.token_fine[len(_path_specifier) :], "rt").readline().strip() - ) + if args.token_fine.startswith(FILE_URI_PREFIX): + args.token_fine = read_file_contents(args.token_fine) if args.token_fine.startswith("github_pat_"): auth = args.token_fine @@ -451,13 +449,8 @@ def get_auth(args, encode=True, for_git_cli=False): "Fine-grained token supplied does not look like a GitHub PAT" ) elif args.token_classic: - _path_specifier = "file://" - if args.token_classic.startswith(_path_specifier): - args.token_classic = ( - open(args.token_classic[len(_path_specifier) :], "rt") - .readline() - .strip() - ) + if args.token_classic.startswith(FILE_URI_PREFIX): + args.token_classic = read_file_contents(args.token_classic) if not args.as_app: auth = args.token_classic + ":" + "x-oauth-basic" @@ -504,6 +497,10 @@ def get_github_host(args): return host +def read_file_contents(file_uri): + result = open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() + + def get_github_repo_url(args, repository): if repository.get("is_gist"): if args.prefer_ssh: @@ -525,20 +522,12 @@ def get_github_repo_url(args, repository): auth = get_auth(args, encode=False, for_git_cli=True) if auth: - if args.token_fine is None: - repo_url = "https://{0}@{1}/{2}/{3}.git".format( - auth, - get_github_host(args), - repository["owner"]["login"], - repository["name"], - ) - else: - repo_url = "https://{0}@{1}/{2}/{3}.git".format( - "oauth2:" + auth, - get_github_host(args), - repository["owner"]["login"], - repository["name"], - ) + repo_url = "https://{0}@{1}/{2}/{3}.git".format( + auth if args.token_fine is None else "oauth2:" + auth, + get_github_host(args), + repository["owner"]["login"], + repository["name"], + ) else: repo_url = repository["clone_url"] From b277baa6ea50144e80841c52b7812ccbf90a4fd5 Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Mon, 2 Oct 2023 09:14:40 +0200 Subject: [PATCH 177/455] Update github_backup.py --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index b05d1fb..bc42a20 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -498,7 +498,7 @@ def get_github_host(args): def read_file_contents(file_uri): - result = open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() + return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() def get_github_repo_url(args, repository): From 6f3be3d0e89bae3659684c95fbb08c075f52ace8 Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Sat, 7 Oct 2023 19:02:52 +0200 Subject: [PATCH 178/455] Suggested modification to fix win32 logging failure, due to local variable scope. Logger does not appear to have any utility within "logging_subprocess". --- github_backup/github_backup.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index bc42a20..5c6b610 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -42,7 +42,6 @@ def logging_subprocess( popenargs, - logger, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs @@ -1278,12 +1277,12 @@ def fetch_repository( if "origin" not in remotes: git_command = ["git", "remote", "rm", "origin"] - logging_subprocess(git_command, None, cwd=local_dir) + logging_subprocess(git_command, cwd=local_dir) git_command = ["git", "remote", "add", "origin", remote_url] - logging_subprocess(git_command, None, cwd=local_dir) + logging_subprocess(git_command, cwd=local_dir) else: git_command = ["git", "remote", "set-url", "origin", remote_url] - logging_subprocess(git_command, None, cwd=local_dir) + logging_subprocess(git_command, cwd=local_dir) if lfs_clone: git_command = ["git", "lfs", "fetch", "--all", "--prune"] @@ -1291,7 +1290,7 @@ def fetch_repository( git_command = ["git", "fetch", "--all", "--force", "--tags", "--prune"] if no_prune: git_command.pop() - logging_subprocess(git_command, None, cwd=local_dir) + logging_subprocess(git_command, cwd=local_dir) else: logger.info( "Cloning {0} repository from {1} to {2}".format( @@ -1300,18 +1299,18 @@ def fetch_repository( ) if bare_clone: git_command = ["git", "clone", "--mirror", remote_url, local_dir] - logging_subprocess(git_command, None) + logging_subprocess(git_command) if lfs_clone: git_command = ["git", "lfs", "fetch", "--all", "--prune"] if no_prune: git_command.pop() - logging_subprocess(git_command, None, cwd=local_dir) + logging_subprocess(git_command, cwd=local_dir) else: if lfs_clone: git_command = ["git", "lfs", "clone", remote_url, local_dir] else: git_command = ["git", "clone", remote_url, local_dir] - logging_subprocess(git_command, None) + logging_subprocess(git_command) def backup_account(args, output_directory): From 7437e3abb1aed35b3dfd2f208bbff8c5a00e523c Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Mon, 9 Oct 2023 12:01:32 +0200 Subject: [PATCH 179/455] Merge pull request, while keeping -q --quiet flag. Most changes were already included, only adjusted with black formatting. --- .gitchangelog.rc | 117 +++ .github/workflows/tagged-release.yml | 19 + CHANGES.rst | 495 ++-------- README.rst | 31 +- bin/github-backup | 7 +- github_backup/__init__.py | 2 +- github_backup/github_backup.py | 1311 +++++++++++++++----------- release | 12 +- release-requirements.txt | 31 + setup.py | 38 +- 10 files changed, 1061 insertions(+), 1002 deletions(-) create mode 100644 .gitchangelog.rc create mode 100644 .github/workflows/tagged-release.yml create mode 100644 release-requirements.txt diff --git a/.gitchangelog.rc b/.gitchangelog.rc new file mode 100644 index 0000000..842973f --- /dev/null +++ b/.gitchangelog.rc @@ -0,0 +1,117 @@ +# +# Format +# +# ACTION: [AUDIENCE:] COMMIT_MSG [@TAG ...] +# +# Description +# +# ACTION is one of 'chg', 'fix', 'new' +# +# Is WHAT the change is about. +# +# 'chg' is for refactor, small improvement, cosmetic changes... +# 'fix' is for bug fixes +# 'new' is for new features, big improvement +# +# SUBJECT is optional and one of 'dev', 'usr', 'pkg', 'test', 'doc' +# +# Is WHO is concerned by the change. +# +# 'dev' is for developpers (API changes, refactors...) +# 'usr' is for final users (UI changes) +# 'pkg' is for packagers (packaging changes) +# 'test' is for testers (test only related changes) +# 'doc' is for doc guys (doc only changes) +# +# COMMIT_MSG is ... well ... the commit message itself. +# +# TAGs are additionnal adjective as 'refactor' 'minor' 'cosmetic' +# +# 'refactor' is obviously for refactoring code only +# 'minor' is for a very meaningless change (a typo, adding a comment) +# 'cosmetic' is for cosmetic driven change (re-indentation, 80-col...) +# +# Example: +# +# new: usr: support of bazaar implemented +# chg: re-indentend some lines @cosmetic +# new: dev: updated code to be compatible with last version of killer lib. +# fix: pkg: updated year of licence coverage. +# new: test: added a bunch of test around user usability of feature X. +# fix: typo in spelling my name in comment. @minor +# +# Please note that multi-line commit message are supported, and only the +# first line will be considered as the "summary" of the commit message. So +# tags, and other rules only applies to the summary. The body of the commit +# message will be displayed in the changelog with minor reformating. + +# +# ``ignore_regexps`` is a line of regexps +# +# Any commit having its full commit message matching any regexp listed here +# will be ignored and won't be reported in the changelog. +# +ignore_regexps = [ + r'(?i)^(Merge pull request|Merge branch|Release|Update)', +] + + +# +# ``replace_regexps`` is a dict associating a regexp pattern and its replacement +# +# It will be applied to get the summary line from the full commit message. +# +# Note that you can provide multiple replacement patterns, they will be all +# tried. If None matches, the summary line will be the full commit message. +# +replace_regexps = { + # current format (ie: 'chg: dev: my commit msg @tag1 @tag2') + + r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n@]*)(@[a-z]+\s+)*$': + r'\4', +} + + +# ``section_regexps`` is a list of 2-tuples associating a string label and a +# list of regexp +# +# Commit messages will be classified in sections thanks to this. Section +# titles are the label, and a commit is classified under this section if any +# of the regexps associated is matching. +# +section_regexps = [ + ('New', [ + r'^[nN]ew\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Changes', [ + r'^[cC]hg\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Fix', [ + r'^[fF]ix\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', + ]), + ('Other', None # Match all lines + ), + +] + +# ``body_split_regexp`` is a regexp +# +# Commit message body (not the summary) if existing will be split +# (new line) on this regexp +# +body_split_regexp = r'[\n-]' + + +# ``tag_filter_regexp`` is a regexp +# +# Tags that will be used for the changelog must match this regexp. +# +# tag_filter_regexp = r'^[0-9]+$' +tag_filter_regexp = r'^(?:[vV])?[0-9\.]+$' + + +# ``unreleased_version_label`` is a string +# +# This label will be used as the changelog Title of the last set of changes +# between last valid tag and HEAD if any. +unreleased_version_label = "%%version%% (unreleased)" diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml new file mode 100644 index 0000000..846c457 --- /dev/null +++ b/.github/workflows/tagged-release.yml @@ -0,0 +1,19 @@ +--- +name: "tagged-release" + +# yamllint disable-line rule:truthy +on: + push: + tags: + - '*' + +jobs: + tagged-release: + name: tagged-release + runs-on: ubuntu-20.04 + + steps: + - uses: "marvinpinto/action-automatic-releases@v1.2.1" + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + prerelease: false diff --git a/CHANGES.rst b/CHANGES.rst index 47b335d..b4f774b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,26 +1,96 @@ Changelog ========= -0.39.0 (2021-03-18) +0.43.1 (2023-05-29) +------------------- +------------------------ +- Chore: add release requirements. [Jose Diaz-Gonzalez] + + +0.43.0 (2023-05-29) ------------------- ------------- Fix ~~~ -- Fix missing INFO logs. [Gallo Feliz] +- Do not update readme. [Jose Diaz-Gonzalez] +- Adjust for black. [Jose Diaz-Gonzalez] +- Adjust for black. [Jose Diaz-Gonzalez] +- Adjust for black. [Jose Diaz-Gonzalez] Other ~~~~~ -- Merge pull request #173 from gallofeliz/make-compatible-python-call. - [Jose Diaz-Gonzalez] +- Feat: commit gitchangelog.rc to repo so anyone can generate a + changelog. [Jose Diaz-Gonzalez] +- Feat: add release tagging. [Jose Diaz-Gonzalez] +- Chore: formatting. [Jose Diaz-Gonzalez] +- Chore: run black. [Jose Diaz-Gonzalez] +- Add --log-level command line argument. [Enrico Tröger] + + Support changing the log level to the desired value easily. + For example, this is useful to suppress progress messages but + keep logging warnings and errors. +- Check both updated_at and pushed_at properties. [Ken Bailey] + + Check both updated_at and pushed_at dates to get the last_update to reduce data retrieved on incremental api calls using since. + + +0.42.0 (2022-11-28) +------------------- +- Add option to exclude repositories. [npounder] +- Backup regular pull request comments as well. [Oneric] + + Before, only review comments were backed up; + regular comments need to be fetched via issue API. +- Fix bug forever retry when request url error. [kornpisey] +- Added --no-prune option to disable prune option when doing git fetch. + [kornpisey] + + +0.41.0 (2022-03-02) +------------------- +- Git lfs clone doe snot respect --mirror. [Louis Parisot] + + +0.40.2 (2021-12-29) +------------------- +- Fix lint issues raised by Flake8. [atinary-afoulon] + + According to job: + [ https://app.circleci.com/pipelines/github/josegonzalez/python-github-backup/30/workflows/74eb93f2-2505-435d-b728-03b3cc04c14a/jobs/23 ] + + Failed on the following checks: + ./github_backup/github_backup.py:20:1: F811 redefinition of unused 'logging' from line 14 + ./github_backup/github_backup.py:45:1: E302 expected 2 blank lines, found 1 + ./github_backup/github_backup.py:136:20: E251 unexpected spaces around keyword / parameter equals + + +0.40.1 (2021-09-22) +------------------- +- Revert to fetch. [Harrison Wright] + + +0.40.0 (2021-07-12) +------------------- +- Add retry on certain network errors. [Jacek Nykis] + + This change includes certain network level errors in the retry logic. + It partially address #110 but I think more comprehensive fix would be useful. +- Pull changes from remote. [Jonas] + + use `git pull` to pull actual files from the remote instead of using `fetch` for only the metadata + + +0.39.0 (2021-03-19) +------------------- - Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli +Fix +~~~ +- Fix missing INFO logs. [Gallo Feliz] + +Other +~~~~~ - Try to make compatible code with direct Python call ; reduce the hard link of the code with the cli. [Gallo Feliz] -- Merge pull request #174 from atorrescogollo/master. [Jose Diaz- - Gonzalez] - - Fixed release_name with slash bug - Fixed release_name with slash bug. [Álvaro Torres Cogollo] @@ -38,15 +108,6 @@ Fix Other ~~~~~ -- Release version 0.38.0. [Jose Diaz-Gonzalez] -- Merge pull request #172 from samanthaq/always-use-oauth-when-provided. - [Jose Diaz-Gonzalez] - - fix: Always clone with OAuth token when provided -- Merge pull request #170 from Mindavi/bugfix/broken-url. [Jose Diaz- - Gonzalez] - - Fix broken and incorrect link to github repository - Change broken link to a fork to a working link to upstream. [Rick van Schijndel] @@ -58,10 +119,6 @@ Fix ~~~ - Use distutils.core on error. [Jose Diaz-Gonzalez] -Other -~~~~~ -- Release version 0.37.2. [Jose Diaz-Gonzalez] - 0.37.1 (2021-01-02) ------------------- @@ -79,46 +136,24 @@ Fix - Set long description type - Gitignore the temporary readme file -Other -~~~~~ -- Release version 0.37.1. [Jose Diaz-Gonzalez] - 0.37.0 (2021-01-02) ------------------- -- Release version 0.37.0. [Jose Diaz-Gonzalez] -- Merge pull request #158 from albertyw/python3. [Jose Diaz-Gonzalez] - - Remove support for python 2 - Add support for python 3.7 and 3.8 in package classifiers. [Albert Wang] - Remove support for python 2.7 in package classifiers. [Albert Wang] - Remove python 2 specific import logic. [Albert Wang] - Remove python 2 specific logic. [Albert Wang] -- Merge pull request #165 from garymoon/add-skip-archived. [Jose Diaz- - Gonzalez] - - Add option to skip archived repositories - Add ability to skip archived repositories. [Gary Moon] 0.36.0 (2020-08-29) ------------------- -- Release version 0.36.0. [Jose Diaz-Gonzalez] -- Merge pull request #157 from albertyw/lint. [Jose Diaz-Gonzalez] - Add flake8 instructions to readme. [Albert Wang] - Fix regex string. [Albert Wang] -- Update boolean check. [Albert Wang] - Fix whitespace issues. [Albert Wang] - Do not use bare excepts. [Albert Wang] -- Merge pull request #161 from albertyw/circleci-project-setup. [Jose - Diaz-Gonzalez] - - Add circleci config - Add .circleci/config.yml. [Albert Wang] -- Merge pull request #160 from wbolster/patch-1. [Jose Diaz-Gonzalez] - - Include --private flag in example - Include --private flag in example. [wouter bolsterlee] By default, private repositories are not included. This is surprising. @@ -128,38 +163,16 @@ Other 0.35.0 (2020-08-05) ------------------- -- Release version 0.35.0. [Jose Diaz-Gonzalez] -- Merge pull request #156 from samanthaq/restore-optional-throttling. - [Jose Diaz-Gonzalez] - - Make API request throttling optional - Make API request throttling optional. [Samantha Baldwin] 0.34.0 (2020-07-24) ------------------- -- Release version 0.34.0. [Jose Diaz-Gonzalez] -- Merge pull request #153 from 0x6d617474/gist_ssh. [Jose Diaz-Gonzalez] - - Add logic for transforming gist repository urls to ssh - Add logic for transforming gist repository urls to ssh. [Matt Fields] -0.33.1 (2020-05-28) -------------------- -- Release version 0.33.1. [Jose Diaz-Gonzalez] -- Merge pull request #151 from garymoon/readme-update-0.33. [Jose Diaz- - Gonzalez] -- Update the readme for new switches added in 0.33. [Gary Moon] - - 0.33.0 (2020-04-13) ------------------- -- Release version 0.33.0. [Jose Diaz-Gonzalez] -- Merge pull request #149 from eht16/simple_api_request_throttling. - [Jose Diaz-Gonzalez] - - Add basic API request throttling - Add basic API request throttling. [Enrico Tröger] A simple approach to throttle API requests and so keep within the rate @@ -171,50 +184,23 @@ Other 0.32.0 (2020-04-13) ------------------- -- Release version 0.32.0. [Jose Diaz-Gonzalez] -- Merge pull request #148 from eht16/logging_with_timestamp. [Jose Diaz- - Gonzalez] - - Add timestamp to log messages - Add timestamp to log messages. [Enrico Tröger] -- Merge pull request #147 from tomhoover/update-readme. [Jose Diaz- - Gonzalez] - - Update README.rst to match 'github-backup -h' -- Update README.rst to match 'github-backup -h' [Tom Hoover] 0.31.0 (2020-02-25) ------------------- -- Release version 0.31.0. [Jose Diaz-Gonzalez] -- Merge pull request #146 from timm3/upstream-123. [Jose Diaz-Gonzalez] - - Authenticate as Github App - #123 update: changed --as-app 'help' description. [ethan] - #123: Support Authenticating As Github Application. [ethan] -0.30.0 (2020-02-14) -------------------- -- Release version 0.30.0. [Jose Diaz-Gonzalez] - - 0.29.0 (2020-02-14) ------------------- -- Release version 0.29.0. [Jose Diaz-Gonzalez] -- Merge pull request #145 from timm3/50-v0.28.0. [Jose Diaz-Gonzalez] - - #50 - refactor for friendlier import - #50 update: keep main() in bin. [ethan] - #50 - refactor for friendlier import. [ethan] 0.28.0 (2020-02-03) ------------------- -- Release version 0.28.0. [Jose Diaz-Gonzalez] -- Merge pull request #143 from smiley/patch-1. [Jose Diaz-Gonzalez] - - Remove deprecated (and removed) "git lfs fetch" flags - Remove deprecated (and removed) git lfs flags. [smiley] "--tags" and "--force" were removed at some point from "git lfs fetch". This broke our backup script. @@ -222,11 +208,6 @@ Other 0.27.0 (2020-01-22) ------------------- -- Release version 0.27.0. [Jose Diaz-Gonzalez] -- Merge pull request #142 from einsteinx2/issue/141-import-error- - version. [Jose Diaz-Gonzalez] - - Fixed script fails if not installed from pip - Fixed script fails if not installed from pip. [Ben Baron] At the top of the script, the line from github_backup import __version__ gets the script's version number to use if the script is called with the -v or --version flags. The problem is that if the script hasn't been installed via pip (for example I cloned the repo directly to my backup server), the script will fail due to an import exception. @@ -234,26 +215,14 @@ Other Also presumably it will always use the version number from pip even if running a modified version from git or a fork or something, though this does not fix that as I have no idea how to check if it's running the pip installed version or not. But at least the script will now work fine if cloned from git or just copied to another machine. closes https://github.com/josegonzalez/python-github-backup/issues/141 -- Merge pull request #136 from einsteinx2/issue/88-macos-keychain- - broken-python3. [Jose Diaz-Gonzalez] - - Fixed macOS keychain access when using Python 3 - Fixed macOS keychain access when using Python 3. [Ben Baron] Python 3 is returning bytes rather than a string, so the string concatenation to create the auth variable was throwing an exception which the script was interpreting to mean it couldn't find the password. Adding a conversion to string first fixed the issue. -- Merge pull request #137 from einsteinx2/issue/134-only-use-auth-token- - when-needed. [Jose Diaz-Gonzalez] - - Public repos no longer include the auth token - Public repos no longer include the auth token. [Ben Baron] When backing up repositories using an auth token and https, the GitHub personal auth token is leaked in each backed up repository. It is included in the URL of each repository's git remote url. This is not needed as they are public and can be accessed without the token and can cause issues in the future if the token is ever changed, so I think it makes more sense not to have the token stored in each repo backup. I think the token should only be "leaked" like this out of necessity, e.g. it's a private repository and the --prefer-ssh option was not chosen so https with auth token was required to perform the clone. -- Merge pull request #130 from einsteinx2/issue/129-fix-crash-on- - release-asset-download-error. [Jose Diaz-Gonzalez] - - Crash when an release asset doesn't exist - Fixed comment typo. [Ben Baron] - Switched log_info to log_warning in download_file. [Ben Baron] - Crash when an release asset doesn't exist. [Ben Baron] @@ -261,10 +230,6 @@ Other Currently, the script crashes whenever a release asset is unable to download (for example a 404 response). This change instead logs the failure and allows the script to continue. No retry logic is enabled, but at least it prevents the crash and allows the backup to complete. Retry logic can be implemented later if wanted. closes https://github.com/josegonzalez/python-github-backup/issues/129 -- Merge pull request #132 from einsteinx2/issue/126-prevent-overwriting- - release-assets. [Jose Diaz-Gonzalez] - - Separate release assets and skip re-downloading - Moved asset downloading loop inside the if block. [Ben Baron] - Separate release assets and skip re-downloading. [Ben Baron] @@ -275,36 +240,21 @@ Other This change also now checks if the asset file already exists on disk and skips downloading it. This drastically speeds up addiotnal syncs as it no longer downloads every single release every single time. It will now only download new releases which I believe is the expected behavior. closes https://github.com/josegonzalez/python-github-backup/issues/126 -- Merge pull request #131 from einsteinx2/improve-gitignore. [Jose Diaz- - Gonzalez] - - Improved gitignore, macOS files and IDE configs - Added newline to end of file. [Ben Baron] - Improved gitignore, macOS files and IDE configs. [Ben Baron] Ignores the annoying hidden macOS files .DS_Store and ._* as well as the IDE configuration folders for contributors using the popular Visual Studio Code and Atom IDEs (more can be added later as needed). -- Update ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] 0.26.0 (2019-09-23) ------------------- -- Release version 0.26.0. [Jose Diaz-Gonzalez] -- Merge pull request #128 from Snawoot/master. [Jose Diaz-Gonzalez] - - Workaround gist clone in `--prefer-ssh` mode - Workaround gist clone in `--prefer-ssh` mode. [Vladislav Yarmak] - Create PULL_REQUEST.md. [Jose Diaz-Gonzalez] - Create ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] -- Update README.rst. [Jose Diaz-Gonzalez] -- Update README.rst. [Jose Diaz-Gonzalez] 0.25.0 (2019-07-03) ------------------- -- Release version 0.25.0. [Jose Diaz-Gonzalez] -- Merge pull request #120 from 8h2a/patch-1. [Jose Diaz-Gonzalez] - - Issue 119: Change retrieve_data to be a generator - Issue 119: Change retrieve_data to be a generator. [2a] See issue #119. @@ -312,43 +262,21 @@ Other 0.24.0 (2019-06-27) ------------------- -- Release version 0.24.0. [Jose Diaz-Gonzalez] -- Merge pull request #117 from QuicketSolutions/master. [Jose Diaz- - Gonzalez] - - Add option for Releases -- Merge pull request #5 from QuicketSolutions/QKT-45. [Ethan Timm] - QKT-45: include assets - update readme. [Ethan Timm] update readme with flag information for including assets alongside their respective releases -- Merge pull request #4 from whwright/wip-releases. [Ethan Timm] - - Download github assets - Make assets it's own flag. [Harrison Wright] - Fix super call for python2. [Harrison Wright] - Fix redirect to s3. [Harrison Wright] - WIP: download assets. [Harrison Wright] -- Merge pull request #3 from QuicketSolutions/QKT-42. [Ethan Timm] - QKT-42: releases - add readme info. [ethan] -- Merge pull request #2 from QuicketSolutions/QKT-42. [Ethan Timm] - - QKT-42 update: shorter command flag - QKT-42 update: shorter command flag. [ethan] -- Merge pull request #1 from QuicketSolutions/QKT-42. [Ethan Timm] - QKT-42: support saving release information. [ethan] -- Merge pull request #118 from whwright/115-fix-pull-details. [Jose - Diaz-Gonzalez] - - Fix pull details - Fix pull details. [Harrison Wright] 0.23.0 (2019-06-04) ------------------- -- Release version 0.23.0. [Jose Diaz-Gonzalez] -- Merge pull request #113 from kleag/master. [Jose Diaz-Gonzalez] - - Avoid to crash in case of HTTP 502 error - Avoid to crash in case of HTTP 502 error. [Gael de Chalendar] Survive also on socket.error connections like on HTTPError or URLError. @@ -365,32 +293,15 @@ Fix Refs #106 -Other -~~~~~ -- Release version 0.22.2. [Jose Diaz-Gonzalez] -- Merge pull request #107 from josegonzalez/patch-1. [Jose Diaz- - Gonzalez] - - fix: warn instead of error - 0.22.1 (2019-02-21) ------------------- -- Release version 0.22.1. [Jose Diaz-Gonzalez] -- Merge pull request #106 from jstetic/master. [Jose Diaz-Gonzalez] - - Log URL error - Log URL error https://github.com/josegonzalez/python-github- backup/issues/105. [JOHN STETIC] 0.22.0 (2019-02-01) ------------------- -- Release version 0.22.0. [Jose Diaz-Gonzalez] -- Merge pull request #103 from whwright/98-better-logging. [Jose Diaz- - Gonzalez] - - Fix accidental system exit with better logging strategy - Remove unnecessary sys.exit call. [W. Harrison Wright] - Add org check to avoid incorrect log output. [W. Harrison Wright] - Fix accidental system exit with better logging strategy. [W. Harrison @@ -399,10 +310,6 @@ Other 0.21.1 (2018-12-25) ------------------- -- Release version 0.21.1. [Jose Diaz-Gonzalez] -- Merge pull request #101 from ecki/patch-2. [Jose Diaz-Gonzalez] - - Mark options which are not included in --all - Mark options which are not included in --all. [Bernd] As discussed in Issue #100 @@ -410,22 +317,12 @@ Other 0.21.0 (2018-11-28) ------------------- -- Release version 0.21.0. [Jose Diaz-Gonzalez] -- Merge pull request #97 from whwright/94-fix-user-repos. [Jose Diaz- - Gonzalez] - - Correctly download repos when user arg != authenticated user - Correctly download repos when user arg != authenticated user. [W. Harrison Wright] 0.20.1 (2018-09-29) ------------------- -- Release version 0.20.1. [Jose Diaz-Gonzalez] -- Merge pull request #92 from whwright/87-fix-starred-bug. [Jose Diaz- - Gonzalez] - - Clone the specified user's starred repos/gists, not the authenticated user - Clone the specified user's gists, not the authenticated user. [W. Harrison Wright] - Clone the specified user's starred repos, not the authenticated user. @@ -434,7 +331,6 @@ Other 0.20.0 (2018-03-24) ------------------- -- Release version 0.20.0. [Jose Diaz-Gonzalez] - Chore: drop Python 2.6. [Jose Diaz-Gonzalez] - Feat: simplify release script. [Jose Diaz-Gonzalez] @@ -446,33 +342,15 @@ Fix ~~~ - Cleanup pep8 violations. [Jose Diaz-Gonzalez] -Other -~~~~~ -- Release version 0.19.2. [Jose Diaz-Gonzalez] - - -0.19.1 (2018-03-24) -------------------- -- Release version 0.19.1. [Jose Diaz-Gonzalez] - 0.19.0 (2018-03-24) ------------------- -- Release version 0.19.0. [Jose Diaz-Gonzalez] -- Merge pull request #77 from mayflower/pull-details. [Jose Diaz- - Gonzalez] - - Pull Details - Add additional output for the current request. [Robin Gloster] This is useful to have some progress indication for huge repositories. - Add option to backup additional PR details. [Robin Gloster] Some payload is only included when requesting a single pull request -- Merge pull request #84 from johbo/fix-python36-skip-existing. [Jose - Diaz-Gonzalez] - - Mark string as binary in comparison for skip_existing - Mark string as binary in comparison for skip_existing. [Johannes Bornhold] @@ -483,20 +361,11 @@ Other 0.18.0 (2018-02-22) ------------------- -- Release version 0.18.0. [Jose Diaz-Gonzalez] -- Merge pull request #82 from sgreene570/add-followers. [Jose Diaz- - Gonzalez] - - Add option to fetch followers/following JSON data - Add option to fetch followers/following JSON data. [Stephen Greene] 0.17.0 (2018-02-20) ------------------- -- Release version 0.17.0. [Jose Diaz-Gonzalez] -- Merge pull request #81 from whwright/gists. [Jose Diaz-Gonzalez] - - Add ability to back up gists - Short circuit gists backup process. [W. Harrison Wright] - Formatting. [W. Harrison Wright] - Add ability to backup gists. [W. Harrison Wright] @@ -504,94 +373,41 @@ Other 0.16.0 (2018-01-22) ------------------- -- Release version 0.16.0. [Jose Diaz-Gonzalez] -- Merge pull request #78 from whwright/clone-starred-repos. [Jose Diaz- - Gonzalez] - - Clone starred repos -- Update README.rst. [Jose Diaz-Gonzalez] -- Update documentation. [W. Harrison Wright] - Change option to --all-starred. [W. Harrison Wright] - JK don't update documentation. [W. Harrison Wright] -- Update documentation. [W. Harrison Wright] - Put starred clone repoistories under a new option. [W. Harrison Wright] - Add comment. [W. Harrison Wright] - Add ability to clone starred repos. [W. Harrison Wright] -0.15.0 (2017-12-11) -------------------- -- Release version 0.15.0. [Jose Diaz-Gonzalez] -- Merge pull request #75 from slibby/slibby-patch-windows. [Jose Diaz- - Gonzalez] - - update check_io() to allow scripts to run on Windows -- Update logging_subprocess function. [Sam Libby] - - 1. added newline for return - 2. added one-time warning (once per subprocess) -- Update check_io() to allow scripts to run on Windows. [Sam Libby] - - 0.14.1 (2017-10-11) ------------------- -- Release version 0.14.1. [Jose Diaz-Gonzalez] -- Merge pull request #70 from epfremmer/patch-1. [Jose Diaz-Gonzalez] - - Fix arg not defined error - Fix arg not defined error. [Edward Pfremmer] 0.14.0 (2017-10-11) ------------------- -- Release version 0.14.0. [Jose Diaz-Gonzalez] -- Merge pull request #68 from pieterclaerhout/master. [Jose Diaz- - Gonzalez] - - Added support for LFS clones -- Updated the readme. [pieterclaerhout] - Added a check to see if git-lfs is installed when doing an LFS clone. [pieterclaerhout] - Added support for LFS clones. [pieterclaerhout] -- Merge pull request #66 from albertyw/python3. [Jose Diaz-Gonzalez] - - Explicitly support python 3 - Add pypi info to readme. [Albert Wang] - Explicitly support python 3 in package description. [Albert Wang] -- Merge pull request #65 from mumblez/master. [Jose Diaz-Gonzalez] - - add couple examples to help new users - Add couple examples to help new users. [Yusuf Tran] 0.13.2 (2017-05-06) ------------------- -- Release version 0.13.2. [Jose Diaz-Gonzalez] -- Merge pull request #64 from karlicoss/fix-remotes. [Jose Diaz- - Gonzalez] - - Fix remotes while updating repository - Fix remotes while updating repository. [Dima Gerasimov] 0.13.1 (2017-04-11) ------------------- -- Release version 0.13.1. [Jose Diaz-Gonzalez] -- Merge pull request #61 from McNetic/fix_empty_updated_at. [Jose Diaz- - Gonzalez] - - Fix error when repository has no updated_at value - Fix error when repository has no updated_at value. [Nicolai Ehemann] 0.13.0 (2017-04-05) ------------------- -- Release version 0.13.0. [Jose Diaz-Gonzalez] -- Merge pull request #59 from martintoreilly/master. [Jose Diaz- - Gonzalez] - - Add support for storing PAT in OSX keychain - Add OS check for OSX specific keychain args. [Martin O'Reilly] Keychain arguments are only supported on Mac OSX. @@ -609,19 +425,11 @@ Other 0.12.1 (2017-03-27) ------------------- -- Release version 0.12.1. [Jose Diaz-Gonzalez] -- Merge pull request #57 from acdha/reuse-existing-remotes. [Jose Diaz- - Gonzalez] - - Avoid remote branch name churn - Avoid remote branch name churn. [Chris Adams] This avoids the backup output having lots of "[new branch]" messages because removing the old remote name removed all of the existing branch references. -- Merge pull request #55 from amaczuga/master. [Jose Diaz-Gonzalez] - - Fix detection of bare git directories - Fix detection of bare git directories. [Andrzej Maczuga] @@ -636,49 +444,22 @@ Fix Other ~~~~~ -- Release version 0.12.0. [Jose Diaz-Gonzalez] - Pep8: E501 line too long (83 > 79 characters) [Jose Diaz-Gonzalez] - Pep8: E128 continuation line under-indented for visual indent. [Jose Diaz-Gonzalez] -- Merge pull request #54 from amaczuga/master. [Jose Diaz-Gonzalez] - - Support archivization using bare git clones - Support archivization using bare git clones. [Andrzej Maczuga] -- Merge pull request #53 from trel/master. [Jose Diaz-Gonzalez] - - fix typo, 3x - Fix typo, 3x. [Terrell Russell] 0.11.0 (2016-10-26) ------------------- -- Release version 0.11.0. [Jose Diaz-Gonzalez] -- Merge pull request #52 from bjodah/fix-gh-51. [Jose Diaz-Gonzalez] - - Support --token file:///home/user/token.txt (fixes gh-51) - Support --token file:///home/user/token.txt (fixes gh-51) [Björn Dahlgren] -- Merge pull request #48 from albertyw/python3. [Jose Diaz-Gonzalez] - - Support Python 3 - Fix some linting. [Albert Wang] - Fix byte/string conversion for python 3. [Albert Wang] - Support python 3. [Albert Wang] -- Merge pull request #46 from remram44/encode-password. [Jose Diaz- - Gonzalez] - - Encode special characters in password - Encode special characters in password. [Remi Rampin] -- Merge pull request #45 from remram44/cli-programname. [Jose Diaz- - Gonzalez] - - Fix program name -- Update README.rst. [Remi Rampin] - Don't pretend program name is "Github Backup" [Remi Rampin] -- Merge pull request #44 from remram44/readme-git-https. [Jose Diaz- - Gonzalez] - - Don't install over insecure connection - Don't install over insecure connection. [Remi Rampin] The git:// protocol is unauthenticated and unencrypted, and no longer advertised by GitHub. Using HTTPS shouldn't impact performance. @@ -686,10 +467,6 @@ Other 0.10.3 (2016-08-21) ------------------- -- Release version 0.10.3. [Jose Diaz-Gonzalez] -- Merge pull request #30 from jonasrmichel/master. [Jose Diaz-Gonzalez] - - Fixes #29 - Fixes #29. [Jonas Michel] Reporting an error when the user's rate limit is exceeded causes @@ -707,23 +484,13 @@ Other 0.10.2 (2016-08-21) ------------------- -- Release version 0.10.2. [Jose Diaz-Gonzalez] - Add a note regarding git version requirement. [Jose Diaz-Gonzalez] Closes #37 -0.10.1 (2016-08-21) -------------------- -- Release version 0.10.1. [Jose Diaz-Gonzalez] - - 0.10.0 (2016-08-18) ------------------- -- Release version 0.10.0. [Jose Diaz-Gonzalez] -- Merge pull request #42 from robertwb/master. [Jose Diaz-Gonzalez] - - Implement incremental updates - Implement incremental updates. [Robert Bradshaw] Guarded with an --incremental flag. @@ -736,21 +503,11 @@ Other 0.9.0 (2016-03-29) ------------------ -- Release version 0.9.0. [Jose Diaz-Gonzalez] -- Merge pull request #36 from zlabjp/fix-cloning-private-repos. [Jose - Diaz-Gonzalez] - - Fix cloning private repos with basic auth or token - Fix cloning private repos with basic auth or token. [Kazuki Suda] 0.8.0 (2016-02-14) ------------------ -- Release version 0.8.0. [Jose Diaz-Gonzalez] -- Merge pull request #35 from eht16/issue23_store_pullrequests_once. - [Jose Diaz-Gonzalez] - - Don't store issues which are actually pull requests - Don't store issues which are actually pull requests. [Enrico Tröger] This prevents storing pull requests twice since the Github API returns @@ -761,65 +518,31 @@ Other 0.7.0 (2016-02-02) ------------------ -- Release version 0.7.0. [Jose Diaz-Gonzalez] -- Merge pull request #32 from albertyw/soft-fail-hooks. [Jose Diaz- - Gonzalez] - - Softly fail if not able to read hooks - Softly fail if not able to read hooks. [Albert Wang] -- Merge pull request #33 from albertyw/update-readme. [Jose Diaz- - Gonzalez] - - Add note about 2-factor auth in readme - Add note about 2-factor auth. [Albert Wang] -- Merge pull request #31 from albertyw/fix-private-repos. [Jose Diaz- - Gonzalez] - - Fix reading user's private repositories - Make user repository search go through endpoint capable of reading private repositories. [Albert Wang] -- Merge pull request #28 from alexmojaki/getpass. [Jose Diaz-Gonzalez] - - Prompt for password if only username given -- Update README with new CLI usage. [Alex Hall] - Prompt for password if only username given. [Alex Hall] 0.6.0 (2015-11-10) ------------------ -- Release version 0.6.0. [Jose Diaz-Gonzalez] - Force proper remote url. [Jose Diaz-Gonzalez] -- Merge pull request #24 from eht16/add_backup_hooks. [Jose Diaz- - Gonzalez] - - Add backup hooks - Improve error handling in case of HTTP errors. [Enrico Tröger] In case of a HTTP status code 404, the returned 'r' was never assigned. In case of URL errors which are not timeouts, we probably should bail out. - Add --hooks to also include web hooks into the backup. [Enrico Tröger] -- Merge pull request #22 from eht16/issue_17_create_output_directory. - [Jose Diaz-Gonzalez] - - Create the user specified output directory if it does not exist - Create the user specified output directory if it does not exist. [Enrico Tröger] Fixes #17. -- Merge pull request #21 from eht16/fix_get_response_missing_auth. [Jose - Diaz-Gonzalez] - - Add missing auth argument to _get_response() - Add missing auth argument to _get_response() [Enrico Tröger] When running unauthenticated and Github starts rate-limiting the client, github-backup crashes because the used auth variable in _get_response() was not available. This change should fix it. -- Merge pull request #20 from - eht16/improve_error_msg_on_non_existing_repo. [Jose Diaz-Gonzalez] - - Add repository URL to error message for non-existing repositories - Add repository URL to error message for non-existing repositories. [Enrico Tröger] @@ -830,69 +553,28 @@ Other 0.5.0 (2015-10-10) ------------------ -- Release version 0.5.0. [Jose Diaz-Gonzalez] - Add release script. [Jose Diaz-Gonzalez] - Refactor to both simplify codepath as well as follow PEP8 standards. [Jose Diaz-Gonzalez] -- Merge pull request #19 from Embed-Engineering/retry-timeout. [Jose - Diaz-Gonzalez] - - Retry 3 times when the connection times out - Retry 3 times when the connection times out. [Mathijs Jonker] -- Merge pull request #15 from kromkrom/master. [Jose Diaz-Gonzalez] - - Preserve Unicode characters in the output file -- Update github-backup. [Kirill Grushetsky] -- Update github-backup. [Kirill Grushetsky] - Made unicode output defalut. [Kirill Grushetsky] - Import alphabetised. [Kirill Grushetsky] - Preserve Unicode characters in the output file. [Kirill Grushetsky] Added option to preserve Unicode characters in the output file -- Merge pull request #14 from aensley/master. [Jose Diaz-Gonzalez] - - Added backup of labels and milestones. - Josegonzales/python-github-backup#12 Added backup of labels and milestones. [aensley] -- Merge pull request #11 from Embed-Engineering/master. [Jose Diaz- - Gonzalez] - - Added test for uninitialized repo's (or wiki's) - Fixed indent. [Mathijs Jonker] -- Update github-backup. [mjonker-embed] - Skip unitialized repo's. [mjonker-embed] These gave me errors which caused mails from crontab. -- Merge pull request #10 from Embed-Engineering/master. [Jose Diaz- - Gonzalez] - - Added prefer-ssh - Added prefer-ssh. [mjonker-embed] Was needed for my back-up setup, code includes this but readme wasn't updated -- Merge pull request #9 from acdha/ratelimit-retries. [Jose Diaz- - Gonzalez] - - Retry API requests which failed due to rate-limiting - Retry API requests which failed due to rate-limiting. [Chris Adams] This allows operation to continue, albeit at a slower pace, if you have enough data to trigger the API rate limits -- Release 0.4.0. [Jose Diaz-Gonzalez] -- Merge pull request #7 from acdha/repo-backup-overhaul. [Jose Diaz- - Gonzalez] - - Repo backup overhaul -- Update repository back up handling for wikis. [Chris Adams] - - * Now wikis will follow the same logic as the main repo - checkout for --prefer-ssh. - * The regular repository and wiki paths both use the same - function to handle either cloning or updating a local copy - of the remote repo - * All git updates will now use “git fetch --all --tags” - to ensure that tags and branches other than master will - also be backed up - Logging_subprocess: always log when a command fails. [Chris Adams] Previously git clones could fail without any indication @@ -907,19 +589,10 @@ Other The previous commit used the wrong URL for a private repo. This was masked by the lack of error loging in logging_subprocess (which will be in a separate branch) -- Merge pull request #6 from acdha/allow-clone-over-ssh. [Jose Diaz- - Gonzalez] - - Add an option to prefer checkouts over SSH - Add an option to prefer checkouts over SSH. [Chris Adams] This is really useful with private repos to avoid being nagged for credentials for every repository -- Release 0.3.0. [Jose Diaz-Gonzalez] -- Merge pull request #4 from klaude/pull_request_support. [Jose Diaz- - Gonzalez] - - Add pull request support - Add pull request support. [Kevin Laude] Back up reporitory pull requests by passing the --include-pulls @@ -931,10 +604,6 @@ Other Pull requests are automatically backed up when the --all argument is uesd. -- Merge pull request #5 from klaude/github-enterprise-support. [Jose - Diaz-Gonzalez] - - Add GitHub Enterprise Support - Add GitHub Enterprise support. [Kevin Laude] Pass the -H or --github-host argument with a GitHub Enterprise hostname @@ -944,13 +613,9 @@ Other 0.2.0 (2014-09-22) ------------------ -- Release 0.2.0. [Jose Diaz-Gonzalez] - Add support for retrieving repositories. Closes #1. [Jose Diaz- Gonzalez] - Fix PEP8 violations. [Jose Diaz-Gonzalez] -- Merge pull request #2 from johnyf/master. [Jose Diaz-Gonzalez] - - add authorization to header only if specified by user - Add authorization to header only if specified by user. [Ioannis Filippidis] - Fill out readme more. [Jose Diaz-Gonzalez] diff --git a/README.rst b/README.rst index b7cd93b..2e4dfa4 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ github-backup |PyPI| |Python Versions| - This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and cannot sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. + This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and can not sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. backup a github user or organization @@ -29,20 +29,21 @@ Usage CLI Usage is as follows:: - github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN] [--as-app] - [-o OUTPUT_DIRECTORY] [-i] [--starred] [--all-starred] - [--watched] [--followers] [--following] [--all] - [--issues] [--issue-comments] [--issue-events] [--pulls] + github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN_CLASSIC] + [-f TOKEN_FINE] [--as-app] [-o OUTPUT_DIRECTORY] + [-l LOG_LEVEL] [-i] [--starred] [--all-starred] + [--watched] [--followers] [--following] [--all] [--issues] + [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--pull-details] [--labels] [--hooks] [--milestones] [--repositories] [--bare] [--lfs] [--wikis] [--gists] [--starred-gists] - [--skip-existing] [-L [LANGUAGES [LANGUAGES ...]]] + [--skip-archived] [--skip-existing] [-L [LANGUAGES ...]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] - [--releases] [--assets] [--throttle-limit THROTTLE_LIMIT] - [--throttle-pause THROTTLE_PAUSE] + [--releases] [--assets] [--exclude [REPOSITORY [REPOSITORY ...]] + [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] USER Backup a github account @@ -57,12 +58,18 @@ CLI Usage is as follows:: -p PASSWORD, --password PASSWORD password for basic auth. If a username is given but not a password, the password will be prompted for. - -t TOKEN, --token TOKEN + -f TOKEN_FINE, --token-fine TOKEN_FINE + fine-grained personal access token or path to token + (file://...) + -t TOKEN_CLASSIC, --token TOKEN_CLASSIC personal access, OAuth, or JSON Web token, or path to token (file://...) --as-app authenticate as github app instead of as a user. -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY directory at which to backup the repositories + -l LOG_LEVEL, --log-level LOG_LEVEL + log level to use (default: info, possible levels: + debug, info, warning, error, critical) -i, --incremental incremental backup --starred include JSON output of starred repositories in backup --all-starred include starred repositories in backup [*] @@ -112,6 +119,8 @@ CLI Usage is as follows:: binaries --assets include assets alongside release information; only applies if including releases + --exclude [REPOSITORY [REPOSITORY ...]] + names of repositories to exclude from backup. --throttle-limit THROTTLE_LIMIT start throttling of GitHub API requests after this amount of API requests remain @@ -158,13 +167,13 @@ Backup all repositories, including private ones:: export ACCESS_TOKEN=SOME-GITHUB-TOKEN github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private -Backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: +Use a fine-grained access token to backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: export ACCESS_TOKEN=SOME-GITHUB-TOKEN ORGANIZATION=docker REPO=cli # e.g. git@github.com:docker/cli.git - github-backup $ORGANIZATION -P -t $ACCESS_TOKEN -o . --all -O -R $REPO + github-backup $ORGANIZATION -P -f $ACCESS_TOKEN -o . --all -O -R $REPO Testing ======= diff --git a/bin/github-backup b/bin/github-backup index faea49f..b983cdf 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -35,6 +35,11 @@ def main(): if args.lfs_clone: check_git_lfs_install() + if args.log_level: + log_level = logging.getLevelName(args.log_level.upper()) + if isinstance(log_level, int): + logger.root.setLevel(log_level) + if not args.as_app: logger.info('Backing up user {0} to {1}'.format(args.user, output_directory)) authenticated_user = get_authenticated_user(args) @@ -51,5 +56,5 @@ if __name__ == '__main__': try: main() except Exception as e: - logger.warning(str(e)) + logger.error(str(e)) sys.exit(1) diff --git a/github_backup/__init__.py b/github_backup/__init__.py index b228564..fb8a056 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = '0.40.2' +__version__ = "0.43.1" diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index fd4003d..1d79b9b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -30,40 +30,42 @@ try: from . import __version__ + VERSION = __version__ except ImportError: - VERSION = 'unknown' - -FNULL = open(os.devnull, 'w') + VERSION = "unknown" +FNULL = open(os.devnull, "w") +FILE_URI_PREFIX = "file://" logger = logging.getLogger(__name__) -def logging_subprocess(popenargs, - logger, - stdout_log_level=logging.DEBUG, - stderr_log_level=logging.ERROR, - **kwargs): +def logging_subprocess( + popenargs, + logger, + stdout_log_level=logging.DEBUG, + stderr_log_level=logging.ERROR, + **kwargs +): """ Variant of subprocess.call that accepts a logger instead of stdout/stderr, and logs stdout messages via logger.debug and stderr messages via logger.error. """ - child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, **kwargs) - if sys.platform == 'win32': - logger.info("Windows operating system detected - no subprocess logging will be returned") + child = subprocess.Popen( + popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs + ) + if sys.platform == "win32": + logger.info( + "Windows operating system detected - no subprocess logging will be returned" + ) - log_level = {child.stdout: stdout_log_level, - child.stderr: stderr_log_level} + log_level = {child.stdout: stdout_log_level, child.stderr: stderr_log_level} def check_io(): - if sys.platform == 'win32': + if sys.platform == "win32": return - ready_to_read = select.select([child.stdout, child.stderr], - [], - [], - 1000)[0] + ready_to_read = select.select([child.stdout, child.stderr], [], [], 1000)[0] for io in ready_to_read: line = io.readline() if not logger: @@ -80,8 +82,8 @@ def check_io(): rc = child.wait() if rc != 0: - print('{} returned {}:'.format(popenargs[0], rc), file=sys.stderr) - print('\t', ' '.join(popenargs), file=sys.stderr) + print("{} returned {}:".format(popenargs[0], rc), file=sys.stderr) + print("\t", " ".join(popenargs), file=sys.stderr) return rc @@ -97,213 +99,314 @@ def mkdir_p(*args): raise -def mask_password(url, secret='*****'): +def mask_password(url, secret="*****"): parsed = urlparse(url) if not parsed.password: return url - elif parsed.password == 'x-oauth-basic': + elif parsed.password == "x-oauth-basic": return url.replace(parsed.username, secret) return url.replace(parsed.password, secret) def parse_args(args=None): - parser = argparse.ArgumentParser(description='Backup a github account') - parser.add_argument('user', - metavar='USER', - type=str, - help='github username') - parser.add_argument('-q', - '--quiet', - action='store_true', - dest='quiet', - help='supress non-error log messages') - parser.add_argument('-u', - '--username', - dest='username', - help='username for basic auth') - parser.add_argument('-p', - '--password', - dest='password', - help='password for basic auth. ' - 'If a username is given but not a password, the ' - 'password will be prompted for.') - parser.add_argument('-t', - '--token', - dest='token', - help='personal access, OAuth, or JSON Web token, or path to token (file://...)') # noqa - parser.add_argument('--as-app', - action='store_true', - dest='as_app', - help='authenticate as github app instead of as a user.') - parser.add_argument('-o', - '--output-directory', - default='.', - dest='output_directory', - help='directory at which to backup the repositories') - parser.add_argument('-i', - '--incremental', - action='store_true', - dest='incremental', - help='incremental backup') - parser.add_argument('--starred', - action='store_true', - dest='include_starred', - help='include JSON output of starred repositories in backup') - parser.add_argument('--all-starred', - action='store_true', - dest='all_starred', - help='include starred repositories in backup [*]') - parser.add_argument('--watched', - action='store_true', - dest='include_watched', - help='include JSON output of watched repositories in backup') - parser.add_argument('--followers', - action='store_true', - dest='include_followers', - help='include JSON output of followers in backup') - parser.add_argument('--following', - action='store_true', - dest='include_following', - help='include JSON output of following users in backup') - parser.add_argument('--all', - action='store_true', - dest='include_everything', - help='include everything in backup (not including [*])') - parser.add_argument('--issues', - action='store_true', - dest='include_issues', - help='include issues in backup') - parser.add_argument('--issue-comments', - action='store_true', - dest='include_issue_comments', - help='include issue comments in backup') - parser.add_argument('--issue-events', - action='store_true', - dest='include_issue_events', - help='include issue events in backup') - parser.add_argument('--pulls', - action='store_true', - dest='include_pulls', - help='include pull requests in backup') - parser.add_argument('--pull-comments', - action='store_true', - dest='include_pull_comments', - help='include pull request review comments in backup') - parser.add_argument('--pull-commits', - action='store_true', - dest='include_pull_commits', - help='include pull request commits in backup') - parser.add_argument('--pull-details', - action='store_true', - dest='include_pull_details', - help='include more pull request details in backup [*]') - parser.add_argument('--labels', - action='store_true', - dest='include_labels', - help='include labels in backup') - parser.add_argument('--hooks', - action='store_true', - dest='include_hooks', - help='include hooks in backup (works only when authenticated)') # noqa - parser.add_argument('--milestones', - action='store_true', - dest='include_milestones', - help='include milestones in backup') - parser.add_argument('--repositories', - action='store_true', - dest='include_repository', - help='include repository clone in backup') - parser.add_argument('--bare', - action='store_true', - dest='bare_clone', - help='clone bare repositories') - parser.add_argument('--lfs', - action='store_true', - dest='lfs_clone', - help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]') - parser.add_argument('--wikis', - action='store_true', - dest='include_wiki', - help='include wiki clone in backup') - parser.add_argument('--gists', - action='store_true', - dest='include_gists', - help='include gists in backup [*]') - parser.add_argument('--starred-gists', - action='store_true', - dest='include_starred_gists', - help='include starred gists in backup [*]') - parser.add_argument('--skip-archived', - action='store_true', - dest='skip_archived', - help='skip project if it is archived') - parser.add_argument('--skip-existing', - action='store_true', - dest='skip_existing', - help='skip project if a backup directory exists') - parser.add_argument('-L', - '--languages', - dest='languages', - help='only allow these languages', - nargs='*') - parser.add_argument('-N', - '--name-regex', - dest='name_regex', - help='python regex to match names against') - parser.add_argument('-H', - '--github-host', - dest='github_host', - help='GitHub Enterprise hostname') - parser.add_argument('-O', - '--organization', - action='store_true', - dest='organization', - help='whether or not this is an organization user') - parser.add_argument('-R', - '--repository', - dest='repository', - help='name of repository to limit backup to') - parser.add_argument('-P', '--private', - action='store_true', - dest='private', - help='include private repositories [*]') - parser.add_argument('-F', '--fork', - action='store_true', - dest='fork', - help='include forked repositories [*]') - parser.add_argument('--prefer-ssh', - action='store_true', - help='Clone repositories using SSH instead of HTTPS') - parser.add_argument('-v', '--version', - action='version', - version='%(prog)s ' + VERSION) - parser.add_argument('--keychain-name', - dest='osx_keychain_item_name', - help='OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--keychain-account', - dest='osx_keychain_item_account', - help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') - parser.add_argument('--releases', - action='store_true', - dest='include_releases', - help='include release information, not including assets or binaries' - ) - parser.add_argument('--assets', - action='store_true', - dest='include_assets', - help='include assets alongside release information; only applies if including releases') - parser.add_argument('--throttle-limit', - dest='throttle_limit', - type=int, - default=0, - help='start throttling of GitHub API requests after this amount of API requests remain') - parser.add_argument('--throttle-pause', - dest='throttle_pause', - type=float, - default=30.0, - help='wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)') + parser = argparse.ArgumentParser(description="Backup a github account") + parser.add_argument("user", metavar="USER", type=str, help="github username") + parser.add_argument( + "-u", "--username", dest="username", help="username for basic auth" + ) + parser.add_argument( + "-p", + "--password", + dest="password", + help="password for basic auth. " + "If a username is given but not a password, the " + "password will be prompted for.", + ) + parser.add_argument( + "-t", + "--token", + dest="token_classic", + help="personal access, OAuth, or JSON Web token, or path to token (file://...)", + ) # noqa + parser.add_argument( + "-f", + "--token-fine", + dest="token_fine", + help="fine-grained personal access token (github_pat_....), or path to token (file://...)", + ) # noqa + parser.add_argument( + "-q", + "--quiet", + action="store_true", + dest="quiet", + help="supress log messages less severe than warning, e.g. info", + ) + parser.add_argument( + "--as-app", + action="store_true", + dest="as_app", + help="authenticate as github app instead of as a user.", + ) + parser.add_argument( + "-o", + "--output-directory", + default=".", + dest="output_directory", + help="directory at which to backup the repositories", + ) + parser.add_argument( + "-l", + "--log-level", + default="info", + dest="log_level", + help="log level to use (default: info, possible levels: debug, info, warning, error, critical)", + ) + parser.add_argument( + "-i", + "--incremental", + action="store_true", + dest="incremental", + help="incremental backup", + ) + parser.add_argument( + "--starred", + action="store_true", + dest="include_starred", + help="include JSON output of starred repositories in backup", + ) + parser.add_argument( + "--all-starred", + action="store_true", + dest="all_starred", + help="include starred repositories in backup [*]", + ) + parser.add_argument( + "--watched", + action="store_true", + dest="include_watched", + help="include JSON output of watched repositories in backup", + ) + parser.add_argument( + "--followers", + action="store_true", + dest="include_followers", + help="include JSON output of followers in backup", + ) + parser.add_argument( + "--following", + action="store_true", + dest="include_following", + help="include JSON output of following users in backup", + ) + parser.add_argument( + "--all", + action="store_true", + dest="include_everything", + help="include everything in backup (not including [*])", + ) + parser.add_argument( + "--issues", + action="store_true", + dest="include_issues", + help="include issues in backup", + ) + parser.add_argument( + "--issue-comments", + action="store_true", + dest="include_issue_comments", + help="include issue comments in backup", + ) + parser.add_argument( + "--issue-events", + action="store_true", + dest="include_issue_events", + help="include issue events in backup", + ) + parser.add_argument( + "--pulls", + action="store_true", + dest="include_pulls", + help="include pull requests in backup", + ) + parser.add_argument( + "--pull-comments", + action="store_true", + dest="include_pull_comments", + help="include pull request review comments in backup", + ) + parser.add_argument( + "--pull-commits", + action="store_true", + dest="include_pull_commits", + help="include pull request commits in backup", + ) + parser.add_argument( + "--pull-details", + action="store_true", + dest="include_pull_details", + help="include more pull request details in backup [*]", + ) + parser.add_argument( + "--labels", + action="store_true", + dest="include_labels", + help="include labels in backup", + ) + parser.add_argument( + "--hooks", + action="store_true", + dest="include_hooks", + help="include hooks in backup (works only when authenticated)", + ) # noqa + parser.add_argument( + "--milestones", + action="store_true", + dest="include_milestones", + help="include milestones in backup", + ) + parser.add_argument( + "--repositories", + action="store_true", + dest="include_repository", + help="include repository clone in backup", + ) + parser.add_argument( + "--bare", action="store_true", dest="bare_clone", help="clone bare repositories" + ) + parser.add_argument( + "--no-prune", + action="store_true", + dest="no_prune", + help="disable prune option for git fetch", + ) + parser.add_argument( + "--lfs", + action="store_true", + dest="lfs_clone", + help="clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]", + ) + parser.add_argument( + "--wikis", + action="store_true", + dest="include_wiki", + help="include wiki clone in backup", + ) + parser.add_argument( + "--gists", + action="store_true", + dest="include_gists", + help="include gists in backup [*]", + ) + parser.add_argument( + "--starred-gists", + action="store_true", + dest="include_starred_gists", + help="include starred gists in backup [*]", + ) + parser.add_argument( + "--skip-archived", + action="store_true", + dest="skip_archived", + help="skip project if it is archived", + ) + parser.add_argument( + "--skip-existing", + action="store_true", + dest="skip_existing", + help="skip project if a backup directory exists", + ) + parser.add_argument( + "-L", + "--languages", + dest="languages", + help="only allow these languages", + nargs="*", + ) + parser.add_argument( + "-N", + "--name-regex", + dest="name_regex", + help="python regex to match names against", + ) + parser.add_argument( + "-H", "--github-host", dest="github_host", help="GitHub Enterprise hostname" + ) + parser.add_argument( + "-O", + "--organization", + action="store_true", + dest="organization", + help="whether or not this is an organization user", + ) + parser.add_argument( + "-R", + "--repository", + dest="repository", + help="name of repository to limit backup to", + ) + parser.add_argument( + "-P", + "--private", + action="store_true", + dest="private", + help="include private repositories [*]", + ) + parser.add_argument( + "-F", + "--fork", + action="store_true", + dest="fork", + help="include forked repositories [*]", + ) + parser.add_argument( + "--prefer-ssh", + action="store_true", + help="Clone repositories using SSH instead of HTTPS", + ) + parser.add_argument( + "-v", "--version", action="version", version="%(prog)s " + VERSION + ) + parser.add_argument( + "--keychain-name", + dest="osx_keychain_item_name", + help="OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token", + ) + parser.add_argument( + "--keychain-account", + dest="osx_keychain_item_account", + help="OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token", + ) + parser.add_argument( + "--releases", + action="store_true", + dest="include_releases", + help="include release information, not including assets or binaries", + ) + parser.add_argument( + "--assets", + action="store_true", + dest="include_assets", + help="include assets alongside release information; only applies if including releases", + ) + parser.add_argument( + "--throttle-limit", + dest="throttle_limit", + type=int, + default=0, + help="start throttling of GitHub API requests after this amount of API requests remain", + ) + parser.add_argument( + "--throttle-pause", + dest="throttle_pause", + type=float, + default=30.0, + help="wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set)", + ) + parser.add_argument( + "--exclude", dest="exclude", help="names of repositories to exclude", nargs="*" + ) return parser.parse_args(args) @@ -312,35 +415,57 @@ def get_auth(args, encode=True, for_git_cli=False): if args.osx_keychain_item_name: if not args.osx_keychain_item_account: - raise Exception('You must specify both name and account fields for osx keychain password items') + raise Exception( + "You must specify both name and account fields for osx keychain password items" + ) else: - if platform.system() != 'Darwin': + if platform.system() != "Darwin": raise Exception("Keychain arguments are only supported on Mac OSX") try: - with open(os.devnull, 'w') as devnull: - token = (subprocess.check_output([ - 'security', 'find-generic-password', - '-s', args.osx_keychain_item_name, - '-a', args.osx_keychain_item_account, - '-w'], stderr=devnull).strip()) - token = token.decode('utf-8') - auth = token + ':' + 'x-oauth-basic' + with open(os.devnull, "w") as devnull: + token = subprocess.check_output( + [ + "security", + "find-generic-password", + "-s", + args.osx_keychain_item_name, + "-a", + args.osx_keychain_item_account, + "-w", + ], + stderr=devnull, + ).strip() + token = token.decode("utf-8") + auth = token + ":" + "x-oauth-basic" except subprocess.SubprocessError: - raise Exception('No password item matching the provided name and account could be found in the osx keychain.') + raise Exception( + "No password item matching the provided name and account could be found in the osx keychain." + ) elif args.osx_keychain_item_account: - raise Exception('You must specify both name and account fields for osx keychain password items') - elif args.token: - _path_specifier = 'file://' - if args.token.startswith(_path_specifier): - args.token = open(args.token[len(_path_specifier):], - 'rt').readline().strip() + raise Exception( + "You must specify both name and account fields for osx keychain password items" + ) + elif args.token_fine: + if args.token_fine.startswith(FILE_URI_PREFIX): + args.token_fine = read_file_contents(args.token_fine) + + if args.token_fine.startswith("github_pat_"): + auth = args.token_fine + else: + raise Exception( + "Fine-grained token supplied does not look like a GitHub PAT" + ) + elif args.token_classic: + if args.token_classic.startswith(FILE_URI_PREFIX): + args.token_classic = read_file_contents(args.token_classic) + if not args.as_app: - auth = args.token + ':' + 'x-oauth-basic' + auth = args.token_classic + ":" + "x-oauth-basic" else: if not for_git_cli: - auth = args.token + auth = args.token_classic else: - auth = 'x-access-token:' + args.token + auth = "x-access-token:" + args.token_classic elif args.username: if not args.password: args.password = getpass.getpass() @@ -348,24 +473,24 @@ def get_auth(args, encode=True, for_git_cli=False): password = args.password else: password = urlquote(args.password) - auth = args.username + ':' + password + auth = args.username + ":" + password elif args.password: - raise Exception('You must specify a username for basic auth') + raise Exception("You must specify a username for basic auth") if not auth: return None - if not encode: + if not encode or args.token_fine is not None: return auth - return base64.b64encode(auth.encode('ascii')) + return base64.b64encode(auth.encode("ascii")) def get_github_api_host(args): if args.github_host: - host = args.github_host + '/api/v3' + host = args.github_host + "/api/v3" else: - host = 'api.github.com' + host = "api.github.com" return host @@ -374,33 +499,44 @@ def get_github_host(args): if args.github_host: host = args.github_host else: - host = 'github.com' + host = "github.com" return host +def read_file_contents(file_uri): + return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() + + def get_github_repo_url(args, repository): - if repository.get('is_gist'): + if repository.get("is_gist"): if args.prefer_ssh: # The git_pull_url value is always https for gists, so we need to transform it to ssh form - repo_url = re.sub(r'^https?:\/\/(.+)\/(.+)\.git$', r'git@\1:\2.git', repository['git_pull_url']) - repo_url = re.sub(r'^git@gist\.', 'git@', repo_url) # strip gist subdomain for better hostkey compatibility + repo_url = re.sub( + r"^https?:\/\/(.+)\/(.+)\.git$", + r"git@\1:\2.git", + repository["git_pull_url"], + ) + repo_url = re.sub( + r"^git@gist\.", "git@", repo_url + ) # strip gist subdomain for better hostkey compatibility else: - repo_url = repository['git_pull_url'] + repo_url = repository["git_pull_url"] return repo_url if args.prefer_ssh: - return repository['ssh_url'] + return repository["ssh_url"] auth = get_auth(args, encode=False, for_git_cli=True) if auth: - repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( - auth, + repo_url = "https://{0}@{1}/{2}/{3}.git".format( + auth if args.token_fine is None else "oauth2:" + auth, get_github_host(args), - repository['owner']['login'], - repository['name']) + repository["owner"]["login"], + repository["name"], + ) else: - repo_url = repository['clone_url'] + repo_url = repository["clone_url"] return repo_url @@ -413,13 +549,21 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): while True: page = page + 1 - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request( + per_page, + page, + query_args, + template, + auth, + as_app=args.as_app, + fine=True if args.token_fine is not None else False, + ) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) # Check if we got correct data try: - response = json.loads(r.read().decode('utf-8')) + response = json.loads(r.read().decode("utf-8")) except IncompleteRead: logger.warning("Incomplete read error detected") read_error = True @@ -433,25 +577,34 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): read_error = False # be gentle with API request limit and throttle requests if remaining requests getting low - limit_remaining = int(r.headers.get('x-ratelimit-remaining', 0)) + limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0)) if args.throttle_limit and limit_remaining <= args.throttle_limit: logger.info( - 'API request limit hit: {} requests left, pausing further requests for {}s'.format( - limit_remaining, - args.throttle_pause)) + "API request limit hit: {} requests left, pausing further requests for {}s".format( + limit_remaining, args.throttle_pause + ) + ) time.sleep(args.throttle_pause) retries = 0 while retries < 3 and (status_code == 502 or read_error): - logger.warning('API request failed. Retrying in 5 seconds') + logger.warning("API request failed. Retrying in 5 seconds") retries += 1 time.sleep(5) - request = _construct_request(per_page, page, query_args, template, auth, as_app=args.as_app) # noqa + request = _construct_request( + per_page, + page, + query_args, + template, + auth, + as_app=args.as_app, + fine=True if args.token_fine is not None else False, + ) # noqa r, errors = _get_response(request, auth, template) status_code = int(r.getcode()) try: - response = json.loads(r.read().decode('utf-8')) + response = json.loads(r.read().decode("utf-8")) read_error = False except IncompleteRead: logger.warning("Incomplete read error detected") @@ -464,14 +617,14 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): read_error = True if status_code != 200: - template = 'API request returned HTTP {0}: {1}' + template = "API request returned HTTP {0}: {1}" errors.append(template.format(status_code, r.reason)) - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if read_error: - template = 'API request problem reading response for {0}' + template = "API request problem reading response for {0}" errors.append(template.format(request)) - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if len(errors) == 0: if type(response) == list: @@ -483,7 +636,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): yield response if len(errors) > 0: - raise Exception(', '.join(errors)) + raise Exception(", ".join(errors)) if single_request: break @@ -513,12 +666,12 @@ def _get_response(request, auth, template): r = exc except URLError as e: logger.warning(e.reason) - should_continue = _request_url_error(template, retry_timeout) + should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise except socket.error as e: logger.warning(e.strerror) - should_continue = _request_url_error(template, retry_timeout) + should_continue, retry_timeout = _request_url_error(template, retry_timeout) if not should_continue: raise @@ -529,21 +682,30 @@ def _get_response(request, auth, template): return r, errors -def _construct_request(per_page, page, query_args, template, auth, as_app=None): - querystring = urlencode(dict(list({ - 'per_page': per_page, - 'page': page - }.items()) + list(query_args.items()))) +def _construct_request( + per_page, page, query_args, template, auth, as_app=None, fine=False +): + querystring = urlencode( + dict( + list({"per_page": per_page, "page": page}.items()) + + list(query_args.items()) + ) + ) - request = Request(template + '?' + querystring) + request = Request(template + "?" + querystring) if auth is not None: if not as_app: - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + if fine: + request.add_header("Authorization", "token " + auth) + else: + request.add_header("Authorization", "Basic ".encode("ascii") + auth) else: - auth = auth.encode('ascii') - request.add_header('Authorization', 'token '.encode('ascii') + auth) - request.add_header('Accept', 'application/vnd.github.machine-man-preview+json') - logger.info('Requesting {}?{}'.format(template, querystring)) + auth = auth.encode("ascii") + request.add_header("Authorization", "token ".encode("ascii") + auth) + request.add_header( + "Accept", "application/vnd.github.machine-man-preview+json" + ) + logger.info("Requesting {}?{}".format(template, querystring)) return request @@ -554,7 +716,7 @@ def _request_http_error(exc, auth, errors): should_continue = False headers = exc.headers - limit_remaining = int(headers.get('x-ratelimit-remaining', 0)) + limit_remaining = int(headers.get("x-ratelimit-remaining", 0)) if exc.code == 403 and limit_remaining < 1: # The X-RateLimit-Reset header includes a @@ -562,15 +724,19 @@ def _request_http_error(exc, auth, errors): # so we can calculate how long to wait rather # than inefficiently polling: gm_now = calendar.timegm(time.gmtime()) - reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now + reset = int(headers.get("x-ratelimit-reset", 0)) or gm_now # We'll never sleep for less than 10 seconds: delta = max(10, reset - gm_now) - limit = headers.get('x-ratelimit-limit') - logger.warning('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta)) # noqa + limit = headers.get("x-ratelimit-limit") + logger.warning( + "Exceeded rate limit of {} requests; waiting {} seconds to reset".format( + limit, delta + ) + ) # noqa if auth is None: - logger.info('Hint: Authenticate to raise your GitHub rate limit') + logger.info("Hint: Authenticate to raise your GitHub rate limit") time.sleep(delta) should_continue = True @@ -578,16 +744,15 @@ def _request_http_error(exc, auth, errors): def _request_url_error(template, retry_timeout): - # Incase of a connection timing out, we can retry a few time + # In case of a connection timing out, we can retry a few time # But we won't crash and not back-up the rest now - logger.info('{} timed out'.format(template)) + logger.info("'{}' timed out".format(template)) retry_timeout -= 1 if retry_timeout >= 0: - return True + return True, retry_timeout - raise Exception('{} timed out to much, skipping!') - return False + raise Exception("'{}' timed out to much, skipping!".format(template)) class S3HTTPRedirectHandler(HTTPRedirectHandler): @@ -597,9 +762,12 @@ class S3HTTPRedirectHandler(HTTPRedirectHandler): urllib will add the Authorization header to the redirected request to S3, which will result in a 400, so we should remove said header on redirect. """ + def redirect_request(self, req, fp, code, msg, headers, newurl): - request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) - del request.headers['Authorization'] + request = super(S3HTTPRedirectHandler, self).redirect_request( + req, fp, code, msg, headers, newurl + ) + del request.headers["Authorization"] return request @@ -609,15 +777,15 @@ def download_file(url, path, auth): return request = Request(url) - request.add_header('Accept', 'application/octet-stream') - request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + request.add_header("Accept", "application/octet-stream") + request.add_header("Authorization", "Basic ".encode("ascii") + auth) opener = build_opener(S3HTTPRedirectHandler) try: response = opener.open(request) chunk_size = 16 * 1024 - with open(path, 'wb') as f: + with open(path, "wb") as f: while True: chunk = response.read(chunk_size) if not chunk: @@ -625,91 +793,110 @@ def download_file(url, path, auth): f.write(chunk) except HTTPError as exc: # Gracefully handle 404 responses (and others) when downloading from S3 - logger.warning('Skipping download of asset {0} due to HTTPError: {1}'.format(url, exc.reason)) + logger.warning( + "Skipping download of asset {0} due to HTTPError: {1}".format( + url, exc.reason + ) + ) except URLError as e: # Gracefully handle other URL errors - logger.warning('Skipping download of asset {0} due to URLError: {1}'.format(url, e.reason)) + logger.warning( + "Skipping download of asset {0} due to URLError: {1}".format(url, e.reason) + ) except socket.error as e: # Gracefully handle socket errors # TODO: Implement retry logic - logger.warning('Skipping download of asset {0} due to socker error: {1}'.format(url, e.strerror)) + logger.warning( + "Skipping download of asset {0} due to socker error: {1}".format( + url, e.strerror + ) + ) def get_authenticated_user(args): - template = 'https://{0}/user'.format(get_github_api_host(args)) + template = "https://{0}/user".format(get_github_api_host(args)) data = retrieve_data(args, template, single_request=True) return data[0] def check_git_lfs_install(): - exit_code = subprocess.call(['git', 'lfs', 'version']) + exit_code = subprocess.call(["git", "lfs", "version"]) if exit_code != 0: - raise Exception('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') + raise Exception( + "The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com." + ) def retrieve_repositories(args, authenticated_user): - logger.info('Retrieving repositories') + logger.info("Retrieving repositories") single_request = False - if args.user == authenticated_user['login']: + if args.user == authenticated_user["login"]: # we must use the /user/repos API to be able to access private repos - template = 'https://{0}/user/repos'.format( - get_github_api_host(args)) + template = "https://{0}/user/repos".format(get_github_api_host(args)) else: if args.private and not args.organization: - logger.warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') - template = 'https://{0}/users/{1}/repos'.format( - get_github_api_host(args), - args.user) + logger.warning( + "Authenticated user is different from user being backed up, thus private repositories cannot be accessed" + ) + template = "https://{0}/users/{1}/repos".format( + get_github_api_host(args), args.user + ) if args.organization: - template = 'https://{0}/orgs/{1}/repos'.format( - get_github_api_host(args), - args.user) + template = "https://{0}/orgs/{1}/repos".format( + get_github_api_host(args), args.user + ) if args.repository: single_request = True - template = 'https://{0}/repos/{1}/{2}'.format( - get_github_api_host(args), - args.user, - args.repository) + template = "https://{0}/repos/{1}/{2}".format( + get_github_api_host(args), args.user, args.repository + ) repos = retrieve_data(args, template, single_request=single_request) if args.all_starred: - starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(args), args.user) + starred_template = "https://{0}/users/{1}/starred".format( + get_github_api_host(args), args.user + ) starred_repos = retrieve_data(args, starred_template, single_request=False) # flag each repo as starred for downstream processing for item in starred_repos: - item.update({'is_starred': True}) + item.update({"is_starred": True}) repos.extend(starred_repos) if args.include_gists: - gists_template = 'https://{0}/users/{1}/gists'.format(get_github_api_host(args), args.user) + gists_template = "https://{0}/users/{1}/gists".format( + get_github_api_host(args), args.user + ) gists = retrieve_data(args, gists_template, single_request=False) # flag each repo as a gist for downstream processing for item in gists: - item.update({'is_gist': True}) + item.update({"is_gist": True}) repos.extend(gists) if args.include_starred_gists: - starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host(args)) - starred_gists = retrieve_data(args, starred_gists_template, single_request=False) + starred_gists_template = "https://{0}/gists/starred".format( + get_github_api_host(args) + ) + starred_gists = retrieve_data( + args, starred_gists_template, single_request=False + ) # flag each repo as a starred gist for downstream processing for item in starred_gists: - item.update({'is_gist': True, - 'is_starred': True}) + item.update({"is_gist": True, "is_starred": True}) repos.extend(starred_gists) return repos def filter_repositories(args, unfiltered_repositories): - logger.info('Filtering repositories') + logger.info("Filtering repositories") repositories = [] for r in unfiltered_repositories: # gists can be anonymous, so need to safely check owner - if r.get('owner', {}).get('login') == args.user or r.get('is_starred'): + if r.get("owner", {}).get("login") == args.user or r.get("is_starred"): repositories.append(r) name_regex = None @@ -721,26 +908,33 @@ def filter_repositories(args, unfiltered_repositories): languages = [x.lower() for x in args.languages] if not args.fork: - repositories = [r for r in repositories if not r.get('fork')] + repositories = [r for r in repositories if not r.get("fork")] if not args.private: - repositories = [r for r in repositories if not r.get('private') or r.get('public')] + repositories = [ + r for r in repositories if not r.get("private") or r.get("public") + ] if languages: - repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa + repositories = [ + r + for r in repositories + if r.get("language") and r.get("language").lower() in languages + ] # noqa if name_regex: - repositories = [r for r in repositories if name_regex.match(r['name'])] + repositories = [r for r in repositories if name_regex.match(r["name"])] if args.skip_archived: - repositories = [r for r in repositories if not r.get('archived')] + repositories = [r for r in repositories if not r.get("archived")] + if args.exclude: + repositories = [r for r in repositories if r["name"] not in args.exclude] return repositories def backup_repositories(args, output_directory, repositories): - logger.info('Backing up repositories') - repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) + logger.info("Backing up repositories") + repos_template = "https://{0}/repos".format(get_github_api_host(args)) if args.incremental: - last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa - last_update_path = os.path.join(output_directory, 'last_update') + last_update_path = os.path.join(output_directory, "last_update") if os.path.exists(last_update_path): args.since = open(last_update_path).read().strip() else: @@ -748,47 +942,70 @@ def backup_repositories(args, output_directory, repositories): else: args.since = None + last_update = "0000-00-00T00:00:00Z" for repository in repositories: - if repository.get('is_gist'): - repo_cwd = os.path.join(output_directory, 'gists', repository['id']) - elif repository.get('is_starred'): + if "updated_at" in repository and repository["updated_at"] > last_update: + last_update = repository["updated_at"] + elif "pushed_at" in repository and repository["pushed_at"] > last_update: + last_update = repository["pushed_at"] + + if repository.get("is_gist"): + repo_cwd = os.path.join(output_directory, "gists", repository["id"]) + elif repository.get("is_starred"): # put starred repos in -o/starred/${owner}/${repo} to prevent collision of # any repositories with the same name - repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) + repo_cwd = os.path.join( + output_directory, + "starred", + repository["owner"]["login"], + repository["name"], + ) else: - repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) + repo_cwd = os.path.join( + output_directory, "repositories", repository["name"] + ) - repo_dir = os.path.join(repo_cwd, 'repository') + repo_dir = os.path.join(repo_cwd, "repository") repo_url = get_github_repo_url(args, repository) - include_gists = (args.include_gists or args.include_starred_gists) - if (args.include_repository or args.include_everything) \ - or (include_gists and repository.get('is_gist')): - repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') - fetch_repository(repo_name, - repo_url, - repo_dir, - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) - - if repository.get('is_gist'): + include_gists = args.include_gists or args.include_starred_gists + if (args.include_repository or args.include_everything) or ( + include_gists and repository.get("is_gist") + ): + repo_name = ( + repository.get("name") + if not repository.get("is_gist") + else repository.get("id") + ) + fetch_repository( + repo_name, + repo_url, + repo_dir, + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone, + no_prune=args.no_prune, + ) + + if repository.get("is_gist"): # dump gist information to a file as well - output_file = '{0}/gist.json'.format(repo_cwd) - with codecs.open(output_file, 'w', encoding='utf-8') as f: + output_file = "{0}/gist.json".format(repo_cwd) + with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(repository, f) continue # don't try to back anything else for a gist; it doesn't exist - download_wiki = (args.include_wiki or args.include_everything) - if repository['has_wiki'] and download_wiki: - fetch_repository(repository['name'], - repo_url.replace('.git', '.wiki.git'), - os.path.join(repo_cwd, 'wiki'), - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) - + download_wiki = args.include_wiki or args.include_everything + if repository["has_wiki"] and download_wiki: + fetch_repository( + repository["name"], + repo_url.replace(".git", ".wiki.git"), + os.path.join(repo_cwd, "wiki"), + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone, + no_prune=args.no_prune, + ) if args.include_issues or args.include_everything: backup_issues(args, repo_cwd, repository, repos_template) @@ -805,176 +1022,169 @@ def backup_repositories(args, output_directory, repositories): backup_hooks(args, repo_cwd, repository, repos_template) if args.include_releases or args.include_everything: - backup_releases(args, repo_cwd, repository, repos_template, - include_assets=args.include_assets or args.include_everything) + backup_releases( + args, + repo_cwd, + repository, + repos_template, + include_assets=args.include_assets or args.include_everything, + ) if args.incremental: - open(last_update_path, 'w').write(last_update) + if last_update == "0000-00-00T00:00:00Z": + last_update = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime()) + + open(last_update_path, "w").write(last_update) def backup_issues(args, repo_cwd, repository, repos_template): - has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) + has_issues_dir = os.path.isdir("{0}/issues/.git".format(repo_cwd)) if args.skip_existing and has_issues_dir: return - logger.info('Retrieving {0} issues'.format(repository['full_name'])) - issue_cwd = os.path.join(repo_cwd, 'issues') + logger.info("Retrieving {0} issues".format(repository["full_name"])) + issue_cwd = os.path.join(repo_cwd, "issues") mkdir_p(repo_cwd, issue_cwd) issues = {} issues_skipped = 0 - issues_skipped_message = '' - _issue_template = '{0}/{1}/issues'.format(repos_template, - repository['full_name']) + issues_skipped_message = "" + _issue_template = "{0}/{1}/issues".format(repos_template, repository["full_name"]) should_include_pulls = args.include_pulls or args.include_everything - issue_states = ['open', 'closed'] + issue_states = ["open", "closed"] for issue_state in issue_states: - query_args = { - 'filter': 'all', - 'state': issue_state - } + query_args = {"filter": "all", "state": issue_state} if args.since: - query_args['since'] = args.since + query_args["since"] = args.since - _issues = retrieve_data(args, - _issue_template, - query_args=query_args) + _issues = retrieve_data(args, _issue_template, query_args=query_args) for issue in _issues: # skip pull requests which are also returned as issues # if retrieving pull requests is requested as well - if 'pull_request' in issue and should_include_pulls: + if "pull_request" in issue and should_include_pulls: issues_skipped += 1 continue - issues[issue['number']] = issue + issues[issue["number"]] = issue if issues_skipped: - issues_skipped_message = ' (skipped {0} pull requests)'.format( - issues_skipped) + issues_skipped_message = " (skipped {0} pull requests)".format(issues_skipped) - logger.info('Saving {0} issues to disk{1}'.format( - len(list(issues.keys())), issues_skipped_message)) - comments_template = _issue_template + '/{0}/comments' - events_template = _issue_template + '/{0}/events' + logger.info( + "Saving {0} issues to disk{1}".format( + len(list(issues.keys())), issues_skipped_message + ) + ) + comments_template = _issue_template + "/{0}/comments" + events_template = _issue_template + "/{0}/events" for number, issue in list(issues.items()): if args.include_issue_comments or args.include_everything: template = comments_template.format(number) - issues[number]['comment_data'] = retrieve_data(args, template) + issues[number]["comment_data"] = retrieve_data(args, template) if args.include_issue_events or args.include_everything: template = events_template.format(number) - issues[number]['event_data'] = retrieve_data(args, template) + issues[number]["event_data"] = retrieve_data(args, template) - issue_file = '{0}/{1}.json'.format(issue_cwd, number) - with codecs.open(issue_file, 'w', encoding='utf-8') as f: + issue_file = "{0}/{1}.json".format(issue_cwd, number) + with codecs.open(issue_file, "w", encoding="utf-8") as f: json_dump(issue, f) def backup_pulls(args, repo_cwd, repository, repos_template): - has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) + has_pulls_dir = os.path.isdir("{0}/pulls/.git".format(repo_cwd)) if args.skip_existing and has_pulls_dir: return - logger.info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa - pulls_cwd = os.path.join(repo_cwd, 'pulls') + logger.info("Retrieving {0} pull requests".format(repository["full_name"])) # noqa + pulls_cwd = os.path.join(repo_cwd, "pulls") mkdir_p(repo_cwd, pulls_cwd) pulls = {} - _pulls_template = '{0}/{1}/pulls'.format(repos_template, - repository['full_name']) + _pulls_template = "{0}/{1}/pulls".format(repos_template, repository["full_name"]) + _issue_template = "{0}/{1}/issues".format(repos_template, repository["full_name"]) query_args = { - 'filter': 'all', - 'state': 'all', - 'sort': 'updated', - 'direction': 'desc', + "filter": "all", + "state": "all", + "sort": "updated", + "direction": "desc", } if not args.include_pull_details: - pull_states = ['open', 'closed'] + pull_states = ["open", "closed"] for pull_state in pull_states: - query_args['state'] = pull_state - _pulls = retrieve_data_gen( - args, - _pulls_template, - query_args=query_args - ) + query_args["state"] = pull_state + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull['updated_at'] < args.since: + if args.since and pull["updated_at"] < args.since: break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = pull + if not args.since or pull["updated_at"] >= args.since: + pulls[pull["number"]] = pull else: - _pulls = retrieve_data_gen( - args, - _pulls_template, - query_args=query_args - ) + _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull['updated_at'] < args.since: + if args.since and pull["updated_at"] < args.since: break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = retrieve_data( + if not args.since or pull["updated_at"] >= args.since: + pulls[pull["number"]] = retrieve_data( args, - _pulls_template + '/{}'.format(pull['number']), - single_request=True + _pulls_template + "/{}".format(pull["number"]), + single_request=True, )[0] - logger.info('Saving {0} pull requests to disk'.format( - len(list(pulls.keys())))) - comments_template = _pulls_template + '/{0}/comments' - commits_template = _pulls_template + '/{0}/commits' + logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) + # Comments from pulls API are only _review_ comments + # regular comments need to be fetched via issue API. + # For backwards compatibility with versions <= 0.41.0 + # keep name "comment_data" for review comments + comments_regular_template = _issue_template + "/{0}/comments" + comments_template = _pulls_template + "/{0}/comments" + commits_template = _pulls_template + "/{0}/commits" for number, pull in list(pulls.items()): if args.include_pull_comments or args.include_everything: + template = comments_regular_template.format(number) + pulls[number]["comment_regular_data"] = retrieve_data(args, template) template = comments_template.format(number) - pulls[number]['comment_data'] = retrieve_data(args, template) + pulls[number]["comment_data"] = retrieve_data(args, template) if args.include_pull_commits or args.include_everything: template = commits_template.format(number) - pulls[number]['commit_data'] = retrieve_data(args, template) + pulls[number]["commit_data"] = retrieve_data(args, template) - pull_file = '{0}/{1}.json'.format(pulls_cwd, number) - with codecs.open(pull_file, 'w', encoding='utf-8') as f: + pull_file = "{0}/{1}.json".format(pulls_cwd, number) + with codecs.open(pull_file, "w", encoding="utf-8") as f: json_dump(pull, f) def backup_milestones(args, repo_cwd, repository, repos_template): - milestone_cwd = os.path.join(repo_cwd, 'milestones') + milestone_cwd = os.path.join(repo_cwd, "milestones") if args.skip_existing and os.path.isdir(milestone_cwd): return - logger.info('Retrieving {0} milestones'.format(repository['full_name'])) + logger.info("Retrieving {0} milestones".format(repository["full_name"])) mkdir_p(repo_cwd, milestone_cwd) - template = '{0}/{1}/milestones'.format(repos_template, - repository['full_name']) + template = "{0}/{1}/milestones".format(repos_template, repository["full_name"]) - query_args = { - 'state': 'all' - } + query_args = {"state": "all"} _milestones = retrieve_data(args, template, query_args=query_args) milestones = {} for milestone in _milestones: - milestones[milestone['number']] = milestone + milestones[milestone["number"]] = milestone - logger.info('Saving {0} milestones to disk'.format( - len(list(milestones.keys())))) + logger.info("Saving {0} milestones to disk".format(len(list(milestones.keys())))) for number, milestone in list(milestones.items()): - milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) - with codecs.open(milestone_file, 'w', encoding='utf-8') as f: + milestone_file = "{0}/{1}.json".format(milestone_cwd, number) + with codecs.open(milestone_file, "w", encoding="utf-8") as f: json_dump(milestone, f) def backup_labels(args, repo_cwd, repository, repos_template): - label_cwd = os.path.join(repo_cwd, 'labels') - output_file = '{0}/labels.json'.format(label_cwd) - template = '{0}/{1}/labels'.format(repos_template, - repository['full_name']) - _backup_data(args, - 'labels', - template, - output_file, - label_cwd) + label_cwd = os.path.join(repo_cwd, "labels") + output_file = "{0}/labels.json".format(label_cwd) + template = "{0}/{1}/labels".format(repos_template, repository["full_name"]) + _backup_data(args, "labels", template, output_file, label_cwd) def backup_hooks(args, repo_cwd, repository, repos_template): @@ -982,178 +1192,185 @@ def backup_hooks(args, repo_cwd, repository, repos_template): if not auth: logger.info("Skipping hooks since no authentication provided") return - hook_cwd = os.path.join(repo_cwd, 'hooks') - output_file = '{0}/hooks.json'.format(hook_cwd) - template = '{0}/{1}/hooks'.format(repos_template, - repository['full_name']) + hook_cwd = os.path.join(repo_cwd, "hooks") + output_file = "{0}/hooks.json".format(hook_cwd) + template = "{0}/{1}/hooks".format(repos_template, repository["full_name"]) try: - _backup_data(args, - 'hooks', - template, - output_file, - hook_cwd) + _backup_data(args, "hooks", template, output_file, hook_cwd) except SystemExit: logger.info("Unable to read hooks, skipping") def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): - repository_fullname = repository['full_name'] + repository_fullname = repository["full_name"] # give release files somewhere to live & log intent - release_cwd = os.path.join(repo_cwd, 'releases') - logger.info('Retrieving {0} releases'.format(repository_fullname)) + release_cwd = os.path.join(repo_cwd, "releases") + logger.info("Retrieving {0} releases".format(repository_fullname)) mkdir_p(repo_cwd, release_cwd) query_args = {} - release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + release_template = "{0}/{1}/releases".format(repos_template, repository_fullname) releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it - logger.info('Saving {0} releases to disk'.format(len(releases))) + logger.info("Saving {0} releases to disk".format(len(releases))) for release in releases: - release_name = release['tag_name'] - release_name_safe = release_name.replace('/', '__') - output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name_safe)) - with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: + release_name = release["tag_name"] + release_name_safe = release_name.replace("/", "__") + output_filepath = os.path.join( + release_cwd, "{0}.json".format(release_name_safe) + ) + with codecs.open(output_filepath, "w+", encoding="utf-8") as f: json_dump(release, f) if include_assets: - assets = retrieve_data(args, release['assets_url']) + assets = retrieve_data(args, release["assets_url"]) if len(assets) > 0: # give release asset files somewhere to live & download them (not including source archives) release_assets_cwd = os.path.join(release_cwd, release_name_safe) mkdir_p(release_assets_cwd) for asset in assets: - download_file(asset['url'], os.path.join(release_assets_cwd, asset['name']), get_auth(args)) - - -def fetch_repository(name, - remote_url, - local_dir, - skip_existing=False, - bare_clone=False, - lfs_clone=False): + download_file( + asset["url"], + os.path.join(release_assets_cwd, asset["name"]), + get_auth(args), + ) + + +def fetch_repository( + name, + remote_url, + local_dir, + skip_existing=False, + bare_clone=False, + lfs_clone=False, + no_prune=False, +): if bare_clone: if os.path.exists(local_dir): - clone_exists = subprocess.check_output(['git', - 'rev-parse', - '--is-bare-repository'], - cwd=local_dir) == b"true\n" + clone_exists = ( + subprocess.check_output( + ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir + ) + == b"true\n" + ) else: clone_exists = False else: - clone_exists = os.path.exists(os.path.join(local_dir, '.git')) + clone_exists = os.path.exists(os.path.join(local_dir, ".git")) if clone_exists and skip_existing: return masked_remote_url = mask_password(remote_url) - initialized = subprocess.call('git ls-remote ' + remote_url, - stdout=FNULL, - stderr=FNULL, - shell=True) + initialized = subprocess.call( + "git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True + ) if initialized == 128: - logger.info("Skipping {0} ({1}) since it's not initialized".format( - name, masked_remote_url)) + logger.info( + "Skipping {0} ({1}) since it's not initialized".format( + name, masked_remote_url + ) + ) return if clone_exists: - logger.info('Updating {0} in {1}'.format(name, local_dir)) + logger.info("Updating {0} in {1}".format(name, local_dir)) - remotes = subprocess.check_output(['git', 'remote', 'show'], - cwd=local_dir) - remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] + remotes = subprocess.check_output(["git", "remote", "show"], cwd=local_dir) + remotes = [i.strip() for i in remotes.decode("utf-8").splitlines()] - if 'origin' not in remotes: - git_command = ['git', 'remote', 'rm', 'origin'] + if "origin" not in remotes: + git_command = ["git", "remote", "rm", "origin"] logging_subprocess(git_command, None, cwd=local_dir) - git_command = ['git', 'remote', 'add', 'origin', remote_url] + git_command = ["git", "remote", "add", "origin", remote_url] logging_subprocess(git_command, None, cwd=local_dir) else: - git_command = ['git', 'remote', 'set-url', 'origin', remote_url] + git_command = ["git", "remote", "set-url", "origin", remote_url] logging_subprocess(git_command, None, cwd=local_dir) if lfs_clone: - git_command = ['git', 'lfs', 'fetch', '--all', '--prune'] + git_command = ["git", "lfs", "fetch", "--all", "--prune"] else: - git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + git_command = ["git", "fetch", "--all", "--force", "--tags", "--prune"] + if no_prune: + git_command.pop() logging_subprocess(git_command, None, cwd=local_dir) else: - logger.info('Cloning {0} repository from {1} to {2}'.format( - name, - masked_remote_url, - local_dir)) + logger.info( + "Cloning {0} repository from {1} to {2}".format( + name, masked_remote_url, local_dir + ) + ) if bare_clone: + git_command = ["git", "clone", "--mirror", remote_url, local_dir] + logging_subprocess(git_command, None) if lfs_clone: - git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir] - else: - git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + git_command = ["git", "lfs", "fetch", "--all", "--prune"] + if no_prune: + git_command.pop() + logging_subprocess(git_command, None, cwd=local_dir) else: if lfs_clone: - git_command = ['git', 'lfs', 'clone', remote_url, local_dir] + git_command = ["git", "lfs", "clone", remote_url, local_dir] else: - git_command = ['git', 'clone', remote_url, local_dir] - logging_subprocess(git_command, None) + git_command = ["git", "clone", remote_url, local_dir] + logging_subprocess(git_command, None) def backup_account(args, output_directory): - account_cwd = os.path.join(output_directory, 'account') + account_cwd = os.path.join(output_directory, "account") if args.include_starred or args.include_everything: output_file = "{0}/starred.json".format(account_cwd) - template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) - _backup_data(args, - "starred repositories", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/starred".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "starred repositories", template, output_file, account_cwd) if args.include_watched or args.include_everything: output_file = "{0}/watched.json".format(account_cwd) - template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) - _backup_data(args, - "watched repositories", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/subscriptions".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "watched repositories", template, output_file, account_cwd) if args.include_followers or args.include_everything: output_file = "{0}/followers.json".format(account_cwd) - template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) - _backup_data(args, - "followers", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/followers".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "followers", template, output_file, account_cwd) if args.include_following or args.include_everything: output_file = "{0}/following.json".format(account_cwd) - template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) - _backup_data(args, - "following", - template, - output_file, - account_cwd) + template = "https://{0}/users/{1}/following".format( + get_github_api_host(args), args.user + ) + _backup_data(args, "following", template, output_file, account_cwd) def _backup_data(args, name, template, output_file, output_directory): skip_existing = args.skip_existing if not skip_existing or not os.path.exists(output_file): - logger.info('Retrieving {0} {1}'.format(args.user, name)) + logger.info("Retrieving {0} {1}".format(args.user, name)) mkdir_p(output_directory) data = retrieve_data(args, template) - logger.info('Writing {0} {1} to disk'.format(len(data), name)) - with codecs.open(output_file, 'w', encoding='utf-8') as f: + logger.info("Writing {0} {1} to disk".format(len(data), name)) + with codecs.open(output_file, "w", encoding="utf-8") as f: json_dump(data, f) def json_dump(data, output_file): - json.dump(data, - output_file, - ensure_ascii=False, - sort_keys=True, - indent=4, - separators=(',', ': ')) + json.dump( + data, + output_file, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(",", ": "), + ) diff --git a/release b/release index c48de82..21a14f7 100755 --- a/release +++ b/release @@ -6,7 +6,7 @@ if [[ ! -f setup.py ]]; then exit 1 fi -PACKAGE_NAME="$(cat setup.py | grep "name='" | head | cut -d "'" -f2)" +PACKAGE_NAME="$(cat setup.py | grep 'name="' | head | cut -d '"' -f2)" INIT_PACKAGE_NAME="$(echo "${PACKAGE_NAME//-/_}")" PUBLIC="true" @@ -86,18 +86,12 @@ TMPFILE=$(mktemp /tmp/${tempfoo}.XXXXXX) || { exit 1 } -find_this="__version__ = '$current_version'" -replace_with="__version__ = '$next_version'" +find_this="__version__ = \"$current_version\"" +replace_with="__version__ = \"$next_version\"" echo -e "${YELLOW}--->${COLOR_OFF} Updating ${INIT_PACKAGE_NAME}/__init__.py" sed "s/$find_this/$replace_with/" ${INIT_PACKAGE_NAME}/__init__.py > $TMPFILE && mv $TMPFILE ${INIT_PACKAGE_NAME}/__init__.py -find_this="${PACKAGE_NAME}.git@$current_version" -replace_with="${PACKAGE_NAME}.git@$next_version" - -echo -e "${YELLOW}--->${COLOR_OFF} Updating README.rst" -sed "s/$find_this/$replace_with/" README.rst > $TMPFILE && mv $TMPFILE README.rst - if [ -f docs/conf.py ]; then echo -e "${YELLOW}--->${COLOR_OFF} Updating docs" find_this="version = '${current_version}'" diff --git a/release-requirements.txt b/release-requirements.txt new file mode 100644 index 0000000..1571464 --- /dev/null +++ b/release-requirements.txt @@ -0,0 +1,31 @@ +bleach==6.0.0 +certifi==2023.5.7 +charset-normalizer==3.1.0 +colorama==0.4.6 +docutils==0.20.1 +flake8==6.0.0 +gitchangelog==3.0.4 +idna==3.4 +importlib-metadata==6.6.0 +jaraco.classes==3.2.3 +keyring==23.13.1 +markdown-it-py==2.2.0 +mccabe==0.7.0 +mdurl==0.1.2 +more-itertools==9.1.0 +pkginfo==1.9.6 +pycodestyle==2.10.0 +pyflakes==3.0.1 +Pygments==2.15.1 +readme-renderer==37.3 +requests==2.31.0 +requests-toolbelt==1.0.0 +restructuredtext-lint==1.4.0 +rfc3986==2.0.0 +rich==13.3.5 +six==1.16.0 +tqdm==4.65.0 +twine==4.0.2 +urllib3==2.0.2 +webencodings==0.5.1 +zipp==3.15.0 diff --git a/setup.py b/setup.py index 3b4df41..898e4fb 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ try: from setuptools import setup + setup # workaround for pyflakes issue #13 except ImportError: from distutils.core import setup @@ -15,6 +16,7 @@ # http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html) try: import multiprocessing + multiprocessing except ImportError: pass @@ -25,26 +27,26 @@ def open_file(fname): setup( - name='github-backup', + name="github-backup", version=__version__, - author='Jose Diaz-Gonzalez', - author_email='github-backup@josediazgonzalez.com', - packages=['github_backup'], - scripts=['bin/github-backup'], - url='http://github.com/josegonzalez/python-github-backup', - license='MIT', + author="Jose Diaz-Gonzalez", + author_email="github-backup@josediazgonzalez.com", + packages=["github_backup"], + scripts=["bin/github-backup"], + url="http://github.com/josegonzalez/python-github-backup", + license="MIT", classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Topic :: System :: Archiving :: Backup', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', + "Development Status :: 5 - Production/Stable", + "Topic :: System :: Archiving :: Backup", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", ], - description='backup a github user or organization', - long_description=open_file('README.rst').read(), - long_description_content_type='text/x-rst', - install_requires=open_file('requirements.txt').readlines(), + description="backup a github user or organization", + long_description=open_file("README.rst").read(), + long_description_content_type="text/x-rst", + install_requires=open_file("requirements.txt").readlines(), zip_safe=True, ) From 2bf8898545aee7a1b8d3ccba2a3c24ab3d81a58b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Oct 2023 20:24:18 +0000 Subject: [PATCH 180/455] Bump urllib3 from 2.0.2 to 2.0.7 Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.0.2 to 2.0.7. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.0.2...2.0.7) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 1571464..506cd20 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -26,6 +26,6 @@ rich==13.3.5 six==1.16.0 tqdm==4.65.0 twine==4.0.2 -urllib3==2.0.2 +urllib3==2.0.7 webencodings==0.5.1 zipp==3.15.0 From f99894317162d5992c7e1d4b9e073a967a8a76ca Mon Sep 17 00:00:00 2001 From: Tom Plant <21111317+pl4nty@users.noreply.github.com> Date: Sat, 28 Oct 2023 16:30:31 +1100 Subject: [PATCH 181/455] feat: create Dockerfile --- Dockerfile | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6217594 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.9.18-slim + +RUN --mount=type=cache,target=/var/cache/apt \ + apt-get update && apt-get install -y git git-lfs + +WORKDIR /usr/src/app + +COPY release-requirements.txt . +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r release-requirements.txt + +COPY . . +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install . + +ENTRYPOINT [ "github-backup" ] From f9b627c1e404e09985b94b50519504990d4befa6 Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Sat, 28 Oct 2023 08:33:58 +0200 Subject: [PATCH 182/455] Added automatic release workflow, for use with GitHub Actions. --- .github/workflows/automatic-release.yml | 38 +++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/automatic-release.yml diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml new file mode 100644 index 0000000..262f3c6 --- /dev/null +++ b/.github/workflows/automatic-release.yml @@ -0,0 +1,38 @@ +name: automatic-release + +on: + workflow_dispatch: + inputs: + release_type: + description: Release type + required: true + type: choice + options: + - patch + - minor + - major + +jobs: + release: + name: Release + runs-on: ubuntu-20.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup Git + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: Install prerequisites + run: pip install -r release-requirements.txt + - name: Execute release + env: + SEMVER_BUMP: ${{ github.event.inputs.release_type }} + TWINE_REPOSITORY: ${{ vars.TWINE_REPOSITORY }} + TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} + TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} + run: ./release $SEMVER_BUMP From febf380c573543a1fb6ff89bcf8fa88602ce3704 Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Sat, 28 Oct 2023 20:19:18 +0200 Subject: [PATCH 183/455] Updated to latest Ubuntu LTS while keeping setup-python to stay put on Python 3.8. --- .github/workflows/automatic-release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index 262f3c6..d6f04cb 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -15,7 +15,7 @@ on: jobs: release: name: Release - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout repository uses: actions/checkout@v4 From 4406ba7f0731cabfcd1ebb92f896508abc369347 Mon Sep 17 00:00:00 2001 From: Halvor Holsten Strand Date: Sun, 29 Oct 2023 20:37:20 +0100 Subject: [PATCH 184/455] Checkout everything. --- .github/workflows/automatic-release.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index d6f04cb..05682f6 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -19,6 +19,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Setup Git run: | git config --local user.email "action@github.com" From 7d03e4c9bb9c8632ae966f9cad475d416e9ee118 Mon Sep 17 00:00:00 2001 From: hozza Date: Tue, 7 Nov 2023 14:53:58 +0000 Subject: [PATCH 185/455] added verbose install instructions --- README.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 2e4dfa4..00ad2bc 100644 --- a/README.rst +++ b/README.rst @@ -12,6 +12,7 @@ Requirements ============ - GIT 1.9+ +- Python Installation ============ @@ -20,9 +21,12 @@ Using PIP via PyPI:: pip install github-backup -Using PIP via Github:: +Using PIP via Github (more likely the latest version):: pip install git+https://github.com/josegonzalez/python-github-backup.git#egg=github-backup + +*Note for Python newcomers: even after you've installed pip and python etc, (e.g. debian based: ``sudo apt install pip``), an installed python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add pythons install path to your environments ``$PATH`` or call the script directly e.g. `$ ~/.local/bin/github-backup`.* + Usage ===== From f449d8bbe3494217eff4e6076f86054f428ad5f5 Mon Sep 17 00:00:00 2001 From: hozza Date: Tue, 7 Nov 2023 14:56:43 +0000 Subject: [PATCH 186/455] added details usage and examples including gotchas, errors and development instructions. --- README.rst | 191 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 154 insertions(+), 37 deletions(-) diff --git a/README.rst b/README.rst index 00ad2bc..d78aa6c 100644 --- a/README.rst +++ b/README.rst @@ -4,9 +4,7 @@ github-backup |PyPI| |Python Versions| - This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and can not sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. - -backup a github user or organization +The package can be used to backup an *entire* `Github `_ organization, repository or user account, including starred, issues and wikis in the most appropriate format (clones for wikis, json files for issues). Requirements ============ @@ -28,11 +26,137 @@ Using PIP via Github (more likely the latest version):: *Note for Python newcomers: even after you've installed pip and python etc, (e.g. debian based: ``sudo apt install pip``), an installed python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add pythons install path to your environments ``$PATH`` or call the script directly e.g. `$ ~/.local/bin/github-backup`.* -Usage -===== -CLI Usage is as follows:: +Usage Details +============= + +Authentication +-------------- + +**Password-based authentication** will fail if you have two-factor authentication enabled, and will `be deprecated `_ by 2023 EOY. + +``--username`` is used for basic password authentication and separate from the position argument ``USER``, which specifies the user account you wish to backing up. + +**Classic tokens** are `slightly less secure `_ as they provide very coarse-grained permissions. + +If you need authentication for long-running backups (i.e. for private repositories etc) it is therefore recommended to use **fine-grained personal access token** ``-f TOKEN_FINE``. + + +Fine Tokens +~~~~~~~~~~~ + +Under Settings -> Developer Settings -> Personal access tokens -> Fine-grained Tokens. You can "generate new token" and choose the repository scope, either specific repos or all repos. + +You can customise the permissions for use case, but for a personal account full backup you'll need to enable the following permissions: + +**User permissions**: Read access to followers, starring, and watching. + +**Repository permissions**: Read access to actions, code, commit statuses, environments, issues, merge queues, metadata, pages, pull requests, repository advisories, and repository hooks. + + +Prefer SSH +~~~~~~~~~~ + +Using the ``-prefer-ssh`` argument will use ssh for cloning the git repos. If cloning repos is enabled with ``--repositories``, ``--all-starred``, ``--wikis``, ``--gists``, ``--starred-gists``. + +To clone with SSH, you'll need SSH authentication setup `as usual with Github `_, e.g. via SSH public and private keys. + +All other connections will still use their own protocol, e.g. API requests for issues using HTTPS. + + +Using the Keychain on Mac OSX +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note: On Mac OSX the token can be stored securely in the user's keychain. To do this: + +1. Open Keychain from "Applications -> Utilities -> Keychain Access" +2. Add a new password item using "File -> New Password Item" +3. Enter a name in the "Keychain Item Name" box. You must provide this name to github-backup using the --keychain-name argument. +4. Enter an account name in the "Account Name" box, enter your Github username as set above. You must provide this name to github-backup using the --keychain-account argument. +5. Enter your Github personal access token in the "Password" box + +Note: When you run github-backup, you will be asked whether you want to allow "security" to use your confidential information stored in your keychain. You have two options: + +1. **Allow:** In this case you will need to click "Allow" each time you run `github-backup` +2. **Always Allow:** In this case, you will not be asked for permission when you run `github-backup` in future. This is less secure, but is required if you want to schedule `github-backup` to run automatically + + +Github Rate-limit and Throttling +-------------------------------- + +``github-backup`` will automatically throttle itself based on feedback from the Github API. The API is usually rate-limited to 5000 calls per hour, and it tells github-backup when to pause and wait until the limit is reset in the next hour. + +On a fast connection this can result in safe (~20 min) pauses and bursts of API calls and downloading periodically maxing our your connection, is this is not suitable `it has been observed `_ under real-world conditions that overriding the throttle with ``--throttle-limit 5000 --throttle-pause 0.6`` provides a smooth rate across the hour, although a ``--throttle-pause 0.72`` (3600 seconds [1 hour] / 5000 limit) is theoretically safer. + + +About Git LFS +------------- + +When you use the ``--lfs`` option, you will need to make sure you have Git LFS installed. + +Instructions on how to do this can be found on https://git-lfs.github.com. + + +Gotchas / Known-issues +====================== + +All is not all +-------------- + +The ``--all`` argument does not include; cloning private repos (``-P, --private``), cloning forks (``-F, --fork``) cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs ``), cloning gists (``--starred-gists``) or starred gist repos (``--starred-gists``). + +All Starred can be very large +------------------------ + +Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. thousands of JSON issues files. + +Incremental Backup +------------------- +Incremental (``-i, --incremental``) backups in this context means, requesting only parts since the last run (successful or not). e.g. only request issues from the API since the last run. This means any blocking errors on previous runs can cause large missing chucks of data. + +Known blocking errors +--------------------- + +Some errors will block the backup and exit the script, such as receiving a 403 Forbidden error from the Github API. If the incremental argument is used, this will result in the next backup only requesting data from the API since the last blocked/failed run. It's therefore recommended to only use the incremental argument if the output/result is being actively monitored to avoid unexpected missing data in your backup. + +Starred public repo blocking with all argument +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since the ``--all`` argument includes ``--hooks``, if you use ``--all`` and ``--all-starred`` to clone a users starred public repositories, the backup will error and block the backup continuing. This is due to needing the correct permission for ``-hooks`` on public repos. + +Releases blocking error +~~~~~~~~~~~~~~~~~~~~~~~ + +A ``--releases`` (required for ``--assets``) error will sometimes block the backup. If you're backing up a lot of repositories with releases e.g. an organisation or ``--starred-gists`` you may need to remove ``--releases`` (and therefore ``--assets``) to complete a backup. Documented in `issue 209 `_. + + +Bare is actually mirror +----------------------- + +Using the bare clone argument (``--bare``) will actually call git's ``clone --mirror`` command. There's a subtle difference between `bare `_ and `mirror `_ clone.:: + + Compared to --bare, --mirror not only maps local branches of the source to local branches of the target, it maps all refs (including remote-tracking branches, notes etc.) and sets up a refspec configuration such that all these refs are overwritten by a git remote update in the target repository. + + +Starred gists stored with your gists +------------------------------------ + +The starred repo cloning (``--all-starred``) argument stores starred repos under a separate directory to your own repositories. Using ``--starred-gists`` will store them within the same directory as your own gists ``--gists``. + + +Skipping existing may leave you with incomplete backups +------------------------------------------------------- + +The ``--skip-existing`` argument will skip any existing backup if the directory exists, if the backup in that directory was successfully completed or not (perhaps due to a blocking error). + + +Basic Help +=========== + +Show the CLI help output:: + github-backup -h + +CLI Help output:: github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN_CLASSIC] [-f TOKEN_FINE] [--as-app] [-o OUTPUT_DIRECTORY] [-l LOG_LEVEL] [-i] [--starred] [--all-starred] @@ -134,53 +258,46 @@ CLI Usage is as follows:: --throttle-limit to be set) -The package can be used to backup an *entire* organization or repository, including issues and wikis in the most appropriate format (clones for wikis, json files for issues). +Github Backup Examples +======== -Authentication -============== +Backup all repositories, including private ones using a classic token:: -Note: Password-based authentication will fail if you have two-factor authentication enabled. + export ACCESS_TOKEN=SOME-GITHUB-TOKEN + github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private -Using the Keychain on Mac OSX -============================= -Note: On Mac OSX the token can be stored securely in the user's keychain. To do this: +Use a fine-grained access token to backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: -1. Open Keychain from "Applications -> Utilities -> Keychain Access" -2. Add a new password item using "File -> New Password Item" -3. Enter a name in the "Keychain Item Name" box. You must provide this name to github-backup using the --keychain-name argument. -4. Enter an account name in the "Account Name" box, enter your Github username as set above. You must provide this name to github-backup using the --keychain-account argument. -5. Enter your Github personal access token in the "Password" box + export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN + ORGANIZATION=docker + REPO=cli + # e.g. git@github.com:docker/cli.git + github-backup $ORGANIZATION -P -f $FINE_ACCESS_TOKEN -o . --all -O -R $REPO -Note: When you run github-backup, you will be asked whether you want to allow "security" to use your confidential information stored in your keychain. You have two options: +Quietly and incrementally backup most useful Github data for a user (public and private) with SSH cloning including; all issues, pulls, all public starred repos and gists (omitting "hooks", "releases" and therefore "assets" to prevent blocking) into an output folder in your home directory. *Great for a cron job. Omit "incremental" to fix a previous incomplete backup.*:: -1. **Allow:** In this case you will need to click "Allow" each time you run `github-backup` -2. **Always Allow:** In this case, you will not be asked for permission when you run `github-backup` in future. This is less secure, but is required if you want to schedule `github-backup` to run automatically + export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN + GH_USER=YOUR-GITHUB-USER -About Git LFS -============= + github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER + +Debug an erroring/blocking or incomplete backup into a temporary directory:: -When you use the "--lfs" option, you will need to make sure you have Git LFS installed. + export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN + GH_USER=YOUR-GITHUB-USER -Instructions on how to do this can be found on https://git-lfs.github.com. + github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER -Examples -======== -Backup all repositories, including private ones:: - export ACCESS_TOKEN=SOME-GITHUB-TOKEN - github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private -Use a fine-grained access token to backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: +Development +=========== - export ACCESS_TOKEN=SOME-GITHUB-TOKEN - ORGANIZATION=docker - REPO=cli - # e.g. git@github.com:docker/cli.git - github-backup $ORGANIZATION -P -f $ACCESS_TOKEN -o . --all -O -R $REPO +This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and can not sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. Testing -======= +------- This project currently contains no unit tests. To run linting:: From 9cf85b087f64d2298567705688e3489c526c6be6 Mon Sep 17 00:00:00 2001 From: hozza Date: Tue, 7 Nov 2023 15:28:39 +0000 Subject: [PATCH 187/455] fix readme formatting, spelling and layout --- README.rst | 268 ++++++++++++++++++++++++++++------------------------- 1 file changed, 141 insertions(+), 127 deletions(-) diff --git a/README.rst b/README.rst index d78aa6c..93a8c92 100644 --- a/README.rst +++ b/README.rst @@ -23,140 +23,19 @@ Using PIP via Github (more likely the latest version):: pip install git+https://github.com/josegonzalez/python-github-backup.git#egg=github-backup -*Note for Python newcomers: even after you've installed pip and python etc, (e.g. debian based: ``sudo apt install pip``), an installed python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add pythons install path to your environments ``$PATH`` or call the script directly e.g. `$ ~/.local/bin/github-backup`.* - - - -Usage Details -============= - -Authentication --------------- - -**Password-based authentication** will fail if you have two-factor authentication enabled, and will `be deprecated `_ by 2023 EOY. - -``--username`` is used for basic password authentication and separate from the position argument ``USER``, which specifies the user account you wish to backing up. - -**Classic tokens** are `slightly less secure `_ as they provide very coarse-grained permissions. - -If you need authentication for long-running backups (i.e. for private repositories etc) it is therefore recommended to use **fine-grained personal access token** ``-f TOKEN_FINE``. - - -Fine Tokens -~~~~~~~~~~~ - -Under Settings -> Developer Settings -> Personal access tokens -> Fine-grained Tokens. You can "generate new token" and choose the repository scope, either specific repos or all repos. - -You can customise the permissions for use case, but for a personal account full backup you'll need to enable the following permissions: - -**User permissions**: Read access to followers, starring, and watching. - -**Repository permissions**: Read access to actions, code, commit statuses, environments, issues, merge queues, metadata, pages, pull requests, repository advisories, and repository hooks. - - -Prefer SSH -~~~~~~~~~~ - -Using the ``-prefer-ssh`` argument will use ssh for cloning the git repos. If cloning repos is enabled with ``--repositories``, ``--all-starred``, ``--wikis``, ``--gists``, ``--starred-gists``. - -To clone with SSH, you'll need SSH authentication setup `as usual with Github `_, e.g. via SSH public and private keys. - -All other connections will still use their own protocol, e.g. API requests for issues using HTTPS. - - -Using the Keychain on Mac OSX -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Note: On Mac OSX the token can be stored securely in the user's keychain. To do this: - -1. Open Keychain from "Applications -> Utilities -> Keychain Access" -2. Add a new password item using "File -> New Password Item" -3. Enter a name in the "Keychain Item Name" box. You must provide this name to github-backup using the --keychain-name argument. -4. Enter an account name in the "Account Name" box, enter your Github username as set above. You must provide this name to github-backup using the --keychain-account argument. -5. Enter your Github personal access token in the "Password" box - -Note: When you run github-backup, you will be asked whether you want to allow "security" to use your confidential information stored in your keychain. You have two options: - -1. **Allow:** In this case you will need to click "Allow" each time you run `github-backup` -2. **Always Allow:** In this case, you will not be asked for permission when you run `github-backup` in future. This is less secure, but is required if you want to schedule `github-backup` to run automatically - - -Github Rate-limit and Throttling --------------------------------- - -``github-backup`` will automatically throttle itself based on feedback from the Github API. The API is usually rate-limited to 5000 calls per hour, and it tells github-backup when to pause and wait until the limit is reset in the next hour. - -On a fast connection this can result in safe (~20 min) pauses and bursts of API calls and downloading periodically maxing our your connection, is this is not suitable `it has been observed `_ under real-world conditions that overriding the throttle with ``--throttle-limit 5000 --throttle-pause 0.6`` provides a smooth rate across the hour, although a ``--throttle-pause 0.72`` (3600 seconds [1 hour] / 5000 limit) is theoretically safer. - - -About Git LFS -------------- - -When you use the ``--lfs`` option, you will need to make sure you have Git LFS installed. - -Instructions on how to do this can be found on https://git-lfs.github.com. - - -Gotchas / Known-issues -====================== - -All is not all --------------- - -The ``--all`` argument does not include; cloning private repos (``-P, --private``), cloning forks (``-F, --fork``) cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs ``), cloning gists (``--starred-gists``) or starred gist repos (``--starred-gists``). - -All Starred can be very large ------------------------- - -Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. thousands of JSON issues files. - -Incremental Backup -------------------- - -Incremental (``-i, --incremental``) backups in this context means, requesting only parts since the last run (successful or not). e.g. only request issues from the API since the last run. This means any blocking errors on previous runs can cause large missing chucks of data. - -Known blocking errors ---------------------- - -Some errors will block the backup and exit the script, such as receiving a 403 Forbidden error from the Github API. If the incremental argument is used, this will result in the next backup only requesting data from the API since the last blocked/failed run. It's therefore recommended to only use the incremental argument if the output/result is being actively monitored to avoid unexpected missing data in your backup. - -Starred public repo blocking with all argument -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Since the ``--all`` argument includes ``--hooks``, if you use ``--all`` and ``--all-starred`` to clone a users starred public repositories, the backup will error and block the backup continuing. This is due to needing the correct permission for ``-hooks`` on public repos. - -Releases blocking error -~~~~~~~~~~~~~~~~~~~~~~~ - -A ``--releases`` (required for ``--assets``) error will sometimes block the backup. If you're backing up a lot of repositories with releases e.g. an organisation or ``--starred-gists`` you may need to remove ``--releases`` (and therefore ``--assets``) to complete a backup. Documented in `issue 209 `_. - - -Bare is actually mirror ------------------------ - -Using the bare clone argument (``--bare``) will actually call git's ``clone --mirror`` command. There's a subtle difference between `bare `_ and `mirror `_ clone.:: - - Compared to --bare, --mirror not only maps local branches of the source to local branches of the target, it maps all refs (including remote-tracking branches, notes etc.) and sets up a refspec configuration such that all these refs are overwritten by a git remote update in the target repository. - - -Starred gists stored with your gists ------------------------------------- - -The starred repo cloning (``--all-starred``) argument stores starred repos under a separate directory to your own repositories. Using ``--starred-gists`` will store them within the same directory as your own gists ``--gists``. - - -Skipping existing may leave you with incomplete backups -------------------------------------------------------- - -The ``--skip-existing`` argument will skip any existing backup if the directory exists, if the backup in that directory was successfully completed or not (perhaps due to a blocking error). +*Install note for python newcomers:* +After you've installed pip and python, python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add python's install path to your environments ``$PATH`` or call the script directly e.g. using ``$ ~/.local/bin/github-backup``.* Basic Help =========== Show the CLI help output:: + github-backup -h CLI Help output:: + github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN_CLASSIC] [-f TOKEN_FINE] [--as-app] [-o OUTPUT_DIRECTORY] [-l LOG_LEVEL] [-i] [--starred] [--all-starred] @@ -258,6 +137,141 @@ CLI Help output:: --throttle-limit to be set) +Usage Details +============= + +Authentication +-------------- + +**Password-based authentication** will fail if you have two-factor authentication enabled, and will `be deprecated `_ by 2023 EOY. + +``--username`` is used for basic password authentication and separate from the positional argument ``USER``, which specifies the user account you wish to backing up. + +**Classic tokens** are `slightly less secure `_ as they provide very coarse-grained permissions. + +If you need authentication for long-running backups (e.g. for a cron job) it is recommended to use **fine-grained personal access token** ``-f TOKEN_FINE``. + + +Fine Tokens +~~~~~~~~~~~ + +You can "generate new token" and choose the repository scope, either specific repos or all repos. On Github this is under *Settings -> Developer Settings -> Personal access tokens -> Fine-grained Tokens* + +Customise the permissions for your use case, but for a personal account full backup you'll need to enable the following permissions: + +**User permissions**: Read access to followers, starring, and watching. + +**Repository permissions**: Read access to actions, code, commit statuses, environments, issues, merge queues, metadata, pages, pull requests, repository advisories, and repository hooks. + + +Prefer SSH +~~~~~~~~~~ + +If cloning repos is enabled with ``--repositories``, ``--all-starred``, ``--wikis``, ``--gists``, ``--starred-gists`` using the ``-prefer-ssh`` argument will use ssh for cloning the git repos. + +To clone with SSH, you'll need SSH authentication setup `as usual with Github `_, e.g. via SSH public and private keys. + +All other connections will still use their own protocol, e.g. API requests for issues uses HTTPS. + + +Using the Keychain on Mac OSX +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note: On Mac OSX the token can be stored securely in the user's keychain. To do this: + +1. Open Keychain from "Applications -> Utilities -> Keychain Access" +2. Add a new password item using "File -> New Password Item" +3. Enter a name in the "Keychain Item Name" box. You must provide this name to github-backup using the --keychain-name argument. +4. Enter an account name in the "Account Name" box, enter your Github username as set above. You must provide this name to github-backup using the --keychain-account argument. +5. Enter your Github personal access token in the "Password" box + +Note: When you run github-backup, you will be asked whether you want to allow "security" to use your confidential information stored in your keychain. You have two options: + +1. **Allow:** In this case you will need to click "Allow" each time you run `github-backup` +2. **Always Allow:** In this case, you will not be asked for permission when you run `github-backup` in future. This is less secure, but is required if you want to schedule `github-backup` to run automatically + + +Github Rate-limit and Throttling +-------------------------------- + +``github-backup`` will automatically throttle itself based on feedback from the Github API. + +Their API is usually rate-limited to 5000 calls per hour, and it tells github-backup when to pause and wait until a specific time when the limit is reset. + +During a large backup such as ``--all-starred``, and on a fast connection this can result in (~20 min) pauses with bursts of API calls periodically maxing out the API limit. If this is not suitable `it has been observed `_ under real-world conditions that overriding the throttle with ``--throttle-limit 5000 --throttle-pause 0.6`` provides a smooth rate across the hour, although a ``--throttle-pause 0.72`` (3600 seconds [1 hour] / 5000 limit) is theoretically safer to prevent pauses. + + +About Git LFS +------------- + +When you use the ``--lfs`` option, you will need to make sure you have Git LFS installed. + +Instructions on how to do this can be found on https://git-lfs.github.com. + + +Gotchas / Known-issues +====================== + +All is not all +-------------- + +The ``--all`` argument does not include; cloning private repos (``-P, --private``), cloning forks (``-F, --fork``) cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--starred-gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. + +Cloning all starred size +------------------------ + +Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. thousands of JSON issues files, assets and the repos. + +Incremental Backup +------------------- + +Incremental (``-i, --incremental``) will request only new data from the API since the last run (successful or not). e.g. only request issues from the API since the last run. + +This means any blocking errors on previous runs can cause a large amount of missing data in backups. + +Known blocking errors +--------------------- + +Some errors will block the backup by exit the script, such as receiving a 403 Forbidden error from the Github API. + +If the incremental argument is used, this will result in the next backup only requesting API data since the last blocked/failed run. + +It's therefore recommended to only use the incremental argument if the output/result is being actively monitored to avoid unexpected missing data in a regular backup runs. + +1. **Starred public repo blocking** + + Since the ``--all`` argument includes ``--hooks``, if you use ``--all`` and ``--all-starred`` together to clone a users starred public repositories, the backup will likely error and block the backup continuing. + + This is due to needing the correct permission for ``-hooks`` on public repos. + +2. **Releases blocking** + + A known ``--releases`` (required for ``--assets``) error will sometimes block the backup. If you're backing up a lot of repositories with releases e.g. an organisation or ``--all-starred``. + + You may need to remove ``--releases`` (and therefore ``--assets``) to complete a backup. Documented in `issue 209 `_. + + +"bare" is actually "mirror" +-------------------------- + +Using the bare clone argument (``--bare``) will actually call git's ``clone --mirror`` command. There's a subtle difference between `bare `_ and `mirror `_ clone.:: + + Compared to --bare, --mirror not only maps local branches of the source to local branches of the target, it maps all refs (including remote-tracking branches, notes etc.) and sets up a refspec configuration such that all these refs are overwritten by a git remote update in the target repository. + + +Starred gists stored with user gists +------------------------------------ + +The starred repo cloning (``--all-starred``) argument stores starred repos separately to the users own repositories. However, using ``--starred-gists`` will store starred gists within the same directory as the users own gists ``--gists``. + + +Skip existing on incomplete backups +------------------------------------------------------- + +The ``--skip-existing`` argument will skip a backup if the directory already exists, regardless of if the backup in that directory was not successfully completed (perhaps due to a blocking error). + +This may result in unexpected missing data in a regular backup. + + Github Backup Examples ======== @@ -274,14 +288,14 @@ Use a fine-grained access token to backup a single organization repository with # e.g. git@github.com:docker/cli.git github-backup $ORGANIZATION -P -f $FINE_ACCESS_TOKEN -o . --all -O -R $REPO -Quietly and incrementally backup most useful Github data for a user (public and private) with SSH cloning including; all issues, pulls, all public starred repos and gists (omitting "hooks", "releases" and therefore "assets" to prevent blocking) into an output folder in your home directory. *Great for a cron job. Omit "incremental" to fix a previous incomplete backup.*:: +Quietly and incrementally backup useful Github user data (public and private repos with SSH) including; all issues, pulls, all public starred repos and gists (omitting "hooks", "releases" and therefore "assets" to prevent blocking). *Great for a cron job.*:: export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER -Debug an erroring/blocking or incomplete backup into a temporary directory:: +Debug an erroring/blocking or incomplete backup into a temporary directory. Omit "incremental" to fix a previous incomplete backup.:: export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER From f63be3be24b4d0ee894228063e04eaebed22eae8 Mon Sep 17 00:00:00 2001 From: hozza Date: Tue, 7 Nov 2023 15:46:03 +0000 Subject: [PATCH 188/455] fixed readme working and layout --- README.rst | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/README.rst b/README.rst index 93a8c92..4d1e2da 100644 --- a/README.rst +++ b/README.rst @@ -25,7 +25,7 @@ Using PIP via Github (more likely the latest version):: *Install note for python newcomers:* -After you've installed pip and python, python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add python's install path to your environments ``$PATH`` or call the script directly e.g. using ``$ ~/.local/bin/github-backup``.* +Python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add python's install path to your environments ``$PATH`` or call the script directly e.g. using ``$ ~/.local/bin/github-backup``.* Basic Help =========== @@ -195,9 +195,9 @@ Github Rate-limit and Throttling ``github-backup`` will automatically throttle itself based on feedback from the Github API. -Their API is usually rate-limited to 5000 calls per hour, and it tells github-backup when to pause and wait until a specific time when the limit is reset. +Their API is usually rate-limited to 5000 calls per hour. The API will ask github-backup to pause until a specific time when the limit is reset again (at the start of the next hour). This continues until the backup is complete. -During a large backup such as ``--all-starred``, and on a fast connection this can result in (~20 min) pauses with bursts of API calls periodically maxing out the API limit. If this is not suitable `it has been observed `_ under real-world conditions that overriding the throttle with ``--throttle-limit 5000 --throttle-pause 0.6`` provides a smooth rate across the hour, although a ``--throttle-pause 0.72`` (3600 seconds [1 hour] / 5000 limit) is theoretically safer to prevent pauses. +During a large backup such as ``--all-starred``, and on a fast connection this can result in (~20 min) pauses with bursts of API calls periodically maxing out the API limit. If this is not suitable `it has been observed `_ under real-world conditions that overriding the throttle with ``--throttle-limit 5000 --throttle-pause 0.6`` provides a smooth rate across the hour, although a ``--throttle-pause 0.72`` (3600 seconds [1 hour] / 5000 limit) is theoretically safer to prevent rate-limit pauses. About Git LFS @@ -211,20 +211,20 @@ Instructions on how to do this can be found on https://git-lfs.github.com. Gotchas / Known-issues ====================== -All is not all --------------- +All is not everything +--------------------- The ``--all`` argument does not include; cloning private repos (``-P, --private``), cloning forks (``-F, --fork``) cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--starred-gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. Cloning all starred size ------------------------ -Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. thousands of JSON issues files, assets and the repos. +Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. thousands of JSON issues files, assets and the repos etc. Consider just storing the links to starred repos with ``--starred``. Incremental Backup ------------------- -Incremental (``-i, --incremental``) will request only new data from the API since the last run (successful or not). e.g. only request issues from the API since the last run. +Using (``-i, --incremental``) will request only new data from the API since the last run (successful or not). e.g. only request issues from the API since the last run. This means any blocking errors on previous runs can cause a large amount of missing data in backups. @@ -233,43 +233,41 @@ Known blocking errors Some errors will block the backup by exit the script, such as receiving a 403 Forbidden error from the Github API. -If the incremental argument is used, this will result in the next backup only requesting API data since the last blocked/failed run. +If the incremental argument is used, this will result in the next backup only requesting API data since the last blocked/failed run. Potentially causing unexpected large amounts of missing data. -It's therefore recommended to only use the incremental argument if the output/result is being actively monitored to avoid unexpected missing data in a regular backup runs. +It's therefore recommended to only use the incremental argument if the output/result is being actively monitored, or complimented with periodic full non-incremental runs, to avoid unexpected missing data in a regular backup runs. 1. **Starred public repo blocking** Since the ``--all`` argument includes ``--hooks``, if you use ``--all`` and ``--all-starred`` together to clone a users starred public repositories, the backup will likely error and block the backup continuing. - This is due to needing the correct permission for ``-hooks`` on public repos. + This is due to needing the correct permission for ``--hooks`` on public repos. 2. **Releases blocking** - A known ``--releases`` (required for ``--assets``) error will sometimes block the backup. If you're backing up a lot of repositories with releases e.g. an organisation or ``--all-starred``. + A known ``--releases`` (required for ``--assets``) error will sometimes block the backup. - You may need to remove ``--releases`` (and therefore ``--assets``) to complete a backup. Documented in `issue 209 `_. + If you're backing up a lot of repositories with releases e.g. an organisation or ``--all-starred``. You may need to remove ``--releases`` (and therefore ``--assets``) to complete a backup. Documented in `issue 209 `_. "bare" is actually "mirror" -------------------------- -Using the bare clone argument (``--bare``) will actually call git's ``clone --mirror`` command. There's a subtle difference between `bare `_ and `mirror `_ clone.:: - - Compared to --bare, --mirror not only maps local branches of the source to local branches of the target, it maps all refs (including remote-tracking branches, notes etc.) and sets up a refspec configuration such that all these refs are overwritten by a git remote update in the target repository. +Using the bare clone argument (``--bare``) will actually call git's ``clone --mirror`` command. There's a subtle difference between `bare `_ and `mirror `_ clone. :: + + Compared to --bare, --mirror not only maps local branches of the source to local branches of the target, it maps all refs (including remote-tracking branches, notes etc.) and sets up a refspec configuration such that all these refs are overwritten by a git remote update in the target repository. -Starred gists stored with user gists ------------------------------------- +Starred gists vs starred repo behaviour +--------------------------------------- -The starred repo cloning (``--all-starred``) argument stores starred repos separately to the users own repositories. However, using ``--starred-gists`` will store starred gists within the same directory as the users own gists ``--gists``. +The starred normal repo cloning (``--all-starred``) argument stores starred repos separately to the users own repositories. However, using ``--starred-gists`` will store starred gists within the same directory as the users own gists ``--gists``. Also, all gist repo directory names are IDs not the gist's name. Skip existing on incomplete backups ------------------------------------------------------- -The ``--skip-existing`` argument will skip a backup if the directory already exists, regardless of if the backup in that directory was not successfully completed (perhaps due to a blocking error). - -This may result in unexpected missing data in a regular backup. +The ``--skip-existing`` argument will skip a backup if the directory already exists, regardless of if the backup in that directory was not successfully completed (perhaps due to a blocking error). This may result in unexpected missing data in a regular backup. Github Backup Examples @@ -288,14 +286,14 @@ Use a fine-grained access token to backup a single organization repository with # e.g. git@github.com:docker/cli.git github-backup $ORGANIZATION -P -f $FINE_ACCESS_TOKEN -o . --all -O -R $REPO -Quietly and incrementally backup useful Github user data (public and private repos with SSH) including; all issues, pulls, all public starred repos and gists (omitting "hooks", "releases" and therefore "assets" to prevent blocking). *Great for a cron job.*:: +Quietly and incrementally backup useful Github user data (public and private repos with SSH) including; all issues, pulls, all public starred repos and gists (omitting "hooks", "releases" and therefore "assets" to prevent blocking). *Great for a cron job.* :: export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER -Debug an erroring/blocking or incomplete backup into a temporary directory. Omit "incremental" to fix a previous incomplete backup.:: +Debug an erroring/blocking or incomplete backup into a temporary directory. Omit "incremental" to fix a previous incomplete backup. :: export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER @@ -304,11 +302,10 @@ Debug an erroring/blocking or incomplete backup into a temporary directory. Omit - Development =========== -This project is considered feature complete for the primary maintainer. If you would like a bugfix or enhancement and can not sponsor the work, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if desired. +This project is considered feature complete for the primary maintainer @josegonzalez. If you would like a bugfix or enhancement, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if you'd like to sponsor the work instead. Testing ------- From a2b13c8109d469930bce5fbcc9860677b9188e25 Mon Sep 17 00:00:00 2001 From: hozza Date: Tue, 7 Nov 2023 16:08:00 +0000 Subject: [PATCH 189/455] fix readme wording and format --- README.rst | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index 4d1e2da..1ad83f0 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ github-backup |PyPI| |Python Versions| -The package can be used to backup an *entire* `Github `_ organization, repository or user account, including starred, issues and wikis in the most appropriate format (clones for wikis, json files for issues). +The package can be used to backup an *entire* `Github `_ organization, repository or user account, including starred repos, issues and wikis in the most appropriate format (clones for wikis, json files for issues). Requirements ============ @@ -145,7 +145,7 @@ Authentication **Password-based authentication** will fail if you have two-factor authentication enabled, and will `be deprecated `_ by 2023 EOY. -``--username`` is used for basic password authentication and separate from the positional argument ``USER``, which specifies the user account you wish to backing up. +``--username`` is used for basic password authentication and separate from the positional argument ``USER``, which specifies the user account you wish to back up. **Classic tokens** are `slightly less secure `_ as they provide very coarse-grained permissions. @@ -155,24 +155,22 @@ If you need authentication for long-running backups (e.g. for a cron job) it is Fine Tokens ~~~~~~~~~~~ -You can "generate new token" and choose the repository scope, either specific repos or all repos. On Github this is under *Settings -> Developer Settings -> Personal access tokens -> Fine-grained Tokens* +You can "generate new token", choosing the repository scope by selecting specific repos or all repos. On Github this is under *Settings -> Developer Settings -> Personal access tokens -> Fine-grained Tokens* Customise the permissions for your use case, but for a personal account full backup you'll need to enable the following permissions: **User permissions**: Read access to followers, starring, and watching. -**Repository permissions**: Read access to actions, code, commit statuses, environments, issues, merge queues, metadata, pages, pull requests, repository advisories, and repository hooks. +**Repository permissions**: Read access to code, commit statuses, issues, metadata, pages, pull requests, and repository hooks. Prefer SSH ~~~~~~~~~~ -If cloning repos is enabled with ``--repositories``, ``--all-starred``, ``--wikis``, ``--gists``, ``--starred-gists`` using the ``-prefer-ssh`` argument will use ssh for cloning the git repos. +If cloning repos is enabled with ``--repositories``, ``--all-starred``, ``--wikis``, ``--gists``, ``--starred-gists`` using the ``--prefer-ssh`` argument will use ssh for cloning the git repos, but all other connections will still use their own protocol, e.g. API requests for issues uses HTTPS. To clone with SSH, you'll need SSH authentication setup `as usual with Github `_, e.g. via SSH public and private keys. -All other connections will still use their own protocol, e.g. API requests for issues uses HTTPS. - Using the Keychain on Mac OSX ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -193,11 +191,11 @@ Note: When you run github-backup, you will be asked whether you want to allow " Github Rate-limit and Throttling -------------------------------- -``github-backup`` will automatically throttle itself based on feedback from the Github API. +"github-backup" will automatically throttle itself based on feedback from the Github API. Their API is usually rate-limited to 5000 calls per hour. The API will ask github-backup to pause until a specific time when the limit is reset again (at the start of the next hour). This continues until the backup is complete. -During a large backup such as ``--all-starred``, and on a fast connection this can result in (~20 min) pauses with bursts of API calls periodically maxing out the API limit. If this is not suitable `it has been observed `_ under real-world conditions that overriding the throttle with ``--throttle-limit 5000 --throttle-pause 0.6`` provides a smooth rate across the hour, although a ``--throttle-pause 0.72`` (3600 seconds [1 hour] / 5000 limit) is theoretically safer to prevent rate-limit pauses. +During a large backup, such as ``--all-starred``, and on a fast connection this can result in (~20 min) pauses with bursts of API calls periodically maxing out the API limit. If this is not suitable `it has been observed `_ under real-world conditions that overriding the throttle with ``--throttle-limit 5000 --throttle-pause 0.6`` provides a smooth rate across the hour, although a ``--throttle-pause 0.72`` (3600 seconds [1 hour] / 5000 limit) is theoretically safer to prevent large rate-limit pauses. About Git LFS @@ -219,25 +217,25 @@ The ``--all`` argument does not include; cloning private repos (``-P, --private` Cloning all starred size ------------------------ -Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. thousands of JSON issues files, assets and the repos etc. Consider just storing the links to starred repos with ``--starred``. +Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. commonly starred repos can have tens of thousands of issues, many large assets and the repo itself etc. Consider just storing links to starred repos in JSON format with ``--starred``. Incremental Backup ------------------- -Using (``-i, --incremental``) will request only new data from the API since the last run (successful or not). e.g. only request issues from the API since the last run. +Using (``-i, --incremental``) will only request new data from the API **since the last run (successful or not)**. e.g. only request issues from the API since the last run. This means any blocking errors on previous runs can cause a large amount of missing data in backups. Known blocking errors --------------------- -Some errors will block the backup by exit the script, such as receiving a 403 Forbidden error from the Github API. +Some errors will block the backup run by exiting the script. e.g. receiving a 403 Forbidden error from the Github API. If the incremental argument is used, this will result in the next backup only requesting API data since the last blocked/failed run. Potentially causing unexpected large amounts of missing data. It's therefore recommended to only use the incremental argument if the output/result is being actively monitored, or complimented with periodic full non-incremental runs, to avoid unexpected missing data in a regular backup runs. -1. **Starred public repo blocking** +1. **Starred public repo hooks blocking** Since the ``--all`` argument includes ``--hooks``, if you use ``--all`` and ``--all-starred`` together to clone a users starred public repositories, the backup will likely error and block the backup continuing. @@ -253,9 +251,9 @@ It's therefore recommended to only use the incremental argument if the output/re "bare" is actually "mirror" -------------------------- -Using the bare clone argument (``--bare``) will actually call git's ``clone --mirror`` command. There's a subtle difference between `bare `_ and `mirror `_ clone. :: - - Compared to --bare, --mirror not only maps local branches of the source to local branches of the target, it maps all refs (including remote-tracking branches, notes etc.) and sets up a refspec configuration such that all these refs are overwritten by a git remote update in the target repository. +Using the bare clone argument (``--bare``) will actually call git's ``clone --mirror`` command. There's a subtle difference between `bare `_ and `mirror `_ clone. + +*From git docs "Compared to --bare, --mirror not only maps local branches of the source to local branches of the target, it maps all refs (including remote-tracking branches, notes etc.) and sets up a refspec configuration such that all these refs are overwritten by a git remote update in the target repository."* Starred gists vs starred repo behaviour @@ -267,7 +265,7 @@ The starred normal repo cloning (``--all-starred``) argument stores starred repo Skip existing on incomplete backups ------------------------------------------------------- -The ``--skip-existing`` argument will skip a backup if the directory already exists, regardless of if the backup in that directory was not successfully completed (perhaps due to a blocking error). This may result in unexpected missing data in a regular backup. +The ``--skip-existing`` argument will skip a backup if the directory already exists, even if the backup in that directory failed (perhaps due to a blocking error). This may result in unexpected missing data in a regular backup. Github Backup Examples @@ -293,7 +291,7 @@ Quietly and incrementally backup useful Github user data (public and private rep github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER -Debug an erroring/blocking or incomplete backup into a temporary directory. Omit "incremental" to fix a previous incomplete backup. :: +Debug an error/block or incomplete backup into a temporary directory. Omit "incremental" to fill a previous incomplete backup. :: export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER From 81876a2bb35006b0231c3b96ebc8877b56e561d1 Mon Sep 17 00:00:00 2001 From: hozza Date: Tue, 7 Nov 2023 16:08:35 +0000 Subject: [PATCH 190/455] add contributor section --- README.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.rst b/README.rst index 1ad83f0..0f388bb 100644 --- a/README.rst +++ b/README.rst @@ -305,6 +305,15 @@ Development This project is considered feature complete for the primary maintainer @josegonzalez. If you would like a bugfix or enhancement, pull requests are welcome. Feel free to contact the maintainer for consulting estimates if you'd like to sponsor the work instead. +Contibuters +----------- + +A huge thanks to all the contibuters! + + + + + Testing ------- From 5dd0744ce0189efdf6cf6bc5d39869215b330c97 Mon Sep 17 00:00:00 2001 From: hozza Date: Tue, 7 Nov 2023 16:12:26 +0000 Subject: [PATCH 191/455] fix rst html --- README.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 0f388bb..1493bce 100644 --- a/README.rst +++ b/README.rst @@ -310,9 +310,11 @@ Contibuters A huge thanks to all the contibuters! - - - +.. raw:: html + + + + Testing ------- From 3b0c08cdc1434075a71c965e49b933b5b3dec081 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:08:19 -0500 Subject: [PATCH 192/455] fix: correct lint issues and show errors on lint --- README.rst | 10 +++++----- release | 57 ++++++++++++++++++++++++++++-------------------------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/README.rst b/README.rst index 1493bce..6a1c170 100644 --- a/README.rst +++ b/README.rst @@ -28,7 +28,7 @@ Using PIP via Github (more likely the latest version):: Python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add python's install path to your environments ``$PATH`` or call the script directly e.g. using ``$ ~/.local/bin/github-backup``.* Basic Help -=========== +========== Show the CLI help output:: @@ -220,7 +220,7 @@ Cloning all starred size Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. commonly starred repos can have tens of thousands of issues, many large assets and the repo itself etc. Consider just storing links to starred repos in JSON format with ``--starred``. Incremental Backup -------------------- +------------------ Using (``-i, --incremental``) will only request new data from the API **since the last run (successful or not)**. e.g. only request issues from the API since the last run. @@ -249,7 +249,7 @@ It's therefore recommended to only use the incremental argument if the output/re "bare" is actually "mirror" --------------------------- +--------------------------- Using the bare clone argument (``--bare``) will actually call git's ``clone --mirror`` command. There's a subtle difference between `bare `_ and `mirror `_ clone. @@ -263,13 +263,13 @@ The starred normal repo cloning (``--all-starred``) argument stores starred repo Skip existing on incomplete backups -------------------------------------------------------- +----------------------------------- The ``--skip-existing`` argument will skip a backup if the directory already exists, even if the backup in that directory failed (perhaps due to a blocking error). This may result in unexpected missing data in a regular backup. Github Backup Examples -======== +====================== Backup all repositories, including private ones using a classic token:: diff --git a/release b/release index 21a14f7..4238245 100755 --- a/release +++ b/release @@ -1,9 +1,10 @@ #!/usr/bin/env bash -set -eo pipefail; [[ $RELEASE_TRACE ]] && set -x +set -eo pipefail +[[ $RELEASE_TRACE ]] && set -x if [[ ! -f setup.py ]]; then - echo -e "${RED}WARNING: Missing setup.py${COLOR_OFF}\n" - exit 1 + echo -e "${RED}WARNING: Missing setup.py${COLOR_OFF}\n" + exit 1 fi PACKAGE_NAME="$(cat setup.py | grep 'name="' | head | cut -d '"' -f2)" @@ -11,27 +12,27 @@ INIT_PACKAGE_NAME="$(echo "${PACKAGE_NAME//-/_}")" PUBLIC="true" # Colors -COLOR_OFF="\033[0m" # unsets color to term fg color -RED="\033[0;31m" # red -GREEN="\033[0;32m" # green -YELLOW="\033[0;33m" # yellow -MAGENTA="\033[0;35m" # magenta -CYAN="\033[0;36m" # cyan +COLOR_OFF="\033[0m" # unsets color to term fg color +RED="\033[0;31m" # red +GREEN="\033[0;32m" # green +YELLOW="\033[0;33m" # yellow +MAGENTA="\033[0;35m" # magenta +CYAN="\033[0;36m" # cyan # ensure wheel is available -pip install wheel > /dev/null +pip install wheel >/dev/null command -v gitchangelog >/dev/null 2>&1 || { echo -e "${RED}WARNING: Missing gitchangelog binary, please run: pip install gitchangelog==3.0.4${COLOR_OFF}\n" exit 1 } -command -v rst-lint > /dev/null || { +command -v rst-lint >/dev/null || { echo -e "${RED}WARNING: Missing rst-lint binary, please run: pip install restructuredtext_lint${COLOR_OFF}\n" exit 1 } -command -v twine > /dev/null || { +command -v twine >/dev/null || { echo -e "${RED}WARNING: Missing twine binary, please run: pip install twine==3.2.0${COLOR_OFF}\n" exit 1 } @@ -43,41 +44,41 @@ fi echo -e "\n${GREEN}STARTING RELEASE PROCESS${COLOR_OFF}\n" -set +e; -git status | grep -Eo "working (directory|tree) clean" &> /dev/null +set +e +git status | grep -Eo "working (directory|tree) clean" &>/dev/null if [ ! $? -eq 0 ]; then # working directory is NOT clean echo -e "${RED}WARNING: You have uncomitted changes, you may have forgotten something${COLOR_OFF}\n" exit 1 fi -set -e; +set -e echo -e "${YELLOW}--->${COLOR_OFF} Updating local copy" git pull -q origin master echo -e "${YELLOW}--->${COLOR_OFF} Retrieving release versions" -current_version=$(cat ${INIT_PACKAGE_NAME}/__init__.py |grep '__version__ ='|sed 's/[^0-9.]//g') +current_version=$(cat ${INIT_PACKAGE_NAME}/__init__.py | grep '__version__ =' | sed 's/[^0-9.]//g') major=$(echo $current_version | awk '{split($0,a,"."); print a[1]}') minor=$(echo $current_version | awk '{split($0,a,"."); print a[2]}') patch=$(echo $current_version | awk '{split($0,a,"."); print a[3]}') if [[ "$@" == "major" ]]; then - major=$(($major + 1)); + major=$(($major + 1)) minor="0" patch="0" elif [[ "$@" == "minor" ]]; then - minor=$(($minor + 1)); + minor=$(($minor + 1)) patch="0" elif [[ "$@" == "patch" ]]; then - patch=$(($patch + 1)); + patch=$(($patch + 1)) fi next_version="${major}.${minor}.${patch}" -echo -e "${YELLOW} >${COLOR_OFF} ${MAGENTA}${current_version}${COLOR_OFF} -> ${MAGENTA}${next_version}${COLOR_OFF}" +echo -e "${YELLOW} >${COLOR_OFF} ${MAGENTA}${current_version}${COLOR_OFF} -> ${MAGENTA}${next_version}${COLOR_OFF}" echo -e "${YELLOW}--->${COLOR_OFF} Ensuring readme passes lint checks (if this fails, run rst-lint)" -rst-lint README.rst > /dev/null +rst-lint README.rst || exit 1 echo -e "${YELLOW}--->${COLOR_OFF} Creating necessary temp file" tempfoo=$(basename $0) @@ -90,23 +91,25 @@ find_this="__version__ = \"$current_version\"" replace_with="__version__ = \"$next_version\"" echo -e "${YELLOW}--->${COLOR_OFF} Updating ${INIT_PACKAGE_NAME}/__init__.py" -sed "s/$find_this/$replace_with/" ${INIT_PACKAGE_NAME}/__init__.py > $TMPFILE && mv $TMPFILE ${INIT_PACKAGE_NAME}/__init__.py +sed "s/$find_this/$replace_with/" ${INIT_PACKAGE_NAME}/__init__.py >$TMPFILE && mv $TMPFILE ${INIT_PACKAGE_NAME}/__init__.py if [ -f docs/conf.py ]; then echo -e "${YELLOW}--->${COLOR_OFF} Updating docs" find_this="version = '${current_version}'" replace_with="version = '${next_version}'" - sed "s/$find_this/$replace_with/" docs/conf.py > $TMPFILE && mv $TMPFILE docs/conf.py + sed "s/$find_this/$replace_with/" docs/conf.py >$TMPFILE && mv $TMPFILE docs/conf.py find_this="version = '${current_version}'" replace_with="release = '${next_version}'" - sed "s/$find_this/$replace_with/" docs/conf.py > $TMPFILE && mv $TMPFILE docs/conf.py + sed "s/$find_this/$replace_with/" docs/conf.py >$TMPFILE && mv $TMPFILE docs/conf.py fi echo -e "${YELLOW}--->${COLOR_OFF} Updating CHANGES.rst for new release" version_header="$next_version ($(date +%F))" -set +e; dashes=$(yes '-'|head -n ${#version_header}|tr -d '\n') ; set -e -gitchangelog |sed "4s/.*/$version_header/"|sed "5s/.*/$dashes/" > $TMPFILE && mv $TMPFILE CHANGES.rst +set +e +dashes=$(yes '-' | head -n ${#version_header} | tr -d '\n') +set -e +gitchangelog | sed "4s/.*/$version_header/" | sed "5s/.*/$dashes/" >$TMPFILE && mv $TMPFILE CHANGES.rst echo -e "${YELLOW}--->${COLOR_OFF} Adding changed files to git" git add CHANGES.rst README.rst ${INIT_PACKAGE_NAME}/__init__.py @@ -124,7 +127,7 @@ git push -q origin master && git push -q --tags if [[ "$PUBLIC" == "true" ]]; then echo -e "${YELLOW}--->${COLOR_OFF} Creating python release" cp README.rst README - python setup.py sdist bdist_wheel > /dev/null + python setup.py sdist bdist_wheel >/dev/null twine upload dist/* rm README fi From 878713a4e08061078d2f84ee537af7431f53bbd4 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:22:36 -0500 Subject: [PATCH 193/455] fix: validate release before committing and uploading it --- release | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/release b/release index 4238245..14150bc 100755 --- a/release +++ b/release @@ -118,6 +118,15 @@ if [ -f docs/conf.py ]; then git add docs/conf.py; fi echo -e "${YELLOW}--->${COLOR_OFF} Creating release" git commit -q -m "Release version $next_version" +if [[ "$PUBLIC" == "true" ]]; then + echo -e "${YELLOW}--->${COLOR_OFF} Creating python release files" + cp README.rst README + python setup.py sdist bdist_wheel >/dev/null + + echo -e "${YELLOW}--->${COLOR_OFF} Validating long_description" + twine check dist/* +fi + echo -e "${YELLOW}--->${COLOR_OFF} Tagging release" git tag -a $next_version -m "Release version $next_version" @@ -125,9 +134,7 @@ echo -e "${YELLOW}--->${COLOR_OFF} Pushing release and tags to github" git push -q origin master && git push -q --tags if [[ "$PUBLIC" == "true" ]]; then - echo -e "${YELLOW}--->${COLOR_OFF} Creating python release" - cp README.rst README - python setup.py sdist bdist_wheel >/dev/null + echo -e "${YELLOW}--->${COLOR_OFF} Uploading python release" twine upload dist/* rm README fi From b60034a9d774b89ea0a975bbb410bda5a1d297e6 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:25:28 -0500 Subject: [PATCH 194/455] fix: do not use raw property in readme This is disabled on pypi. --- README.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 6a1c170..22d3062 100644 --- a/README.rst +++ b/README.rst @@ -310,11 +310,9 @@ Contibuters A huge thanks to all the contibuters! -.. raw:: html - - - - +.. image:: https://contrib.rocks/image?repo=josegonzalez/python-github-backup + :target: https://github.com/josegonzalez/python-github-backup/graphs/contributors + :alt: contributors Testing ------- From e0bf80a6aa1daa0ae2043362b8eb5fb21004ce27 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 9 Dec 2023 05:26:00 +0000 Subject: [PATCH 195/455] Release version 0.44.0 --- CHANGES.rst | 66 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b4f774b..7d11cbb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,73 @@ Changelog ========= -0.43.1 (2023-05-29) +0.44.0 (2023-12-09) ------------------- ------------------------ + +Fix +~~~ +- Do not use raw property in readme. [Jose Diaz-Gonzalez] + + This is disabled on pypi. +- Validate release before committing and uploading it. [Jose Diaz- + Gonzalez] +- Correct lint issues and show errors on lint. [Jose Diaz-Gonzalez] +- Minor cosmetic changes. [ZhymabekRoman] +- Add forgotten variable formatting. [ZhymabekRoman] +- Refactor logging Based on #195. [ZhymabekRoman] +- Minor typo fix. [Zhymabek Roman] + +Other +~~~~~ +- Bump certifi from 2023.5.7 to 2023.7.22. [dependabot[bot]] + + Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.5.7 to 2023.7.22. + - [Commits](https://github.com/certifi/python-certifi/compare/2023.05.07...2023.07.22) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-type: direct:production + ... +- Checkout everything. [Halvor Holsten Strand] +- Added automatic release workflow, for use with GitHub Actions. [Halvor + Holsten Strand] +- Feat: create Dockerfile. [Tom Plant] +- Fix rst html. [hozza] +- Add contributor section. [hozza] +- Fix readme wording and format. [hozza] +- Fixed readme working and layout. [hozza] +- Fix readme formatting, spelling and layout. [hozza] +- Added details usage and examples including gotchas, errors and + development instructions. [hozza] +- Added verbose install instructions. [hozza] +- Bump urllib3 from 2.0.2 to 2.0.7. [dependabot[bot]] + + Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.0.2 to 2.0.7. + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.0.2...2.0.7) + + --- + updated-dependencies: + - dependency-name: urllib3 + dependency-type: direct:production + ... +- Suggested modification to fix win32 logging failure, due to local + variable scope. Logger does not appear to have any utility within + "logging_subprocess". [Halvor Holsten Strand] +- Simplified one if/elif scenario. Extracted file reading of another + if/elif scenario. [Halvor Holsten Strand] +- Ran black. [Halvor Holsten Strand] +- Keep backwards compatability by going back to "--token" for classic. + Allow "file://" uri for "--token-fine". [Halvor Holsten Strand] +- Add support for fine-grained tokens. [froggleston] +- Refactor logging and add support for quiet flag. [Harrison Wright] + + +0.43.1 (2023-05-29) +------------------- - Chore: add release requirements. [Jose Diaz-Gonzalez] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index fb8a056..1b33897 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.43.1" +__version__ = "0.44.0" From 2724f02b0ae33b6952c6aec713ad7fc8fd47c795 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:30:31 -0500 Subject: [PATCH 196/455] chore: reformat file and update flake8 --- github_backup/github_backup.py | 27 ++++++++++----------------- release-requirements.txt | 10 +++++----- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index f5b69b2..860b73a 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from __future__ import print_function -import socket import argparse import base64 @@ -10,23 +9,20 @@ import errno import getpass import json +import logging import os +import platform import re import select +import socket import subprocess import sys -import logging import time -import platform -from urllib.parse import urlparse -from urllib.parse import quote as urlquote -from urllib.parse import urlencode -from urllib.error import HTTPError, URLError -from urllib.request import urlopen -from urllib.request import Request -from urllib.request import HTTPRedirectHandler -from urllib.request import build_opener from http.client import IncompleteRead +from urllib.error import HTTPError, URLError +from urllib.parse import quote as urlquote +from urllib.parse import urlencode, urlparse +from urllib.request import HTTPRedirectHandler, Request, build_opener, urlopen try: from . import __version__ @@ -41,10 +37,7 @@ def logging_subprocess( - popenargs, - stdout_log_level=logging.DEBUG, - stderr_log_level=logging.ERROR, - **kwargs + popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs ): """ Variant of subprocess.call that accepts a logger instead of stdout/stderr, @@ -626,12 +619,12 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): raise Exception(", ".join(errors)) if len(errors) == 0: - if type(response) == list: + if type(response) is list: for resp in response: yield resp if len(response) < per_page: break - elif type(response) == dict and single_request: + elif type(response) is dict and single_request: yield response if len(errors) > 0: diff --git a/release-requirements.txt b/release-requirements.txt index bbd7508..6f79393 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,9 +1,9 @@ bleach==6.0.0 -certifi==2023.7.22 +certifi==2023.5.7 charset-normalizer==3.1.0 colorama==0.4.6 docutils==0.20.1 -flake8==6.0.0 +flake8==6.1.0 gitchangelog==3.0.4 idna==3.4 importlib-metadata==6.6.0 @@ -14,8 +14,8 @@ mccabe==0.7.0 mdurl==0.1.2 more-itertools==9.1.0 pkginfo==1.9.6 -pycodestyle==2.10.0 -pyflakes==3.0.1 +pycodestyle==2.11.1 +pyflakes==3.1.0 Pygments==2.15.1 readme-renderer==37.3 requests==2.31.0 @@ -26,6 +26,6 @@ rich==13.3.5 six==1.16.0 tqdm==4.65.0 twine==4.0.2 -urllib3==2.0.7 +urllib3==2.0.2 webencodings==0.5.1 zipp==3.15.0 From f2b4f566a12d17682d491f721e249913341f42e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 9 Dec 2023 05:31:23 +0000 Subject: [PATCH 197/455] chore(deps): bump certifi from 2023.5.7 to 2023.7.22 Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.5.7 to 2023.7.22. - [Commits](https://github.com/certifi/python-certifi/compare/2023.05.07...2023.07.22) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 6f79393..f32754b 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,5 +1,5 @@ bleach==6.0.0 -certifi==2023.5.7 +certifi==2023.7.22 charset-normalizer==3.1.0 colorama==0.4.6 docutils==0.20.1 From 54c81de3d7f56f601643e15e4091edf6af61c589 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 9 Dec 2023 05:31:30 +0000 Subject: [PATCH 198/455] chore(deps): bump urllib3 from 2.0.2 to 2.0.7 Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.0.2 to 2.0.7. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.0.2...2.0.7) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 6f79393..246b1c1 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -26,6 +26,6 @@ rich==13.3.5 six==1.16.0 tqdm==4.65.0 twine==4.0.2 -urllib3==2.0.2 +urllib3==2.0.7 webencodings==0.5.1 zipp==3.15.0 From a9f82faa1c446cbf9bc4aec689126a6357d9daa4 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:33:01 -0500 Subject: [PATCH 199/455] feat: install autopep8 --- release-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/release-requirements.txt b/release-requirements.txt index f32754b..1d873b6 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,3 +1,4 @@ +autopep8==2.0.4 bleach==6.0.0 certifi==2023.7.22 charset-normalizer==3.1.0 From fda71b0467fca0e2aa01344e1e0377ec5835a26b Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:39:35 -0500 Subject: [PATCH 200/455] tests: add lint github action workflow --- .github/workflows/lint.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..894edf3 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,28 @@ +--- +name: "lint" + +# yamllint disable-line rule:truthy +on: + push: + branches: + - "*" + +jobs: + lint: + name: tagged-release + runs-on: ubuntu-22.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.8" + cache: "pip" + - run: pip install -r requirements.txt + - run: flake8 --ignore=E501,E203,W503 + - run: black . + - run: rst-lint README.rst From 2615cab1143da7a4b839e700d3b776270232e66d Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:40:58 -0500 Subject: [PATCH 201/455] tests: install correct dependencies and rename job --- .github/workflows/lint.yml | 4 ++-- release-requirements.txt | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 894edf3..4a3221e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -9,7 +9,7 @@ on: jobs: lint: - name: tagged-release + name: lint runs-on: ubuntu-22.04 steps: @@ -22,7 +22,7 @@ jobs: with: python-version: "3.8" cache: "pip" - - run: pip install -r requirements.txt + - run: pip install -r release-requirements.txt - run: flake8 --ignore=E501,E203,W503 - run: black . - run: rst-lint README.rst diff --git a/release-requirements.txt b/release-requirements.txt index 1d873b6..2bb49df 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,9 @@ autopep8==2.0.4 +black==23.11.0 bleach==6.0.0 -certifi==2023.7.22 +certifi==2023.5.7 charset-normalizer==3.1.0 +click==8.1.7 colorama==0.4.6 docutils==0.20.1 flake8==6.1.0 @@ -14,7 +16,11 @@ markdown-it-py==2.2.0 mccabe==0.7.0 mdurl==0.1.2 more-itertools==9.1.0 +mypy-extensions==1.0.0 +packaging==23.2 +pathspec==0.11.2 pkginfo==1.9.6 +platformdirs==4.1.0 pycodestyle==2.11.1 pyflakes==3.1.0 Pygments==2.15.1 From 76ff7f3b0d2dba8ff69ea076bd40fc634a870c08 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:42:09 -0500 Subject: [PATCH 202/455] chore: remove circleci as tests now run in github actions --- .circleci/config.yml | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index f23481c..0000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2.1 - -orbs: - python: circleci/python@0.3.2 - -jobs: - build-and-test: - executor: python/default - steps: - - checkout - - python/load-cache - - run: - command: pip install flake8 - name: Install dependencies - - python/save-cache - - run: - command: flake8 --ignore=E501 - name: Lint - -workflows: - main: - jobs: - - build-and-test From f53f7d9b71e5b56c48f1e83eb12bf9548b63cf31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 9 Dec 2023 05:43:45 +0000 Subject: [PATCH 203/455] chore(deps): bump certifi from 2023.5.7 to 2023.7.22 Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.5.7 to 2023.7.22. - [Commits](https://github.com/certifi/python-certifi/compare/2023.05.07...2023.07.22) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 3bfce56..0ce9f96 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ autopep8==2.0.4 black==23.11.0 bleach==6.0.0 -certifi==2023.5.7 +certifi==2023.7.22 charset-normalizer==3.1.0 click==8.1.7 colorama==0.4.6 From 4700a26d9077fd6bbe720494ba1e7b55fcf89d27 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:45:20 -0500 Subject: [PATCH 204/455] tests: run lint on pull requests --- .github/workflows/lint.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 4a3221e..a4e282e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -3,9 +3,13 @@ name: "lint" # yamllint disable-line rule:truthy on: + pull_request: + branches: + - '*' push: branches: - - "*" + - 'main' + - 'master' jobs: lint: From 7cdf428e3ae1e3cadb8776b16fdfadae8713d542 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 00:52:00 -0500 Subject: [PATCH 205/455] fix: use a deploy key to push tags so releases get auto-created --- .github/workflows/automatic-release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index 05682f6..f5b8f64 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -21,6 +21,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + ssh-key: ${{ secrets.DEPLOY_PRIVATE_KEY }} - name: Setup Git run: | git config --local user.email "action@github.com" From aaf45022cc7a0f3c2d456fa9c5d245110a66fd91 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 9 Dec 2023 05:53:43 +0000 Subject: [PATCH 206/455] Release version 0.44.1 --- CHANGES.rst | 56 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7d11cbb..6a15e22 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,64 @@ Changelog ========= -0.44.0 (2023-12-09) +0.44.1 (2023-12-09) ------------------- ------------------------ +Fix +~~~ +- Use a deploy key to push tags so releases get auto-created. [Jose + Diaz-Gonzalez] + +Other +~~~~~ +- Chore(deps): bump certifi from 2023.5.7 to 2023.7.22. + [dependabot[bot]] + + Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.5.7 to 2023.7.22. + - [Commits](https://github.com/certifi/python-certifi/compare/2023.05.07...2023.07.22) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-type: direct:production + ... +- Tests: run lint on pull requests. [Jose Diaz-Gonzalez] +- Chore(deps): bump urllib3 from 2.0.2 to 2.0.7. [dependabot[bot]] + + Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.0.2 to 2.0.7. + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.0.2...2.0.7) + + --- + updated-dependencies: + - dependency-name: urllib3 + dependency-type: direct:production + ... +- Chore: remove circleci as tests now run in github actions. [Jose Diaz- + Gonzalez] +- Tests: install correct dependencies and rename job. [Jose Diaz- + Gonzalez] +- Tests: add lint github action workflow. [Jose Diaz-Gonzalez] +- Feat: install autopep8. [Jose Diaz-Gonzalez] +- Chore(deps): bump certifi from 2023.5.7 to 2023.7.22. + [dependabot[bot]] + + Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.5.7 to 2023.7.22. + - [Commits](https://github.com/certifi/python-certifi/compare/2023.05.07...2023.07.22) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-type: direct:production + ... +- Chore: reformat file and update flake8. [Jose Diaz-Gonzalez] + + +0.44.0 (2023-12-09) +------------------- + Fix ~~~ - Do not use raw property in readme. [Jose Diaz-Gonzalez] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 1b33897..7f532dc 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.44.0" +__version__ = "0.44.1" From bd65c3d5d6674f3aab31b69b1c05033e62c87661 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 12:25:51 -0500 Subject: [PATCH 207/455] feat: add dependabot config to repository --- .github/dependabot.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..88bb03b --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: +- package-ecosystem: pip + directory: "/" + schedule: + interval: daily + time: "13:00" + groups: + python-packages: + patterns: + - "*" From 96592295e1bd5999d093e030308380fa1a0fd0ab Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 12:30:45 -0500 Subject: [PATCH 208/455] chore: update gitignore --- .gitignore | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 52a12ea..f0ed9db 100644 --- a/.gitignore +++ b/.gitignore @@ -18,13 +18,13 @@ pkg # Debian Files debian/files -debian/python-aws-hostname* +debian/python-github-backup* # Sphinx build doc/_build # Generated man page -doc/aws_hostname.1 +doc/github_backup.1 # Annoying macOS files .DS_Store @@ -34,4 +34,11 @@ doc/aws_hostname.1 .vscode .atom -README \ No newline at end of file +README + +# RSA +id_rsa +id_rsa.pub + +# Virtual env +venv From 2de69beffa7b320818078ef8bf1ba985fce4607d Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 12:30:53 -0500 Subject: [PATCH 209/455] chore: format yaml --- .github/workflows/lint.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a4e282e..8a98c68 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -5,11 +5,11 @@ name: "lint" on: pull_request: branches: - - '*' + - "*" push: branches: - - 'main' - - 'master' + - "main" + - "master" jobs: lint: @@ -30,3 +30,4 @@ jobs: - run: flake8 --ignore=E501,E203,W503 - run: black . - run: rst-lint README.rst + - run: python setup.py sdist bdist_wheel && twine check dist/* From 4c5187bcff0aa4c93e0f386d1d92f68162ce56c2 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 12:31:28 -0500 Subject: [PATCH 210/455] chore: format python code --- bin/github-backup | 24 +++++++++++++----------- setup.py | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index b983cdf..b33d19f 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -1,12 +1,8 @@ #!/usr/bin/env python -import os, sys, logging - -logging.basicConfig( - format='%(asctime)s.%(msecs)03d: %(message)s', - datefmt='%Y-%m-%dT%H:%M:%S', - level=logging.INFO -) +import logging +import os +import sys from github_backup.github_backup import ( backup_account, @@ -20,6 +16,12 @@ from github_backup.github_backup import ( retrieve_repositories, ) +logging.basicConfig( + format="%(asctime)s.%(msecs)03d: %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S", + level=logging.INFO, +) + def main(): args = parse_args() @@ -29,7 +31,7 @@ def main(): output_directory = os.path.realpath(args.output_directory) if not os.path.isdir(output_directory): - logger.info('Create output directory {0}'.format(output_directory)) + logger.info("Create output directory {0}".format(output_directory)) mkdir_p(output_directory) if args.lfs_clone: @@ -41,10 +43,10 @@ def main(): logger.root.setLevel(log_level) if not args.as_app: - logger.info('Backing up user {0} to {1}'.format(args.user, output_directory)) + logger.info("Backing up user {0} to {1}".format(args.user, output_directory)) authenticated_user = get_authenticated_user(args) else: - authenticated_user = {'login': None} + authenticated_user = {"login": None} repositories = retrieve_repositories(args, authenticated_user) repositories = filter_repositories(args, repositories) @@ -52,7 +54,7 @@ def main(): backup_account(args, output_directory) -if __name__ == '__main__': +if __name__ == "__main__": try: main() except Exception as e: diff --git a/setup.py b/setup.py index 898e4fb..ebdd532 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os + from github_backup import __version__ try: From 31a6e52a5e9fbe3cfcee069a9545c238f877e1df Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 9 Dec 2023 12:34:58 -0500 Subject: [PATCH 211/455] fix: ensure wheel is installed --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 8a98c68..d3df703 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -26,7 +26,7 @@ jobs: with: python-version: "3.8" cache: "pip" - - run: pip install -r release-requirements.txt + - run: pip install -r release-requirements.txt && pip install wheel - run: flake8 --ignore=E501,E203,W503 - run: black . - run: rst-lint README.rst From 500c97c60e8a41ddf368279d97224bb20b69b5e4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 9 Dec 2023 18:30:23 +0000 Subject: [PATCH 212/455] chore(deps): bump the python-packages group with 15 updates Bumps the python-packages group with 15 updates: | Package | From | To | | --- | --- | --- | | [bleach](https://github.com/mozilla/bleach) | `6.0.0` | `6.1.0` | | [certifi](https://github.com/certifi/python-certifi) | `2023.7.22` | `2023.11.17` | | [charset-normalizer](https://github.com/Ousret/charset_normalizer) | `3.1.0` | `3.3.2` | | [idna](https://github.com/kjd/idna) | `3.4` | `3.6` | | [importlib-metadata](https://github.com/python/importlib_metadata) | `6.6.0` | `7.0.0` | | [jaraco-classes](https://github.com/jaraco/jaraco.classes) | `3.2.3` | `3.3.0` | | [keyring](https://github.com/jaraco/keyring) | `23.13.1` | `24.3.0` | | [markdown-it-py](https://github.com/executablebooks/markdown-it-py) | `2.2.0` | `3.0.0` | | [more-itertools](https://github.com/more-itertools/more-itertools) | `9.1.0` | `10.1.0` | | [pygments](https://github.com/pygments/pygments) | `2.15.1` | `2.17.2` | | [readme-renderer](https://github.com/pypa/readme_renderer) | `37.3` | `42.0` | | [rich](https://github.com/Textualize/rich) | `13.3.5` | `13.7.0` | | [tqdm](https://github.com/tqdm/tqdm) | `4.65.0` | `4.66.1` | | [urllib3](https://github.com/urllib3/urllib3) | `2.0.7` | `2.1.0` | | [zipp](https://github.com/jaraco/zipp) | `3.15.0` | `3.17.0` | Updates `bleach` from 6.0.0 to 6.1.0 - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v6.0.0...v6.1.0) Updates `certifi` from 2023.7.22 to 2023.11.17 - [Commits](https://github.com/certifi/python-certifi/compare/2023.07.22...2023.11.17) Updates `charset-normalizer` from 3.1.0 to 3.3.2 - [Release notes](https://github.com/Ousret/charset_normalizer/releases) - [Changelog](https://github.com/Ousret/charset_normalizer/blob/master/CHANGELOG.md) - [Commits](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.3.2) Updates `idna` from 3.4 to 3.6 - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.4...v3.6) Updates `importlib-metadata` from 6.6.0 to 7.0.0 - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/v6.6.0...v7.0.0) Updates `jaraco-classes` from 3.2.3 to 3.3.0 - [Release notes](https://github.com/jaraco/jaraco.classes/releases) - [Changelog](https://github.com/jaraco/jaraco.classes/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/jaraco.classes/compare/v3.2.3...v3.3.0) Updates `keyring` from 23.13.1 to 24.3.0 - [Release notes](https://github.com/jaraco/keyring/releases) - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/keyring/compare/v23.13.1...v24.3.0) Updates `markdown-it-py` from 2.2.0 to 3.0.0 - [Release notes](https://github.com/executablebooks/markdown-it-py/releases) - [Changelog](https://github.com/executablebooks/markdown-it-py/blob/master/CHANGELOG.md) - [Commits](https://github.com/executablebooks/markdown-it-py/compare/v2.2.0...v3.0.0) Updates `more-itertools` from 9.1.0 to 10.1.0 - [Release notes](https://github.com/more-itertools/more-itertools/releases) - [Commits](https://github.com/more-itertools/more-itertools/compare/v9.1.0...v10.1.0) Updates `pygments` from 2.15.1 to 2.17.2 - [Release notes](https://github.com/pygments/pygments/releases) - [Changelog](https://github.com/pygments/pygments/blob/master/CHANGES) - [Commits](https://github.com/pygments/pygments/compare/2.15.1...2.17.2) Updates `readme-renderer` from 37.3 to 42.0 - [Release notes](https://github.com/pypa/readme_renderer/releases) - [Changelog](https://github.com/pypa/readme_renderer/blob/main/CHANGES.rst) - [Commits](https://github.com/pypa/readme_renderer/compare/37.3...42.0) Updates `rich` from 13.3.5 to 13.7.0 - [Release notes](https://github.com/Textualize/rich/releases) - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) - [Commits](https://github.com/Textualize/rich/compare/v13.3.5...v13.7.0) Updates `tqdm` from 4.65.0 to 4.66.1 - [Release notes](https://github.com/tqdm/tqdm/releases) - [Commits](https://github.com/tqdm/tqdm/compare/v4.65.0...v4.66.1) Updates `urllib3` from 2.0.7 to 2.1.0 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.0.7...2.1.0) Updates `zipp` from 3.15.0 to 3.17.0 - [Release notes](https://github.com/jaraco/zipp/releases) - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/zipp/compare/v3.15.0...v3.17.0) --- updated-dependencies: - dependency-name: bleach dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: certifi dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: charset-normalizer dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: idna dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: importlib-metadata dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: jaraco-classes dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: keyring dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: markdown-it-py dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: more-itertools dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: pygments dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: readme-renderer dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: rich dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: tqdm dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: urllib3 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: zipp dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 0ce9f96..4afefe2 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,21 +1,21 @@ autopep8==2.0.4 black==23.11.0 -bleach==6.0.0 -certifi==2023.7.22 -charset-normalizer==3.1.0 +bleach==6.1.0 +certifi==2023.11.17 +charset-normalizer==3.3.2 click==8.1.7 colorama==0.4.6 docutils==0.20.1 flake8==6.1.0 gitchangelog==3.0.4 -idna==3.4 -importlib-metadata==6.6.0 -jaraco.classes==3.2.3 -keyring==23.13.1 -markdown-it-py==2.2.0 +idna==3.6 +importlib-metadata==7.0.0 +jaraco.classes==3.3.0 +keyring==24.3.0 +markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 -more-itertools==9.1.0 +more-itertools==10.1.0 mypy-extensions==1.0.0 packaging==23.2 pathspec==0.11.2 @@ -23,16 +23,16 @@ pkginfo==1.9.6 platformdirs==4.1.0 pycodestyle==2.11.1 pyflakes==3.1.0 -Pygments==2.15.1 -readme-renderer==37.3 +Pygments==2.17.2 +readme-renderer==42.0 requests==2.31.0 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 -rich==13.3.5 +rich==13.7.0 six==1.16.0 -tqdm==4.65.0 +tqdm==4.66.1 twine==4.0.2 -urllib3==2.0.7 +urllib3==2.1.0 webencodings==0.5.1 -zipp==3.15.0 +zipp==3.17.0 From c8b8b270f668ce09c68da20059e297c66cef3997 Mon Sep 17 00:00:00 2001 From: 8cH9azbsFifZ Date: Sat, 30 Dec 2023 13:01:46 +0100 Subject: [PATCH 213/455] vs code --- python-github-backup.code-workspace | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 python-github-backup.code-workspace diff --git a/python-github-backup.code-workspace b/python-github-backup.code-workspace new file mode 100755 index 0000000..362d7c2 --- /dev/null +++ b/python-github-backup.code-workspace @@ -0,0 +1,7 @@ +{ + "folders": [ + { + "path": "." + } + ] +} \ No newline at end of file From 7fe654129140feb6006b854236820bc4922dd602 Mon Sep 17 00:00:00 2001 From: 8cH9azbsFifZ Date: Sat, 30 Dec 2023 13:04:45 +0100 Subject: [PATCH 214/455] auto docker build --- .github/workflows/docker.yml | 75 ++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 .github/workflows/docker.yml diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..977c32d --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,75 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Create and publish a Docker image + +on: + push: + branches: + - 'master' + - 'main' + - 'dev' + + tags: + - 'v*' + - 'v*.*' + - 'v*.*.*' + - '*' + - '*.*' + - '*.*.*' + pull_request: + branches: + - 'main' + - 'dev' + + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Log in to the Container registry + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha + type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} + + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + push: true + platforms: linux/amd64,linux/arm64 + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} From eb88def888df288d9427e28ad9ec5778180753b7 Mon Sep 17 00:00:00 2001 From: BrOleg5 Date: Tue, 2 Jan 2024 12:54:06 +0800 Subject: [PATCH 215/455] Add option to include certain number of the latest releases. --- README.rst | 6 +++++- github_backup/github_backup.py | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 22d3062..0944536 100644 --- a/README.rst +++ b/README.rst @@ -49,7 +49,8 @@ CLI Help output:: [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] - [--releases] [--assets] [--exclude [REPOSITORY [REPOSITORY ...]] + [--releases] [--assets] [--latest-releases INCLUDE_LATEST_RELEASES] + [--exclude [REPOSITORY [REPOSITORY ...]] [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] USER @@ -126,6 +127,9 @@ CLI Help output:: binaries --assets include assets alongside release information; only applies if including releases + --latest-releases INCLUDE_LATEST_RELEASES + include certain number of the latest releases; only + applies if including releases --exclude [REPOSITORY [REPOSITORY ...]] names of repositories to exclude from backup. --throttle-limit THROTTLE_LIMIT diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 860b73a..27c357b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -23,6 +23,7 @@ from urllib.parse import quote as urlquote from urllib.parse import urlencode, urlparse from urllib.request import HTTPRedirectHandler, Request, build_opener, urlopen +from operator import itemgetter try: from . import __version__ @@ -382,6 +383,13 @@ def parse_args(args=None): dest="include_assets", help="include assets alongside release information; only applies if including releases", ) + parser.add_argument( + "--latest-releases", + type=int, + default=0, + dest="include_latest_releases", + help="include certain number of the latest releases; only applies if including releases", + ) parser.add_argument( "--throttle-limit", dest="throttle_limit", @@ -1206,8 +1214,14 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F release_template = "{0}/{1}/releases".format(repos_template, repository_fullname) releases = retrieve_data(args, release_template, query_args=query_args) + if args.include_latest_releases and args.include_latest_releases < len(releases): + releases = sorted(releases, key=itemgetter('tag_name'), reverse=True) + releases = releases[:args.include_latest_releases] + logger.info("Saving the latest {0} releases to disk".format(len(releases))) + else: + logger.info("Saving {0} releases to disk".format(len(releases))) + # for each release, store it - logger.info("Saving {0} releases to disk".format(len(releases))) for release in releases: release_name = release["tag_name"] release_name_safe = release_name.replace("/", "__") From ab18e96ea802e9b0aafa5dace54eb72b26ca3387 Mon Sep 17 00:00:00 2001 From: BrOleg5 Date: Fri, 5 Jan 2024 20:26:08 +0800 Subject: [PATCH 216/455] Add option to skip prerelease versions. Replace release sorting by tag with release sorting by creation date. --- README.rst | 7 ++++--- github_backup/github_backup.py | 26 ++++++++++++++++++-------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index 0944536..22c39d7 100644 --- a/README.rst +++ b/README.rst @@ -125,10 +125,11 @@ CLI Help output:: keychain that holds the personal access or OAuth token --releases include release information, not including assets or binaries - --assets include assets alongside release information; only - applies if including releases --latest-releases INCLUDE_LATEST_RELEASES - include certain number of the latest releases; only + include certain number of the latest releases; + only applies if including releases + --skip-prerelease skip prerelease and draft versions; only applies if including releases + --assets include assets alongside release information; only applies if including releases --exclude [REPOSITORY [REPOSITORY ...]] names of repositories to exclude from backup. diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 27c357b..2761336 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -23,7 +23,7 @@ from urllib.parse import quote as urlquote from urllib.parse import urlencode, urlparse from urllib.request import HTTPRedirectHandler, Request, build_opener, urlopen -from operator import itemgetter +from datetime import datetime try: from . import __version__ @@ -377,12 +377,6 @@ def parse_args(args=None): dest="include_releases", help="include release information, not including assets or binaries", ) - parser.add_argument( - "--assets", - action="store_true", - dest="include_assets", - help="include assets alongside release information; only applies if including releases", - ) parser.add_argument( "--latest-releases", type=int, @@ -390,6 +384,18 @@ def parse_args(args=None): dest="include_latest_releases", help="include certain number of the latest releases; only applies if including releases", ) + parser.add_argument( + "--skip-prerelease", + action="store_true", + dest="skip_prerelease", + help="skip prerelease and draft versions; only applies if including releases", + ) + parser.add_argument( + "--assets", + action="store_true", + dest="include_assets", + help="include assets alongside release information; only applies if including releases", + ) parser.add_argument( "--throttle-limit", dest="throttle_limit", @@ -1214,8 +1220,12 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F release_template = "{0}/{1}/releases".format(repos_template, repository_fullname) releases = retrieve_data(args, release_template, query_args=query_args) + if args.skip_prerelease: + releases = [r for r in releases if r["prerelease"] == False and r["draft"] == False] + if args.include_latest_releases and args.include_latest_releases < len(releases): - releases = sorted(releases, key=itemgetter('tag_name'), reverse=True) + releases.sort(key=lambda item: datetime.strptime(item["created_at"], "%Y-%m-%dT%H:%M:%SZ"), \ + reverse=True) releases = releases[:args.include_latest_releases] logger.info("Saving the latest {0} releases to disk".format(len(releases))) else: From 3e9a4fa0d885ae0e348df62d40f8c36c16b4076a Mon Sep 17 00:00:00 2001 From: BrOleg5 Date: Fri, 5 Jan 2024 20:35:37 +0800 Subject: [PATCH 217/455] Update CLI help output in README. Rename destination of latest-releases argument. --- README.rst | 5 +++-- github_backup/github_backup.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 22c39d7..82dc479 100644 --- a/README.rst +++ b/README.rst @@ -49,7 +49,8 @@ CLI Help output:: [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] - [--releases] [--assets] [--latest-releases INCLUDE_LATEST_RELEASES] + [--releases] [--latest-releases NUMBER_OF_LATEST_RELEASES] + [--skip-prerelease] [--assets] [--exclude [REPOSITORY [REPOSITORY ...]] [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] USER @@ -125,7 +126,7 @@ CLI Help output:: keychain that holds the personal access or OAuth token --releases include release information, not including assets or binaries - --latest-releases INCLUDE_LATEST_RELEASES + --latest-releases NUMBER_OF_LATEST_RELEASES include certain number of the latest releases; only applies if including releases --skip-prerelease skip prerelease and draft versions; only applies if including releases diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 2761336..13fef00 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -381,7 +381,7 @@ def parse_args(args=None): "--latest-releases", type=int, default=0, - dest="include_latest_releases", + dest="number_of_latest_releases", help="include certain number of the latest releases; only applies if including releases", ) parser.add_argument( @@ -1223,10 +1223,10 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F if args.skip_prerelease: releases = [r for r in releases if r["prerelease"] == False and r["draft"] == False] - if args.include_latest_releases and args.include_latest_releases < len(releases): + if args.number_of_latest_releases and args.number_of_latest_releases < len(releases): releases.sort(key=lambda item: datetime.strptime(item["created_at"], "%Y-%m-%dT%H:%M:%SZ"), \ reverse=True) - releases = releases[:args.include_latest_releases] + releases = releases[:args.number_of_latest_releases] logger.info("Saving the latest {0} releases to disk".format(len(releases))) else: logger.info("Saving {0} releases to disk".format(len(releases))) From 09f4168db67c90dd9093b8c799ac95d8881a1345 Mon Sep 17 00:00:00 2001 From: BrOleg5 Date: Fri, 5 Jan 2024 20:46:02 +0800 Subject: [PATCH 218/455] Fix code style. --- github_backup/github_backup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 13fef00..2690b49 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -511,7 +511,7 @@ def get_github_host(args): def read_file_contents(file_uri): - return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() + return open(file_uri[len(FILE_URI_PREFIX):], "rt").readline().strip() def get_github_repo_url(args, repository): @@ -1221,10 +1221,10 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F releases = retrieve_data(args, release_template, query_args=query_args) if args.skip_prerelease: - releases = [r for r in releases if r["prerelease"] == False and r["draft"] == False] + releases = [r for r in releases if not r["prerelease"] and not r["draft"]] if args.number_of_latest_releases and args.number_of_latest_releases < len(releases): - releases.sort(key=lambda item: datetime.strptime(item["created_at"], "%Y-%m-%dT%H:%M:%SZ"), \ + releases.sort(key=lambda item: datetime.strptime(item["created_at"], "%Y-%m-%dT%H:%M:%SZ"), reverse=True) releases = releases[:args.number_of_latest_releases] logger.info("Saving the latest {0} releases to disk".format(len(releases))) From 42b836f623799fa8b1725d9b41b4d32a2301685d Mon Sep 17 00:00:00 2001 From: Moritz Federspiel Date: Tue, 16 Jan 2024 15:13:26 +0100 Subject: [PATCH 219/455] fix: Catch 404s for non-existing hooks. Fixes #176 Explanation: Repositories where no webhooks are defined return 404 errors. This breaks further script execution. --- github_backup/github_backup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 860b73a..d429acb 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1189,8 +1189,11 @@ def backup_hooks(args, repo_cwd, repository, repos_template): template = "{0}/{1}/hooks".format(repos_template, repository["full_name"]) try: _backup_data(args, "hooks", template, output_file, hook_cwd) - except SystemExit: - logger.info("Unable to read hooks, skipping") + except Exception as e: + if "404" in str(e): + logger.info("Unable to read hooks, skipping") + else: + raise e def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): From 3c3262ed696228ec7e52f5ef4c4d2b816d0ebc27 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 14:14:51 +0000 Subject: [PATCH 220/455] chore(deps): bump the python-packages group with 6 updates Bumps the python-packages group with 6 updates: | Package | From | To | | --- | --- | --- | | [black](https://github.com/psf/black) | `23.11.0` | `24.1.0` | | [flake8](https://github.com/pycqa/flake8) | `6.1.0` | `7.0.0` | | [importlib-metadata](https://github.com/python/importlib_metadata) | `7.0.0` | `7.0.1` | | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.1.0` | `10.2.0` | | [pathspec](https://github.com/cpburnz/python-pathspec) | `0.11.2` | `0.12.1` | | [pyflakes](https://github.com/PyCQA/pyflakes) | `3.1.0` | `3.2.0` | Updates `black` from 23.11.0 to 24.1.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/23.11.0...24.1.0) Updates `flake8` from 6.1.0 to 7.0.0 - [Commits](https://github.com/pycqa/flake8/compare/6.1.0...7.0.0) Updates `importlib-metadata` from 7.0.0 to 7.0.1 - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/v7.0.0...v7.0.1) Updates `more-itertools` from 10.1.0 to 10.2.0 - [Release notes](https://github.com/more-itertools/more-itertools/releases) - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.1.0...v10.2.0) Updates `pathspec` from 0.11.2 to 0.12.1 - [Release notes](https://github.com/cpburnz/python-pathspec/releases) - [Changelog](https://github.com/cpburnz/python-pathspec/blob/master/CHANGES.rst) - [Commits](https://github.com/cpburnz/python-pathspec/compare/v0.11.2...v0.12.1) Updates `pyflakes` from 3.1.0 to 3.2.0 - [Changelog](https://github.com/PyCQA/pyflakes/blob/main/NEWS.rst) - [Commits](https://github.com/PyCQA/pyflakes/compare/3.1.0...3.2.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: flake8 dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: importlib-metadata dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: more-itertools dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pathspec dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pyflakes dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 4afefe2..b7039c0 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,28 +1,28 @@ autopep8==2.0.4 -black==23.11.0 +black==24.1.0 bleach==6.1.0 certifi==2023.11.17 charset-normalizer==3.3.2 click==8.1.7 colorama==0.4.6 docutils==0.20.1 -flake8==6.1.0 +flake8==7.0.0 gitchangelog==3.0.4 idna==3.6 -importlib-metadata==7.0.0 +importlib-metadata==7.0.1 jaraco.classes==3.3.0 keyring==24.3.0 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 -more-itertools==10.1.0 +more-itertools==10.2.0 mypy-extensions==1.0.0 packaging==23.2 -pathspec==0.11.2 +pathspec==0.12.1 pkginfo==1.9.6 platformdirs==4.1.0 pycodestyle==2.11.1 -pyflakes==3.1.0 +pyflakes==3.2.0 Pygments==2.17.2 readme-renderer==42.0 requests==2.31.0 From 0857a37440e693d24fe71c9e05113febbc51443e Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 29 Jan 2024 12:52:57 +0000 Subject: [PATCH 221/455] Release version 0.45.0 --- CHANGES.rst | 249 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 249 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 6a15e22..90dc30f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,257 @@ Changelog ========= -0.44.1 (2023-12-09) +0.45.0 (2024-01-29) ------------------- ------------------------ +Fix +~~~ +- Catch 404s for non-existing hooks. Fixes #176. [Moritz Federspiel] +- Ensure wheel is installed. [Jose Diaz-Gonzalez] + +Other +~~~~~ +- Fix code style. [BrOleg5] +- Add option to skip prerelease versions. [BrOleg5] + + Replace release sorting by tag with release sorting by creation date. +- Add option to include certain number of the latest releases. [BrOleg5] +- Auto docker build. [8cH9azbsFifZ] +- Vs code. [8cH9azbsFifZ] +- Chore(deps): bump the python-packages group with 6 updates. + [dependabot[bot]] + + Bumps the python-packages group with 6 updates: + + | Package | From | To | + | --- | --- | --- | + | [black](https://github.com/psf/black) | `23.11.0` | `24.1.0` | + | [flake8](https://github.com/pycqa/flake8) | `6.1.0` | `7.0.0` | + | [importlib-metadata](https://github.com/python/importlib_metadata) | `7.0.0` | `7.0.1` | + | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.1.0` | `10.2.0` | + | [pathspec](https://github.com/cpburnz/python-pathspec) | `0.11.2` | `0.12.1` | + | [pyflakes](https://github.com/PyCQA/pyflakes) | `3.1.0` | `3.2.0` | + + + Updates `black` from 23.11.0 to 24.1.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/23.11.0...24.1.0) + + Updates `flake8` from 6.1.0 to 7.0.0 + - [Commits](https://github.com/pycqa/flake8/compare/6.1.0...7.0.0) + + Updates `importlib-metadata` from 7.0.0 to 7.0.1 + - [Release notes](https://github.com/python/importlib_metadata/releases) + - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) + - [Commits](https://github.com/python/importlib_metadata/compare/v7.0.0...v7.0.1) + + Updates `more-itertools` from 10.1.0 to 10.2.0 + - [Release notes](https://github.com/more-itertools/more-itertools/releases) + - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.1.0...v10.2.0) + + Updates `pathspec` from 0.11.2 to 0.12.1 + - [Release notes](https://github.com/cpburnz/python-pathspec/releases) + - [Changelog](https://github.com/cpburnz/python-pathspec/blob/master/CHANGES.rst) + - [Commits](https://github.com/cpburnz/python-pathspec/compare/v0.11.2...v0.12.1) + + Updates `pyflakes` from 3.1.0 to 3.2.0 + - [Changelog](https://github.com/PyCQA/pyflakes/blob/main/NEWS.rst) + - [Commits](https://github.com/PyCQA/pyflakes/compare/3.1.0...3.2.0) + + --- + updated-dependencies: + - dependency-name: black + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: flake8 + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: importlib-metadata + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: more-itertools + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pathspec + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pyflakes + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 15 updates. + [dependabot[bot]] + + Bumps the python-packages group with 15 updates: + + | Package | From | To | + | --- | --- | --- | + | [bleach](https://github.com/mozilla/bleach) | `6.0.0` | `6.1.0` | + | [certifi](https://github.com/certifi/python-certifi) | `2023.7.22` | `2023.11.17` | + | [charset-normalizer](https://github.com/Ousret/charset_normalizer) | `3.1.0` | `3.3.2` | + | [idna](https://github.com/kjd/idna) | `3.4` | `3.6` | + | [importlib-metadata](https://github.com/python/importlib_metadata) | `6.6.0` | `7.0.0` | + | [jaraco-classes](https://github.com/jaraco/jaraco.classes) | `3.2.3` | `3.3.0` | + | [keyring](https://github.com/jaraco/keyring) | `23.13.1` | `24.3.0` | + | [markdown-it-py](https://github.com/executablebooks/markdown-it-py) | `2.2.0` | `3.0.0` | + | [more-itertools](https://github.com/more-itertools/more-itertools) | `9.1.0` | `10.1.0` | + | [pygments](https://github.com/pygments/pygments) | `2.15.1` | `2.17.2` | + | [readme-renderer](https://github.com/pypa/readme_renderer) | `37.3` | `42.0` | + | [rich](https://github.com/Textualize/rich) | `13.3.5` | `13.7.0` | + | [tqdm](https://github.com/tqdm/tqdm) | `4.65.0` | `4.66.1` | + | [urllib3](https://github.com/urllib3/urllib3) | `2.0.7` | `2.1.0` | + | [zipp](https://github.com/jaraco/zipp) | `3.15.0` | `3.17.0` | + + + Updates `bleach` from 6.0.0 to 6.1.0 + - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) + - [Commits](https://github.com/mozilla/bleach/compare/v6.0.0...v6.1.0) + + Updates `certifi` from 2023.7.22 to 2023.11.17 + - [Commits](https://github.com/certifi/python-certifi/compare/2023.07.22...2023.11.17) + + Updates `charset-normalizer` from 3.1.0 to 3.3.2 + - [Release notes](https://github.com/Ousret/charset_normalizer/releases) + - [Changelog](https://github.com/Ousret/charset_normalizer/blob/master/CHANGELOG.md) + - [Commits](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.3.2) + + Updates `idna` from 3.4 to 3.6 + - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) + - [Commits](https://github.com/kjd/idna/compare/v3.4...v3.6) + + Updates `importlib-metadata` from 6.6.0 to 7.0.0 + - [Release notes](https://github.com/python/importlib_metadata/releases) + - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) + - [Commits](https://github.com/python/importlib_metadata/compare/v6.6.0...v7.0.0) + + Updates `jaraco-classes` from 3.2.3 to 3.3.0 + - [Release notes](https://github.com/jaraco/jaraco.classes/releases) + - [Changelog](https://github.com/jaraco/jaraco.classes/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/jaraco.classes/compare/v3.2.3...v3.3.0) + + Updates `keyring` from 23.13.1 to 24.3.0 + - [Release notes](https://github.com/jaraco/keyring/releases) + - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/keyring/compare/v23.13.1...v24.3.0) + + Updates `markdown-it-py` from 2.2.0 to 3.0.0 + - [Release notes](https://github.com/executablebooks/markdown-it-py/releases) + - [Changelog](https://github.com/executablebooks/markdown-it-py/blob/master/CHANGELOG.md) + - [Commits](https://github.com/executablebooks/markdown-it-py/compare/v2.2.0...v3.0.0) + + Updates `more-itertools` from 9.1.0 to 10.1.0 + - [Release notes](https://github.com/more-itertools/more-itertools/releases) + - [Commits](https://github.com/more-itertools/more-itertools/compare/v9.1.0...v10.1.0) + + Updates `pygments` from 2.15.1 to 2.17.2 + - [Release notes](https://github.com/pygments/pygments/releases) + - [Changelog](https://github.com/pygments/pygments/blob/master/CHANGES) + - [Commits](https://github.com/pygments/pygments/compare/2.15.1...2.17.2) + + Updates `readme-renderer` from 37.3 to 42.0 + - [Release notes](https://github.com/pypa/readme_renderer/releases) + - [Changelog](https://github.com/pypa/readme_renderer/blob/main/CHANGES.rst) + - [Commits](https://github.com/pypa/readme_renderer/compare/37.3...42.0) + + Updates `rich` from 13.3.5 to 13.7.0 + - [Release notes](https://github.com/Textualize/rich/releases) + - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) + - [Commits](https://github.com/Textualize/rich/compare/v13.3.5...v13.7.0) + + Updates `tqdm` from 4.65.0 to 4.66.1 + - [Release notes](https://github.com/tqdm/tqdm/releases) + - [Commits](https://github.com/tqdm/tqdm/compare/v4.65.0...v4.66.1) + + Updates `urllib3` from 2.0.7 to 2.1.0 + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.0.7...2.1.0) + + Updates `zipp` from 3.15.0 to 3.17.0 + - [Release notes](https://github.com/jaraco/zipp/releases) + - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/zipp/compare/v3.15.0...v3.17.0) + + --- + updated-dependencies: + - dependency-name: bleach + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: certifi + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: charset-normalizer + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: idna + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: importlib-metadata + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: jaraco-classes + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: keyring + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: markdown-it-py + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: more-itertools + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: pygments + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: readme-renderer + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: rich + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: tqdm + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: urllib3 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: zipp + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore: format python code. [Jose Diaz-Gonzalez] +- Chore: format yaml. [Jose Diaz-Gonzalez] +- Chore: update gitignore. [Jose Diaz-Gonzalez] +- Feat: add dependabot config to repository. [Jose Diaz-Gonzalez] + + +0.44.1 (2023-12-09) +------------------- + Fix ~~~ - Use a deploy key to push tags so releases get auto-created. [Jose diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 7f532dc..4d8afa5 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.44.1" +__version__ = "0.45.0" From e869844dba43e338631ca5a4fc15a94f9b3ead10 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jan 2024 13:36:46 +0000 Subject: [PATCH 222/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). Updates `black` from 24.1.0 to 24.1.1 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/24.1.0...24.1.1) --- updated-dependencies: - dependency-name: black dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index b7039c0..902f265 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,5 +1,5 @@ autopep8==2.0.4 -black==24.1.0 +black==24.1.1 bleach==6.1.0 certifi==2023.11.17 charset-normalizer==3.3.2 From 421a7ec62b1f07ee34355786e8dff4946b35de3a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 31 Jan 2024 14:16:30 +0000 Subject: [PATCH 223/455] chore(deps): bump the python-packages group with 2 updates Bumps the python-packages group with 2 updates: [platformdirs](https://github.com/platformdirs/platformdirs) and [urllib3](https://github.com/urllib3/urllib3). Updates `platformdirs` from 4.1.0 to 4.2.0 - [Release notes](https://github.com/platformdirs/platformdirs/releases) - [Changelog](https://github.com/platformdirs/platformdirs/blob/main/CHANGES.rst) - [Commits](https://github.com/platformdirs/platformdirs/compare/4.1.0...4.2.0) Updates `urllib3` from 2.1.0 to 2.2.0 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.1.0...2.2.0) --- updated-dependencies: - dependency-name: platformdirs dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: urllib3 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 902f265..37414c3 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -20,7 +20,7 @@ mypy-extensions==1.0.0 packaging==23.2 pathspec==0.12.1 pkginfo==1.9.6 -platformdirs==4.1.0 +platformdirs==4.2.0 pycodestyle==2.11.1 pyflakes==3.2.0 Pygments==2.17.2 @@ -33,6 +33,6 @@ rich==13.7.0 six==1.16.0 tqdm==4.66.1 twine==4.0.2 -urllib3==2.1.0 +urllib3==2.2.0 webencodings==0.5.1 zipp==3.17.0 From fd2d398025968e650c0e1838407d60e0e6e3659c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 2 Feb 2024 13:26:36 +0000 Subject: [PATCH 224/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). Updates `certifi` from 2023.11.17 to 2024.2.2 - [Commits](https://github.com/certifi/python-certifi/compare/2023.11.17...2024.02.02) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 37414c3..cd64be0 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ autopep8==2.0.4 black==24.1.1 bleach==6.1.0 -certifi==2023.11.17 +certifi==2024.2.2 charset-normalizer==3.3.2 click==8.1.7 colorama==0.4.6 From d1874c0bd9e79db3ef6d067228792aaa0499840d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:05:38 +0000 Subject: [PATCH 225/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [jaraco-classes](https://github.com/jaraco/jaraco.classes). Updates `jaraco-classes` from 3.3.0 to 3.3.1 - [Release notes](https://github.com/jaraco/jaraco.classes/releases) - [Changelog](https://github.com/jaraco/jaraco.classes/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/jaraco.classes/compare/v3.3.0...v3.3.1) --- updated-dependencies: - dependency-name: jaraco-classes dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index cd64be0..28943b7 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -10,7 +10,7 @@ flake8==7.0.0 gitchangelog==3.0.4 idna==3.6 importlib-metadata==7.0.1 -jaraco.classes==3.3.0 +jaraco.classes==3.3.1 keyring==24.3.0 markdown-it-py==3.0.0 mccabe==0.7.0 From 888815c2714110e31bd82d5faa72a37d3ab4e253 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Feb 2024 13:44:39 +0000 Subject: [PATCH 226/455] chore(deps): bump the python-packages group with 2 updates Bumps the python-packages group with 2 updates: [tqdm](https://github.com/tqdm/tqdm) and [twine](https://github.com/pypa/twine). Updates `tqdm` from 4.66.1 to 4.66.2 - [Release notes](https://github.com/tqdm/tqdm/releases) - [Commits](https://github.com/tqdm/tqdm/compare/v4.66.1...v4.66.2) Updates `twine` from 4.0.2 to 5.0.0 - [Release notes](https://github.com/pypa/twine/releases) - [Changelog](https://github.com/pypa/twine/blob/main/docs/changelog.rst) - [Commits](https://github.com/pypa/twine/compare/4.0.2...5.0.0) --- updated-dependencies: - dependency-name: tqdm dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: twine dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 28943b7..a6a791a 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -31,8 +31,8 @@ restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.7.0 six==1.16.0 -tqdm==4.66.1 -twine==4.0.2 +tqdm==4.66.2 +twine==5.0.0 urllib3==2.2.0 webencodings==0.5.1 zipp==3.17.0 From c97598c914453a7e0bff6955b0bb8651c1cbb6f0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Feb 2024 13:59:09 +0000 Subject: [PATCH 227/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). Updates `black` from 24.1.1 to 24.2.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/24.1.1...24.2.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index a6a791a..517e78a 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,5 +1,5 @@ autopep8==2.0.4 -black==24.1.1 +black==24.2.0 bleach==6.1.0 certifi==2024.2.2 charset-normalizer==3.3.2 From 0380fb8e359e6774fbde86f3c02568c1c00a4e3b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Feb 2024 13:15:55 +0000 Subject: [PATCH 228/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [urllib3](https://github.com/urllib3/urllib3). Updates `urllib3` from 2.2.0 to 2.2.1 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.2.0...2.2.1) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 517e78a..36d369e 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -33,6 +33,6 @@ rich==13.7.0 six==1.16.0 tqdm==4.66.2 twine==5.0.0 -urllib3==2.2.0 +urllib3==2.2.1 webencodings==0.5.1 zipp==3.17.0 From 6b6297399715858558d85124185df68b354c1fba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 27 Feb 2024 14:05:42 +0000 Subject: [PATCH 229/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [readme-renderer](https://github.com/pypa/readme_renderer). Updates `readme-renderer` from 42.0 to 43.0 - [Release notes](https://github.com/pypa/readme_renderer/releases) - [Changelog](https://github.com/pypa/readme_renderer/blob/main/CHANGES.rst) - [Commits](https://github.com/pypa/readme_renderer/compare/42.0...43.0) --- updated-dependencies: - dependency-name: readme-renderer dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 36d369e..6cb3a09 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -24,7 +24,7 @@ platformdirs==4.2.0 pycodestyle==2.11.1 pyflakes==3.2.0 Pygments==2.17.2 -readme-renderer==42.0 +readme-renderer==43.0 requests==2.31.0 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 From e9d76921239cdd7be8fd978396f98bd40fb1f87e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 28 Feb 2024 13:43:08 +0000 Subject: [PATCH 230/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [keyring](https://github.com/jaraco/keyring). Updates `keyring` from 24.3.0 to 24.3.1 - [Release notes](https://github.com/jaraco/keyring/releases) - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/keyring/compare/v24.3.0...v24.3.1) --- updated-dependencies: - dependency-name: keyring dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 6cb3a09..e02594c 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -11,7 +11,7 @@ gitchangelog==3.0.4 idna==3.6 importlib-metadata==7.0.1 jaraco.classes==3.3.1 -keyring==24.3.0 +keyring==24.3.1 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 From 8eba46d8a78b44a07bf0ac223e6bbea8ca419098 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Mar 2024 13:39:14 +0000 Subject: [PATCH 231/455] chore(deps): bump the python-packages group with 2 updates Bumps the python-packages group with 2 updates: [pkginfo](https://code.launchpad.net/~tseaver/pkginfo/trunk) and [rich](https://github.com/Textualize/rich). Updates `pkginfo` from 1.9.6 to 1.10.0 Updates `rich` from 13.7.0 to 13.7.1 - [Release notes](https://github.com/Textualize/rich/releases) - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) - [Commits](https://github.com/Textualize/rich/compare/v13.7.0...v13.7.1) --- updated-dependencies: - dependency-name: pkginfo dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: rich dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index e02594c..3e09ddf 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -19,7 +19,7 @@ more-itertools==10.2.0 mypy-extensions==1.0.0 packaging==23.2 pathspec==0.12.1 -pkginfo==1.9.6 +pkginfo==1.10.0 platformdirs==4.2.0 pycodestyle==2.11.1 pyflakes==3.2.0 @@ -29,7 +29,7 @@ requests==2.31.0 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 -rich==13.7.0 +rich==13.7.1 six==1.16.0 tqdm==4.66.2 twine==5.0.0 From d1d3d84d950b1b80cb53388e126164d0ed34e881 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:43:27 +0000 Subject: [PATCH 232/455] chore(deps): bump the python-packages group with 2 updates Bumps the python-packages group with 2 updates: [importlib-metadata](https://github.com/python/importlib_metadata) and [packaging](https://github.com/pypa/packaging). Updates `importlib-metadata` from 7.0.1 to 7.0.2 - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/v7.0.1...v7.0.2) Updates `packaging` from 23.2 to 24.0 - [Release notes](https://github.com/pypa/packaging/releases) - [Changelog](https://github.com/pypa/packaging/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pypa/packaging/compare/23.2...24.0) --- updated-dependencies: - dependency-name: importlib-metadata dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: packaging dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 3e09ddf..9e5def7 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -9,7 +9,7 @@ docutils==0.20.1 flake8==7.0.0 gitchangelog==3.0.4 idna==3.6 -importlib-metadata==7.0.1 +importlib-metadata==7.0.2 jaraco.classes==3.3.1 keyring==24.3.1 markdown-it-py==3.0.0 @@ -17,7 +17,7 @@ mccabe==0.7.0 mdurl==0.1.2 more-itertools==10.2.0 mypy-extensions==1.0.0 -packaging==23.2 +packaging==24.0 pathspec==0.12.1 pkginfo==1.10.0 platformdirs==4.2.0 From 80fa92664cc5780c70f9b396a88005174bfc7627 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 14:07:08 +0000 Subject: [PATCH 233/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [zipp](https://github.com/jaraco/zipp). Updates `zipp` from 3.17.0 to 3.18.0 - [Release notes](https://github.com/jaraco/zipp/releases) - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/zipp/compare/v3.17.0...v3.18.0) --- updated-dependencies: - dependency-name: zipp dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 9e5def7..89f9fcb 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -35,4 +35,4 @@ tqdm==4.66.2 twine==5.0.0 urllib3==2.2.1 webencodings==0.5.1 -zipp==3.17.0 +zipp==3.18.0 From 122eb56aa11c0878471f96aab63aaa3621a6e2e9 Mon Sep 17 00:00:00 2001 From: dale-primer-e <70494025+dale-primer-e@users.noreply.github.com> Date: Thu, 14 Mar 2024 14:55:20 +1300 Subject: [PATCH 234/455] Fix error downloading assets When downloading assets using a fine grained token you will get a "can't concat str to bytes" error. This is due to the fine grained token being concatenated onto bytes in the line: `request.add_header("Authorization", "Basic ".encode("ascii") + auth)` This is better handled in the function `_construct_request` so I changed the lines that construct the request in `download_file` to use the function `_construct_request` and updated the function signature to reflect that. --- github_backup/github_backup.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 562313a..49e54a6 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -777,14 +777,19 @@ def redirect_request(self, req, fp, code, msg, headers, newurl): return request -def download_file(url, path, auth): +def download_file(url, path, auth, as_app=False, fine=False): # Skip downloading release assets if they already exist on disk so we don't redownload on every sync if os.path.exists(path): return - request = Request(url) + request = _construct_request(per_page=100, + page=1, + query_args={}, + template=url, + auth=auth, + as_app=as_app, + fine=fine) request.add_header("Accept", "application/octet-stream") - request.add_header("Authorization", "Basic ".encode("ascii") + auth) opener = build_opener(S3HTTPRedirectHandler) try: @@ -1255,6 +1260,8 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F asset["url"], os.path.join(release_assets_cwd, asset["name"]), get_auth(args), + as_app=True if args.as_app is not None else False, + fine=True if args.token_fine is not None else False ) From 1eccebcb83e7715ba3d994137098174e070a05f3 Mon Sep 17 00:00:00 2001 From: dale-primer-e <70494025+dale-primer-e@users.noreply.github.com> Date: Thu, 14 Mar 2024 15:27:22 +1300 Subject: [PATCH 235/455] Fix error with as_app flag --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 49e54a6..f564458 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1260,7 +1260,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F asset["url"], os.path.join(release_assets_cwd, asset["name"]), get_auth(args), - as_app=True if args.as_app is not None else False, + as_app=args.as_app, fine=True if args.token_fine is not None else False ) From 9812988a4acf369a7ad2ea3171ba3dc713d85aaa Mon Sep 17 00:00:00 2001 From: dale-primer-e <70494025+dale-primer-e@users.noreply.github.com> Date: Fri, 15 Mar 2024 08:26:14 +1300 Subject: [PATCH 236/455] Remove trailing whitespaces That are triggering flake. --- github_backup/github_backup.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index f564458..ca9e88e 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -782,12 +782,12 @@ def download_file(url, path, auth, as_app=False, fine=False): if os.path.exists(path): return - request = _construct_request(per_page=100, - page=1, - query_args={}, - template=url, - auth=auth, - as_app=as_app, + request = _construct_request(per_page=100, + page=1, + query_args={}, + template=url, + auth=auth, + as_app=as_app, fine=fine) request.add_header("Accept", "application/octet-stream") opener = build_opener(S3HTTPRedirectHandler) From 436e8df0ac6f28c4b05ffdb5d0ba66a0012a7eb1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Mar 2024 14:00:44 +0000 Subject: [PATCH 237/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [zipp](https://github.com/jaraco/zipp). Updates `zipp` from 3.18.0 to 3.18.1 - [Release notes](https://github.com/jaraco/zipp/releases) - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/zipp/compare/v3.18.0...v3.18.1) --- updated-dependencies: - dependency-name: zipp dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 89f9fcb..4807e64 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -35,4 +35,4 @@ tqdm==4.66.2 twine==5.0.0 urllib3==2.2.1 webencodings==0.5.1 -zipp==3.18.0 +zipp==3.18.1 From d835d47c17d62e03aff086fb1702ff99e8ac6393 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sun, 17 Mar 2024 22:30:14 +0000 Subject: [PATCH 238/455] Release version 0.45.1 --- CHANGES.rst | 286 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 286 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 90dc30f..e10ab1f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,293 @@ Changelog ========= -0.45.0 (2024-01-29) +0.45.1 (2024-03-17) ------------------- ------------------------ +- Remove trailing whitespaces. [dale-primer-e] + + That are triggering flake. +- Fix error with as_app flag. [dale-primer-e] +- Fix error downloading assets. [dale-primer-e] + + When downloading assets using a fine grained token you will get a "can't + concat str to bytes" error. This is due to the fine grained token being + concatenated onto bytes in the line: + + `request.add_header("Authorization", "Basic ".encode("ascii") + auth)` + + This is better handled in the function `_construct_request` so I changed + the lines that construct the request in `download_file` to use the + function `_construct_request` and updated the function signature to + reflect that. +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [zipp](https://github.com/jaraco/zipp). + + + Updates `zipp` from 3.18.0 to 3.18.1 + - [Release notes](https://github.com/jaraco/zipp/releases) + - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/zipp/compare/v3.18.0...v3.18.1) + + --- + updated-dependencies: + - dependency-name: zipp + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [zipp](https://github.com/jaraco/zipp). + + + Updates `zipp` from 3.17.0 to 3.18.0 + - [Release notes](https://github.com/jaraco/zipp/releases) + - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/zipp/compare/v3.17.0...v3.18.0) + + --- + updated-dependencies: + - dependency-name: zipp + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 2 updates. + [dependabot[bot]] + + Bumps the python-packages group with 2 updates: [importlib-metadata](https://github.com/python/importlib_metadata) and [packaging](https://github.com/pypa/packaging). + + + Updates `importlib-metadata` from 7.0.1 to 7.0.2 + - [Release notes](https://github.com/python/importlib_metadata/releases) + - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) + - [Commits](https://github.com/python/importlib_metadata/compare/v7.0.1...v7.0.2) + + Updates `packaging` from 23.2 to 24.0 + - [Release notes](https://github.com/pypa/packaging/releases) + - [Changelog](https://github.com/pypa/packaging/blob/main/CHANGELOG.rst) + - [Commits](https://github.com/pypa/packaging/compare/23.2...24.0) + + --- + updated-dependencies: + - dependency-name: importlib-metadata + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: packaging + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 2 updates. + [dependabot[bot]] + + Bumps the python-packages group with 2 updates: [pkginfo](https://code.launchpad.net/~tseaver/pkginfo/trunk) and [rich](https://github.com/Textualize/rich). + + + Updates `pkginfo` from 1.9.6 to 1.10.0 + + Updates `rich` from 13.7.0 to 13.7.1 + - [Release notes](https://github.com/Textualize/rich/releases) + - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) + - [Commits](https://github.com/Textualize/rich/compare/v13.7.0...v13.7.1) + + --- + updated-dependencies: + - dependency-name: pkginfo + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: rich + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [keyring](https://github.com/jaraco/keyring). + + + Updates `keyring` from 24.3.0 to 24.3.1 + - [Release notes](https://github.com/jaraco/keyring/releases) + - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/keyring/compare/v24.3.0...v24.3.1) + + --- + updated-dependencies: + - dependency-name: keyring + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [readme-renderer](https://github.com/pypa/readme_renderer). + + + Updates `readme-renderer` from 42.0 to 43.0 + - [Release notes](https://github.com/pypa/readme_renderer/releases) + - [Changelog](https://github.com/pypa/readme_renderer/blob/main/CHANGES.rst) + - [Commits](https://github.com/pypa/readme_renderer/compare/42.0...43.0) + + --- + updated-dependencies: + - dependency-name: readme-renderer + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [urllib3](https://github.com/urllib3/urllib3). + + + Updates `urllib3` from 2.2.0 to 2.2.1 + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.2.0...2.2.1) + + --- + updated-dependencies: + - dependency-name: urllib3 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). + + + Updates `black` from 24.1.1 to 24.2.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/24.1.1...24.2.0) + + --- + updated-dependencies: + - dependency-name: black + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 2 updates. + [dependabot[bot]] + + Bumps the python-packages group with 2 updates: [tqdm](https://github.com/tqdm/tqdm) and [twine](https://github.com/pypa/twine). + + + Updates `tqdm` from 4.66.1 to 4.66.2 + - [Release notes](https://github.com/tqdm/tqdm/releases) + - [Commits](https://github.com/tqdm/tqdm/compare/v4.66.1...v4.66.2) + + Updates `twine` from 4.0.2 to 5.0.0 + - [Release notes](https://github.com/pypa/twine/releases) + - [Changelog](https://github.com/pypa/twine/blob/main/docs/changelog.rst) + - [Commits](https://github.com/pypa/twine/compare/4.0.2...5.0.0) + + --- + updated-dependencies: + - dependency-name: tqdm + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: twine + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [jaraco-classes](https://github.com/jaraco/jaraco.classes). + + + Updates `jaraco-classes` from 3.3.0 to 3.3.1 + - [Release notes](https://github.com/jaraco/jaraco.classes/releases) + - [Changelog](https://github.com/jaraco/jaraco.classes/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/jaraco.classes/compare/v3.3.0...v3.3.1) + + --- + updated-dependencies: + - dependency-name: jaraco-classes + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). + + + Updates `certifi` from 2023.11.17 to 2024.2.2 + - [Commits](https://github.com/certifi/python-certifi/compare/2023.11.17...2024.02.02) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 2 updates. + [dependabot[bot]] + + Bumps the python-packages group with 2 updates: [platformdirs](https://github.com/platformdirs/platformdirs) and [urllib3](https://github.com/urllib3/urllib3). + + + Updates `platformdirs` from 4.1.0 to 4.2.0 + - [Release notes](https://github.com/platformdirs/platformdirs/releases) + - [Changelog](https://github.com/platformdirs/platformdirs/blob/main/CHANGES.rst) + - [Commits](https://github.com/platformdirs/platformdirs/compare/4.1.0...4.2.0) + + Updates `urllib3` from 2.1.0 to 2.2.0 + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.1.0...2.2.0) + + --- + updated-dependencies: + - dependency-name: platformdirs + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: urllib3 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). + + + Updates `black` from 24.1.0 to 24.1.1 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/24.1.0...24.1.1) + + --- + updated-dependencies: + - dependency-name: black + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... + + +0.45.0 (2024-01-29) +------------------- Fix ~~~ diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 4d8afa5..058b03f 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.45.0" +__version__ = "0.45.1" From 093db93994f714b7fda68ed61021ec071ecce81e Mon Sep 17 00:00:00 2001 From: paranerd Date: Mon, 18 Mar 2024 14:02:10 +0100 Subject: [PATCH 239/455] Bumped actions versions to latest --- .github/workflows/docker.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 977c32d..6c11f9d 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -38,16 +38,16 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Log in to the Container registry - uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + uses: docker/login-action@v3 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -55,7 +55,7 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | @@ -66,7 +66,7 @@ jobs: type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} - name: Build and push Docker image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: context: . push: true From bba39fb4c8b6fd6b3201f6273ca68d283fd1f0da Mon Sep 17 00:00:00 2001 From: paranerd Date: Mon, 18 Mar 2024 14:07:26 +0100 Subject: [PATCH 240/455] Disable credential persistance on checkout --- .github/workflows/docker.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 6c11f9d..fb8cfa3 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -39,6 +39,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + persist-credentials: false - name: Set up QEMU uses: docker/setup-qemu-action@v3 From caff40e65baa63e96f234476f71b1901fcc8f057 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 13:35:12 +0000 Subject: [PATCH 241/455] chore(deps): bump the python-packages group with 2 updates Bumps the python-packages group with 2 updates: [autopep8](https://github.com/hhatto/autopep8) and [black](https://github.com/psf/black). Updates `autopep8` from 2.0.4 to 2.1.0 - [Release notes](https://github.com/hhatto/autopep8/releases) - [Commits](https://github.com/hhatto/autopep8/compare/v2.0.4...v2.1.0) Updates `black` from 24.2.0 to 24.3.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/24.2.0...24.3.0) --- updated-dependencies: - dependency-name: autopep8 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: black dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 4807e64..d9f5811 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,5 +1,5 @@ -autopep8==2.0.4 -black==24.2.0 +autopep8==2.1.0 +black==24.3.0 bleach==6.1.0 certifi==2024.2.2 charset-normalizer==3.3.2 From eb44c735eb8b0671bdca65f5f1f0c564f55c99a4 Mon Sep 17 00:00:00 2001 From: paranerd Date: Mon, 18 Mar 2024 14:35:37 +0100 Subject: [PATCH 242/455] Added Docker info to README --- README.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.rst b/README.rst index 82dc479..34de98f 100644 --- a/README.rst +++ b/README.rst @@ -212,6 +212,13 @@ When you use the ``--lfs`` option, you will need to make sure you have Git LFS i Instructions on how to do this can be found on https://git-lfs.github.com. +Run in Docker container +----------------------- + +To run the tool in a Docker container use the following command: + + sudo docker run --rm -v /path/to/backup:/data --name github-backup ghcr.io/josegonzalez/python-github-backup -o /data $OPTIONS $USER + Gotchas / Known-issues ====================== From 6630b2b82e81e2ec90af7ae5ccd7bad6049bbfe5 Mon Sep 17 00:00:00 2001 From: paranerd Date: Mon, 18 Mar 2024 19:31:29 +0100 Subject: [PATCH 243/455] Scheduled dependabot for GitHub Actions --- .github/dependabot.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 88bb03b..64c2f28 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,3 +9,7 @@ updates: python-packages: patterns: - "*" +- package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" From 358d1e3d3eb4364d77e80a9b0880df8c5b80e359 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 19:59:52 +0000 Subject: [PATCH 244/455] chore(deps): bump actions/setup-python from 4 to 5 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/automatic-release.yml | 2 +- .github/workflows/lint.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index f5b8f64..93074ed 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -27,7 +27,7 @@ jobs: git config --local user.email "action@github.com" git config --local user.name "GitHub Action" - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' - name: Install prerequisites diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d3df703..f632da2 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -22,7 +22,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.8" cache: "pip" From 8b086856780b47416a58b9fde414d425e3905133 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 21 Mar 2024 13:14:15 +0000 Subject: [PATCH 245/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [importlib-metadata](https://github.com/python/importlib_metadata). Updates `importlib-metadata` from 7.0.2 to 7.1.0 - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/v7.0.2...v7.1.0) --- updated-dependencies: - dependency-name: importlib-metadata dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index d9f5811..62ab77a 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -9,7 +9,7 @@ docutils==0.20.1 flake8==7.0.0 gitchangelog==3.0.4 idna==3.6 -importlib-metadata==7.0.2 +importlib-metadata==7.1.0 jaraco.classes==3.3.1 keyring==24.3.1 markdown-it-py==3.0.0 From 298724acfc6b02ae04c788354de06a15b5c4e2e2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Mar 2024 14:08:44 +0000 Subject: [PATCH 246/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [keyring](https://github.com/jaraco/keyring). Updates `keyring` from 24.3.1 to 25.0.0 - [Release notes](https://github.com/jaraco/keyring/releases) - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/keyring/compare/v24.3.1...v25.0.0) --- updated-dependencies: - dependency-name: keyring dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 62ab77a..cf329bc 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -11,7 +11,7 @@ gitchangelog==3.0.4 idna==3.6 importlib-metadata==7.1.0 jaraco.classes==3.3.1 -keyring==24.3.1 +keyring==25.0.0 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 From 22fa2eb97eb27cb431f73bf0717107cfdc5ea27b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 13:13:51 +0000 Subject: [PATCH 247/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [jaraco-classes](https://github.com/jaraco/jaraco.classes). Updates `jaraco-classes` from 3.3.1 to 3.4.0 - [Release notes](https://github.com/jaraco/jaraco.classes/releases) - [Changelog](https://github.com/jaraco/jaraco.classes/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/jaraco.classes/compare/v3.3.1...v3.4.0) --- updated-dependencies: - dependency-name: jaraco-classes dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index cf329bc..296a8e1 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -10,7 +10,7 @@ flake8==7.0.0 gitchangelog==3.0.4 idna==3.6 importlib-metadata==7.1.0 -jaraco.classes==3.3.1 +jaraco.classes==3.4.0 keyring==25.0.0 markdown-it-py==3.0.0 mccabe==0.7.0 From 02a07d3f0d89ff0935f28e1d46bb876cb6356b01 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 13:34:48 +0000 Subject: [PATCH 248/455] chore(deps): bump the python-packages group with 1 update Bumps the python-packages group with 1 update: [keyring](https://github.com/jaraco/keyring). Updates `keyring` from 25.0.0 to 25.1.0 - [Release notes](https://github.com/jaraco/keyring/releases) - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/keyring/compare/v25.0.0...v25.1.0) --- updated-dependencies: - dependency-name: keyring dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 296a8e1..3f826dc 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -11,7 +11,7 @@ gitchangelog==3.0.4 idna==3.6 importlib-metadata==7.1.0 jaraco.classes==3.4.0 -keyring==25.0.0 +keyring==25.1.0 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 From dea87873f92a327a8fee479897cb9bf5795f9fd4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 04:16:01 +0000 Subject: [PATCH 249/455] chore(deps): bump idna from 3.6 to 3.7 Bumps [idna](https://github.com/kjd/idna) from 3.6 to 3.7. - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.6...v3.7) --- updated-dependencies: - dependency-name: idna dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 3f826dc..49fb3c0 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -8,7 +8,7 @@ colorama==0.4.6 docutils==0.20.1 flake8==7.0.0 gitchangelog==3.0.4 -idna==3.6 +idna==3.7 importlib-metadata==7.1.0 jaraco.classes==3.4.0 keyring==25.1.0 From f325daa875c1508cef1baf7ced12025ee4741aca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 22:01:17 +0000 Subject: [PATCH 250/455] chore(deps): bump tqdm from 4.66.2 to 4.66.3 Bumps [tqdm](https://github.com/tqdm/tqdm) from 4.66.2 to 4.66.3. - [Release notes](https://github.com/tqdm/tqdm/releases) - [Commits](https://github.com/tqdm/tqdm/compare/v4.66.2...v4.66.3) --- updated-dependencies: - dependency-name: tqdm dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 49fb3c0..c39bc13 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -31,7 +31,7 @@ restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.7.1 six==1.16.0 -tqdm==4.66.2 +tqdm==4.66.3 twine==5.0.0 urllib3==2.2.1 webencodings==0.5.1 From 75382afeaed9dbb3a53525aee6ff21a11f9cc51f Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 4 May 2024 18:36:39 +0000 Subject: [PATCH 251/455] Release version 0.45.2 --- CHANGES.rst | 132 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 132 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e10ab1f..588c2fd 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,139 @@ Changelog ========= -0.45.1 (2024-03-17) +0.45.2 (2024-05-04) ------------------- ------------------------ +- Chore(deps): bump idna from 3.6 to 3.7. [dependabot[bot]] + + Bumps [idna](https://github.com/kjd/idna) from 3.6 to 3.7. + - [Release notes](https://github.com/kjd/idna/releases) + - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) + - [Commits](https://github.com/kjd/idna/compare/v3.6...v3.7) + + --- + updated-dependencies: + - dependency-name: idna + dependency-type: direct:production + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [keyring](https://github.com/jaraco/keyring). + + + Updates `keyring` from 25.0.0 to 25.1.0 + - [Release notes](https://github.com/jaraco/keyring/releases) + - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/keyring/compare/v25.0.0...v25.1.0) + + --- + updated-dependencies: + - dependency-name: keyring + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [jaraco-classes](https://github.com/jaraco/jaraco.classes). + + + Updates `jaraco-classes` from 3.3.1 to 3.4.0 + - [Release notes](https://github.com/jaraco/jaraco.classes/releases) + - [Changelog](https://github.com/jaraco/jaraco.classes/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/jaraco.classes/compare/v3.3.1...v3.4.0) + + --- + updated-dependencies: + - dependency-name: jaraco-classes + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [keyring](https://github.com/jaraco/keyring). + + + Updates `keyring` from 24.3.1 to 25.0.0 + - [Release notes](https://github.com/jaraco/keyring/releases) + - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/keyring/compare/v24.3.1...v25.0.0) + + --- + updated-dependencies: + - dependency-name: keyring + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 1 update. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [importlib-metadata](https://github.com/python/importlib_metadata). + + + Updates `importlib-metadata` from 7.0.2 to 7.1.0 + - [Release notes](https://github.com/python/importlib_metadata/releases) + - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) + - [Commits](https://github.com/python/importlib_metadata/compare/v7.0.2...v7.1.0) + + --- + updated-dependencies: + - dependency-name: importlib-metadata + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump actions/setup-python from 4 to 5. [dependabot[bot]] + + Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5. + - [Release notes](https://github.com/actions/setup-python/releases) + - [Commits](https://github.com/actions/setup-python/compare/v4...v5) + + --- + updated-dependencies: + - dependency-name: actions/setup-python + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Scheduled dependabot for GitHub Actions. [paranerd] +- Chore(deps): bump the python-packages group with 2 updates. + [dependabot[bot]] + + Bumps the python-packages group with 2 updates: [autopep8](https://github.com/hhatto/autopep8) and [black](https://github.com/psf/black). + + + Updates `autopep8` from 2.0.4 to 2.1.0 + - [Release notes](https://github.com/hhatto/autopep8/releases) + - [Commits](https://github.com/hhatto/autopep8/compare/v2.0.4...v2.1.0) + + Updates `black` from 24.2.0 to 24.3.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/24.2.0...24.3.0) + + --- + updated-dependencies: + - dependency-name: autopep8 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: black + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Added Docker info to README. [paranerd] +- Disable credential persistance on checkout. [paranerd] +- Bumped actions versions to latest. [paranerd] + + +0.45.1 (2024-03-17) +------------------- - Remove trailing whitespaces. [dale-primer-e] That are triggering flake. diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 058b03f..4ce7032 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.45.1" +__version__ = "0.45.2" From 4a134ae2ecdb110b4ee6d6e2b84c99b92bee78ed Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 May 2024 05:42:42 +0000 Subject: [PATCH 252/455] --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index c39bc13..41b9cc3 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -25,7 +25,7 @@ pycodestyle==2.11.1 pyflakes==3.2.0 Pygments==2.17.2 readme-renderer==43.0 -requests==2.31.0 +requests==2.32.0 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 From 8449d6352d75b0386ecf9aeb85ab9b1b3172bdfb Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 25 May 2024 04:24:32 -0400 Subject: [PATCH 253/455] chore: drop unsupported python versions and add supported ones --- setup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index ebdd532..c4b8cf1 100644 --- a/setup.py +++ b/setup.py @@ -40,10 +40,11 @@ def open_file(fname): "Development Status :: 5 - Production/Stable", "Topic :: System :: Archiving :: Backup", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], description="backup a github user or organization", long_description=open_file("README.rst").read(), From b1b3df692dcac9bfa999ec685105fd3d25549fe4 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 25 May 2024 04:32:21 -0400 Subject: [PATCH 254/455] chore: update python version used in linting --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f632da2..fb05a5c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -24,7 +24,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.12" cache: "pip" - run: pip install -r release-requirements.txt && pip install wheel - run: flake8 --ignore=E501,E203,W503 From 1971c97b5d065de71fbaa0e32ee2938a4fa521d1 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 25 May 2024 04:45:38 -0400 Subject: [PATCH 255/455] fix: add now missing setuptools --- release-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/release-requirements.txt b/release-requirements.txt index 41b9cc3..745ae88 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -30,6 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.7.1 +setuptools==70.0.0 six==1.16.0 tqdm==4.66.3 twine==5.0.0 From 17af2cbc288ec2cad19609b96ff9b5ce36a8b35f Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Sat, 25 May 2024 04:47:35 -0400 Subject: [PATCH 256/455] chore: update python version in release workflow --- .github/workflows/automatic-release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index 93074ed..85b01dd 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -29,7 +29,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.12' - name: Install prerequisites run: pip install -r release-requirements.txt - name: Execute release From 15eeff7879ad0bf47e05f65692a046def3bcb5cd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 25 May 2024 08:53:59 +0000 Subject: [PATCH 257/455] --- updated-dependencies: - dependency-name: autopep8 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: black dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: docutils dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: keyring dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: platformdirs dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: pygments dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: tqdm dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: twine dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: zipp dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 745ae88..3f746e9 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,17 +1,17 @@ -autopep8==2.1.0 -black==24.3.0 +autopep8==2.1.1 +black==24.4.2 bleach==6.1.0 certifi==2024.2.2 charset-normalizer==3.3.2 click==8.1.7 colorama==0.4.6 -docutils==0.20.1 +docutils==0.21.2 flake8==7.0.0 gitchangelog==3.0.4 idna==3.7 importlib-metadata==7.1.0 jaraco.classes==3.4.0 -keyring==25.1.0 +keyring==25.2.1 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 @@ -20,20 +20,20 @@ mypy-extensions==1.0.0 packaging==24.0 pathspec==0.12.1 pkginfo==1.10.0 -platformdirs==4.2.0 +platformdirs==4.2.2 pycodestyle==2.11.1 pyflakes==3.2.0 -Pygments==2.17.2 +Pygments==2.18.0 readme-renderer==43.0 -requests==2.32.0 +requests==2.32.2 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.7.1 setuptools==70.0.0 six==1.16.0 -tqdm==4.66.3 -twine==5.0.0 +tqdm==4.66.4 +twine==5.1.0 urllib3==2.2.1 webencodings==0.5.1 -zipp==3.18.1 +zipp==3.18.2 From 4948178a6377809ce16845eb5f1ebaff7a952a6a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 11 Jun 2024 13:17:57 +0000 Subject: [PATCH 258/455] chore(deps): bump the python-packages group across 1 directory with 7 updates Bumps the python-packages group with 7 updates in the / directory: | Package | From | To | | --- | --- | --- | | [autopep8](https://github.com/hhatto/autopep8) | `2.1.1` | `2.2.0` | | [certifi](https://github.com/certifi/python-certifi) | `2024.2.2` | `2024.6.2` | | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.2.0` | `10.3.0` | | [packaging](https://github.com/pypa/packaging) | `24.0` | `24.1` | | [pkginfo](https://code.launchpad.net/~tseaver/pkginfo/trunk) | `1.10.0` | `1.11.1` | | [requests](https://github.com/psf/requests) | `2.32.2` | `2.32.3` | | [zipp](https://github.com/jaraco/zipp) | `3.18.2` | `3.19.2` | Updates `autopep8` from 2.1.1 to 2.2.0 - [Release notes](https://github.com/hhatto/autopep8/releases) - [Commits](https://github.com/hhatto/autopep8/compare/v2.1.1...v2.2.0) Updates `certifi` from 2024.2.2 to 2024.6.2 - [Commits](https://github.com/certifi/python-certifi/compare/2024.02.02...2024.06.02) Updates `more-itertools` from 10.2.0 to 10.3.0 - [Release notes](https://github.com/more-itertools/more-itertools/releases) - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.2.0...v10.3.0) Updates `packaging` from 24.0 to 24.1 - [Release notes](https://github.com/pypa/packaging/releases) - [Changelog](https://github.com/pypa/packaging/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pypa/packaging/compare/24.0...24.1) Updates `pkginfo` from 1.10.0 to 1.11.1 Updates `requests` from 2.32.2 to 2.32.3 - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.32.2...v2.32.3) Updates `zipp` from 3.18.2 to 3.19.2 - [Release notes](https://github.com/jaraco/zipp/releases) - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/zipp/compare/v3.18.2...v3.19.2) --- updated-dependencies: - dependency-name: autopep8 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: certifi dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: more-itertools dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: packaging dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pkginfo dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: zipp dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 3f746e9..812a46c 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ -autopep8==2.1.1 +autopep8==2.2.0 black==24.4.2 bleach==6.1.0 -certifi==2024.2.2 +certifi==2024.6.2 charset-normalizer==3.3.2 click==8.1.7 colorama==0.4.6 @@ -15,17 +15,17 @@ keyring==25.2.1 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 -more-itertools==10.2.0 +more-itertools==10.3.0 mypy-extensions==1.0.0 -packaging==24.0 +packaging==24.1 pathspec==0.12.1 -pkginfo==1.10.0 +pkginfo==1.11.1 platformdirs==4.2.2 pycodestyle==2.11.1 pyflakes==3.2.0 Pygments==2.18.0 readme-renderer==43.0 -requests==2.32.2 +requests==2.32.3 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 @@ -36,4 +36,4 @@ tqdm==4.66.4 twine==5.1.0 urllib3==2.2.1 webencodings==0.5.1 -zipp==3.18.2 +zipp==3.19.2 From 56d3fd75bf7178eb6efbdbd093f6911154880e25 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Jun 2024 13:17:30 +0000 Subject: [PATCH 259/455] chore(deps): bump the python-packages group with 3 updates Bumps the python-packages group with 3 updates: [autopep8](https://github.com/hhatto/autopep8), [flake8](https://github.com/pycqa/flake8) and [pycodestyle](https://github.com/PyCQA/pycodestyle). Updates `autopep8` from 2.2.0 to 2.3.0 - [Release notes](https://github.com/hhatto/autopep8/releases) - [Commits](https://github.com/hhatto/autopep8/compare/v2.2.0...v2.3.0) Updates `flake8` from 7.0.0 to 7.1.0 - [Commits](https://github.com/pycqa/flake8/compare/7.0.0...7.1.0) Updates `pycodestyle` from 2.11.1 to 2.12.0 - [Release notes](https://github.com/PyCQA/pycodestyle/releases) - [Changelog](https://github.com/PyCQA/pycodestyle/blob/main/CHANGES.txt) - [Commits](https://github.com/PyCQA/pycodestyle/compare/2.11.1...2.12.0) --- updated-dependencies: - dependency-name: autopep8 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: flake8 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pycodestyle dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 812a46c..e343716 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,4 +1,4 @@ -autopep8==2.2.0 +autopep8==2.3.0 black==24.4.2 bleach==6.1.0 certifi==2024.6.2 @@ -6,7 +6,7 @@ charset-normalizer==3.3.2 click==8.1.7 colorama==0.4.6 docutils==0.21.2 -flake8==7.0.0 +flake8==7.1.0 gitchangelog==3.0.4 idna==3.7 importlib-metadata==7.1.0 @@ -21,7 +21,7 @@ packaging==24.1 pathspec==0.12.1 pkginfo==1.11.1 platformdirs==4.2.2 -pycodestyle==2.11.1 +pycodestyle==2.12.0 pyflakes==3.2.0 Pygments==2.18.0 readme-renderer==43.0 From 1e14a4eecd3ed509823dbab5b1174531435c5833 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Jun 2024 22:53:08 +0000 Subject: [PATCH 260/455] chore(deps): bump urllib3 from 2.2.1 to 2.2.2 Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.2.1 to 2.2.2. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.2.1...2.2.2) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 812a46c..0f5ce8b 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -34,6 +34,6 @@ setuptools==70.0.0 six==1.16.0 tqdm==4.66.4 twine==5.1.0 -urllib3==2.2.1 +urllib3==2.2.2 webencodings==0.5.1 zipp==3.19.2 From b474e1654fa0b72b31ebc993fd0e41a63ad8397b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 13:40:17 +0000 Subject: [PATCH 261/455] chore(deps): bump the python-packages group across 1 directory with 2 updates Bumps the python-packages group with 2 updates in the / directory: [importlib-metadata](https://github.com/python/importlib_metadata) and [setuptools](https://github.com/pypa/setuptools). Updates `importlib-metadata` from 7.1.0 to 7.2.0 - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/v7.1.0...v7.2.0) Updates `setuptools` from 70.0.0 to 70.1.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v70.0.0...v70.1.0) --- updated-dependencies: - dependency-name: importlib-metadata dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 1740d16..d653dde 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -9,7 +9,7 @@ docutils==0.21.2 flake8==7.1.0 gitchangelog==3.0.4 idna==3.7 -importlib-metadata==7.1.0 +importlib-metadata==7.2.0 jaraco.classes==3.4.0 keyring==25.2.1 markdown-it-py==3.0.0 @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.7.1 -setuptools==70.0.0 +setuptools==70.1.0 six==1.16.0 tqdm==4.66.4 twine==5.1.0 From 09bbcfc7b129ac21459f036d99dcd0d61f19fad5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jun 2024 04:42:05 +0000 Subject: [PATCH 262/455] chore(deps): bump docker/build-push-action from 5 to 6 Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 5 to 6. - [Release notes](https://github.com/docker/build-push-action/releases) - [Commits](https://github.com/docker/build-push-action/compare/v5...v6) --- updated-dependencies: - dependency-name: docker/build-push-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index fb8cfa3..b0607f7 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -68,7 +68,7 @@ jobs: type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} - name: Build and push Docker image - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 with: context: . push: true From 2e9db92b6891d7cc555488d783bc681663041000 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Jun 2024 13:47:45 +0000 Subject: [PATCH 263/455] chore(deps): bump the python-packages group across 1 directory with 3 updates Bumps the python-packages group with 3 updates in the / directory: [autopep8](https://github.com/hhatto/autopep8), [importlib-metadata](https://github.com/python/importlib_metadata) and [setuptools](https://github.com/pypa/setuptools). Updates `autopep8` from 2.3.0 to 2.3.1 - [Release notes](https://github.com/hhatto/autopep8/releases) - [Commits](https://github.com/hhatto/autopep8/compare/v2.3.0...v2.3.1) Updates `importlib-metadata` from 7.2.0 to 7.2.1 - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/v7.2.0...v7.2.1) Updates `setuptools` from 70.1.0 to 70.1.1 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v70.1.0...v70.1.1) --- updated-dependencies: - dependency-name: autopep8 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: importlib-metadata dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index d653dde..cf02f90 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,4 +1,4 @@ -autopep8==2.3.0 +autopep8==2.3.1 black==24.4.2 bleach==6.1.0 certifi==2024.6.2 @@ -9,7 +9,7 @@ docutils==0.21.2 flake8==7.1.0 gitchangelog==3.0.4 idna==3.7 -importlib-metadata==7.2.0 +importlib-metadata==7.2.1 jaraco.classes==3.4.0 keyring==25.2.1 markdown-it-py==3.0.0 @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.7.1 -setuptools==70.1.0 +setuptools==70.1.1 six==1.16.0 tqdm==4.66.4 twine==5.1.0 From f6ad29673050f3767ba2daca6566a04211317749 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 6 Jul 2024 02:24:22 +0000 Subject: [PATCH 264/455] chore(deps): bump certifi from 2024.6.2 to 2024.7.4 Bumps [certifi](https://github.com/certifi/python-certifi) from 2024.6.2 to 2024.7.4. - [Commits](https://github.com/certifi/python-certifi/compare/2024.06.02...2024.07.04) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index cf02f90..9ffeaaf 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ autopep8==2.3.1 black==24.4.2 bleach==6.1.0 -certifi==2024.6.2 +certifi==2024.7.4 charset-normalizer==3.3.2 click==8.1.7 colorama==0.4.6 From aacb252e5719374ff9a2a47d54a7d1bbd8a24ddf Mon Sep 17 00:00:00 2001 From: Albert Wang Date: Sun, 21 Jul 2024 16:46:20 -0700 Subject: [PATCH 265/455] Upgrade github workflow ubuntu containers to newest LTS --- .github/workflows/automatic-release.yml | 2 +- .github/workflows/lint.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index f5b8f64..dc4e18e 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -15,7 +15,7 @@ on: jobs: release: name: Release - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d3df703..582f2d7 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -14,7 +14,7 @@ on: jobs: lint: name: lint - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - name: Checkout repository From ba46cb87e8cd2a7d0c3db8f733762cb686d5ef46 Mon Sep 17 00:00:00 2001 From: Jarl Totland Date: Wed, 24 Jul 2024 10:43:30 +0200 Subject: [PATCH 266/455] fix: do not double encode auth when retrieving release assets --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index ca9e88e..299a3a1 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1259,7 +1259,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F download_file( asset["url"], os.path.join(release_assets_cwd, asset["name"]), - get_auth(args), + get_auth(args, encode=not args.as_app), as_app=args.as_app, fine=True if args.token_fine is not None else False ) From 04c70ce277565bdb1c2c0732ff5d29756b94e995 Mon Sep 17 00:00:00 2001 From: Louis Parisot Date: Tue, 10 Sep 2024 11:00:17 +0200 Subject: [PATCH 267/455] git fetch is required even when using lfs --- github_backup/github_backup.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 299a3a1..b7b8916 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1318,13 +1318,15 @@ def fetch_repository( git_command = ["git", "remote", "set-url", "origin", remote_url] logging_subprocess(git_command, cwd=local_dir) - if lfs_clone: - git_command = ["git", "lfs", "fetch", "--all", "--prune"] - else: - git_command = ["git", "fetch", "--all", "--force", "--tags", "--prune"] + git_command = ["git", "fetch", "--all", "--force", "--tags", "--prune"] if no_prune: git_command.pop() logging_subprocess(git_command, cwd=local_dir) + if lfs_clone: + git_command = ["git", "lfs", "fetch", "--all", "--prune"] + if no_prune: + git_command.pop() + logging_subprocess(git_command, cwd=local_dir) else: logger.info( "Cloning {0} repository from {1} to {2}".format( From 0846e7d8e574f400b493dc3437780b3412a6bc69 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 11 Sep 2024 18:51:53 +0000 Subject: [PATCH 268/455] Release version 0.46.0 --- CHANGES.rst | 263 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 263 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 588c2fd..cf76177 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,270 @@ Changelog ========= -0.45.2 (2024-05-04) +0.46.0 (2024-09-11) ------------------- ------------------------ + +Fix +~~~ +- Do not double encode auth when retrieving release assets. [Jarl + Totland] +- Add now missing setuptools. [Jose Diaz-Gonzalez] + +Other +~~~~~ +- Git fetch is required even when using lfs. [Louis Parisot] +- Upgrade github workflow ubuntu containers to newest LTS. [Albert Wang] +- Chore(deps): bump certifi from 2024.6.2 to 2024.7.4. [dependabot[bot]] + + Bumps [certifi](https://github.com/certifi/python-certifi) from 2024.6.2 to 2024.7.4. + - [Commits](https://github.com/certifi/python-certifi/compare/2024.06.02...2024.07.04) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-type: direct:production + ... +- Chore(deps): bump docker/build-push-action from 5 to 6. + [dependabot[bot]] + + Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 5 to 6. + - [Release notes](https://github.com/docker/build-push-action/releases) + - [Commits](https://github.com/docker/build-push-action/compare/v5...v6) + + --- + updated-dependencies: + - dependency-name: docker/build-push-action + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Chore(deps): bump the python-packages group across 1 directory with 3 + updates. [dependabot[bot]] + + Bumps the python-packages group with 3 updates in the / directory: [autopep8](https://github.com/hhatto/autopep8), [importlib-metadata](https://github.com/python/importlib_metadata) and [setuptools](https://github.com/pypa/setuptools). + + + Updates `autopep8` from 2.3.0 to 2.3.1 + - [Release notes](https://github.com/hhatto/autopep8/releases) + - [Commits](https://github.com/hhatto/autopep8/compare/v2.3.0...v2.3.1) + + Updates `importlib-metadata` from 7.2.0 to 7.2.1 + - [Release notes](https://github.com/python/importlib_metadata/releases) + - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) + - [Commits](https://github.com/python/importlib_metadata/compare/v7.2.0...v7.2.1) + + Updates `setuptools` from 70.1.0 to 70.1.1 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v70.1.0...v70.1.1) + + --- + updated-dependencies: + - dependency-name: autopep8 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: importlib-metadata + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group across 1 directory with 2 + updates. [dependabot[bot]] + + Bumps the python-packages group with 2 updates in the / directory: [importlib-metadata](https://github.com/python/importlib_metadata) and [setuptools](https://github.com/pypa/setuptools). + + + Updates `importlib-metadata` from 7.1.0 to 7.2.0 + - [Release notes](https://github.com/python/importlib_metadata/releases) + - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) + - [Commits](https://github.com/python/importlib_metadata/compare/v7.1.0...v7.2.0) + + Updates `setuptools` from 70.0.0 to 70.1.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v70.0.0...v70.1.0) + + --- + updated-dependencies: + - dependency-name: importlib-metadata + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 3 updates. + [dependabot[bot]] + + Bumps the python-packages group with 3 updates: [autopep8](https://github.com/hhatto/autopep8), [flake8](https://github.com/pycqa/flake8) and [pycodestyle](https://github.com/PyCQA/pycodestyle). + + + Updates `autopep8` from 2.2.0 to 2.3.0 + - [Release notes](https://github.com/hhatto/autopep8/releases) + - [Commits](https://github.com/hhatto/autopep8/compare/v2.2.0...v2.3.0) + + Updates `flake8` from 7.0.0 to 7.1.0 + - [Commits](https://github.com/pycqa/flake8/compare/7.0.0...7.1.0) + + Updates `pycodestyle` from 2.11.1 to 2.12.0 + - [Release notes](https://github.com/PyCQA/pycodestyle/releases) + - [Changelog](https://github.com/PyCQA/pycodestyle/blob/main/CHANGES.txt) + - [Commits](https://github.com/PyCQA/pycodestyle/compare/2.11.1...2.12.0) + + --- + updated-dependencies: + - dependency-name: autopep8 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: flake8 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pycodestyle + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump urllib3 from 2.2.1 to 2.2.2. [dependabot[bot]] + + Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.2.1 to 2.2.2. + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.2.1...2.2.2) + + --- + updated-dependencies: + - dependency-name: urllib3 + dependency-type: direct:production + ... +- Chore(deps): bump the python-packages group across 1 directory with 7 + updates. [dependabot[bot]] + + Bumps the python-packages group with 7 updates in the / directory: + + | Package | From | To | + | --- | --- | --- | + | [autopep8](https://github.com/hhatto/autopep8) | `2.1.1` | `2.2.0` | + | [certifi](https://github.com/certifi/python-certifi) | `2024.2.2` | `2024.6.2` | + | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.2.0` | `10.3.0` | + | [packaging](https://github.com/pypa/packaging) | `24.0` | `24.1` | + | [pkginfo](https://code.launchpad.net/~tseaver/pkginfo/trunk) | `1.10.0` | `1.11.1` | + | [requests](https://github.com/psf/requests) | `2.32.2` | `2.32.3` | + | [zipp](https://github.com/jaraco/zipp) | `3.18.2` | `3.19.2` | + + + + Updates `autopep8` from 2.1.1 to 2.2.0 + - [Release notes](https://github.com/hhatto/autopep8/releases) + - [Commits](https://github.com/hhatto/autopep8/compare/v2.1.1...v2.2.0) + + Updates `certifi` from 2024.2.2 to 2024.6.2 + - [Commits](https://github.com/certifi/python-certifi/compare/2024.02.02...2024.06.02) + + Updates `more-itertools` from 10.2.0 to 10.3.0 + - [Release notes](https://github.com/more-itertools/more-itertools/releases) + - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.2.0...v10.3.0) + + Updates `packaging` from 24.0 to 24.1 + - [Release notes](https://github.com/pypa/packaging/releases) + - [Changelog](https://github.com/pypa/packaging/blob/main/CHANGELOG.rst) + - [Commits](https://github.com/pypa/packaging/compare/24.0...24.1) + + Updates `pkginfo` from 1.10.0 to 1.11.1 + + Updates `requests` from 2.32.2 to 2.32.3 + - [Release notes](https://github.com/psf/requests/releases) + - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) + - [Commits](https://github.com/psf/requests/compare/v2.32.2...v2.32.3) + + Updates `zipp` from 3.18.2 to 3.19.2 + - [Release notes](https://github.com/jaraco/zipp/releases) + - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/zipp/compare/v3.18.2...v3.19.2) + + --- + updated-dependencies: + - dependency-name: autopep8 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: certifi + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: more-itertools + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: packaging + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pkginfo + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: requests + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: zipp + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- --- updated-dependencies: - dependency-name: autopep8 dependency- + type: direct:production update-type: version-update:semver-patch + dependency-group: python-packages - dependency-name: black + dependency-type: direct:production update-type: version- + update:semver-minor dependency-group: python-packages - dependency- + name: docutils dependency-type: direct:production update-type: + version-update:semver-minor dependency-group: python-packages - + dependency-name: keyring dependency-type: direct:production + update-type: version-update:semver-minor dependency-group: python- + packages - dependency-name: platformdirs dependency-type: + direct:production update-type: version-update:semver-patch + dependency-group: python-packages - dependency-name: pygments + dependency-type: direct:production update-type: version- + update:semver-minor dependency-group: python-packages - dependency- + name: requests dependency-type: direct:production update-type: + version-update:semver-minor dependency-group: python-packages - + dependency-name: tqdm dependency-type: direct:production update- + type: version-update:semver-patch dependency-group: python-packages + - dependency-name: twine dependency-type: direct:production + update-type: version-update:semver-minor dependency-group: python- + packages - dependency-name: zipp dependency-type: direct:production + update-type: version-update:semver-patch dependency-group: python- + packages ... [dependabot[bot]] +- Chore: update python version in release workflow. [Jose Diaz-Gonzalez] +- Chore: update python version used in linting. [Jose Diaz-Gonzalez] +- Chore: drop unsupported python versions and add supported ones. [Jose + Diaz-Gonzalez] +- --- updated-dependencies: - dependency-name: requests dependency- + type: direct:production ... [dependabot[bot]] +- Chore(deps): bump tqdm from 4.66.2 to 4.66.3. [dependabot[bot]] + + Bumps [tqdm](https://github.com/tqdm/tqdm) from 4.66.2 to 4.66.3. + - [Release notes](https://github.com/tqdm/tqdm/releases) + - [Commits](https://github.com/tqdm/tqdm/compare/v4.66.2...v4.66.3) + + --- + updated-dependencies: + - dependency-name: tqdm + dependency-type: direct:production + ... + + +0.45.2 (2024-05-04) +------------------- - Chore(deps): bump idna from 3.6 to 3.7. [dependabot[bot]] Bumps [idna](https://github.com/kjd/idna) from 3.6 to 3.7. diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 4ce7032..6f70987 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.45.2" +__version__ = "0.46.0" From ca3c4fa64b5285e1b5eab09e2f42ece076b41583 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Fri, 13 Sep 2024 07:26:02 +0200 Subject: [PATCH 269/455] Fix punctuation in README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 34de98f..66d0491 100644 --- a/README.rst +++ b/README.rst @@ -225,7 +225,7 @@ Gotchas / Known-issues All is not everything --------------------- -The ``--all`` argument does not include; cloning private repos (``-P, --private``), cloning forks (``-F, --fork``) cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--starred-gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. +The ``--all`` argument does not include: cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--starred-gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. Cloning all starred size ------------------------ From 548a2ec4052ec91f3ff7b204ab95db7b99085ad7 Mon Sep 17 00:00:00 2001 From: John Doe Date: Sat, 21 Sep 2024 20:50:38 -0400 Subject: [PATCH 270/455] Detect empty HTTPS contexts. Some users are relying solely on the certifi package to provide their CA certs, as requests does this by default. This patch detects this situation and emits a clear warning as well as importing certifi to work around the situation.. Fixes #162 . --- github_backup/github_backup.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index b7b8916..0cb7d8d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -15,6 +15,7 @@ import re import select import socket +import ssl import subprocess import sys import time @@ -36,6 +37,18 @@ FILE_URI_PREFIX = "file://" logger = logging.getLogger(__name__) +https_ctx = ssl.create_default_context() +if not https_ctx.get_ca_certs(): + import warnings + warnings.warn('\n\nYOUR DEFAULT CA CERTS ARE EMPTY.\n' + + 'PLEASE POPULATE ANY OF:' + + ''.join([ + '\n - ' + x + for x in ssl.get_default_verify_paths() + if type(x) is str + ]) + '\n', stacklevel=2) + import certifi + https_ctx = ssl.create_default_context(cafile=certifi.where()) def logging_subprocess( popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs @@ -666,7 +679,7 @@ def _get_response(request, auth, template): while True: should_continue = False try: - r = urlopen(request) + r = urlopen(request, context=https_ctx) except HTTPError as exc: errors, should_continue = _request_http_error(exc, auth, errors) # noqa r = exc From 53f6650f61e1cae7375be42850cfc3d6ea681bb7 Mon Sep 17 00:00:00 2001 From: John Doe Date: Sat, 21 Sep 2024 21:38:23 -0400 Subject: [PATCH 271/455] KeyError fix with gists --- github_backup/github_backup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index b7b8916..b8e64d3 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -931,11 +931,11 @@ def filter_repositories(args, unfiltered_repositories): if r.get("language") and r.get("language").lower() in languages ] # noqa if name_regex: - repositories = [r for r in repositories if name_regex.match(r["name"])] + repositories = [r for r in repositories if "name" not in r or name_regex.match(r["name"])] if args.skip_archived: repositories = [r for r in repositories if not r.get("archived")] if args.exclude: - repositories = [r for r in repositories if r["name"] not in args.exclude] + repositories = [r for r in repositories if "name" not in r or r["name"] not in args.exclude] return repositories From c8c71239c736f4b8484f33fa3bc1d33aaef8e682 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Dec 2024 17:57:51 +0000 Subject: [PATCH 272/455] chore(deps): bump the python-packages group across 1 directory with 20 updates Bumps the python-packages group with 20 updates in the / directory: | Package | From | To | | --- | --- | --- | | [black](https://github.com/psf/black) | `24.4.2` | `24.10.0` | | [bleach](https://github.com/mozilla/bleach) | `6.1.0` | `6.2.0` | | [certifi](https://github.com/certifi/python-certifi) | `2024.7.4` | `2024.8.30` | | [charset-normalizer](https://github.com/Ousret/charset_normalizer) | `3.3.2` | `3.4.0` | | [flake8](https://github.com/pycqa/flake8) | `7.1.0` | `7.1.1` | | [idna](https://github.com/kjd/idna) | `3.7` | `3.10` | | [importlib-metadata](https://github.com/python/importlib_metadata) | `7.2.1` | `8.5.0` | | [keyring](https://github.com/jaraco/keyring) | `25.2.1` | `25.5.0` | | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.3.0` | `10.5.0` | | [packaging](https://github.com/pypa/packaging) | `24.1` | `24.2` | | [pkginfo](https://code.launchpad.net/~tseaver/pkginfo/trunk) | `1.11.1` | `1.12.0` | | [platformdirs](https://github.com/tox-dev/platformdirs) | `4.2.2` | `4.3.6` | | [pycodestyle](https://github.com/PyCQA/pycodestyle) | `2.12.0` | `2.12.1` | | [readme-renderer](https://github.com/pypa/readme_renderer) | `43.0` | `44.0` | | [rich](https://github.com/Textualize/rich) | `13.7.1` | `13.9.4` | | [setuptools](https://github.com/pypa/setuptools) | `70.1.1` | `75.6.0` | | [tqdm](https://github.com/tqdm/tqdm) | `4.66.4` | `4.67.1` | | [twine](https://github.com/pypa/twine) | `5.1.0` | `6.0.1` | | [urllib3](https://github.com/urllib3/urllib3) | `2.2.2` | `2.2.3` | | [zipp](https://github.com/jaraco/zipp) | `3.19.2` | `3.21.0` | Updates `black` from 24.4.2 to 24.10.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/24.4.2...24.10.0) Updates `bleach` from 6.1.0 to 6.2.0 - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v6.1.0...v6.2.0) Updates `certifi` from 2024.7.4 to 2024.8.30 - [Commits](https://github.com/certifi/python-certifi/compare/2024.07.04...2024.08.30) Updates `charset-normalizer` from 3.3.2 to 3.4.0 - [Release notes](https://github.com/Ousret/charset_normalizer/releases) - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) - [Commits](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) Updates `flake8` from 7.1.0 to 7.1.1 - [Commits](https://github.com/pycqa/flake8/compare/7.1.0...7.1.1) Updates `idna` from 3.7 to 3.10 - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.7...v3.10) Updates `importlib-metadata` from 7.2.1 to 8.5.0 - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/v7.2.1...v8.5.0) Updates `keyring` from 25.2.1 to 25.5.0 - [Release notes](https://github.com/jaraco/keyring/releases) - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/keyring/compare/v25.2.1...v25.5.0) Updates `more-itertools` from 10.3.0 to 10.5.0 - [Release notes](https://github.com/more-itertools/more-itertools/releases) - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.3.0...v10.5.0) Updates `packaging` from 24.1 to 24.2 - [Release notes](https://github.com/pypa/packaging/releases) - [Changelog](https://github.com/pypa/packaging/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pypa/packaging/compare/24.1...24.2) Updates `pkginfo` from 1.11.1 to 1.12.0 Updates `platformdirs` from 4.2.2 to 4.3.6 - [Release notes](https://github.com/tox-dev/platformdirs/releases) - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) - [Commits](https://github.com/tox-dev/platformdirs/compare/4.2.2...4.3.6) Updates `pycodestyle` from 2.12.0 to 2.12.1 - [Release notes](https://github.com/PyCQA/pycodestyle/releases) - [Changelog](https://github.com/PyCQA/pycodestyle/blob/main/CHANGES.txt) - [Commits](https://github.com/PyCQA/pycodestyle/compare/2.12.0...2.12.1) Updates `readme-renderer` from 43.0 to 44.0 - [Release notes](https://github.com/pypa/readme_renderer/releases) - [Changelog](https://github.com/pypa/readme_renderer/blob/main/CHANGES.rst) - [Commits](https://github.com/pypa/readme_renderer/compare/43.0...44.0) Updates `rich` from 13.7.1 to 13.9.4 - [Release notes](https://github.com/Textualize/rich/releases) - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) - [Commits](https://github.com/Textualize/rich/compare/v13.7.1...v13.9.4) Updates `setuptools` from 70.1.1 to 75.6.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v70.1.1...v75.6.0) Updates `tqdm` from 4.66.4 to 4.67.1 - [Release notes](https://github.com/tqdm/tqdm/releases) - [Commits](https://github.com/tqdm/tqdm/compare/v4.66.4...v4.67.1) Updates `twine` from 5.1.0 to 6.0.1 - [Release notes](https://github.com/pypa/twine/releases) - [Changelog](https://github.com/pypa/twine/blob/main/docs/changelog.rst) - [Commits](https://github.com/pypa/twine/compare/5.1.0...6.0.1) Updates `urllib3` from 2.2.2 to 2.2.3 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.2.2...2.2.3) Updates `zipp` from 3.19.2 to 3.21.0 - [Release notes](https://github.com/jaraco/zipp/releases) - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/zipp/compare/v3.19.2...v3.21.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: bleach dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: certifi dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: charset-normalizer dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: flake8 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: idna dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: importlib-metadata dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: keyring dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: more-itertools dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: packaging dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pkginfo dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: platformdirs dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pycodestyle dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: readme-renderer dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: rich dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: tqdm dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: twine dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: urllib3 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: zipp dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 9ffeaaf..00beca4 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,39 +1,39 @@ autopep8==2.3.1 -black==24.4.2 -bleach==6.1.0 -certifi==2024.7.4 -charset-normalizer==3.3.2 +black==24.10.0 +bleach==6.2.0 +certifi==2024.8.30 +charset-normalizer==3.4.0 click==8.1.7 colorama==0.4.6 docutils==0.21.2 -flake8==7.1.0 +flake8==7.1.1 gitchangelog==3.0.4 -idna==3.7 -importlib-metadata==7.2.1 +idna==3.10 +importlib-metadata==8.5.0 jaraco.classes==3.4.0 -keyring==25.2.1 +keyring==25.5.0 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 -more-itertools==10.3.0 +more-itertools==10.5.0 mypy-extensions==1.0.0 -packaging==24.1 +packaging==24.2 pathspec==0.12.1 -pkginfo==1.11.1 -platformdirs==4.2.2 -pycodestyle==2.12.0 +pkginfo==1.12.0 +platformdirs==4.3.6 +pycodestyle==2.12.1 pyflakes==3.2.0 Pygments==2.18.0 -readme-renderer==43.0 +readme-renderer==44.0 requests==2.32.3 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 -rich==13.7.1 -setuptools==70.1.1 +rich==13.9.4 +setuptools==75.6.0 six==1.16.0 -tqdm==4.66.4 -twine==5.1.0 -urllib3==2.2.2 +tqdm==4.67.1 +twine==6.0.1 +urllib3==2.2.3 webencodings==0.5.1 -zipp==3.19.2 +zipp==3.21.0 From cb66375e1e383158cdfd72a74f93bf2ca5eeecd4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:24:14 +0000 Subject: [PATCH 273/455] chore(deps): bump six from 1.16.0 to 1.17.0 in the python-packages group Bumps the python-packages group with 1 update: [six](https://github.com/benjaminp/six). Updates `six` from 1.16.0 to 1.17.0 - [Changelog](https://github.com/benjaminp/six/blob/main/CHANGES) - [Commits](https://github.com/benjaminp/six/compare/1.16.0...1.17.0) --- updated-dependencies: - dependency-name: six dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 00beca4..861fd43 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -31,7 +31,7 @@ restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.9.4 setuptools==75.6.0 -six==1.16.0 +six==1.17.0 tqdm==4.67.1 twine==6.0.1 urllib3==2.2.3 From e981ce3ff95de7f5e834276fef113ac5c0c2b87c Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 9 Dec 2024 14:46:36 +0000 Subject: [PATCH 274/455] Release version 0.47.0 --- CHANGES.rst | 235 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 235 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index cf76177..879a854 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,242 @@ Changelog ========= -0.46.0 (2024-09-11) +0.47.0 (2024-12-09) ------------------- ------------------------ +- Detect empty HTTPS contexts. [John Doe] + + Some users are relying solely on the certifi package to provide their CA certs, as requests does this by default. + + This patch detects this situation and emits a clear warning as well as importing certifi to work around the situation.. + + Fixes #162 . +- Chore(deps): bump six from 1.16.0 to 1.17.0 in the python-packages + group. [dependabot[bot]] + + Bumps the python-packages group with 1 update: [six](https://github.com/benjaminp/six). + + + Updates `six` from 1.16.0 to 1.17.0 + - [Changelog](https://github.com/benjaminp/six/blob/main/CHANGES) + - [Commits](https://github.com/benjaminp/six/compare/1.16.0...1.17.0) + + --- + updated-dependencies: + - dependency-name: six + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group across 1 directory with 20 + updates. [dependabot[bot]] + + Bumps the python-packages group with 20 updates in the / directory: + + | Package | From | To | + | --- | --- | --- | + | [black](https://github.com/psf/black) | `24.4.2` | `24.10.0` | + | [bleach](https://github.com/mozilla/bleach) | `6.1.0` | `6.2.0` | + | [certifi](https://github.com/certifi/python-certifi) | `2024.7.4` | `2024.8.30` | + | [charset-normalizer](https://github.com/Ousret/charset_normalizer) | `3.3.2` | `3.4.0` | + | [flake8](https://github.com/pycqa/flake8) | `7.1.0` | `7.1.1` | + | [idna](https://github.com/kjd/idna) | `3.7` | `3.10` | + | [importlib-metadata](https://github.com/python/importlib_metadata) | `7.2.1` | `8.5.0` | + | [keyring](https://github.com/jaraco/keyring) | `25.2.1` | `25.5.0` | + | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.3.0` | `10.5.0` | + | [packaging](https://github.com/pypa/packaging) | `24.1` | `24.2` | + | [pkginfo](https://code.launchpad.net/~tseaver/pkginfo/trunk) | `1.11.1` | `1.12.0` | + | [platformdirs](https://github.com/tox-dev/platformdirs) | `4.2.2` | `4.3.6` | + | [pycodestyle](https://github.com/PyCQA/pycodestyle) | `2.12.0` | `2.12.1` | + | [readme-renderer](https://github.com/pypa/readme_renderer) | `43.0` | `44.0` | + | [rich](https://github.com/Textualize/rich) | `13.7.1` | `13.9.4` | + | [setuptools](https://github.com/pypa/setuptools) | `70.1.1` | `75.6.0` | + | [tqdm](https://github.com/tqdm/tqdm) | `4.66.4` | `4.67.1` | + | [twine](https://github.com/pypa/twine) | `5.1.0` | `6.0.1` | + | [urllib3](https://github.com/urllib3/urllib3) | `2.2.2` | `2.2.3` | + | [zipp](https://github.com/jaraco/zipp) | `3.19.2` | `3.21.0` | + + + + Updates `black` from 24.4.2 to 24.10.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/24.4.2...24.10.0) + + Updates `bleach` from 6.1.0 to 6.2.0 + - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) + - [Commits](https://github.com/mozilla/bleach/compare/v6.1.0...v6.2.0) + + Updates `certifi` from 2024.7.4 to 2024.8.30 + - [Commits](https://github.com/certifi/python-certifi/compare/2024.07.04...2024.08.30) + + Updates `charset-normalizer` from 3.3.2 to 3.4.0 + - [Release notes](https://github.com/Ousret/charset_normalizer/releases) + - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) + - [Commits](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) + + Updates `flake8` from 7.1.0 to 7.1.1 + - [Commits](https://github.com/pycqa/flake8/compare/7.1.0...7.1.1) + + Updates `idna` from 3.7 to 3.10 + - [Release notes](https://github.com/kjd/idna/releases) + - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) + - [Commits](https://github.com/kjd/idna/compare/v3.7...v3.10) + + Updates `importlib-metadata` from 7.2.1 to 8.5.0 + - [Release notes](https://github.com/python/importlib_metadata/releases) + - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) + - [Commits](https://github.com/python/importlib_metadata/compare/v7.2.1...v8.5.0) + + Updates `keyring` from 25.2.1 to 25.5.0 + - [Release notes](https://github.com/jaraco/keyring/releases) + - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/keyring/compare/v25.2.1...v25.5.0) + + Updates `more-itertools` from 10.3.0 to 10.5.0 + - [Release notes](https://github.com/more-itertools/more-itertools/releases) + - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.3.0...v10.5.0) + + Updates `packaging` from 24.1 to 24.2 + - [Release notes](https://github.com/pypa/packaging/releases) + - [Changelog](https://github.com/pypa/packaging/blob/main/CHANGELOG.rst) + - [Commits](https://github.com/pypa/packaging/compare/24.1...24.2) + + Updates `pkginfo` from 1.11.1 to 1.12.0 + + Updates `platformdirs` from 4.2.2 to 4.3.6 + - [Release notes](https://github.com/tox-dev/platformdirs/releases) + - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) + - [Commits](https://github.com/tox-dev/platformdirs/compare/4.2.2...4.3.6) + + Updates `pycodestyle` from 2.12.0 to 2.12.1 + - [Release notes](https://github.com/PyCQA/pycodestyle/releases) + - [Changelog](https://github.com/PyCQA/pycodestyle/blob/main/CHANGES.txt) + - [Commits](https://github.com/PyCQA/pycodestyle/compare/2.12.0...2.12.1) + + Updates `readme-renderer` from 43.0 to 44.0 + - [Release notes](https://github.com/pypa/readme_renderer/releases) + - [Changelog](https://github.com/pypa/readme_renderer/blob/main/CHANGES.rst) + - [Commits](https://github.com/pypa/readme_renderer/compare/43.0...44.0) + + Updates `rich` from 13.7.1 to 13.9.4 + - [Release notes](https://github.com/Textualize/rich/releases) + - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) + - [Commits](https://github.com/Textualize/rich/compare/v13.7.1...v13.9.4) + + Updates `setuptools` from 70.1.1 to 75.6.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v70.1.1...v75.6.0) + + Updates `tqdm` from 4.66.4 to 4.67.1 + - [Release notes](https://github.com/tqdm/tqdm/releases) + - [Commits](https://github.com/tqdm/tqdm/compare/v4.66.4...v4.67.1) + + Updates `twine` from 5.1.0 to 6.0.1 + - [Release notes](https://github.com/pypa/twine/releases) + - [Changelog](https://github.com/pypa/twine/blob/main/docs/changelog.rst) + - [Commits](https://github.com/pypa/twine/compare/5.1.0...6.0.1) + + Updates `urllib3` from 2.2.2 to 2.2.3 + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.2.2...2.2.3) + + Updates `zipp` from 3.19.2 to 3.21.0 + - [Release notes](https://github.com/jaraco/zipp/releases) + - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/zipp/compare/v3.19.2...v3.21.0) + + --- + updated-dependencies: + - dependency-name: black + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: bleach + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: certifi + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: charset-normalizer + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: flake8 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: idna + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: importlib-metadata + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: keyring + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: more-itertools + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: packaging + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pkginfo + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: platformdirs + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pycodestyle + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: readme-renderer + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: rich + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: tqdm + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: twine + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: urllib3 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: zipp + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- KeyError fix with gists. [John Doe] +- Fix punctuation in README. [Jakub Wilk] + + +0.46.0 (2024-09-11) +------------------- Fix ~~~ diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 6f70987..bf97bc4 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.46.0" +__version__ = "0.47.0" From c39ec9c549e4bbc21b6a4dfe579f0fae5ace9524 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 13:14:44 +0000 Subject: [PATCH 275/455] chore(deps): bump certifi in the python-packages group Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). Updates `certifi` from 2024.8.30 to 2024.12.14 - [Commits](https://github.com/certifi/python-certifi/compare/2024.08.30...2024.12.14) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 861fd43..d242d7a 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ autopep8==2.3.1 black==24.10.0 bleach==6.2.0 -certifi==2024.8.30 +certifi==2024.12.14 charset-normalizer==3.4.0 click==8.1.7 colorama==0.4.6 From b0bfffde1a838acca1f41a53a966e47e43ff3a0d Mon Sep 17 00:00:00 2001 From: "Michael D. Adams" Date: Sat, 28 Dec 2024 10:14:37 +0000 Subject: [PATCH 276/455] Fix typo README.rst: --starred-gists that should be --gists --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 66d0491..5583bf4 100644 --- a/README.rst +++ b/README.rst @@ -225,7 +225,7 @@ Gotchas / Known-issues All is not everything --------------------- -The ``--all`` argument does not include: cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--starred-gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. +The ``--all`` argument does not include: cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. Cloning all starred size ------------------------ From dcb89a5c336f39d78d7648bca2efe8b75a4e86d7 Mon Sep 17 00:00:00 2001 From: Ethan White Date: Sat, 28 Dec 2024 06:41:54 -0500 Subject: [PATCH 277/455] Remove fixed release issue from known blocking errors The issue with --release producing errors documented in #209 (the linked issue) and #234 appears to have been fixed in #257. This change removes the associated warning from the README. --- README.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.rst b/README.rst index 66d0491..c07b1ef 100644 --- a/README.rst +++ b/README.rst @@ -254,12 +254,6 @@ It's therefore recommended to only use the incremental argument if the output/re This is due to needing the correct permission for ``--hooks`` on public repos. -2. **Releases blocking** - - A known ``--releases`` (required for ``--assets``) error will sometimes block the backup. - - If you're backing up a lot of repositories with releases e.g. an organisation or ``--all-starred``. You may need to remove ``--releases`` (and therefore ``--assets``) to complete a backup. Documented in `issue 209 `_. - "bare" is actually "mirror" --------------------------- From 3dc36917707992db227c737dcd3cf2593612477a Mon Sep 17 00:00:00 2001 From: "Michael D. Adams" Date: Sun, 29 Dec 2024 09:02:34 +0000 Subject: [PATCH 278/455] Update required permissions listed in README.rst Removed unused permissions, and changed names to those currently used by GitHub. - code: renamed to contents as used by GitHub - commit statuses: removed because not used by github-backup - pages: removed because not used by github-backup - repository hooks: renamed to webhooks as used by GitHub --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 2abd023..9e01f26 100644 --- a/README.rst +++ b/README.rst @@ -167,7 +167,7 @@ Customise the permissions for your use case, but for a personal account full bac **User permissions**: Read access to followers, starring, and watching. -**Repository permissions**: Read access to code, commit statuses, issues, metadata, pages, pull requests, and repository hooks. +**Repository permissions**: Read access to contents, issues, metadata, pull requests, and webhooks. Prefer SSH From 68fe29d1e10718f26d62817b0b496f870b5d4257 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Dec 2024 13:34:44 +0000 Subject: [PATCH 279/455] chore(deps): bump the python-packages group across 1 directory with 4 updates Bumps the python-packages group with 4 updates in the / directory: [charset-normalizer](https://github.com/jawah/charset_normalizer), [click](https://github.com/pallets/click), [keyring](https://github.com/jaraco/keyring) and [urllib3](https://github.com/urllib3/urllib3). Updates `charset-normalizer` from 3.4.0 to 3.4.1 - [Release notes](https://github.com/jawah/charset_normalizer/releases) - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) - [Commits](https://github.com/jawah/charset_normalizer/compare/3.4.0...3.4.1) Updates `click` from 8.1.7 to 8.1.8 - [Release notes](https://github.com/pallets/click/releases) - [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/click/compare/8.1.7...8.1.8) Updates `keyring` from 25.5.0 to 25.6.0 - [Release notes](https://github.com/jaraco/keyring/releases) - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/keyring/compare/v25.5.0...v25.6.0) Updates `urllib3` from 2.2.3 to 2.3.0 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.2.3...2.3.0) --- updated-dependencies: - dependency-name: charset-normalizer dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: click dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: keyring dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: urllib3 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index d242d7a..00dee95 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -2,8 +2,8 @@ autopep8==2.3.1 black==24.10.0 bleach==6.2.0 certifi==2024.12.14 -charset-normalizer==3.4.0 -click==8.1.7 +charset-normalizer==3.4.1 +click==8.1.8 colorama==0.4.6 docutils==0.21.2 flake8==7.1.1 @@ -11,7 +11,7 @@ gitchangelog==3.0.4 idna==3.10 importlib-metadata==8.5.0 jaraco.classes==3.4.0 -keyring==25.5.0 +keyring==25.6.0 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 @@ -34,6 +34,6 @@ setuptools==75.6.0 six==1.17.0 tqdm==4.67.1 twine==6.0.1 -urllib3==2.2.3 +urllib3==2.3.0 webencodings==0.5.1 zipp==3.21.0 From 0e0197149e50b14b49de6f77cb7b7a7c16156a31 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 3 Jan 2025 20:07:40 -0500 Subject: [PATCH 280/455] chore: reformat file to fix lint issues --- github_backup/github_backup.py | 63 +++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index c9268cd..ebd4f01 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -19,12 +19,12 @@ import subprocess import sys import time +from datetime import datetime from http.client import IncompleteRead from urllib.error import HTTPError, URLError from urllib.parse import quote as urlquote from urllib.parse import urlencode, urlparse from urllib.request import HTTPRedirectHandler, Request, build_opener, urlopen -from datetime import datetime try: from . import __version__ @@ -40,16 +40,21 @@ https_ctx = ssl.create_default_context() if not https_ctx.get_ca_certs(): import warnings - warnings.warn('\n\nYOUR DEFAULT CA CERTS ARE EMPTY.\n' + - 'PLEASE POPULATE ANY OF:' + - ''.join([ - '\n - ' + x - for x in ssl.get_default_verify_paths() - if type(x) is str - ]) + '\n', stacklevel=2) + + warnings.warn( + "\n\nYOUR DEFAULT CA CERTS ARE EMPTY.\n" + + "PLEASE POPULATE ANY OF:" + + "".join( + ["\n - " + x for x in ssl.get_default_verify_paths() if type(x) is str] + ) + + "\n", + stacklevel=2, + ) import certifi + https_ctx = ssl.create_default_context(cafile=certifi.where()) + def logging_subprocess( popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs ): @@ -524,7 +529,7 @@ def get_github_host(args): def read_file_contents(file_uri): - return open(file_uri[len(FILE_URI_PREFIX):], "rt").readline().strip() + return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() def get_github_repo_url(args, repository): @@ -795,13 +800,15 @@ def download_file(url, path, auth, as_app=False, fine=False): if os.path.exists(path): return - request = _construct_request(per_page=100, - page=1, - query_args={}, - template=url, - auth=auth, - as_app=as_app, - fine=fine) + request = _construct_request( + per_page=100, + page=1, + query_args={}, + template=url, + auth=auth, + as_app=as_app, + fine=fine, + ) request.add_header("Accept", "application/octet-stream") opener = build_opener(S3HTTPRedirectHandler) @@ -944,11 +951,15 @@ def filter_repositories(args, unfiltered_repositories): if r.get("language") and r.get("language").lower() in languages ] # noqa if name_regex: - repositories = [r for r in repositories if "name" not in r or name_regex.match(r["name"])] + repositories = [ + r for r in repositories if "name" not in r or name_regex.match(r["name"]) + ] if args.skip_archived: repositories = [r for r in repositories if not r.get("archived")] if args.exclude: - repositories = [r for r in repositories if "name" not in r or r["name"] not in args.exclude] + repositories = [ + r for r in repositories if "name" not in r or r["name"] not in args.exclude + ] return repositories @@ -1244,10 +1255,16 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F if args.skip_prerelease: releases = [r for r in releases if not r["prerelease"] and not r["draft"]] - if args.number_of_latest_releases and args.number_of_latest_releases < len(releases): - releases.sort(key=lambda item: datetime.strptime(item["created_at"], "%Y-%m-%dT%H:%M:%SZ"), - reverse=True) - releases = releases[:args.number_of_latest_releases] + if args.number_of_latest_releases and args.number_of_latest_releases < len( + releases + ): + releases.sort( + key=lambda item: datetime.strptime( + item["created_at"], "%Y-%m-%dT%H:%M:%SZ" + ), + reverse=True, + ) + releases = releases[: args.number_of_latest_releases] logger.info("Saving the latest {0} releases to disk".format(len(releases))) else: logger.info("Saving {0} releases to disk".format(len(releases))) @@ -1274,7 +1291,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F os.path.join(release_assets_cwd, asset["name"]), get_auth(args, encode=not args.as_app), as_app=args.as_app, - fine=True if args.token_fine is not None else False + fine=True if args.token_fine is not None else False, ) From fcf21f7a2e2df8b4b92a282359617a4375ea9a9a Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 4 Jan 2025 01:15:18 +0000 Subject: [PATCH 281/455] Release version 0.48.0 --- CHANGES.rst | 75 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 879a854..b51bc68 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,82 @@ Changelog ========= -0.47.0 (2024-12-09) +0.48.0 (2025-01-04) ------------------- ------------------------ +- Chore: reformat file to fix lint issues. [Jose Diaz-Gonzalez] +- Chore(deps): bump the python-packages group across 1 directory with 4 + updates. [dependabot[bot]] + + Bumps the python-packages group with 4 updates in the / directory: [charset-normalizer](https://github.com/jawah/charset_normalizer), [click](https://github.com/pallets/click), [keyring](https://github.com/jaraco/keyring) and [urllib3](https://github.com/urllib3/urllib3). + + + Updates `charset-normalizer` from 3.4.0 to 3.4.1 + - [Release notes](https://github.com/jawah/charset_normalizer/releases) + - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) + - [Commits](https://github.com/jawah/charset_normalizer/compare/3.4.0...3.4.1) + + Updates `click` from 8.1.7 to 8.1.8 + - [Release notes](https://github.com/pallets/click/releases) + - [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst) + - [Commits](https://github.com/pallets/click/compare/8.1.7...8.1.8) + + Updates `keyring` from 25.5.0 to 25.6.0 + - [Release notes](https://github.com/jaraco/keyring/releases) + - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/keyring/compare/v25.5.0...v25.6.0) + + Updates `urllib3` from 2.2.3 to 2.3.0 + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.2.3...2.3.0) + + --- + updated-dependencies: + - dependency-name: charset-normalizer + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: click + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: keyring + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: urllib3 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Fix typo README.rst: --starred-gists that should be --gists. [Michael + D. Adams] +- Remove fixed release issue from known blocking errors. [Ethan White] + + The issue with --release producing errors documented in #209 (the linked issue) and #234 appears to have been fixed in #257. + + This change removes the associated warning from the README. +- Chore(deps): bump certifi in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). + + + Updates `certifi` from 2024.8.30 to 2024.12.14 + - [Commits](https://github.com/certifi/python-certifi/compare/2024.08.30...2024.12.14) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... + + +0.47.0 (2024-12-09) +------------------- - Detect empty HTTPS contexts. [John Doe] Some users are relying solely on the certifi package to provide their CA certs, as requests does this by default. diff --git a/github_backup/__init__.py b/github_backup/__init__.py index bf97bc4..3158ac8 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.47.0" +__version__ = "0.48.0" From 09bf9275d1d59b9aee018fd94d7bd26acb322d66 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 13:13:41 +0000 Subject: [PATCH 282/455] chore(deps): bump the python-packages group across 1 directory with 2 updates Bumps the python-packages group with 2 updates in the / directory: [pygments](https://github.com/pygments/pygments) and [setuptools](https://github.com/pypa/setuptools). Updates `pygments` from 2.18.0 to 2.19.1 - [Release notes](https://github.com/pygments/pygments/releases) - [Changelog](https://github.com/pygments/pygments/blob/master/CHANGES) - [Commits](https://github.com/pygments/pygments/compare/2.18.0...2.19.1) Updates `setuptools` from 75.6.0 to 75.7.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v75.6.0...v75.7.0) --- updated-dependencies: - dependency-name: pygments dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 00dee95..20eeb25 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -23,14 +23,14 @@ pkginfo==1.12.0 platformdirs==4.3.6 pycodestyle==2.12.1 pyflakes==3.2.0 -Pygments==2.18.0 +Pygments==2.19.1 readme-renderer==44.0 requests==2.32.3 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.9.4 -setuptools==75.6.0 +setuptools==75.7.0 six==1.17.0 tqdm==4.67.1 twine==6.0.1 From a49322cf7d7ce85b3144f1dafe06eb8f90eb6830 Mon Sep 17 00:00:00 2001 From: Honza Maly Date: Thu, 16 Jan 2025 21:00:02 +0000 Subject: [PATCH 283/455] Implementing incremental by files, safer version of incremental backup. --- README.rst | 7 +++++++ github_backup/github_backup.py | 27 +++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 9e01f26..5dcef95 100644 --- a/README.rst +++ b/README.rst @@ -80,6 +80,7 @@ CLI Help output:: log level to use (default: info, possible levels: debug, info, warning, error, critical) -i, --incremental incremental backup + --incremental-by-files incremental backup using modified time of files --starred include JSON output of starred repositories in backup --all-starred include starred repositories in backup [*] --watched include JSON output of watched repositories in backup @@ -239,6 +240,12 @@ Using (``-i, --incremental``) will only request new data from the API **since th This means any blocking errors on previous runs can cause a large amount of missing data in backups. +Using (``--incremental-by-files``) will request new data from the API **based on when the file was modified on filesystem**. e.g. if you modify the file yourself you may miss something. + +Still saver than the previous version. + +Specifically, issues and pull requests are handled like this. + Known blocking errors --------------------- diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index ebd4f01..587c2a3 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -181,6 +181,12 @@ def parse_args(args=None): dest="incremental", help="incremental backup", ) + parser.add_argument( + "--incremental-by-files", + action="store_true", + dest="incremental_by_files", + help="incremental backup based on modification date of files", + ) parser.add_argument( "--starred", action="store_true", @@ -1114,6 +1120,13 @@ def backup_issues(args, repo_cwd, repository, repos_template): comments_template = _issue_template + "/{0}/comments" events_template = _issue_template + "/{0}/events" for number, issue in list(issues.items()): + issue_file = "{0}/{1}.json".format(issue_cwd, number) + if args.incremental_by_files and os.path.isfile(issue_file): + modified = os.path.getmtime(issue_file) + if modified > issue["updated_at"] + logger.info("Skipping issue {0} because it wasn't modified since last backup".format(number)) + continue + if args.include_issue_comments or args.include_everything: template = comments_template.format(number) issues[number]["comment_data"] = retrieve_data(args, template) @@ -1121,9 +1134,9 @@ def backup_issues(args, repo_cwd, repository, repos_template): template = events_template.format(number) issues[number]["event_data"] = retrieve_data(args, template) - issue_file = "{0}/{1}.json".format(issue_cwd, number) - with codecs.open(issue_file, "w", encoding="utf-8") as f: + with codecs.open(issue_file + ".temp", "w", encoding="utf-8") as f: json_dump(issue, f) + os.rename(issue_file + ".temp", issue_file) # Unlike json_dump, this is atomic def backup_pulls(args, repo_cwd, repository, repos_template): @@ -1176,6 +1189,12 @@ def backup_pulls(args, repo_cwd, repository, repos_template): comments_template = _pulls_template + "/{0}/comments" commits_template = _pulls_template + "/{0}/commits" for number, pull in list(pulls.items()): + pull_file = "{0}/{1}.json".format(pulls_cwd, number) + if args.incremental_by_files and os.path.isfile(pull_file): + modified = os.path.getmtime(pull_file) + if modified > pull["updated_at"] + logger.info("Skipping pull request {0} because it wasn't modified since last backup".format(number)) + continue if args.include_pull_comments or args.include_everything: template = comments_regular_template.format(number) pulls[number]["comment_regular_data"] = retrieve_data(args, template) @@ -1185,9 +1204,9 @@ def backup_pulls(args, repo_cwd, repository, repos_template): template = commits_template.format(number) pulls[number]["commit_data"] = retrieve_data(args, template) - pull_file = "{0}/{1}.json".format(pulls_cwd, number) - with codecs.open(pull_file, "w", encoding="utf-8") as f: + with codecs.open(pull_file + ".temp", "w", encoding="utf-8") as f: json_dump(pull, f) + os.rename(pull_file + ".temp", pull_file) # Unlike json_dump, this is atomic def backup_milestones(args, repo_cwd, repository, repos_template): From 20e4d385a576c6716c52cafaf4f0fb2d7a1fa10f Mon Sep 17 00:00:00 2001 From: Honza Maly Date: Fri, 17 Jan 2025 07:28:49 +0000 Subject: [PATCH 284/455] Convert timestamp to string, although maybe the other way around would be better ... --- github_backup/github_backup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 587c2a3..d845d5b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1123,7 +1123,8 @@ def backup_issues(args, repo_cwd, repository, repos_template): issue_file = "{0}/{1}.json".format(issue_cwd, number) if args.incremental_by_files and os.path.isfile(issue_file): modified = os.path.getmtime(issue_file) - if modified > issue["updated_at"] + modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") + if modified > issue["updated_at"]: logger.info("Skipping issue {0} because it wasn't modified since last backup".format(number)) continue @@ -1192,7 +1193,8 @@ def backup_pulls(args, repo_cwd, repository, repos_template): pull_file = "{0}/{1}.json".format(pulls_cwd, number) if args.incremental_by_files and os.path.isfile(pull_file): modified = os.path.getmtime(pull_file) - if modified > pull["updated_at"] + modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") + if modified > pull["updated_at"]: logger.info("Skipping pull request {0} because it wasn't modified since last backup".format(number)) continue if args.include_pull_comments or args.include_everything: From e75021db80dcffee75b6e2fb62e49de5e6412669 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 31 Jan 2025 14:01:24 +0000 Subject: [PATCH 285/455] chore(deps): bump the python-packages group across 1 directory with 7 updates Bumps the python-packages group with 7 updates in the / directory: | Package | From | To | | --- | --- | --- | | [autopep8](https://github.com/hhatto/autopep8) | `2.3.1` | `2.3.2` | | [black](https://github.com/psf/black) | `24.10.0` | `25.1.0` | | [certifi](https://github.com/certifi/python-certifi) | `2024.12.14` | `2025.1.31` | | [importlib-metadata](https://github.com/python/importlib_metadata) | `8.5.0` | `8.6.1` | | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.5.0` | `10.6.0` | | [setuptools](https://github.com/pypa/setuptools) | `75.7.0` | `75.8.0` | | [twine](https://github.com/pypa/twine) | `6.0.1` | `6.1.0` | Updates `autopep8` from 2.3.1 to 2.3.2 - [Release notes](https://github.com/hhatto/autopep8/releases) - [Commits](https://github.com/hhatto/autopep8/compare/v2.3.1...v2.3.2) Updates `black` from 24.10.0 to 25.1.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/24.10.0...25.1.0) Updates `certifi` from 2024.12.14 to 2025.1.31 - [Commits](https://github.com/certifi/python-certifi/compare/2024.12.14...2025.01.31) Updates `importlib-metadata` from 8.5.0 to 8.6.1 - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/v8.5.0...v8.6.1) Updates `more-itertools` from 10.5.0 to 10.6.0 - [Release notes](https://github.com/more-itertools/more-itertools/releases) - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.5.0...v10.6.0) Updates `setuptools` from 75.7.0 to 75.8.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v75.7.0...v75.8.0) Updates `twine` from 6.0.1 to 6.1.0 - [Release notes](https://github.com/pypa/twine/releases) - [Changelog](https://github.com/pypa/twine/blob/main/docs/changelog.rst) - [Commits](https://github.com/pypa/twine/compare/6.0.1...6.1.0) --- updated-dependencies: - dependency-name: autopep8 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: black dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: certifi dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: importlib-metadata dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: more-itertools dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: twine dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 20eeb25..bc09531 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ -autopep8==2.3.1 -black==24.10.0 +autopep8==2.3.2 +black==25.1.0 bleach==6.2.0 -certifi==2024.12.14 +certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 colorama==0.4.6 @@ -9,13 +9,13 @@ docutils==0.21.2 flake8==7.1.1 gitchangelog==3.0.4 idna==3.10 -importlib-metadata==8.5.0 +importlib-metadata==8.6.1 jaraco.classes==3.4.0 keyring==25.6.0 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 -more-itertools==10.5.0 +more-itertools==10.6.0 mypy-extensions==1.0.0 packaging==24.2 pathspec==0.12.1 @@ -30,10 +30,10 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.9.4 -setuptools==75.7.0 +setuptools==75.8.0 six==1.17.0 tqdm==4.67.1 -twine==6.0.1 +twine==6.1.0 urllib3==2.3.0 webencodings==0.5.1 zipp==3.21.0 From c92f5ef0f200cd6513e12c1a5e55c77a5394a577 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 1 Feb 2025 07:00:56 +0000 Subject: [PATCH 286/455] Release version 0.49.0 --- CHANGES.rst | 116 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 116 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b51bc68..6e525b9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,123 @@ Changelog ========= -0.48.0 (2025-01-04) +0.49.0 (2025-02-01) ------------------- ------------------------ +- Convert timestamp to string, although maybe the other way around would + be better ... [Honza Maly] +- Implementing incremental by files, safer version of incremental + backup. [Honza Maly] +- Chore(deps): bump the python-packages group across 1 directory with 7 + updates. [dependabot[bot]] + + Bumps the python-packages group with 7 updates in the / directory: + + | Package | From | To | + | --- | --- | --- | + | [autopep8](https://github.com/hhatto/autopep8) | `2.3.1` | `2.3.2` | + | [black](https://github.com/psf/black) | `24.10.0` | `25.1.0` | + | [certifi](https://github.com/certifi/python-certifi) | `2024.12.14` | `2025.1.31` | + | [importlib-metadata](https://github.com/python/importlib_metadata) | `8.5.0` | `8.6.1` | + | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.5.0` | `10.6.0` | + | [setuptools](https://github.com/pypa/setuptools) | `75.7.0` | `75.8.0` | + | [twine](https://github.com/pypa/twine) | `6.0.1` | `6.1.0` | + + + + Updates `autopep8` from 2.3.1 to 2.3.2 + - [Release notes](https://github.com/hhatto/autopep8/releases) + - [Commits](https://github.com/hhatto/autopep8/compare/v2.3.1...v2.3.2) + + Updates `black` from 24.10.0 to 25.1.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/24.10.0...25.1.0) + + Updates `certifi` from 2024.12.14 to 2025.1.31 + - [Commits](https://github.com/certifi/python-certifi/compare/2024.12.14...2025.01.31) + + Updates `importlib-metadata` from 8.5.0 to 8.6.1 + - [Release notes](https://github.com/python/importlib_metadata/releases) + - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) + - [Commits](https://github.com/python/importlib_metadata/compare/v8.5.0...v8.6.1) + + Updates `more-itertools` from 10.5.0 to 10.6.0 + - [Release notes](https://github.com/more-itertools/more-itertools/releases) + - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.5.0...v10.6.0) + + Updates `setuptools` from 75.7.0 to 75.8.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v75.7.0...v75.8.0) + + Updates `twine` from 6.0.1 to 6.1.0 + - [Release notes](https://github.com/pypa/twine/releases) + - [Changelog](https://github.com/pypa/twine/blob/main/docs/changelog.rst) + - [Commits](https://github.com/pypa/twine/compare/6.0.1...6.1.0) + + --- + updated-dependencies: + - dependency-name: autopep8 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: black + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: certifi + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: importlib-metadata + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: more-itertools + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: twine + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group across 1 directory with 2 + updates. [dependabot[bot]] + + Bumps the python-packages group with 2 updates in the / directory: [pygments](https://github.com/pygments/pygments) and [setuptools](https://github.com/pypa/setuptools). + + + Updates `pygments` from 2.18.0 to 2.19.1 + - [Release notes](https://github.com/pygments/pygments/releases) + - [Changelog](https://github.com/pygments/pygments/blob/master/CHANGES) + - [Commits](https://github.com/pygments/pygments/compare/2.18.0...2.19.1) + + Updates `setuptools` from 75.6.0 to 75.7.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v75.6.0...v75.7.0) + + --- + updated-dependencies: + - dependency-name: pygments + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... + + +0.48.0 (2025-01-04) +------------------- - Chore: reformat file to fix lint issues. [Jose Diaz-Gonzalez] - Chore(deps): bump the python-packages group across 1 directory with 4 updates. [dependabot[bot]] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 3158ac8..d4ace94 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.48.0" +__version__ = "0.49.0" From 552c1051e333aa35cd1946cd785a0b683b04a1d5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 19 Feb 2025 17:28:50 +0000 Subject: [PATCH 287/455] chore(deps): bump the python-packages group across 1 directory with 2 updates Bumps the python-packages group with 2 updates in the / directory: [flake8](https://github.com/pycqa/flake8) and [pkginfo](https://code.launchpad.net/~tseaver/pkginfo/trunk). Updates `flake8` from 7.1.1 to 7.1.2 - [Commits](https://github.com/pycqa/flake8/compare/7.1.1...7.1.2) Updates `pkginfo` from 1.12.0 to 1.12.1.2 --- updated-dependencies: - dependency-name: flake8 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: pkginfo dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index bc09531..8f9b4ac 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -6,7 +6,7 @@ charset-normalizer==3.4.1 click==8.1.8 colorama==0.4.6 docutils==0.21.2 -flake8==7.1.1 +flake8==7.1.2 gitchangelog==3.0.4 idna==3.10 importlib-metadata==8.6.1 @@ -19,7 +19,7 @@ more-itertools==10.6.0 mypy-extensions==1.0.0 packaging==24.2 pathspec==0.12.1 -pkginfo==1.12.0 +pkginfo==1.12.1.2 platformdirs==4.3.6 pycodestyle==2.12.1 pyflakes==3.2.0 From 3d354beb2403cbf97c76d798a25ce367be43dd8f Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Fri, 21 Feb 2025 22:14:37 -0500 Subject: [PATCH 288/455] chore: fix inline comments --- github_backup/github_backup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index d845d5b..29c9e58 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1137,7 +1137,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): with codecs.open(issue_file + ".temp", "w", encoding="utf-8") as f: json_dump(issue, f) - os.rename(issue_file + ".temp", issue_file) # Unlike json_dump, this is atomic + os.rename(issue_file + ".temp", issue_file) # Unlike json_dump, this is atomic def backup_pulls(args, repo_cwd, repository, repos_template): @@ -1208,7 +1208,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): with codecs.open(pull_file + ".temp", "w", encoding="utf-8") as f: json_dump(pull, f) - os.rename(pull_file + ".temp", pull_file) # Unlike json_dump, this is atomic + os.rename(pull_file + ".temp", pull_file) # Unlike json_dump, this is atomic def backup_milestones(args, repo_cwd, repository, repos_template): From 5530a1baddaea3c996a1b99add6eefbe58c2da4a Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 22 Feb 2025 03:15:44 +0000 Subject: [PATCH 289/455] Release version 0.50.0 --- CHANGES.rst | 29 ++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 6e525b9..e7753ac 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,36 @@ Changelog ========= -0.49.0 (2025-02-01) +0.50.0 (2025-02-22) ------------------- ------------------------ +- Chore: fix inline comments. [Jose Diaz-Gonzalez] +- Chore(deps): bump the python-packages group across 1 directory with 2 + updates. [dependabot[bot]] + + Bumps the python-packages group with 2 updates in the / directory: [flake8](https://github.com/pycqa/flake8) and [pkginfo](https://code.launchpad.net/~tseaver/pkginfo/trunk). + + + Updates `flake8` from 7.1.1 to 7.1.2 + - [Commits](https://github.com/pycqa/flake8/compare/7.1.1...7.1.2) + + Updates `pkginfo` from 1.12.0 to 1.12.1.2 + + --- + updated-dependencies: + - dependency-name: flake8 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: pkginfo + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... + + +0.49.0 (2025-02-01) +------------------- - Convert timestamp to string, although maybe the other way around would be better ... [Honza Maly] - Implementing incremental by files, safer version of incremental diff --git a/github_backup/__init__.py b/github_backup/__init__.py index d4ace94..3a7c7a2 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.49.0" +__version__ = "0.50.0" From 07fd47a59612e0d8882126841d2a54030f0d1344 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 26 Feb 2025 13:19:50 +0000 Subject: [PATCH 290/455] chore(deps): bump setuptools in the python-packages group Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). Updates `setuptools` from 75.8.0 to 75.8.1 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v75.8.0...v75.8.1) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 8f9b4ac..a5f72a3 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.9.4 -setuptools==75.8.0 +setuptools==75.8.1 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From b818e9b95f3566f69c11aaf17affdf812eb3e791 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 27 Feb 2025 14:03:39 +0000 Subject: [PATCH 291/455] chore(deps): bump setuptools in the python-packages group Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). Updates `setuptools` from 75.8.1 to 75.8.2 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v75.8.1...v75.8.2) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index a5f72a3..656d736 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.9.4 -setuptools==75.8.1 +setuptools==75.8.2 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From 6d51d199c5345717d70ed1eaf02fc32fc1326fee Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Thu, 6 Mar 2025 01:26:22 +0000 Subject: [PATCH 292/455] Release version 0.50.1 --- CHANGES.rst | 42 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e7753ac..c3f8150 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,49 @@ Changelog ========= -0.50.0 (2025-02-22) +0.50.1 (2025-03-06) ------------------- ------------------------ +- Chore(deps): bump setuptools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). + + + Updates `setuptools` from 75.8.1 to 75.8.2 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v75.8.1...v75.8.2) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump setuptools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). + + + Updates `setuptools` from 75.8.0 to 75.8.1 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v75.8.0...v75.8.1) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... + + +0.50.0 (2025-02-22) +------------------- - Chore: fix inline comments. [Jose Diaz-Gonzalez] - Chore(deps): bump the python-packages group across 1 directory with 2 updates. [dependabot[bot]] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 3a7c7a2..8341786 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.50.0" +__version__ = "0.50.1" From c1f9ea7b9b552ee7f76350bdeeaf16c656cb10b3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 14:16:58 +0000 Subject: [PATCH 293/455] chore(deps): bump setuptools in the python-packages group Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). Updates `setuptools` from 75.8.2 to 76.0.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v75.8.2...v76.0.0) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 656d736..3f8558e 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.9.4 -setuptools==75.8.2 +setuptools==76.0.0 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From 677f3d328756b34fc020f9074b402a5a45277d63 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 13:19:29 +0000 Subject: [PATCH 294/455] chore(deps): bump the python-packages group across 1 directory with 2 updates Bumps the python-packages group with 2 updates in the / directory: [platformdirs](https://github.com/tox-dev/platformdirs) and [setuptools](https://github.com/pypa/setuptools). Updates `platformdirs` from 4.3.6 to 4.3.7 - [Release notes](https://github.com/tox-dev/platformdirs/releases) - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) - [Commits](https://github.com/tox-dev/platformdirs/compare/4.3.6...4.3.7) Updates `setuptools` from 76.0.0 to 77.0.1 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v76.0.0...v77.0.1) --- updated-dependencies: - dependency-name: platformdirs dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 3f8558e..6078365 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -20,7 +20,7 @@ mypy-extensions==1.0.0 packaging==24.2 pathspec==0.12.1 pkginfo==1.12.1.2 -platformdirs==4.3.6 +platformdirs==4.3.7 pycodestyle==2.12.1 pyflakes==3.2.0 Pygments==2.19.1 @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.9.4 -setuptools==76.0.0 +setuptools==77.0.1 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From 2885fc682251c5f039b86996ccc41901f9f4efff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Mar 2025 14:08:27 +0000 Subject: [PATCH 295/455] chore(deps): bump setuptools in the python-packages group Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). Updates `setuptools` from 77.0.1 to 77.0.3 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v77.0.1...v77.0.3) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 6078365..403fd43 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==13.9.4 -setuptools==77.0.1 +setuptools==77.0.3 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From 1a8eb7a90608d88d537ed5d9aa82b344d4d9be18 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 31 Mar 2025 14:05:39 +0000 Subject: [PATCH 296/455] chore(deps): bump the python-packages group with 5 updates Bumps the python-packages group with 5 updates: | Package | From | To | | --- | --- | --- | | [flake8](https://github.com/pycqa/flake8) | `7.1.2` | `7.2.0` | | [pycodestyle](https://github.com/PyCQA/pycodestyle) | `2.12.1` | `2.13.0` | | [pyflakes](https://github.com/PyCQA/pyflakes) | `3.2.0` | `3.3.2` | | [rich](https://github.com/Textualize/rich) | `13.9.4` | `14.0.0` | | [setuptools](https://github.com/pypa/setuptools) | `77.0.3` | `78.1.0` | Updates `flake8` from 7.1.2 to 7.2.0 - [Commits](https://github.com/pycqa/flake8/compare/7.1.2...7.2.0) Updates `pycodestyle` from 2.12.1 to 2.13.0 - [Release notes](https://github.com/PyCQA/pycodestyle/releases) - [Changelog](https://github.com/PyCQA/pycodestyle/blob/main/CHANGES.txt) - [Commits](https://github.com/PyCQA/pycodestyle/compare/2.12.1...2.13.0) Updates `pyflakes` from 3.2.0 to 3.3.2 - [Changelog](https://github.com/PyCQA/pyflakes/blob/main/NEWS.rst) - [Commits](https://github.com/PyCQA/pyflakes/compare/3.2.0...3.3.2) Updates `rich` from 13.9.4 to 14.0.0 - [Release notes](https://github.com/Textualize/rich/releases) - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) - [Commits](https://github.com/Textualize/rich/compare/v13.9.4...v14.0.0) Updates `setuptools` from 77.0.3 to 78.1.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v77.0.3...v78.1.0) --- updated-dependencies: - dependency-name: flake8 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pycodestyle dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pyflakes dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: rich dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 403fd43..43d8f06 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -6,7 +6,7 @@ charset-normalizer==3.4.1 click==8.1.8 colorama==0.4.6 docutils==0.21.2 -flake8==7.1.2 +flake8==7.2.0 gitchangelog==3.0.4 idna==3.10 importlib-metadata==8.6.1 @@ -21,16 +21,16 @@ packaging==24.2 pathspec==0.12.1 pkginfo==1.12.1.2 platformdirs==4.3.7 -pycodestyle==2.12.1 -pyflakes==3.2.0 +pycodestyle==2.13.0 +pyflakes==3.3.2 Pygments==2.19.1 readme-renderer==44.0 requests==2.32.3 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 -rich==13.9.4 -setuptools==77.0.3 +rich==14.0.0 +setuptools==78.1.0 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From abe6192ee912d1a51309ef49b30f7ee50172a5b6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 11 Apr 2025 13:20:02 +0000 Subject: [PATCH 297/455] chore(deps): bump urllib3 in the python-packages group Bumps the python-packages group with 1 update: [urllib3](https://github.com/urllib3/urllib3). Updates `urllib3` from 2.3.0 to 2.4.0 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.3.0...2.4.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.4.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 43d8f06..25bf6c4 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -34,6 +34,6 @@ setuptools==78.1.0 six==1.17.0 tqdm==4.67.1 twine==6.1.0 -urllib3==2.3.0 +urllib3==2.4.0 webencodings==0.5.1 zipp==3.21.0 From b49544270e7363aafe8e1062420815031e428e2c Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 17 Apr 2025 21:07:10 -0400 Subject: [PATCH 298/455] chore: bump runs-on image from ubuntu-20.04 to ubuntu-24.04 --- .github/workflows/tagged-release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml index 846c457..131dfa6 100644 --- a/.github/workflows/tagged-release.yml +++ b/.github/workflows/tagged-release.yml @@ -10,7 +10,7 @@ on: jobs: tagged-release: name: tagged-release - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - uses: "marvinpinto/action-automatic-releases@v1.2.1" From e4bd19acea29536f27909e4c0305ebbec51753de Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 16:02:18 +0000 Subject: [PATCH 299/455] chore(deps): bump the python-packages group across 1 directory with 6 updates Bumps the python-packages group with 6 updates in the / directory: | Package | From | To | | --- | --- | --- | | [certifi](https://github.com/certifi/python-certifi) | `2025.1.31` | `2025.4.26` | | [importlib-metadata](https://github.com/python/importlib_metadata) | `8.6.1` | `8.7.0` | | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.6.0` | `10.7.0` | | [mypy-extensions](https://github.com/python/mypy_extensions) | `1.0.0` | `1.1.0` | | [packaging](https://github.com/pypa/packaging) | `24.2` | `25.0` | | [setuptools](https://github.com/pypa/setuptools) | `78.1.0` | `80.0.0` | Updates `certifi` from 2025.1.31 to 2025.4.26 - [Commits](https://github.com/certifi/python-certifi/compare/2025.01.31...2025.04.26) Updates `importlib-metadata` from 8.6.1 to 8.7.0 - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/v8.6.1...v8.7.0) Updates `more-itertools` from 10.6.0 to 10.7.0 - [Release notes](https://github.com/more-itertools/more-itertools/releases) - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.6.0...v10.7.0) Updates `mypy-extensions` from 1.0.0 to 1.1.0 - [Commits](https://github.com/python/mypy_extensions/compare/1.0.0...1.1.0) Updates `packaging` from 24.2 to 25.0 - [Release notes](https://github.com/pypa/packaging/releases) - [Changelog](https://github.com/pypa/packaging/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pypa/packaging/compare/24.2...25.0) Updates `setuptools` from 78.1.0 to 80.0.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v78.1.0...v80.0.0) --- updated-dependencies: - dependency-name: certifi dependency-version: 2025.4.26 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: importlib-metadata dependency-version: 8.7.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: more-itertools dependency-version: 10.7.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: mypy-extensions dependency-version: 1.1.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: packaging dependency-version: '25.0' dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: setuptools dependency-version: 80.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 25bf6c4..8a6f50f 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ autopep8==2.3.2 black==25.1.0 bleach==6.2.0 -certifi==2025.1.31 +certifi==2025.4.26 charset-normalizer==3.4.1 click==8.1.8 colorama==0.4.6 @@ -9,15 +9,15 @@ docutils==0.21.2 flake8==7.2.0 gitchangelog==3.0.4 idna==3.10 -importlib-metadata==8.6.1 +importlib-metadata==8.7.0 jaraco.classes==3.4.0 keyring==25.6.0 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 -more-itertools==10.6.0 -mypy-extensions==1.0.0 -packaging==24.2 +more-itertools==10.7.0 +mypy-extensions==1.1.0 +packaging==25.0 pathspec==0.12.1 pkginfo==1.12.1.2 platformdirs==4.3.7 @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==14.0.0 -setuptools==78.1.0 +setuptools==80.0.0 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From e8bf4257daa9d9905e6ee1d28ec7a6c179792508 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 8 May 2025 13:14:31 +0000 Subject: [PATCH 300/455] chore(deps): bump the python-packages group across 1 directory with 3 updates Bumps the python-packages group with 3 updates in the / directory: [charset-normalizer](https://github.com/jawah/charset_normalizer), [platformdirs](https://github.com/tox-dev/platformdirs) and [setuptools](https://github.com/pypa/setuptools). Updates `charset-normalizer` from 3.4.1 to 3.4.2 - [Release notes](https://github.com/jawah/charset_normalizer/releases) - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) - [Commits](https://github.com/jawah/charset_normalizer/compare/3.4.1...3.4.2) Updates `platformdirs` from 4.3.7 to 4.3.8 - [Release notes](https://github.com/tox-dev/platformdirs/releases) - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) - [Commits](https://github.com/tox-dev/platformdirs/compare/4.3.7...4.3.8) Updates `setuptools` from 80.0.0 to 80.3.1 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v80.0.0...v80.3.1) --- updated-dependencies: - dependency-name: charset-normalizer dependency-version: 3.4.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: platformdirs dependency-version: 4.3.8 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: setuptools dependency-version: 80.3.1 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 8a6f50f..0caf9f0 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -2,7 +2,7 @@ autopep8==2.3.2 black==25.1.0 bleach==6.2.0 certifi==2025.4.26 -charset-normalizer==3.4.1 +charset-normalizer==3.4.2 click==8.1.8 colorama==0.4.6 docutils==0.21.2 @@ -20,7 +20,7 @@ mypy-extensions==1.1.0 packaging==25.0 pathspec==0.12.1 pkginfo==1.12.1.2 -platformdirs==4.3.7 +platformdirs==4.3.8 pycodestyle==2.13.0 pyflakes==3.3.2 Pygments==2.19.1 @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==14.0.0 -setuptools==80.0.0 +setuptools==80.3.1 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From db69f5a5e83301d64c9c7258776ec9a896b344dd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 13:49:22 +0000 Subject: [PATCH 301/455] chore(deps): bump setuptools in the python-packages group Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). Updates `setuptools` from 80.3.1 to 80.4.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v80.3.1...v80.4.0) --- updated-dependencies: - dependency-name: setuptools dependency-version: 80.4.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 0caf9f0..a58a551 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==14.0.0 -setuptools==80.3.1 +setuptools==80.4.0 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From 17dc265385858d5dbbc7e20350d2fe0a481a67ec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 22 May 2025 13:34:39 +0000 Subject: [PATCH 302/455] chore(deps): bump setuptools in the python-packages group Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). Updates `setuptools` from 80.4.0 to 80.8.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v80.4.0...v80.8.0) --- updated-dependencies: - dependency-name: setuptools dependency-version: 80.8.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index a58a551..bc5dfa1 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -30,7 +30,7 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==14.0.0 -setuptools==80.4.0 +setuptools==80.8.0 six==1.17.0 tqdm==4.67.1 twine==6.1.0 From 53714612d47d5a46611dda20aca866a3ce68f1bc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 27 May 2025 13:29:45 +0000 Subject: [PATCH 303/455] chore(deps): bump the python-packages group with 2 updates Bumps the python-packages group with 2 updates: [setuptools](https://github.com/pypa/setuptools) and [zipp](https://github.com/jaraco/zipp). Updates `setuptools` from 80.8.0 to 80.9.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v80.8.0...v80.9.0) Updates `zipp` from 3.21.0 to 3.22.0 - [Release notes](https://github.com/jaraco/zipp/releases) - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/zipp/compare/v3.21.0...v3.22.0) --- updated-dependencies: - dependency-name: setuptools dependency-version: 80.9.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: zipp dependency-version: 3.22.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index bc5dfa1..34bb095 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -30,10 +30,10 @@ requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 rich==14.0.0 -setuptools==80.8.0 +setuptools==80.9.0 six==1.17.0 tqdm==4.67.1 twine==6.1.0 urllib3==2.4.0 webencodings==0.5.1 -zipp==3.21.0 +zipp==3.22.0 From 8f58ef6229569e9d3c51309a0811496a85fe0e3c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 10 Jun 2025 08:54:47 +0000 Subject: [PATCH 304/455] chore(deps): bump requests from 2.32.3 to 2.32.4 Bumps [requests](https://github.com/psf/requests) from 2.32.3 to 2.32.4. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.32.3...v2.32.4) --- updated-dependencies: - dependency-name: requests dependency-version: 2.32.4 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 34bb095..3308cce 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -25,7 +25,7 @@ pycodestyle==2.13.0 pyflakes==3.3.2 Pygments==2.19.1 readme-renderer==44.0 -requests==2.32.3 +requests==2.32.4 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 From 16b5b304e7ea6dd09437fdaa9fd5e63494a1b433 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 10 Jun 2025 13:11:08 +0000 Subject: [PATCH 305/455] chore(deps): bump the python-packages group across 1 directory with 2 updates Bumps the python-packages group with 2 updates in the / directory: [requests](https://github.com/psf/requests) and [zipp](https://github.com/jaraco/zipp). Updates `requests` from 2.32.3 to 2.32.4 - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.32.3...v2.32.4) Updates `zipp` from 3.22.0 to 3.23.0 - [Release notes](https://github.com/jaraco/zipp/releases) - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/zipp/compare/v3.22.0...v3.23.0) --- updated-dependencies: - dependency-name: requests dependency-version: 2.32.4 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: zipp dependency-version: 3.23.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 34bb095..5637820 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -25,7 +25,7 @@ pycodestyle==2.13.0 pyflakes==3.3.2 Pygments==2.19.1 readme-renderer==44.0 -requests==2.32.3 +requests==2.32.4 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 @@ -36,4 +36,4 @@ tqdm==4.67.1 twine==6.1.0 urllib3==2.4.0 webencodings==0.5.1 -zipp==3.22.0 +zipp==3.23.0 From 7e0f7d19302bbf009dedee1c32a5189d7c9d2dec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:11:13 +0000 Subject: [PATCH 306/455] chore(deps): bump certifi in the python-packages group Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). Updates `certifi` from 2025.4.26 to 2025.6.15 - [Commits](https://github.com/certifi/python-certifi/compare/2025.04.26...2025.06.15) --- updated-dependencies: - dependency-name: certifi dependency-version: 2025.6.15 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 5637820..21eea04 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ autopep8==2.3.2 black==25.1.0 bleach==6.2.0 -certifi==2025.4.26 +certifi==2025.6.15 charset-normalizer==3.4.2 click==8.1.8 colorama==0.4.6 From cf8b4c6b45cf7d0350784c0b57960e29a00ba71c Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 16 Jun 2025 20:32:34 +0000 Subject: [PATCH 307/455] Release version 0.50.2 --- CHANGES.rst | 388 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 388 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index c3f8150..2fddca5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,395 @@ Changelog ========= -0.50.1 (2025-03-06) +0.50.2 (2025-06-16) ------------------- ------------------------ +- Chore(deps): bump certifi in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). + + + Updates `certifi` from 2025.4.26 to 2025.6.15 + - [Commits](https://github.com/certifi/python-certifi/compare/2025.04.26...2025.06.15) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-version: 2025.6.15 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump requests from 2.32.3 to 2.32.4. [dependabot[bot]] + + Bumps [requests](https://github.com/psf/requests) from 2.32.3 to 2.32.4. + - [Release notes](https://github.com/psf/requests/releases) + - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) + - [Commits](https://github.com/psf/requests/compare/v2.32.3...v2.32.4) + + --- + updated-dependencies: + - dependency-name: requests + dependency-version: 2.32.4 + dependency-type: direct:production + ... +- Chore(deps): bump the python-packages group across 1 directory with 2 + updates. [dependabot[bot]] + + Bumps the python-packages group with 2 updates in the / directory: [requests](https://github.com/psf/requests) and [zipp](https://github.com/jaraco/zipp). + + + Updates `requests` from 2.32.3 to 2.32.4 + - [Release notes](https://github.com/psf/requests/releases) + - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) + - [Commits](https://github.com/psf/requests/compare/v2.32.3...v2.32.4) + + Updates `zipp` from 3.22.0 to 3.23.0 + - [Release notes](https://github.com/jaraco/zipp/releases) + - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/zipp/compare/v3.22.0...v3.23.0) + + --- + updated-dependencies: + - dependency-name: requests + dependency-version: 2.32.4 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: zipp + dependency-version: 3.23.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 2 updates. + [dependabot[bot]] + + Bumps the python-packages group with 2 updates: [setuptools](https://github.com/pypa/setuptools) and [zipp](https://github.com/jaraco/zipp). + + + Updates `setuptools` from 80.8.0 to 80.9.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v80.8.0...v80.9.0) + + Updates `zipp` from 3.21.0 to 3.22.0 + - [Release notes](https://github.com/jaraco/zipp/releases) + - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/zipp/compare/v3.21.0...v3.22.0) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-version: 80.9.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: zipp + dependency-version: 3.22.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump setuptools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). + + + Updates `setuptools` from 80.4.0 to 80.8.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v80.4.0...v80.8.0) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-version: 80.8.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump setuptools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). + + + Updates `setuptools` from 80.3.1 to 80.4.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v80.3.1...v80.4.0) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-version: 80.4.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group across 1 directory with 3 + updates. [dependabot[bot]] + + Bumps the python-packages group with 3 updates in the / directory: [charset-normalizer](https://github.com/jawah/charset_normalizer), [platformdirs](https://github.com/tox-dev/platformdirs) and [setuptools](https://github.com/pypa/setuptools). + + + Updates `charset-normalizer` from 3.4.1 to 3.4.2 + - [Release notes](https://github.com/jawah/charset_normalizer/releases) + - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) + - [Commits](https://github.com/jawah/charset_normalizer/compare/3.4.1...3.4.2) + + Updates `platformdirs` from 4.3.7 to 4.3.8 + - [Release notes](https://github.com/tox-dev/platformdirs/releases) + - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) + - [Commits](https://github.com/tox-dev/platformdirs/compare/4.3.7...4.3.8) + + Updates `setuptools` from 80.0.0 to 80.3.1 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v80.0.0...v80.3.1) + + --- + updated-dependencies: + - dependency-name: charset-normalizer + dependency-version: 3.4.2 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: platformdirs + dependency-version: 4.3.8 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: setuptools + dependency-version: 80.3.1 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group across 1 directory with 6 + updates. [dependabot[bot]] + + Bumps the python-packages group with 6 updates in the / directory: + + | Package | From | To | + | --- | --- | --- | + | [certifi](https://github.com/certifi/python-certifi) | `2025.1.31` | `2025.4.26` | + | [importlib-metadata](https://github.com/python/importlib_metadata) | `8.6.1` | `8.7.0` | + | [more-itertools](https://github.com/more-itertools/more-itertools) | `10.6.0` | `10.7.0` | + | [mypy-extensions](https://github.com/python/mypy_extensions) | `1.0.0` | `1.1.0` | + | [packaging](https://github.com/pypa/packaging) | `24.2` | `25.0` | + | [setuptools](https://github.com/pypa/setuptools) | `78.1.0` | `80.0.0` | + + + + Updates `certifi` from 2025.1.31 to 2025.4.26 + - [Commits](https://github.com/certifi/python-certifi/compare/2025.01.31...2025.04.26) + + Updates `importlib-metadata` from 8.6.1 to 8.7.0 + - [Release notes](https://github.com/python/importlib_metadata/releases) + - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) + - [Commits](https://github.com/python/importlib_metadata/compare/v8.6.1...v8.7.0) + + Updates `more-itertools` from 10.6.0 to 10.7.0 + - [Release notes](https://github.com/more-itertools/more-itertools/releases) + - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.6.0...v10.7.0) + + Updates `mypy-extensions` from 1.0.0 to 1.1.0 + - [Commits](https://github.com/python/mypy_extensions/compare/1.0.0...1.1.0) + + Updates `packaging` from 24.2 to 25.0 + - [Release notes](https://github.com/pypa/packaging/releases) + - [Changelog](https://github.com/pypa/packaging/blob/main/CHANGELOG.rst) + - [Commits](https://github.com/pypa/packaging/compare/24.2...25.0) + + Updates `setuptools` from 78.1.0 to 80.0.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v78.1.0...v80.0.0) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-version: 2025.4.26 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: importlib-metadata + dependency-version: 8.7.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: more-itertools + dependency-version: 10.7.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: mypy-extensions + dependency-version: 1.1.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: packaging + dependency-version: '25.0' + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: setuptools + dependency-version: 80.0.0 + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore: bump runs-on image from ubuntu-20.04 to ubuntu-24.04. [Jose + Diaz-Gonzalez] +- Chore(deps): bump urllib3 in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [urllib3](https://github.com/urllib3/urllib3). + + + Updates `urllib3` from 2.3.0 to 2.4.0 + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.3.0...2.4.0) + + --- + updated-dependencies: + - dependency-name: urllib3 + dependency-version: 2.4.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 5 updates. + [dependabot[bot]] + + Bumps the python-packages group with 5 updates: + + | Package | From | To | + | --- | --- | --- | + | [flake8](https://github.com/pycqa/flake8) | `7.1.2` | `7.2.0` | + | [pycodestyle](https://github.com/PyCQA/pycodestyle) | `2.12.1` | `2.13.0` | + | [pyflakes](https://github.com/PyCQA/pyflakes) | `3.2.0` | `3.3.2` | + | [rich](https://github.com/Textualize/rich) | `13.9.4` | `14.0.0` | + | [setuptools](https://github.com/pypa/setuptools) | `77.0.3` | `78.1.0` | + + + Updates `flake8` from 7.1.2 to 7.2.0 + - [Commits](https://github.com/pycqa/flake8/compare/7.1.2...7.2.0) + + Updates `pycodestyle` from 2.12.1 to 2.13.0 + - [Release notes](https://github.com/PyCQA/pycodestyle/releases) + - [Changelog](https://github.com/PyCQA/pycodestyle/blob/main/CHANGES.txt) + - [Commits](https://github.com/PyCQA/pycodestyle/compare/2.12.1...2.13.0) + + Updates `pyflakes` from 3.2.0 to 3.3.2 + - [Changelog](https://github.com/PyCQA/pyflakes/blob/main/NEWS.rst) + - [Commits](https://github.com/PyCQA/pyflakes/compare/3.2.0...3.3.2) + + Updates `rich` from 13.9.4 to 14.0.0 + - [Release notes](https://github.com/Textualize/rich/releases) + - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) + - [Commits](https://github.com/Textualize/rich/compare/v13.9.4...v14.0.0) + + Updates `setuptools` from 77.0.3 to 78.1.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v77.0.3...v78.1.0) + + --- + updated-dependencies: + - dependency-name: flake8 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pycodestyle + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pyflakes + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: rich + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump setuptools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). + + + Updates `setuptools` from 77.0.1 to 77.0.3 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v77.0.1...v77.0.3) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group across 1 directory with 2 + updates. [dependabot[bot]] + + Bumps the python-packages group with 2 updates in the / directory: [platformdirs](https://github.com/tox-dev/platformdirs) and [setuptools](https://github.com/pypa/setuptools). + + + Updates `platformdirs` from 4.3.6 to 4.3.7 + - [Release notes](https://github.com/tox-dev/platformdirs/releases) + - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) + - [Commits](https://github.com/tox-dev/platformdirs/compare/4.3.6...4.3.7) + + Updates `setuptools` from 76.0.0 to 77.0.1 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v76.0.0...v77.0.1) + + --- + updated-dependencies: + - dependency-name: platformdirs + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump setuptools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). + + + Updates `setuptools` from 75.8.2 to 76.0.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v75.8.2...v76.0.0) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... + + +0.50.1 (2025-03-06) +------------------- - Chore(deps): bump setuptools in the python-packages group. [dependabot[bot]] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 8341786..079baa7 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.50.1" +__version__ = "0.50.2" From 7333458ee455c5ab0cd5fdf34b80b638c22a7268 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 19 Jun 2025 05:26:53 +0000 Subject: [PATCH 308/455] chore(deps): bump urllib3 from 2.4.0 to 2.5.0 Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.4.0 to 2.5.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.4.0...2.5.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.5.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 21eea04..a409656 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -34,6 +34,6 @@ setuptools==80.9.0 six==1.17.0 tqdm==4.67.1 twine==6.1.0 -urllib3==2.4.0 +urllib3==2.5.0 webencodings==0.5.1 zipp==3.23.0 From fb8945fc094cb9087a23c2f81826b0fc5d521b2c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Jun 2025 14:34:26 +0000 Subject: [PATCH 309/455] chore(deps): bump the python-packages group with 5 updates Bumps the python-packages group with 5 updates: | Package | From | To | | --- | --- | --- | | [flake8](https://github.com/pycqa/flake8) | `7.2.0` | `7.3.0` | | [pycodestyle](https://github.com/PyCQA/pycodestyle) | `2.13.0` | `2.14.0` | | [pyflakes](https://github.com/PyCQA/pyflakes) | `3.3.2` | `3.4.0` | | [pygments](https://github.com/pygments/pygments) | `2.19.1` | `2.19.2` | | [urllib3](https://github.com/urllib3/urllib3) | `2.4.0` | `2.5.0` | Updates `flake8` from 7.2.0 to 7.3.0 - [Commits](https://github.com/pycqa/flake8/compare/7.2.0...7.3.0) Updates `pycodestyle` from 2.13.0 to 2.14.0 - [Release notes](https://github.com/PyCQA/pycodestyle/releases) - [Changelog](https://github.com/PyCQA/pycodestyle/blob/main/CHANGES.txt) - [Commits](https://github.com/PyCQA/pycodestyle/compare/2.13.0...2.14.0) Updates `pyflakes` from 3.3.2 to 3.4.0 - [Changelog](https://github.com/PyCQA/pyflakes/blob/main/NEWS.rst) - [Commits](https://github.com/PyCQA/pyflakes/compare/3.3.2...3.4.0) Updates `pygments` from 2.19.1 to 2.19.2 - [Release notes](https://github.com/pygments/pygments/releases) - [Changelog](https://github.com/pygments/pygments/blob/master/CHANGES) - [Commits](https://github.com/pygments/pygments/compare/2.19.1...2.19.2) Updates `urllib3` from 2.4.0 to 2.5.0 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.4.0...2.5.0) --- updated-dependencies: - dependency-name: flake8 dependency-version: 7.3.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pycodestyle dependency-version: 2.14.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pyflakes dependency-version: 3.4.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pygments dependency-version: 2.19.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: urllib3 dependency-version: 2.5.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 21eea04..2f5a899 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -6,7 +6,7 @@ charset-normalizer==3.4.2 click==8.1.8 colorama==0.4.6 docutils==0.21.2 -flake8==7.2.0 +flake8==7.3.0 gitchangelog==3.0.4 idna==3.10 importlib-metadata==8.7.0 @@ -21,9 +21,9 @@ packaging==25.0 pathspec==0.12.1 pkginfo==1.12.1.2 platformdirs==4.3.8 -pycodestyle==2.13.0 -pyflakes==3.3.2 -Pygments==2.19.1 +pycodestyle==2.14.0 +pyflakes==3.4.0 +Pygments==2.19.2 readme-renderer==44.0 requests==2.32.4 requests-toolbelt==1.0.0 @@ -34,6 +34,6 @@ setuptools==80.9.0 six==1.17.0 tqdm==4.67.1 twine==6.1.0 -urllib3==2.4.0 +urllib3==2.5.0 webencodings==0.5.1 zipp==3.23.0 From 175ac19be683d5aa8b614aa5da8d1a4912050ccc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 13:55:16 +0000 Subject: [PATCH 310/455] chore(deps): bump certifi in the python-packages group Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). Updates `certifi` from 2025.6.15 to 2025.7.9 - [Commits](https://github.com/certifi/python-certifi/compare/2025.06.15...2025.07.09) --- updated-dependencies: - dependency-name: certifi dependency-version: 2025.7.9 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 2f5a899..1c766de 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ autopep8==2.3.2 black==25.1.0 bleach==6.2.0 -certifi==2025.6.15 +certifi==2025.7.9 charset-normalizer==3.4.2 click==8.1.8 colorama==0.4.6 From 1bad563e3f23d3d8b9f98721d857a660692f4847 Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Sat, 19 Jul 2025 17:17:58 -0700 Subject: [PATCH 311/455] Add conditional check for git checkout in development path Only insert development path into sys.path when running from a git checkout (when ../.git exists). This makes the script more robust by only using the development tree when available and falling back to installed package otherwise. --- bin/github-backup | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/bin/github-backup b/bin/github-backup index b33d19f..c6116a1 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -4,6 +4,15 @@ import logging import os import sys + +# If we are running from a git-checkout, we can run against the development +# tree without installing. +if os.path.exists(os.path.join(os.path.dirname(__file__), "..", ".git")): + sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) + ) + + from github_backup.github_backup import ( backup_account, backup_repositories, From d820dd994d931f8dbead5e63dceef5c5b49bafa3 Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Sat, 19 Jul 2025 17:28:52 -0700 Subject: [PATCH 312/455] Fix -R flag to allow backups of repositories not owned by user Previously, using -R flag would show zero issues/PRs for repositories not owned by the primary user due to incorrect pagination parameters being added to single repository API calls. - Remove pagination parameters for single repository requests - Support owner/repo format in -R flag (e.g., -R owner/repo-name) - Skip filtering when specific repository is requested - Fix URL construction for requests without query parameters This enables backing up any repository, not just those owned by the primary user specified in -u flag. --- github_backup/github_backup.py | 49 +++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 29c9e58..4b2d790 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -578,10 +578,15 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): page = 0 while True: - page = page + 1 + if single_request: + request_page, request_per_page = None, None + else: + page = page + 1 + request_page, request_per_page = page, per_page + request = _construct_request( - per_page, - page, + request_per_page, + request_page, query_args, template, auth, @@ -715,14 +720,22 @@ def _get_response(request, auth, template): def _construct_request( per_page, page, query_args, template, auth, as_app=None, fine=False ): - querystring = urlencode( - dict( - list({"per_page": per_page, "page": page}.items()) - + list(query_args.items()) - ) - ) + all_query_args = {} + if per_page: + all_query_args["per_page"] = per_page + if page: + all_query_args["page"] = page + if query_args: + all_query_args.update(query_args) + + request_url = template + if all_query_args: + querystring = urlencode(all_query_args) + request_url = template + "?" + querystring + else: + querystring = "" - request = Request(template + "?" + querystring) + request = Request(request_url) if auth is not None: if not as_app: if fine: @@ -735,7 +748,11 @@ def _construct_request( request.add_header( "Accept", "application/vnd.github.machine-man-preview+json" ) - logger.info("Requesting {}?{}".format(template, querystring)) + + log_url = template + if querystring: + log_url += "?" + querystring + logger.info("Requesting {}".format(log_url)) return request @@ -885,9 +902,13 @@ def retrieve_repositories(args, authenticated_user): ) if args.repository: + if "/" in args.repository: + repo_path = args.repository + else: + repo_path = "{0}/{1}".format(args.user, args.repository) single_request = True - template = "https://{0}/repos/{1}/{2}".format( - get_github_api_host(args), args.user, args.repository + template = "https://{0}/repos/{1}".format( + get_github_api_host(args), repo_path ) repos = retrieve_data(args, template, single_request=single_request) @@ -928,6 +949,8 @@ def retrieve_repositories(args, authenticated_user): def filter_repositories(args, unfiltered_repositories): + if args.repository: + return unfiltered_repositories logger.info("Filtering repositories") repositories = [] From a4f15b06d94c0481861a3cd149f3ac5b10fbefa7 Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Fri, 25 Jul 2025 11:47:08 -0700 Subject: [PATCH 313/455] Revert "Add conditional check for git checkout in development path" This reverts commit 1bad563e3f23d3d8b9f98721d857a660692f4847. --- bin/github-backup | 9 --------- 1 file changed, 9 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index c6116a1..b33d19f 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -4,15 +4,6 @@ import logging import os import sys - -# If we are running from a git-checkout, we can run against the development -# tree without installing. -if os.path.exists(os.path.join(os.path.dirname(__file__), "..", ".git")): - sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) - ) - - from github_backup.github_backup import ( backup_account, backup_repositories, From 82c1fc30864a23599af5a285a0a2fc1201d59f03 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 30 Jul 2025 13:49:49 +0000 Subject: [PATCH 314/455] chore(deps): bump the python-packages group across 1 directory with 3 updates Bumps the python-packages group with 3 updates in the / directory: [certifi](https://github.com/certifi/python-certifi), [docutils](https://github.com/rtfd/recommonmark) and [rich](https://github.com/Textualize/rich). Updates `certifi` from 2025.7.9 to 2025.7.14 - [Commits](https://github.com/certifi/python-certifi/compare/2025.07.09...2025.07.14) Updates `docutils` from 0.21.2 to 0.22 - [Changelog](https://github.com/readthedocs/recommonmark/blob/master/CHANGELOG.md) - [Commits](https://github.com/rtfd/recommonmark/commits) Updates `rich` from 14.0.0 to 14.1.0 - [Release notes](https://github.com/Textualize/rich/releases) - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) - [Commits](https://github.com/Textualize/rich/compare/v14.0.0...v14.1.0) --- updated-dependencies: - dependency-name: certifi dependency-version: 2025.7.14 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: docutils dependency-version: '0.22' dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: rich dependency-version: 14.1.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 1c766de..788fa95 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,11 +1,11 @@ autopep8==2.3.2 black==25.1.0 bleach==6.2.0 -certifi==2025.7.9 +certifi==2025.7.14 charset-normalizer==3.4.2 click==8.1.8 colorama==0.4.6 -docutils==0.21.2 +docutils==0.22 flake8==7.3.0 gitchangelog==3.0.4 idna==3.10 @@ -29,7 +29,7 @@ requests==2.32.4 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 -rich==14.0.0 +rich==14.1.0 setuptools==80.9.0 six==1.17.0 tqdm==4.67.1 From 5f07157c9b417c538ead38a1902035e0ac45188f Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Fri, 8 Aug 2025 20:41:53 +0000 Subject: [PATCH 315/455] Release version 0.50.3 --- CHANGES.rst | 160 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 160 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 2fddca5..960977f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,167 @@ Changelog ========= -0.50.2 (2025-06-16) +0.50.3 (2025-08-08) ------------------- ------------------------ +- Revert "Add conditional check for git checkout in development path" + [Eric Wheeler] + + This reverts commit 1bad563e3f23d3d8b9f98721d857a660692f4847. +- Fix -R flag to allow backups of repositories not owned by user. [Eric + Wheeler] + + Previously, using -R flag would show zero issues/PRs for repositories + not owned by the primary user due to incorrect pagination parameters + being added to single repository API calls. + + - Remove pagination parameters for single repository requests + - Support owner/repo format in -R flag (e.g., -R owner/repo-name) + - Skip filtering when specific repository is requested + - Fix URL construction for requests without query parameters + + This enables backing up any repository, not just those owned by the + primary user specified in -u flag. +- Add conditional check for git checkout in development path. [Eric + Wheeler] + + Only insert development path into sys.path when running from a git checkout + (when ../.git exists). This makes the script more robust by only using the + development tree when available and falling back to installed package otherwise. +- Chore(deps): bump the python-packages group across 1 directory with 3 + updates. [dependabot[bot]] + + Bumps the python-packages group with 3 updates in the / directory: [certifi](https://github.com/certifi/python-certifi), [docutils](https://github.com/rtfd/recommonmark) and [rich](https://github.com/Textualize/rich). + + + Updates `certifi` from 2025.7.9 to 2025.7.14 + - [Commits](https://github.com/certifi/python-certifi/compare/2025.07.09...2025.07.14) + + Updates `docutils` from 0.21.2 to 0.22 + - [Changelog](https://github.com/readthedocs/recommonmark/blob/master/CHANGELOG.md) + - [Commits](https://github.com/rtfd/recommonmark/commits) + + Updates `rich` from 14.0.0 to 14.1.0 + - [Release notes](https://github.com/Textualize/rich/releases) + - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) + - [Commits](https://github.com/Textualize/rich/compare/v14.0.0...v14.1.0) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-version: 2025.7.14 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: docutils + dependency-version: '0.22' + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: rich + dependency-version: 14.1.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump certifi in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). + + + Updates `certifi` from 2025.6.15 to 2025.7.9 + - [Commits](https://github.com/certifi/python-certifi/compare/2025.06.15...2025.07.09) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-version: 2025.7.9 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump urllib3 from 2.4.0 to 2.5.0. [dependabot[bot]] + + Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.4.0 to 2.5.0. + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.4.0...2.5.0) + + --- + updated-dependencies: + - dependency-name: urllib3 + dependency-version: 2.5.0 + dependency-type: direct:production + ... +- Chore(deps): bump the python-packages group with 5 updates. + [dependabot[bot]] + + Bumps the python-packages group with 5 updates: + + | Package | From | To | + | --- | --- | --- | + | [flake8](https://github.com/pycqa/flake8) | `7.2.0` | `7.3.0` | + | [pycodestyle](https://github.com/PyCQA/pycodestyle) | `2.13.0` | `2.14.0` | + | [pyflakes](https://github.com/PyCQA/pyflakes) | `3.3.2` | `3.4.0` | + | [pygments](https://github.com/pygments/pygments) | `2.19.1` | `2.19.2` | + | [urllib3](https://github.com/urllib3/urllib3) | `2.4.0` | `2.5.0` | + + + Updates `flake8` from 7.2.0 to 7.3.0 + - [Commits](https://github.com/pycqa/flake8/compare/7.2.0...7.3.0) + + Updates `pycodestyle` from 2.13.0 to 2.14.0 + - [Release notes](https://github.com/PyCQA/pycodestyle/releases) + - [Changelog](https://github.com/PyCQA/pycodestyle/blob/main/CHANGES.txt) + - [Commits](https://github.com/PyCQA/pycodestyle/compare/2.13.0...2.14.0) + + Updates `pyflakes` from 3.3.2 to 3.4.0 + - [Changelog](https://github.com/PyCQA/pyflakes/blob/main/NEWS.rst) + - [Commits](https://github.com/PyCQA/pyflakes/compare/3.3.2...3.4.0) + + Updates `pygments` from 2.19.1 to 2.19.2 + - [Release notes](https://github.com/pygments/pygments/releases) + - [Changelog](https://github.com/pygments/pygments/blob/master/CHANGES) + - [Commits](https://github.com/pygments/pygments/compare/2.19.1...2.19.2) + + Updates `urllib3` from 2.4.0 to 2.5.0 + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.4.0...2.5.0) + + --- + updated-dependencies: + - dependency-name: flake8 + dependency-version: 7.3.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pycodestyle + dependency-version: 2.14.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pyflakes + dependency-version: 3.4.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pygments + dependency-version: 2.19.2 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: urllib3 + dependency-version: 2.5.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... + + +0.50.2 (2025-06-16) +------------------- - Chore(deps): bump certifi in the python-packages group. [dependabot[bot]] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 079baa7..e7d2f93 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.50.2" +__version__ = "0.50.3" From 338d5a956b4b61c3ee65517785433157a914d2c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Aug 2025 20:51:37 +0000 Subject: [PATCH 316/455] chore(deps): bump the python-packages group with 2 updates Bumps the python-packages group with 2 updates: [certifi](https://github.com/certifi/python-certifi) and [charset-normalizer](https://github.com/jawah/charset_normalizer). Updates `certifi` from 2025.7.14 to 2025.8.3 - [Commits](https://github.com/certifi/python-certifi/compare/2025.07.14...2025.08.03) Updates `charset-normalizer` from 3.4.2 to 3.4.3 - [Release notes](https://github.com/jawah/charset_normalizer/releases) - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) - [Commits](https://github.com/jawah/charset_normalizer/compare/3.4.2...3.4.3) --- updated-dependencies: - dependency-name: certifi dependency-version: 2025.8.3 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: charset-normalizer dependency-version: 3.4.3 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 788fa95..1769460 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,8 +1,8 @@ autopep8==2.3.2 black==25.1.0 bleach==6.2.0 -certifi==2025.7.14 -charset-normalizer==3.4.2 +certifi==2025.8.3 +charset-normalizer==3.4.3 click==8.1.8 colorama==0.4.6 docutils==0.22 From f027760ac5b701ec7edffe72e265223821f9371b Mon Sep 17 00:00:00 2001 From: Mateusz Hajder <6783135+mhajder@users.noreply.github.com> Date: Tue, 12 Aug 2025 10:18:52 +0200 Subject: [PATCH 317/455] chore: update Dockerfile to use Python 3.12 and improve dependency installation --- .dockerignore | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++ .gitignore | 4 ++- Dockerfile | 42 ++++++++++++++++++++++------- 3 files changed, 110 insertions(+), 11 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..07a3ea4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,75 @@ +# Docker ignore file to reduce build context size + +# Temp files +*~ +~* +.*~ +\#* +.#* +*# +dist + +# Build files +build +dist +pkg +*.egg +*.egg-info + +# Debian Files +debian/files +debian/python-github-backup* + +# Sphinx build +doc/_build + +# Generated man page +doc/github_backup.1 + +# Annoying macOS files +.DS_Store +._* + +# IDE configuration files +.vscode +.atom +.idea +*.code-workspace + +# RSA +id_rsa +id_rsa.pub + +# Virtual env +venv +.venv + +# Git +.git +.gitignore +.gitchangelog.rc +.github + +# Documentation +*.md +!README.md + +# Environment variables files +.env +.env.* +!.env.example +*.log + +# Cache files +**/__pycache__/ +*.py[cod] + +# Docker files +docker-compose.yml +Dockerfile* + +# Other files +release +*.tar +*.zip +*.gzip diff --git a/.gitignore b/.gitignore index f0ed9db..652f035 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -*.py[oc] +*.py[cod] # Temp files *~ @@ -33,6 +33,7 @@ doc/github_backup.1 # IDE configuration files .vscode .atom +.idea README @@ -42,3 +43,4 @@ id_rsa.pub # Virtual env venv +.venv diff --git a/Dockerfile b/Dockerfile index 6217594..2c28829 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,38 @@ -FROM python:3.9.18-slim +FROM python:3.12-alpine3.22 AS builder -RUN --mount=type=cache,target=/var/cache/apt \ - apt-get update && apt-get install -y git git-lfs +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir uv -WORKDIR /usr/src/app +WORKDIR /app -COPY release-requirements.txt . -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r release-requirements.txt +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,source=requirements.txt,target=requirements.txt \ + --mount=type=bind,source=release-requirements.txt,target=release-requirements.txt \ + uv venv \ + && uv pip install -r release-requirements.txt COPY . . -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install . -ENTRYPOINT [ "github-backup" ] +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install . + + +FROM python:3.12-alpine3.22 +ENV PYTHONUNBUFFERED=1 + +RUN apk add --no-cache \ + ca-certificates \ + git \ + git-lfs \ + && addgroup -g 1000 appuser \ + && adduser -D -u 1000 -G appuser appuser + +COPY --from=builder --chown=appuser:appuser /app /app + +WORKDIR /app + +USER appuser + +ENV PATH="/app/.venv/bin:$PATH" + +ENTRYPOINT ["github-backup"] From 65749bfde4d7e5910763d77f6b89719687e96969 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Aug 2025 06:33:46 +0000 Subject: [PATCH 318/455] chore(deps): bump actions/checkout from 4 to 5 Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 5. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/automatic-release.yml | 2 +- .github/workflows/docker.yml | 2 +- .github/workflows/lint.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index 4c2150e..c6eb48b 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 ssh-key: ${{ secrets.DEPLOY_PRIVATE_KEY }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index b0607f7..2c7cb38 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -38,7 +38,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: persist-credentials: false diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 541242d..03686f4 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -18,7 +18,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 - name: Setup Python From d3b67f884a21a0542a8f2e65f3233241a2e76706 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Aug 2025 20:54:47 +0000 Subject: [PATCH 319/455] chore(deps): bump requests in the python-packages group Bumps the python-packages group with 1 update: [requests](https://github.com/psf/requests). Updates `requests` from 2.32.4 to 2.32.5 - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.32.4...v2.32.5) --- updated-dependencies: - dependency-name: requests dependency-version: 2.32.5 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 1769460..2e16603 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -25,7 +25,7 @@ pycodestyle==2.14.0 pyflakes==3.4.0 Pygments==2.19.2 readme-renderer==44.0 -requests==2.32.4 +requests==2.32.5 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 From 8bfad9b5b71f2ca988db56a3300fef039c4ac691 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Aug 2025 20:52:18 +0000 Subject: [PATCH 320/455] chore(deps): bump platformdirs in the python-packages group Bumps the python-packages group with 1 update: [platformdirs](https://github.com/tox-dev/platformdirs). Updates `platformdirs` from 4.3.8 to 4.4.0 - [Release notes](https://github.com/tox-dev/platformdirs/releases) - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) - [Commits](https://github.com/tox-dev/platformdirs/compare/4.3.8...4.4.0) --- updated-dependencies: - dependency-name: platformdirs dependency-version: 4.4.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 2e16603..e02238f 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -20,7 +20,7 @@ mypy-extensions==1.1.0 packaging==25.0 pathspec==0.12.1 pkginfo==1.12.1.2 -platformdirs==4.3.8 +platformdirs==4.4.0 pycodestyle==2.14.0 pyflakes==3.4.0 Pygments==2.19.2 From 1c465f4d35f777f4d601e0fcf32131fbf6e000bd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:43:31 +0000 Subject: [PATCH 321/455] chore(deps): bump more-itertools in the python-packages group Bumps the python-packages group with 1 update: [more-itertools](https://github.com/more-itertools/more-itertools). Updates `more-itertools` from 10.7.0 to 10.8.0 - [Release notes](https://github.com/more-itertools/more-itertools/releases) - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.7.0...v10.8.0) --- updated-dependencies: - dependency-name: more-itertools dependency-version: 10.8.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index e02238f..82e6645 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -15,7 +15,7 @@ keyring==25.6.0 markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 -more-itertools==10.7.0 +more-itertools==10.8.0 mypy-extensions==1.1.0 packaging==25.0 pathspec==0.12.1 From 268a989b09b96f575e058d3c12fe6a71580c1214 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 5 Sep 2025 13:09:08 +0000 Subject: [PATCH 322/455] chore(deps): bump twine from 6.1.0 to 6.2.0 in the python-packages group Bumps the python-packages group with 1 update: [twine](https://github.com/pypa/twine). Updates `twine` from 6.1.0 to 6.2.0 - [Release notes](https://github.com/pypa/twine/releases) - [Changelog](https://github.com/pypa/twine/blob/main/docs/changelog.rst) - [Commits](https://github.com/pypa/twine/compare/6.1.0...6.2.0) --- updated-dependencies: - dependency-name: twine dependency-version: 6.2.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 82e6645..68d6bd9 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -33,7 +33,7 @@ rich==14.1.0 setuptools==80.9.0 six==1.17.0 tqdm==4.67.1 -twine==6.1.0 +twine==6.2.0 urllib3==2.5.0 webencodings==0.5.1 zipp==3.23.0 From d3079bfb74ec4be5a8f49b28e228dc1cbb4dcc44 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 04:10:35 +0000 Subject: [PATCH 323/455] chore(deps): bump actions/setup-python from 5 to 6 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5 to 6. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/setup-python dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/automatic-release.yml | 2 +- .github/workflows/lint.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index c6eb48b..2160206 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -27,7 +27,7 @@ jobs: git config --local user.email "action@github.com" git config --local user.name "GitHub Action" - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' - name: Install prerequisites diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 03686f4..e0036e2 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -22,7 +22,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: "3.12" cache: "pip" From 12ac519e9c1f19a42c25e7cc7aa1ba5bc508509b Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Sep 2025 16:26:53 -0400 Subject: [PATCH 324/455] chore: Rename ISSUE_TEMPLATE.md to .github/ISSUE_TEMPLATE.md --- ISSUE_TEMPLATE.md => .github/ISSUE_TEMPLATE.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ISSUE_TEMPLATE.md => .github/ISSUE_TEMPLATE.md (100%) diff --git a/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md similarity index 100% rename from ISSUE_TEMPLATE.md rename to .github/ISSUE_TEMPLATE.md From 39848e650cc15809631b31adf9df4b1fa54712e2 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Sep 2025 16:27:23 -0400 Subject: [PATCH 325/455] chore: Rename PULL_REQUEST.md to .github/PULL_REQUEST.md --- PULL_REQUEST.md => .github/PULL_REQUEST.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename PULL_REQUEST.md => .github/PULL_REQUEST.md (100%) diff --git a/PULL_REQUEST.md b/.github/PULL_REQUEST.md similarity index 100% rename from PULL_REQUEST.md rename to .github/PULL_REQUEST.md From 03c660724d39b92af100454629685ec442aeb521 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Sep 2025 16:30:10 -0400 Subject: [PATCH 326/455] chore: create bug template --- .github/ISSUE_TEMPLATE/bug.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug.md diff --git a/.github/ISSUE_TEMPLATE/bug.md b/.github/ISSUE_TEMPLATE/bug.md new file mode 100644 index 0000000..0d0fee5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug.md @@ -0,0 +1,28 @@ +--- +name: Bug Report +description: File a bug report. +body: + - type: markdown + attributes: + value: | + # Important notice regarding filed issues + + This project already fills my needs, and as such I have no real reason to continue it's development. This project is otherwise provided as is, and no support is given. + + If pull requests implementing bug fixes or enhancements are pushed, I am happy to review and merge them (time permitting). + + If you wish to have a bug fixed, you have a few options: + + - Fix it yourself and file a pull request. + - File a bug and hope someone else fixes it for you. + - Pay me to fix it (my rate is $200 an hour, minimum 1 hour, contact me via my [github email address](https://github.com/josegonzalez) if you want to go this route). + + In all cases, feel free to file an issue, they may be of help to others in the future. + - type: textarea + id: what-happened + attributes: + label: What happened? + description: Also tell us, what did you expect to happen? + placeholder: Tell us what you see! + validations: + required: true From df4d751be27252c2d2c1bf272d3e62cb55a2da61 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Sep 2025 16:30:46 -0400 Subject: [PATCH 327/455] Rename bug.md to bug.yaml --- .github/ISSUE_TEMPLATE/{bug.md => bug.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/ISSUE_TEMPLATE/{bug.md => bug.yaml} (100%) diff --git a/.github/ISSUE_TEMPLATE/bug.md b/.github/ISSUE_TEMPLATE/bug.yaml similarity index 100% rename from .github/ISSUE_TEMPLATE/bug.md rename to .github/ISSUE_TEMPLATE/bug.yaml From 85ab54e5147ddddb0ce4dbb7dc2c144b9db18acf Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Sep 2025 16:31:38 -0400 Subject: [PATCH 328/455] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 38 ++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..dd84ea7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. From d6bf031bf7ae0cd5bce311a725d36fe3214a1ec8 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Sep 2025 16:32:32 -0400 Subject: [PATCH 329/455] Delete .github/ISSUE_TEMPLATE/bug_report.md --- .github/ISSUE_TEMPLATE/bug_report.md | 38 ---------------------------- 1 file changed, 38 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index dd84ea7..0000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: '' -labels: '' -assignees: '' - ---- - -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -Steps to reproduce the behavior: -1. Go to '...' -2. Click on '....' -3. Scroll down to '....' -4. See error - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. - -**Desktop (please complete the following information):** - - OS: [e.g. iOS] - - Browser [e.g. chrome, safari] - - Version [e.g. 22] - -**Smartphone (please complete the following information):** - - Device: [e.g. iPhone6] - - OS: [e.g. iOS8.1] - - Browser [e.g. stock browser, safari] - - Version [e.g. 22] - -**Additional context** -Add any other context about the problem here. From 3d5f61aa2279c9cef3b3e9f8e8770768362afd73 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Sep 2025 16:33:49 -0400 Subject: [PATCH 330/455] Create feature.yaml --- .github/ISSUE_TEMPLATE/feature.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/feature.yaml diff --git a/.github/ISSUE_TEMPLATE/feature.yaml b/.github/ISSUE_TEMPLATE/feature.yaml new file mode 100644 index 0000000..dbfd2c5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature.yaml @@ -0,0 +1,27 @@ +--- +name: Feature Request +description: File a feature request. +body: + - type: markdown + attributes: + value: | + # Important notice regarding filed issues + + This project already fills my needs, and as such I have no real reason to continue it's development. This project is otherwise provided as is, and no support is given. + + If pull requests implementing bug fixes or enhancements are pushed, I am happy to review and merge them (time permitting). + + If you wish to have a bug fixed, you have a few options: + + - Fix it yourself and file a pull request. + - File a bug and hope someone else fixes it for you. + - Pay me to fix it (my rate is $200 an hour, minimum 1 hour, contact me via my [github email address](https://github.com/josegonzalez) if you want to go this route). + + In all cases, feel free to file an issue, they may be of help to others in the future. + - type: textarea + id: what-would-you-like-to-happen + attributes: + label: What would you like to happen? + description: Please describe in detail how the new functionality should work as well as any issues with existing functionality. + validations: + required: true From eb756d665c425fd30ae266b82809a229a7cf1d41 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Sep 2025 16:34:18 -0400 Subject: [PATCH 331/455] Delete .github/ISSUE_TEMPLATE.md --- .github/ISSUE_TEMPLATE.md | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index 734420b..0000000 --- a/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,13 +0,0 @@ -# Important notice regarding filed issues - -This project already fills my needs, and as such I have no real reason to continue it's development. This project is otherwise provided as is, and no support is given. - -If pull requests implementing bug fixes or enhancements are pushed, I am happy to review and merge them (time permitting). - -If you wish to have a bug fixed, you have a few options: - -- Fix it yourself and file a pull request. -- File a bug and hope someone else fixes it for you. -- Pay me to fix it (my rate is $200 an hour, minimum 1 hour, contact me via my [github email address](https://github.com/josegonzalez) if you want to go this route). - -In all cases, feel free to file an issue, they may be of help to others in the future. From 9d28d9c2b041aab387fc950846794ca7a374d9d9 Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Thu, 11 Sep 2025 16:34:50 -0400 Subject: [PATCH 332/455] Update feature.yaml --- .github/ISSUE_TEMPLATE/feature.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/feature.yaml b/.github/ISSUE_TEMPLATE/feature.yaml index dbfd2c5..4b1f408 100644 --- a/.github/ISSUE_TEMPLATE/feature.yaml +++ b/.github/ISSUE_TEMPLATE/feature.yaml @@ -11,11 +11,11 @@ body: If pull requests implementing bug fixes or enhancements are pushed, I am happy to review and merge them (time permitting). - If you wish to have a bug fixed, you have a few options: + If you wish to have a feature implemented, you have a few options: - - Fix it yourself and file a pull request. - - File a bug and hope someone else fixes it for you. - - Pay me to fix it (my rate is $200 an hour, minimum 1 hour, contact me via my [github email address](https://github.com/josegonzalez) if you want to go this route). + - Implement it yourself and file a pull request. + - File an issue and hope someone else implements it for you. + - Pay me to implement it (my rate is $200 an hour, minimum 1 hour, contact me via my [github email address](https://github.com/josegonzalez) if you want to go this route). In all cases, feel free to file an issue, they may be of help to others in the future. - type: textarea From 5bedaf825f2a161617d41e002f8ddc0af1dfee60 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Sep 2025 13:09:40 +0000 Subject: [PATCH 333/455] chore(deps): bump the python-packages group across 1 directory with 2 updates Bumps the python-packages group with 2 updates in the / directory: [black](https://github.com/psf/black) and [docutils](https://github.com/rtfd/recommonmark). Updates `black` from 25.1.0 to 25.9.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/25.1.0...25.9.0) Updates `docutils` from 0.22 to 0.22.1 - [Changelog](https://github.com/readthedocs/recommonmark/blob/master/CHANGELOG.md) - [Commits](https://github.com/rtfd/recommonmark/commits) --- updated-dependencies: - dependency-name: black dependency-version: 25.9.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: docutils dependency-version: 0.22.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 68d6bd9..76d8fd0 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,11 +1,11 @@ autopep8==2.3.2 -black==25.1.0 +black==25.9.0 bleach==6.2.0 certifi==2025.8.3 charset-normalizer==3.4.3 click==8.1.8 colorama==0.4.6 -docutils==0.22 +docutils==0.22.1 flake8==7.3.0 gitchangelog==3.0.4 idna==3.10 From 64b5667a1690a04eb39b96305569f2a41a0e8d41 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Sep 2025 13:12:10 +0000 Subject: [PATCH 334/455] chore(deps): bump docutils in the python-packages group Bumps the python-packages group with 1 update: [docutils](https://github.com/rtfd/recommonmark). Updates `docutils` from 0.22.1 to 0.22.2 - [Changelog](https://github.com/readthedocs/recommonmark/blob/master/CHANGELOG.md) - [Commits](https://github.com/rtfd/recommonmark/commits) --- updated-dependencies: - dependency-name: docutils dependency-version: 0.22.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 76d8fd0..1df8412 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -5,7 +5,7 @@ certifi==2025.8.3 charset-normalizer==3.4.3 click==8.1.8 colorama==0.4.6 -docutils==0.22.1 +docutils==0.22.2 flake8==7.3.0 gitchangelog==3.0.4 idna==3.10 From 963ed3e6f605c40d83e194f7f1ad9d0594f77bd3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Oct 2025 13:53:31 +0000 Subject: [PATCH 335/455] chore(deps): bump the python-packages group with 3 updates Bumps the python-packages group with 3 updates: [certifi](https://github.com/certifi/python-certifi), [click](https://github.com/pallets/click) and [markdown-it-py](https://github.com/executablebooks/markdown-it-py). Updates `certifi` from 2025.8.3 to 2025.10.5 - [Commits](https://github.com/certifi/python-certifi/compare/2025.08.03...2025.10.05) Updates `click` from 8.1.8 to 8.3.0 - [Release notes](https://github.com/pallets/click/releases) - [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/click/compare/8.1.8...8.3.0) Updates `markdown-it-py` from 3.0.0 to 4.0.0 - [Release notes](https://github.com/executablebooks/markdown-it-py/releases) - [Changelog](https://github.com/executablebooks/markdown-it-py/blob/master/CHANGELOG.md) - [Commits](https://github.com/executablebooks/markdown-it-py/compare/v3.0.0...v4.0.0) --- updated-dependencies: - dependency-name: certifi dependency-version: 2025.10.5 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: click dependency-version: 8.3.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: markdown-it-py dependency-version: 4.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 1df8412..b5c3b26 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,9 +1,9 @@ autopep8==2.3.2 black==25.9.0 bleach==6.2.0 -certifi==2025.8.3 +certifi==2025.10.5 charset-normalizer==3.4.3 -click==8.1.8 +click==8.3.0 colorama==0.4.6 docutils==0.22.2 flake8==7.3.0 @@ -12,7 +12,7 @@ idna==3.10 importlib-metadata==8.7.0 jaraco.classes==3.4.0 keyring==25.6.0 -markdown-it-py==3.0.0 +markdown-it-py==4.0.0 mccabe==0.7.0 mdurl==0.1.2 more-itertools==10.8.0 From 90396d2bdfc0bc9e54ddf00bd6cf3435f20a7516 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 10 Oct 2025 13:09:42 +0000 Subject: [PATCH 336/455] chore(deps): bump the python-packages group across 1 directory with 2 updates Bumps the python-packages group with 2 updates in the / directory: [platformdirs](https://github.com/tox-dev/platformdirs) and [rich](https://github.com/Textualize/rich). Updates `platformdirs` from 4.4.0 to 4.5.0 - [Release notes](https://github.com/tox-dev/platformdirs/releases) - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) - [Commits](https://github.com/tox-dev/platformdirs/compare/4.4.0...4.5.0) Updates `rich` from 14.1.0 to 14.2.0 - [Release notes](https://github.com/Textualize/rich/releases) - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) - [Commits](https://github.com/Textualize/rich/compare/v14.1.0...v14.2.0) --- updated-dependencies: - dependency-name: platformdirs dependency-version: 4.5.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: rich dependency-version: 14.2.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index b5c3b26..f5bcdb4 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -20,7 +20,7 @@ mypy-extensions==1.1.0 packaging==25.0 pathspec==0.12.1 pkginfo==1.12.1.2 -platformdirs==4.4.0 +platformdirs==4.5.0 pycodestyle==2.14.0 pyflakes==3.4.0 Pygments==2.19.2 @@ -29,7 +29,7 @@ requests==2.32.5 requests-toolbelt==1.0.0 restructuredtext-lint==1.4.0 rfc3986==2.0.0 -rich==14.1.0 +rich==14.2.0 setuptools==80.9.0 six==1.17.0 tqdm==4.67.1 From 38b4a2c1066f90327278f85fda1792a26d5510fc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 13:42:50 +0000 Subject: [PATCH 337/455] chore(deps): bump idna from 3.10 to 3.11 in the python-packages group Bumps the python-packages group with 1 update: [idna](https://github.com/kjd/idna). Updates `idna` from 3.10 to 3.11 - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.10...v3.11) --- updated-dependencies: - dependency-name: idna dependency-version: '3.11' dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index f5bcdb4..895083f 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -8,7 +8,7 @@ colorama==0.4.6 docutils==0.22.2 flake8==7.3.0 gitchangelog==3.0.4 -idna==3.10 +idna==3.11 importlib-metadata==8.7.0 jaraco.classes==3.4.0 keyring==25.6.0 From 759ec58beb24e55539f401fccfb68f83a72ffe7d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 Oct 2025 13:10:22 +0000 Subject: [PATCH 338/455] chore(deps): bump charset-normalizer in the python-packages group Bumps the python-packages group with 1 update: [charset-normalizer](https://github.com/jawah/charset_normalizer). Updates `charset-normalizer` from 3.4.3 to 3.4.4 - [Release notes](https://github.com/jawah/charset_normalizer/releases) - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) - [Commits](https://github.com/jawah/charset_normalizer/compare/3.4.3...3.4.4) --- updated-dependencies: - dependency-name: charset-normalizer dependency-version: 3.4.4 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 895083f..6f1b161 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -2,7 +2,7 @@ autopep8==2.3.2 black==25.9.0 bleach==6.2.0 certifi==2025.10.5 -charset-normalizer==3.4.3 +charset-normalizer==3.4.4 click==8.3.0 colorama==0.4.6 docutils==0.22.2 From 4dae43c58e0f907e050c498c225ea5d40b970fd0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Oct 2025 13:11:27 +0000 Subject: [PATCH 339/455] chore(deps): bump bleach in the python-packages group Bumps the python-packages group with 1 update: [bleach](https://github.com/mozilla/bleach). Updates `bleach` from 6.2.0 to 6.3.0 - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v6.2.0...v6.3.0) --- updated-dependencies: - dependency-name: bleach dependency-version: 6.3.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 6f1b161..bd9ebf2 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,6 +1,6 @@ autopep8==2.3.2 black==25.9.0 -bleach==6.2.0 +bleach==6.3.0 certifi==2025.10.5 charset-normalizer==3.4.4 click==8.3.0 From cd23dd1a16558b40ebdfae72f233db42e5b485f9 Mon Sep 17 00:00:00 2001 From: Rodos Date: Tue, 4 Nov 2025 10:07:22 +1100 Subject: [PATCH 340/455] feat: Enforce Python 3.8+ requirement and add multi-version CI testing - Add python_requires=">=3.8" to setup.py to enforce minimum version at install time - Update README to explicitly document Python 3.8+ requirement - Add CI matrix to test lint/build on Python 3.8-3.14 (7 versions) - Aligns with actual usage patterns (~99% of downloads on Python 3.8+) - Prevents future PRs from inadvertently using incompatible syntax This change protects users by preventing installation on unsupported Python versions and ensures contributors can see version requirements clearly. --- .github/workflows/lint.yml | 5 ++++- README.rst | 2 +- setup.py | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e0036e2..cf74eb7 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -15,6 +15,9 @@ jobs: lint: name: lint runs-on: ubuntu-24.04 + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - name: Checkout repository @@ -24,7 +27,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v6 with: - python-version: "3.12" + python-version: ${{ matrix.python-version }} cache: "pip" - run: pip install -r release-requirements.txt && pip install wheel - run: flake8 --ignore=E501,E203,W503 diff --git a/README.rst b/README.rst index 5dcef95..c5fafa3 100644 --- a/README.rst +++ b/README.rst @@ -9,8 +9,8 @@ The package can be used to backup an *entire* `Github `_ or Requirements ============ +- Python 3.8 or higher - GIT 1.9+ -- Python Installation ============ diff --git a/setup.py b/setup.py index c4b8cf1..6ef7551 100644 --- a/setup.py +++ b/setup.py @@ -50,5 +50,6 @@ def open_file(fname): long_description=open_file("README.rst").read(), long_description_content_type="text/x-rst", install_requires=open_file("requirements.txt").readlines(), + python_requires=">=3.8", zip_safe=True, ) From 73dc75ab952300213d4930bc93cb76067b7f87e0 Mon Sep 17 00:00:00 2001 From: Rodos Date: Tue, 4 Nov 2025 13:30:42 +1100 Subject: [PATCH 341/455] fix: Remove Python 3.8 and 3.9 from CI matrix 3.8 and 3.9 are failing because the pinned dependencies don't support them: - autopep8==2.3.2 needs Python 3.9+ - bleach==6.3.0 needs Python 3.10+ Both are EOL now anyway (3.8 in Oct 2024, 3.9 in Oct 2025). Just fixing CI to test 3.10-3.14 for now. Will do a separate PR to formally drop 3.8/3.9 support with python_requires and README updates. --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index cf74eb7..02ad174 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - name: Checkout repository From 875e31819afe3ed4cd2e77cdb8b3a1f4c626a29b Mon Sep 17 00:00:00 2001 From: Rodos Date: Tue, 4 Nov 2025 13:53:41 +1100 Subject: [PATCH 342/455] feat: Drop support for Python 3.8 and 3.9 (EOL) Both Python 3.8 and 3.9 have reached end-of-life: - Python 3.8: EOL October 7, 2024 - Python 3.9: EOL October 31, 2025 Changes: - Add python_requires=">=3.10" to setup.py - Remove Python 3.8 and 3.9 from classifiers - Add Python 3.13 and 3.14 to classifiers - Update README to document Python 3.10+ requirement --- README.rst | 2 +- setup.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 5dcef95..e435951 100644 --- a/README.rst +++ b/README.rst @@ -9,8 +9,8 @@ The package can be used to backup an *entire* `Github `_ or Requirements ============ +- Python 3.10 or higher - GIT 1.9+ -- Python Installation ============ diff --git a/setup.py b/setup.py index c4b8cf1..374e6ec 100644 --- a/setup.py +++ b/setup.py @@ -40,15 +40,16 @@ def open_file(fname): "Development Status :: 5 - Production/Stable", "Topic :: System :: Archiving :: Backup", "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ], description="backup a github user or organization", long_description=open_file("README.rst").read(), long_description_content_type="text/x-rst", install_requires=open_file("requirements.txt").readlines(), + python_requires=">=3.10", zip_safe=True, ) From a194fa48cead59dda7f491ab6c4aeffb8a0d4c7f Mon Sep 17 00:00:00 2001 From: Rodos Date: Mon, 3 Nov 2025 13:36:15 +1100 Subject: [PATCH 343/455] feat: Add attachment download support for issues and pull requests Adds new --attachments flag that downloads user-uploaded files from issue and PR bodies and comments. Key features: - Determines attachment URLs - Tracks downloads in manifest.json with metadata - Supports --skip-existing to avoid re-downloading - Handles filename collisions with counter suffix - Smart retry logic for transient vs permanent failures - Uses Content-Disposition for correct file extensions --- README.rst | 30 +- github_backup/github_backup.py | 610 ++++++++++++++++++++++++++++++++- 2 files changed, 637 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index e435951..69d5524 100644 --- a/README.rst +++ b/README.rst @@ -50,7 +50,7 @@ CLI Help output:: [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] [--releases] [--latest-releases NUMBER_OF_LATEST_RELEASES] - [--skip-prerelease] [--assets] + [--skip-prerelease] [--assets] [--attachments] [--exclude [REPOSITORY [REPOSITORY ...]] [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] USER @@ -133,6 +133,9 @@ CLI Help output:: --skip-prerelease skip prerelease and draft versions; only applies if including releases --assets include assets alongside release information; only applies if including releases + --attachments download user-attachments from issues and pull requests + to issues/attachments/{issue_number}/ and + pulls/attachments/{pull_number}/ directories --exclude [REPOSITORY [REPOSITORY ...]] names of repositories to exclude from backup. --throttle-limit THROTTLE_LIMIT @@ -213,6 +216,29 @@ When you use the ``--lfs`` option, you will need to make sure you have Git LFS i Instructions on how to do this can be found on https://git-lfs.github.com. +About Attachments +----------------- + +When you use the ``--attachments`` option with ``--issues`` or ``--pulls``, the tool will download user-uploaded attachments (images, videos, documents, etc.) from issue and pull request descriptions and comments. In some circumstances attachments contain valuable data related to the topic, and without their backup important information or context might be lost inadvertently. + +Attachments are saved to ``issues/attachments/{issue_number}/`` and ``pulls/attachments/{pull_number}/`` directories, where ``{issue_number}`` is the GitHub issue number (e.g., issue #123 saves to ``issues/attachments/123/``). Each attachment directory contains: + +- The downloaded attachment files (named by their GitHub identifier with appropriate file extensions) +- If multiple attachments have the same filename, conflicts are resolved with numeric suffixes (e.g., ``report.pdf``, ``report_1.pdf``, ``report_2.pdf``) +- A ``manifest.json`` file documenting all downloads, including URLs, file metadata, and download status + +The tool automatically extracts file extensions from HTTP headers to ensure files can be more easily opened by your operating system. + +**Supported URL formats:** + +- Modern: ``github.com/user-attachments/{assets,files}/*`` +- Legacy: ``user-images.githubusercontent.com/*`` and ``private-user-images.githubusercontent.com/*`` +- Repo files: ``github.com/{owner}/{repo}/files/*`` (filtered to current repository) +- Repo assets: ``github.com/{owner}/{repo}/assets/*`` (filtered to current repository) + +**Repository filtering** for repo files/assets handles renamed and transferred repositories gracefully. URLs are included if they either match the current repository name directly, or redirect to it (e.g., ``willmcgugan/rich`` redirects to ``Textualize/rich`` after transfer). + + Run in Docker container ----------------------- @@ -303,7 +329,7 @@ Quietly and incrementally backup useful Github user data (public and private rep export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER - github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER + github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER Debug an error/block or incomplete backup into a temporary directory. Omit "incremental" to fill a previous incomplete backup. :: diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 4b2d790..e8d9ae0 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -420,6 +420,12 @@ def parse_args(args=None): dest="include_assets", help="include assets alongside release information; only applies if including releases", ) + parser.add_argument( + "--attachments", + action="store_true", + dest="include_attachments", + help="download user-attachments from issues and pull requests", + ) parser.add_argument( "--throttle-limit", dest="throttle_limit", @@ -814,7 +820,9 @@ def redirect_request(self, req, fp, code, msg, headers, newurl): request = super(S3HTTPRedirectHandler, self).redirect_request( req, fp, code, msg, headers, newurl ) - del request.headers["Authorization"] + # Only delete Authorization header if it exists (attachments may not have it) + if "Authorization" in request.headers: + del request.headers["Authorization"] return request @@ -867,6 +875,598 @@ def download_file(url, path, auth, as_app=False, fine=False): ) +def download_attachment_file(url, path, auth, as_app=False, fine=False): + """Download attachment file directly (not via GitHub API). + + Similar to download_file() but for direct file URLs, not API endpoints. + Attachment URLs (user-images, user-attachments) are direct downloads, + not API endpoints, so we skip _construct_request() which adds API params. + + URL Format Support & Authentication Requirements: + + | URL Format | Auth Required | Notes | + |----------------------------------------------|---------------|--------------------------| + | github.com/user-attachments/assets/* | Private only | Modern format (2024+) | + | github.com/user-attachments/files/* | Private only | Modern format (2024+) | + | user-images.githubusercontent.com/* | No (public) | Legacy CDN, all eras | + | private-user-images.githubusercontent.com/* | JWT in URL | Legacy private (5min) | + | github.com/{owner}/{repo}/files/* | Repo filter | Old repo files | + + - Modern user-attachments: Requires GitHub token auth for private repos + - Legacy public CDN: No auth needed/accepted (returns 400 with auth header) + - Legacy private CDN: Uses JWT token embedded in URL, no GitHub token needed + - Repo files: Filtered to current repository only during extraction + + Returns dict with metadata: + - success: bool + - http_status: int (200, 404, etc.) + - content_type: str or None + - original_filename: str or None (from Content-Disposition) + - size_bytes: int or None + - error: str or None + """ + import re + from datetime import datetime, timezone + + metadata = { + "url": url, + "success": False, + "http_status": None, + "content_type": None, + "original_filename": None, + "size_bytes": None, + "downloaded_at": datetime.now(timezone.utc).isoformat(), + "error": None, + } + + if os.path.exists(path): + metadata["success"] = True + metadata["http_status"] = 200 # Assume success if already exists + metadata["size_bytes"] = os.path.getsize(path) + return metadata + + # Create simple request (no API query params) + request = Request(url) + request.add_header("Accept", "application/octet-stream") + + # Add authentication header only for modern github.com/user-attachments URLs + # Legacy CDN URLs (user-images.githubusercontent.com) are public and don't need/accept auth + # Private CDN URLs (private-user-images) use JWT tokens embedded in the URL + if auth is not None and "github.com/user-attachments/" in url: + if not as_app: + if fine: + # Fine-grained token: plain token with "token " prefix + request.add_header("Authorization", "token " + auth) + else: + # Classic token: base64-encoded with "Basic " prefix + request.add_header("Authorization", "Basic ".encode("ascii") + auth) + else: + # App authentication + auth = auth.encode("ascii") + request.add_header("Authorization", "token ".encode("ascii") + auth) + + # Reuse S3HTTPRedirectHandler from download_file() + opener = build_opener(S3HTTPRedirectHandler) + + try: + response = opener.open(request) + metadata["http_status"] = response.getcode() + + # Extract Content-Type + content_type = response.headers.get("Content-Type", "").split(";")[0].strip() + if content_type: + metadata["content_type"] = content_type + + # Extract original filename from Content-Disposition header + # Format: attachment; filename=example.mov or attachment;filename="example.mov" + content_disposition = response.headers.get("Content-Disposition", "") + if content_disposition: + # Match: filename=something or filename="something" or filename*=UTF-8''something + match = re.search(r'filename\*?=["\']?([^"\';\r\n]+)', content_disposition) + if match: + original_filename = match.group(1).strip() + # Handle RFC 5987 encoding: filename*=UTF-8''example.mov + if "UTF-8''" in original_filename: + original_filename = original_filename.split("UTF-8''")[1] + metadata["original_filename"] = original_filename + + # Fallback: Extract filename from final URL after redirects + # This handles user-attachments/assets URLs which redirect to S3 with filename.ext + if not metadata["original_filename"]: + from urllib.parse import urlparse, unquote + + final_url = response.geturl() + parsed = urlparse(final_url) + # Get filename from path (last component before query string) + path_parts = parsed.path.split("/") + if path_parts: + # URL might be encoded, decode it + filename_from_url = unquote(path_parts[-1]) + # Only use if it has an extension + if "." in filename_from_url: + metadata["original_filename"] = filename_from_url + + # Download file + chunk_size = 16 * 1024 + bytes_downloaded = 0 + with open(path, "wb") as f: + while True: + chunk = response.read(chunk_size) + if not chunk: + break + f.write(chunk) + bytes_downloaded += len(chunk) + + metadata["size_bytes"] = bytes_downloaded + metadata["success"] = True + + except HTTPError as exc: + metadata["http_status"] = exc.code + metadata["error"] = str(exc.reason) + logger.warning( + "Skipping download of attachment {0} due to HTTPError: {1}".format( + url, exc.reason + ) + ) + except URLError as e: + metadata["error"] = str(e.reason) + logger.warning( + "Skipping download of attachment {0} due to URLError: {1}".format( + url, e.reason + ) + ) + except socket.error as e: + metadata["error"] = str(e.strerror) if hasattr(e, "strerror") else str(e) + logger.warning( + "Skipping download of attachment {0} due to socket error: {1}".format( + url, e.strerror if hasattr(e, "strerror") else str(e) + ) + ) + except Exception as e: + metadata["error"] = str(e) + logger.warning( + "Skipping download of attachment {0} due to error: {1}".format(url, str(e)) + ) + + return metadata + + +def extract_attachment_urls(item_data, issue_number=None, repository_full_name=None): + """Extract GitHub-hosted attachment URLs from issue/PR body and comments. + + What qualifies as an attachment? + There is no "attachment" concept in the GitHub API - it's a user behavior pattern + we've identified through analysis of real-world repositories. We define attachments as: + + - User-uploaded files hosted on GitHub's CDN domains + - Found outside of code blocks (not examples/documentation) + - Matches known GitHub attachment URL patterns + + This intentionally captures bare URLs pasted by users, not just markdown/HTML syntax. + Some false positives (example URLs in documentation) may occur - these fail gracefully + with HTTP 404 and are logged in the manifest. + + Supported URL formats: + - Modern: github.com/user-attachments/{assets,files}/* + - Legacy: user-images.githubusercontent.com/* (including private-user-images) + - Repo files: github.com/{owner}/{repo}/files/* (filtered to current repo) + - Repo assets: github.com/{owner}/{repo}/assets/* (filtered to current repo) + + Repository filtering (repo files/assets only): + - Direct match: URL is for current repository → included + - Redirect match: URL redirects to current repository → included (handles renames/transfers) + - Different repo: URL is for different repository → excluded + + Code block filtering: + - Removes fenced code blocks (```) and inline code (`) before extraction + - Prevents extracting URLs from code examples and documentation snippets + + Args: + item_data: Issue or PR data dict + issue_number: Issue/PR number for logging + repository_full_name: Full repository name (owner/repo) for filtering repo-scoped URLs + """ + import re + + urls = [] + + # Define all GitHub attachment patterns + # Stop at markdown punctuation: whitespace, ), `, ", >, < + # Trailing sentence punctuation (. ! ? , ; : ' ") is stripped in post-processing + patterns = [ + r'https://github\.com/user-attachments/(?:assets|files)/[^\s\)`"<>]+', # Modern + r'https://(?:private-)?user-images\.githubusercontent\.com/[^\s\)`"<>]+', # Legacy CDN + ] + + # Add repo-scoped patterns (will be filtered by repository later) + # These patterns match ANY repo, then we filter to current repo with redirect checking + repo_files_pattern = r'https://github\.com/[^/]+/[^/]+/files/\d+/[^\s\)`"<>]+' + repo_assets_pattern = r'https://github\.com/[^/]+/[^/]+/assets/\d+/[^\s\)`"<>]+' + patterns.append(repo_files_pattern) + patterns.append(repo_assets_pattern) + + def clean_url(url): + """Remove trailing sentence and markdown punctuation that's not part of the URL.""" + return url.rstrip(".!?,;:'\")") + + def remove_code_blocks(text): + """Remove markdown code blocks (fenced and inline) from text. + + This prevents extracting URLs from code examples like: + - Fenced code blocks: ```code``` + - Inline code: `code` + """ + # Remove fenced code blocks first (```...```) + # DOTALL flag makes . match newlines + text = re.sub(r"```.*?```", "", text, flags=re.DOTALL) + + # Remove inline code (`...`) + # Non-greedy match between backticks + text = re.sub(r"`[^`]*`", "", text) + + return text + + def is_repo_scoped_url(url): + """Check if URL is a repo-scoped attachment (files or assets).""" + return bool( + re.match(r"https://github\.com/[^/]+/[^/]+/(?:files|assets)/\d+/", url) + ) + + def check_redirect_to_current_repo(url, current_repo): + """Check if URL redirects to current repository. + + Returns True if: + - URL is already for current repo + - URL redirects (301/302) to current repo (handles renames/transfers) + + Returns False otherwise (URL is for a different repo). + """ + # Extract owner/repo from URL + match = re.match(r"https://github\.com/([^/]+)/([^/]+)/", url) + if not match: + return False + + url_owner, url_repo = match.groups() + url_repo_full = f"{url_owner}/{url_repo}" + + # Direct match - no need to check redirect + if url_repo_full.lower() == current_repo.lower(): + return True + + # Different repo - check if it redirects to current repo + # This handles repository transfers and renames + try: + import urllib.request + import urllib.error + + # Make HEAD request with redirect following disabled + # We need to manually handle redirects to see the Location header + request = urllib.request.Request(url, method="HEAD") + request.add_header("User-Agent", "python-github-backup") + + # Create opener that does NOT follow redirects + class NoRedirectHandler(urllib.request.HTTPRedirectHandler): + def redirect_request(self, req, fp, code, msg, headers, newurl): + return None # Don't follow redirects + + opener = urllib.request.build_opener(NoRedirectHandler) + + try: + _ = opener.open(request, timeout=10) + # Got 200 - URL works as-is but for different repo + return False + except urllib.error.HTTPError as e: + # Check if it's a redirect (301, 302, 307, 308) + if e.code in (301, 302, 307, 308): + location = e.headers.get("Location", "") + # Check if redirect points to current repo + if location: + redirect_match = re.match( + r"https://github\.com/([^/]+)/([^/]+)/", location + ) + if redirect_match: + redirect_owner, redirect_repo = redirect_match.groups() + redirect_repo_full = f"{redirect_owner}/{redirect_repo}" + return redirect_repo_full.lower() == current_repo.lower() + return False + except Exception: + # On any error (timeout, network issue, etc.), be conservative + # and exclude the URL to avoid downloading from wrong repos + return False + + # Extract from body + body = item_data.get("body") or "" + # Remove code blocks before searching for URLs + body_cleaned = remove_code_blocks(body) + for pattern in patterns: + found_urls = re.findall(pattern, body_cleaned) + urls.extend([clean_url(url) for url in found_urls]) + + # Extract from issue comments + if "comment_data" in item_data: + for comment in item_data["comment_data"]: + comment_body = comment.get("body") or "" + # Remove code blocks before searching for URLs + comment_cleaned = remove_code_blocks(comment_body) + for pattern in patterns: + found_urls = re.findall(pattern, comment_cleaned) + urls.extend([clean_url(url) for url in found_urls]) + + # Extract from PR regular comments + if "comment_regular_data" in item_data: + for comment in item_data["comment_regular_data"]: + comment_body = comment.get("body") or "" + # Remove code blocks before searching for URLs + comment_cleaned = remove_code_blocks(comment_body) + for pattern in patterns: + found_urls = re.findall(pattern, comment_cleaned) + urls.extend([clean_url(url) for url in found_urls]) + + regex_urls = list(set(urls)) # dedupe + + # Filter repo-scoped URLs to current repository only + # This handles repository transfers/renames via redirect checking + if repository_full_name: + filtered_urls = [] + for url in regex_urls: + if is_repo_scoped_url(url): + # Check if URL belongs to current repo (or redirects to it) + if check_redirect_to_current_repo(url, repository_full_name): + filtered_urls.append(url) + # else: skip URLs from other repositories + else: + # Non-repo-scoped URLs (user-attachments, CDN) - always include + filtered_urls.append(url) + regex_urls = filtered_urls + + return regex_urls + + +def extract_and_apply_extension(filepath, original_filename): + """Extract extension from original filename and rename file if needed. + + Args: + filepath: Current file path (may have no extension) + original_filename: Original filename from Content-Disposition (has extension) + + Returns: + Final filepath with extension applied + """ + if not original_filename or not os.path.exists(filepath): + return filepath + + # Get extension from original filename + original_ext = os.path.splitext(original_filename)[1] + if not original_ext: + return filepath + + # Check if current file already has this extension + current_ext = os.path.splitext(filepath)[1] + if current_ext == original_ext: + return filepath + + # Rename file to add extension + new_filepath = filepath + original_ext + try: + os.rename(filepath, new_filepath) + logger.debug("Renamed {0} to {1}".format(filepath, new_filepath)) + return new_filepath + except Exception as e: + logger.warning("Could not rename {0}: {1}".format(filepath, str(e))) + return filepath + + +def get_attachment_filename(url): + """Get filename from attachment URL, handling all GitHub formats. + + Formats: + - github.com/user-attachments/assets/{uuid} → uuid (add extension later) + - github.com/user-attachments/files/{id}/{filename} → filename + - github.com/{owner}/{repo}/files/{id}/{filename} → filename + - user-images.githubusercontent.com/{user}/{hash}.{ext} → hash.ext + - private-user-images.githubusercontent.com/...?jwt=... → extract from path + """ + from urllib.parse import urlparse + + parsed = urlparse(url) + path_parts = parsed.path.split("/") + + # Modern: /user-attachments/files/{id}/{filename} + if "user-attachments/files" in parsed.path: + return path_parts[-1] + + # Modern: /user-attachments/assets/{uuid} + elif "user-attachments/assets" in parsed.path: + return path_parts[-1] # extension added later via detect_and_add_extension + + # Repo files: /{owner}/{repo}/files/{id}/{filename} + elif "/files/" in parsed.path and len(path_parts) >= 2: + return path_parts[-1] + + # Legacy: user-images.githubusercontent.com/{user}/{hash-with-ext} + elif "githubusercontent.com" in parsed.netloc: + return path_parts[-1] # Already has extension usually + + # Fallback: use last path component + return path_parts[-1] if path_parts[-1] else "unknown_attachment" + + +def resolve_filename_collision(filepath): + """Resolve filename collisions using counter suffix pattern. + + If filepath exists, returns a new filepath with counter suffix. + Pattern: report.pdf → report_1.pdf → report_2.pdf + + Also protects against manifest.json collisions by treating it as reserved. + + Args: + filepath: Full path to file that might exist + + Returns: + filepath that doesn't collide (may be same as input if no collision) + """ + directory = os.path.dirname(filepath) + filename = os.path.basename(filepath) + + # Protect manifest.json - it's a reserved filename + if filename == "manifest.json": + name, ext = os.path.splitext(filename) + counter = 1 + while True: + new_filename = f"{name}_{counter}{ext}" + new_filepath = os.path.join(directory, new_filename) + if not os.path.exists(new_filepath): + return new_filepath + counter += 1 + + if not os.path.exists(filepath): + return filepath + + name, ext = os.path.splitext(filename) + + counter = 1 + while True: + new_filename = f"{name}_{counter}{ext}" + new_filepath = os.path.join(directory, new_filename) + if not os.path.exists(new_filepath): + return new_filepath + counter += 1 + + +def download_attachments(args, item_cwd, item_data, number, repository, item_type="issue"): + """Download user-attachments from issue/PR body and comments with manifest. + + Args: + args: Command line arguments + item_cwd: Working directory (issue_cwd or pulls_cwd) + item_data: Issue or PR data dict + number: Issue or PR number + repository: Repository dict + item_type: "issue" or "pull" for logging/manifest + """ + import json + from datetime import datetime, timezone + + item_type_display = "issue" if item_type == "issue" else "pull request" + + urls = extract_attachment_urls( + item_data, issue_number=number, repository_full_name=repository["full_name"] + ) + if not urls: + return + + attachments_dir = os.path.join(item_cwd, "attachments", str(number)) + manifest_path = os.path.join(attachments_dir, "manifest.json") + + # Load existing manifest if skip_existing is enabled + existing_urls = set() + existing_metadata = [] + if args.skip_existing and os.path.exists(manifest_path): + try: + with open(manifest_path, "r") as f: + existing_manifest = json.load(f) + all_metadata = existing_manifest.get("attachments", []) + # Only skip URLs that were successfully downloaded OR failed with permanent errors + # Retry transient failures (5xx, timeouts, network errors) + for item in all_metadata: + if item.get("success"): + existing_urls.add(item["url"]) + else: + # Check if this is a permanent failure (don't retry) or transient (retry) + http_status = item.get("http_status") + if http_status in [404, 410, 451]: + # Permanent failures - don't retry + existing_urls.add(item["url"]) + # Transient failures (5xx, auth errors, timeouts) will be retried + existing_metadata = all_metadata + except (json.JSONDecodeError, IOError): + # If manifest is corrupted, re-download everything + logger.warning( + "Corrupted manifest for {0} #{1}, will re-download".format( + item_type_display, number + ) + ) + existing_urls = set() + existing_metadata = [] + + # Filter to only new URLs + new_urls = [url for url in urls if url not in existing_urls] + + if not new_urls and existing_urls: + logger.debug( + "Skipping attachments for {0} #{1} (all {2} already downloaded)".format( + item_type_display, number, len(urls) + ) + ) + return + + if new_urls: + logger.info( + "Downloading {0} new attachment(s) for {1} #{2}".format( + len(new_urls), item_type_display, number + ) + ) + + mkdir_p(item_cwd, attachments_dir) + + # Collect metadata for manifest (start with existing) + attachment_metadata_list = existing_metadata[:] + + for url in new_urls: + filename = get_attachment_filename(url) + filepath = os.path.join(attachments_dir, filename) + + # Check for collision BEFORE downloading + filepath = resolve_filename_collision(filepath) + + # Download and get metadata + metadata = download_attachment_file( + url, + filepath, + get_auth(args, encode=not args.as_app), + as_app=args.as_app, + fine=args.token_fine is not None, + ) + + # Apply extension from Content-Disposition if available + if metadata["success"] and metadata.get("original_filename"): + final_filepath = extract_and_apply_extension( + filepath, metadata["original_filename"] + ) + # Check for collision again ONLY if filename changed (extension was added) + if final_filepath != filepath: + final_filepath = resolve_filename_collision(final_filepath) + # Update saved_as to reflect actual filename + metadata["saved_as"] = os.path.basename(final_filepath) + else: + metadata["saved_as"] = ( + os.path.basename(filepath) if metadata["success"] else None + ) + + attachment_metadata_list.append(metadata) + + # Write manifest + if attachment_metadata_list: + manifest = { + "issue_number": number, + "issue_type": item_type, + "repository": f"{args.user}/{args.repository}" + if hasattr(args, "repository") and args.repository + else args.user, + "manifest_updated_at": datetime.now(timezone.utc).isoformat(), + "attachments": attachment_metadata_list, + } + + manifest_path = os.path.join(attachments_dir, "manifest.json") + with open(manifest_path, "w") as f: + json.dump(manifest, f, indent=2) + logger.debug( + "Wrote manifest for {0} #{1}: {2} attachments".format( + item_type_display, number, len(attachment_metadata_list) + ) + ) + + def get_authenticated_user(args): template = "https://{0}/user".format(get_github_api_host(args)) data = retrieve_data(args, template, single_request=True) @@ -1157,6 +1757,10 @@ def backup_issues(args, repo_cwd, repository, repos_template): if args.include_issue_events or args.include_everything: template = events_template.format(number) issues[number]["event_data"] = retrieve_data(args, template) + if args.include_attachments: + download_attachments( + args, issue_cwd, issues[number], number, repository, item_type="issue" + ) with codecs.open(issue_file + ".temp", "w", encoding="utf-8") as f: json_dump(issue, f) @@ -1228,6 +1832,10 @@ def backup_pulls(args, repo_cwd, repository, repos_template): if args.include_pull_commits or args.include_everything: template = commits_template.format(number) pulls[number]["commit_data"] = retrieve_data(args, template) + if args.include_attachments: + download_attachments( + args, pulls_cwd, pulls[number], number, repository, item_type="pull" + ) with codecs.open(pull_file + ".temp", "w", encoding="utf-8") as f: json_dump(pull, f) From 1ed3d66777a848c37a4b5897357693290fa5b374 Mon Sep 17 00:00:00 2001 From: Rodos Date: Tue, 4 Nov 2025 09:10:22 +1100 Subject: [PATCH 344/455] refactor: Add atomic writes for attachment files and manifests --- github_backup/github_backup.py | 94 ++++++++++++++++------------------ 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index e8d9ae0..b0c2aef 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -948,6 +948,8 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False): # Reuse S3HTTPRedirectHandler from download_file() opener = build_opener(S3HTTPRedirectHandler) + temp_path = path + ".temp" + try: response = opener.open(request) metadata["http_status"] = response.getcode() @@ -986,10 +988,10 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False): if "." in filename_from_url: metadata["original_filename"] = filename_from_url - # Download file + # Download file to temporary location chunk_size = 16 * 1024 bytes_downloaded = 0 - with open(path, "wb") as f: + with open(temp_path, "wb") as f: while True: chunk = response.read(chunk_size) if not chunk: @@ -997,6 +999,9 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False): f.write(chunk) bytes_downloaded += len(chunk) + # Atomic rename to final location + os.rename(temp_path, path) + metadata["size_bytes"] = bytes_downloaded metadata["success"] = True @@ -1027,6 +1032,12 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False): logger.warning( "Skipping download of attachment {0} due to error: {1}".format(url, str(e)) ) + # Clean up temp file if it was partially created + if os.path.exists(temp_path): + try: + os.remove(temp_path) + except Exception: + pass return metadata @@ -1222,40 +1233,6 @@ def redirect_request(self, req, fp, code, msg, headers, newurl): return regex_urls -def extract_and_apply_extension(filepath, original_filename): - """Extract extension from original filename and rename file if needed. - - Args: - filepath: Current file path (may have no extension) - original_filename: Original filename from Content-Disposition (has extension) - - Returns: - Final filepath with extension applied - """ - if not original_filename or not os.path.exists(filepath): - return filepath - - # Get extension from original filename - original_ext = os.path.splitext(original_filename)[1] - if not original_ext: - return filepath - - # Check if current file already has this extension - current_ext = os.path.splitext(filepath)[1] - if current_ext == original_ext: - return filepath - - # Rename file to add extension - new_filepath = filepath + original_ext - try: - os.rename(filepath, new_filepath) - logger.debug("Renamed {0} to {1}".format(filepath, new_filepath)) - return new_filepath - except Exception as e: - logger.warning("Could not rename {0}: {1}".format(filepath, str(e))) - return filepath - - def get_attachment_filename(url): """Get filename from attachment URL, handling all GitHub formats. @@ -1333,7 +1310,9 @@ def resolve_filename_collision(filepath): counter += 1 -def download_attachments(args, item_cwd, item_data, number, repository, item_type="issue"): +def download_attachments( + args, item_cwd, item_data, number, repository, item_type="issue" +): """Download user-attachments from issue/PR body and comments with manifest. Args: @@ -1428,20 +1407,36 @@ def download_attachments(args, item_cwd, item_data, number, repository, item_typ fine=args.token_fine is not None, ) - # Apply extension from Content-Disposition if available + # If download succeeded but we got an extension from Content-Disposition, + # we may need to rename the file to add the extension if metadata["success"] and metadata.get("original_filename"): - final_filepath = extract_and_apply_extension( - filepath, metadata["original_filename"] - ) - # Check for collision again ONLY if filename changed (extension was added) - if final_filepath != filepath: + original_ext = os.path.splitext(metadata["original_filename"])[1] + current_ext = os.path.splitext(filepath)[1] + + # Add extension if not present + if original_ext and current_ext != original_ext: + final_filepath = filepath + original_ext + # Check for collision again with new extension final_filepath = resolve_filename_collision(final_filepath) - # Update saved_as to reflect actual filename - metadata["saved_as"] = os.path.basename(final_filepath) + logger.debug( + "Adding extension {0} to {1}".format(original_ext, filepath) + ) + + # Rename to add extension (already atomic from download) + try: + os.rename(filepath, final_filepath) + metadata["saved_as"] = os.path.basename(final_filepath) + except Exception as e: + logger.warning( + "Could not add extension to {0}: {1}".format(filepath, str(e)) + ) + metadata["saved_as"] = os.path.basename(filepath) + else: + metadata["saved_as"] = os.path.basename(filepath) + elif metadata["success"]: + metadata["saved_as"] = os.path.basename(filepath) else: - metadata["saved_as"] = ( - os.path.basename(filepath) if metadata["success"] else None - ) + metadata["saved_as"] = None attachment_metadata_list.append(metadata) @@ -1458,8 +1453,9 @@ def download_attachments(args, item_cwd, item_data, number, repository, item_typ } manifest_path = os.path.join(attachments_dir, "manifest.json") - with open(manifest_path, "w") as f: + with open(manifest_path + ".temp", "w") as f: json.dump(manifest, f, indent=2) + os.rename(manifest_path + ".temp", manifest_path) # Atomic write logger.debug( "Wrote manifest for {0} #{1}: {2} attachments".format( item_type_display, number, len(attachment_metadata_list) From e7880bb056307159e8c31ac7a3d917884cbcc9bc Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Thu, 6 Nov 2025 02:11:08 +0000 Subject: [PATCH 345/455] Release version 0.51.0 --- CHANGES.rst | 366 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 366 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 960977f..50cbd09 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,373 @@ Changelog ========= -0.50.3 (2025-08-08) +0.51.0 (2025-11-06) ------------------- ------------------------ + +Fix +~~~ +- Remove Python 3.8 and 3.9 from CI matrix. [Rodos] + + 3.8 and 3.9 are failing because the pinned dependencies don't support them: + - autopep8==2.3.2 needs Python 3.9+ + - bleach==6.3.0 needs Python 3.10+ + + Both are EOL now anyway (3.8 in Oct 2024, 3.9 in Oct 2025). + + Just fixing CI to test 3.10-3.14 for now. Will do a separate PR to formally + drop 3.8/3.9 support with python_requires and README updates. + +Other +~~~~~ +- Refactor: Add atomic writes for attachment files and manifests. + [Rodos] +- Feat: Add attachment download support for issues and pull requests. + [Rodos] + + Adds new --attachments flag that downloads user-uploaded files from + issue and PR bodies and comments. Key features: + + - Determines attachment URLs + - Tracks downloads in manifest.json with metadata + - Supports --skip-existing to avoid re-downloading + - Handles filename collisions with counter suffix + - Smart retry logic for transient vs permanent failures + - Uses Content-Disposition for correct file extensions +- Feat: Drop support for Python 3.8 and 3.9 (EOL) [Rodos] + + Both Python 3.8 and 3.9 have reached end-of-life: + - Python 3.8: EOL October 7, 2024 + - Python 3.9: EOL October 31, 2025 + + Changes: + - Add python_requires=">=3.10" to setup.py + - Remove Python 3.8 and 3.9 from classifiers + - Add Python 3.13 and 3.14 to classifiers + - Update README to document Python 3.10+ requirement +- Feat: Enforce Python 3.8+ requirement and add multi-version CI + testing. [Rodos] + + - Add python_requires=">=3.8" to setup.py to enforce minimum version at install time + - Update README to explicitly document Python 3.8+ requirement + - Add CI matrix to test lint/build on Python 3.8-3.14 (7 versions) + - Aligns with actual usage patterns (~99% of downloads on Python 3.8+) + - Prevents future PRs from inadvertently using incompatible syntax + + This change protects users by preventing installation on unsupported Python + versions and ensures contributors can see version requirements clearly. +- Chore(deps): bump bleach in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [bleach](https://github.com/mozilla/bleach). + + + Updates `bleach` from 6.2.0 to 6.3.0 + - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) + - [Commits](https://github.com/mozilla/bleach/compare/v6.2.0...v6.3.0) + + --- + updated-dependencies: + - dependency-name: bleach + dependency-version: 6.3.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump charset-normalizer in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [charset-normalizer](https://github.com/jawah/charset_normalizer). + + + Updates `charset-normalizer` from 3.4.3 to 3.4.4 + - [Release notes](https://github.com/jawah/charset_normalizer/releases) + - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) + - [Commits](https://github.com/jawah/charset_normalizer/compare/3.4.3...3.4.4) + + --- + updated-dependencies: + - dependency-name: charset-normalizer + dependency-version: 3.4.4 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump idna from 3.10 to 3.11 in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [idna](https://github.com/kjd/idna). + + + Updates `idna` from 3.10 to 3.11 + - [Release notes](https://github.com/kjd/idna/releases) + - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) + - [Commits](https://github.com/kjd/idna/compare/v3.10...v3.11) + + --- + updated-dependencies: + - dependency-name: idna + dependency-version: '3.11' + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group across 1 directory with 2 + updates. [dependabot[bot]] + + Bumps the python-packages group with 2 updates in the / directory: [platformdirs](https://github.com/tox-dev/platformdirs) and [rich](https://github.com/Textualize/rich). + + + Updates `platformdirs` from 4.4.0 to 4.5.0 + - [Release notes](https://github.com/tox-dev/platformdirs/releases) + - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) + - [Commits](https://github.com/tox-dev/platformdirs/compare/4.4.0...4.5.0) + + Updates `rich` from 14.1.0 to 14.2.0 + - [Release notes](https://github.com/Textualize/rich/releases) + - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) + - [Commits](https://github.com/Textualize/rich/compare/v14.1.0...v14.2.0) + + --- + updated-dependencies: + - dependency-name: platformdirs + dependency-version: 4.5.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: rich + dependency-version: 14.2.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 3 updates. + [dependabot[bot]] + + Bumps the python-packages group with 3 updates: [certifi](https://github.com/certifi/python-certifi), [click](https://github.com/pallets/click) and [markdown-it-py](https://github.com/executablebooks/markdown-it-py). + + + Updates `certifi` from 2025.8.3 to 2025.10.5 + - [Commits](https://github.com/certifi/python-certifi/compare/2025.08.03...2025.10.05) + + Updates `click` from 8.1.8 to 8.3.0 + - [Release notes](https://github.com/pallets/click/releases) + - [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst) + - [Commits](https://github.com/pallets/click/compare/8.1.8...8.3.0) + + Updates `markdown-it-py` from 3.0.0 to 4.0.0 + - [Release notes](https://github.com/executablebooks/markdown-it-py/releases) + - [Changelog](https://github.com/executablebooks/markdown-it-py/blob/master/CHANGELOG.md) + - [Commits](https://github.com/executablebooks/markdown-it-py/compare/v3.0.0...v4.0.0) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-version: 2025.10.5 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: click + dependency-version: 8.3.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: markdown-it-py + dependency-version: 4.0.0 + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump docutils in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [docutils](https://github.com/rtfd/recommonmark). + + + Updates `docutils` from 0.22.1 to 0.22.2 + - [Changelog](https://github.com/readthedocs/recommonmark/blob/master/CHANGELOG.md) + - [Commits](https://github.com/rtfd/recommonmark/commits) + + --- + updated-dependencies: + - dependency-name: docutils + dependency-version: 0.22.2 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group across 1 directory with 2 + updates. [dependabot[bot]] + + Bumps the python-packages group with 2 updates in the / directory: [black](https://github.com/psf/black) and [docutils](https://github.com/rtfd/recommonmark). + + + Updates `black` from 25.1.0 to 25.9.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/25.1.0...25.9.0) + + Updates `docutils` from 0.22 to 0.22.1 + - [Changelog](https://github.com/readthedocs/recommonmark/blob/master/CHANGELOG.md) + - [Commits](https://github.com/rtfd/recommonmark/commits) + + --- + updated-dependencies: + - dependency-name: black + dependency-version: 25.9.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: docutils + dependency-version: 0.22.1 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Delete .github/ISSUE_TEMPLATE.md. [Jose Diaz-Gonzalez] +- Create feature.yaml. [Jose Diaz-Gonzalez] +- Delete .github/ISSUE_TEMPLATE/bug_report.md. [Jose Diaz-Gonzalez] +- Rename bug.md to bug.yaml. [Jose Diaz-Gonzalez] +- Chore: create bug template. [Jose Diaz-Gonzalez] +- Chore: Rename PULL_REQUEST.md to .github/PULL_REQUEST.md. [Jose Diaz- + Gonzalez] +- Chore: Rename ISSUE_TEMPLATE.md to .github/ISSUE_TEMPLATE.md. [Jose + Diaz-Gonzalez] +- Chore(deps): bump actions/setup-python from 5 to 6. [dependabot[bot]] + + Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5 to 6. + - [Release notes](https://github.com/actions/setup-python/releases) + - [Commits](https://github.com/actions/setup-python/compare/v5...v6) + + --- + updated-dependencies: + - dependency-name: actions/setup-python + dependency-version: '6' + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Chore(deps): bump twine from 6.1.0 to 6.2.0 in the python-packages + group. [dependabot[bot]] + + Bumps the python-packages group with 1 update: [twine](https://github.com/pypa/twine). + + + Updates `twine` from 6.1.0 to 6.2.0 + - [Release notes](https://github.com/pypa/twine/releases) + - [Changelog](https://github.com/pypa/twine/blob/main/docs/changelog.rst) + - [Commits](https://github.com/pypa/twine/compare/6.1.0...6.2.0) + + --- + updated-dependencies: + - dependency-name: twine + dependency-version: 6.2.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump more-itertools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [more-itertools](https://github.com/more-itertools/more-itertools). + + + Updates `more-itertools` from 10.7.0 to 10.8.0 + - [Release notes](https://github.com/more-itertools/more-itertools/releases) + - [Commits](https://github.com/more-itertools/more-itertools/compare/v10.7.0...v10.8.0) + + --- + updated-dependencies: + - dependency-name: more-itertools + dependency-version: 10.8.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump platformdirs in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [platformdirs](https://github.com/tox-dev/platformdirs). + + + Updates `platformdirs` from 4.3.8 to 4.4.0 + - [Release notes](https://github.com/tox-dev/platformdirs/releases) + - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) + - [Commits](https://github.com/tox-dev/platformdirs/compare/4.3.8...4.4.0) + + --- + updated-dependencies: + - dependency-name: platformdirs + dependency-version: 4.4.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump actions/checkout from 4 to 5. [dependabot[bot]] + + Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 5. + - [Release notes](https://github.com/actions/checkout/releases) + - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) + - [Commits](https://github.com/actions/checkout/compare/v4...v5) + + --- + updated-dependencies: + - dependency-name: actions/checkout + dependency-version: '5' + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Chore(deps): bump requests in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [requests](https://github.com/psf/requests). + + + Updates `requests` from 2.32.4 to 2.32.5 + - [Release notes](https://github.com/psf/requests/releases) + - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) + - [Commits](https://github.com/psf/requests/compare/v2.32.4...v2.32.5) + + --- + updated-dependencies: + - dependency-name: requests + dependency-version: 2.32.5 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore: update Dockerfile to use Python 3.12 and improve dependency + installation. [Mateusz Hajder] +- Chore(deps): bump the python-packages group with 2 updates. + [dependabot[bot]] + + Bumps the python-packages group with 2 updates: [certifi](https://github.com/certifi/python-certifi) and [charset-normalizer](https://github.com/jawah/charset_normalizer). + + + Updates `certifi` from 2025.7.14 to 2025.8.3 + - [Commits](https://github.com/certifi/python-certifi/compare/2025.07.14...2025.08.03) + + Updates `charset-normalizer` from 3.4.2 to 3.4.3 + - [Release notes](https://github.com/jawah/charset_normalizer/releases) + - [Changelog](https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md) + - [Commits](https://github.com/jawah/charset_normalizer/compare/3.4.2...3.4.3) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-version: 2025.8.3 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: charset-normalizer + dependency-version: 3.4.3 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... + + +0.50.3 (2025-08-08) +------------------- - Revert "Add conditional check for git checkout in development path" [Eric Wheeler] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index e7d2f93..d942e9e 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.50.3" +__version__ = "0.51.0" From c8c585cbb5634ebd4db7c85a4fca1742d48537b2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Nov 2025 13:09:51 +0000 Subject: [PATCH 346/455] chore(deps): bump docutils in the python-packages group Bumps the python-packages group with 1 update: [docutils](https://github.com/rtfd/recommonmark). Updates `docutils` from 0.22.2 to 0.22.3 - [Changelog](https://github.com/readthedocs/recommonmark/blob/master/CHANGELOG.md) - [Commits](https://github.com/rtfd/recommonmark/commits) --- updated-dependencies: - dependency-name: docutils dependency-version: 0.22.3 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index bd9ebf2..8e05be0 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -5,7 +5,7 @@ certifi==2025.10.5 charset-normalizer==3.4.4 click==8.3.0 colorama==0.4.6 -docutils==0.22.2 +docutils==0.22.3 flake8==7.3.0 gitchangelog==3.0.4 idna==3.11 From 56db3ff0e81a63324e31935f1d669e4bfd3d5426 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 13:59:47 +0000 Subject: [PATCH 347/455] chore(deps): bump black in the python-packages group Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). Updates `black` from 25.9.0 to 25.11.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/25.9.0...25.11.0) --- updated-dependencies: - dependency-name: black dependency-version: 25.11.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 8e05be0..b3e9f19 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,5 +1,5 @@ autopep8==2.3.2 -black==25.9.0 +black==25.11.0 bleach==6.3.0 certifi==2025.10.5 charset-normalizer==3.4.4 From a98ff7f23df8bb6356ec30a4c7e22bc39d9ee771 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Nov 2025 13:11:06 +0000 Subject: [PATCH 348/455] chore(deps): bump certifi in the python-packages group Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). Updates `certifi` from 2025.10.5 to 2025.11.12 - [Commits](https://github.com/certifi/python-certifi/compare/2025.10.05...2025.11.12) --- updated-dependencies: - dependency-name: certifi dependency-version: 2025.11.12 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index b3e9f19..0a695b3 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,7 +1,7 @@ autopep8==2.3.2 black==25.11.0 bleach==6.3.0 -certifi==2025.10.5 +certifi==2025.11.12 charset-normalizer==3.4.4 click==8.3.0 colorama==0.4.6 From 7a9455db88884571faef1f17044003c4e6460836 Mon Sep 17 00:00:00 2001 From: Rodos Date: Fri, 14 Nov 2025 10:17:08 +1100 Subject: [PATCH 349/455] fix: Prevent duplicate attachment downloads Fixes bug where attachments were downloaded multiple times with incremented filenames (file.mov, file_1.mov, file_2.mov) when running backups without --skip-existing flag. I should not have used the --skip-existing flag for attachments, it did not do what I thought it did. The correct approach is to always use the manifest to guide what has already been downloaded and what now needs to be done. --- github_backup/github_backup.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index b0c2aef..d1828d0 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -919,12 +919,6 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False): "error": None, } - if os.path.exists(path): - metadata["success"] = True - metadata["http_status"] = 200 # Assume success if already exists - metadata["size_bytes"] = os.path.getsize(path) - return metadata - # Create simple request (no API query params) request = Request(url) request.add_header("Accept", "application/octet-stream") @@ -1337,10 +1331,10 @@ def download_attachments( attachments_dir = os.path.join(item_cwd, "attachments", str(number)) manifest_path = os.path.join(attachments_dir, "manifest.json") - # Load existing manifest if skip_existing is enabled + # Load existing manifest to prevent duplicate downloads existing_urls = set() existing_metadata = [] - if args.skip_existing and os.path.exists(manifest_path): + if os.path.exists(manifest_path): try: with open(manifest_path, "r") as f: existing_manifest = json.load(f) @@ -1395,9 +1389,6 @@ def download_attachments( filename = get_attachment_filename(url) filepath = os.path.join(attachments_dir, filename) - # Check for collision BEFORE downloading - filepath = resolve_filename_collision(filepath) - # Download and get metadata metadata = download_attachment_file( url, From e4d1c789937fe1ccf7934613ccfbc63fd8b8ab9b Mon Sep 17 00:00:00 2001 From: Rodos Date: Fri, 14 Nov 2025 10:23:29 +1100 Subject: [PATCH 350/455] test: Add pytest infrastructure and attachment tests In making my last fix to attachments, I found it challenging not having tests to ensure there was no regression. Added pytest with minimal setup and isolated configuration. Created a separate test workflow to keep tests isolated from linting. Tests cover the key elements of the attachment logic: - URL extraction from issue bodies - Filename extraction from different URL types - Filename collision resolution - Manifest duplicate prevention --- .github/workflows/test.yml | 33 ++++ pytest.ini | 6 + release-requirements.txt | 1 + tests/__init__.py | 1 + tests/test_attachments.py | 353 +++++++++++++++++++++++++++++++++++++ 5 files changed, 394 insertions(+) create mode 100644 .github/workflows/test.yml create mode 100644 pytest.ini create mode 100644 tests/__init__.py create mode 100644 tests/test_attachments.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..fb43350 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,33 @@ +--- +name: "test" + +# yamllint disable-line rule:truthy +on: + pull_request: + branches: + - "*" + push: + branches: + - "main" + - "master" + +jobs: + test: + name: test + runs-on: ubuntu-24.04 + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 0 + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - run: pip install -r release-requirements.txt + - run: pytest tests/ -v diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..a1edb37 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,6 @@ +[pytest] +testpaths = tests +python_files = test_*.py +python_classes = Test* +python_functions = test_* +addopts = -v diff --git a/release-requirements.txt b/release-requirements.txt index b3e9f19..2a9b2ba 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -8,6 +8,7 @@ colorama==0.4.6 docutils==0.22.3 flake8==7.3.0 gitchangelog==3.0.4 +pytest==8.3.3 idna==3.11 importlib-metadata==8.7.0 jaraco.classes==3.4.0 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..5675dbd --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for python-github-backup.""" diff --git a/tests/test_attachments.py b/tests/test_attachments.py new file mode 100644 index 0000000..07c1b33 --- /dev/null +++ b/tests/test_attachments.py @@ -0,0 +1,353 @@ +"""Behavioral tests for attachment functionality.""" + +import json +import os +import tempfile +from pathlib import Path +from unittest.mock import Mock + +import pytest + +from github_backup import github_backup + + +@pytest.fixture +def attachment_test_setup(tmp_path): + """Fixture providing setup and helper for attachment download tests.""" + from unittest.mock import patch + + issue_cwd = tmp_path / "issues" + issue_cwd.mkdir() + + # Mock args + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = None + args.username = None + args.password = None + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.user = "testuser" + args.repository = "testrepo" + + repository = {"full_name": "testuser/testrepo"} + + def call_download(issue_data, issue_number=123): + """Call download_attachments with mocked HTTP downloads. + + Returns list of URLs that were actually downloaded. + """ + downloaded_urls = [] + + def mock_download(url, path, auth, as_app, fine): + downloaded_urls.append(url) + return { + "success": True, + "saved_as": os.path.basename(path), + "url": url, + } + + with patch( + "github_backup.github_backup.download_attachment_file", + side_effect=mock_download, + ): + github_backup.download_attachments( + args, str(issue_cwd), issue_data, issue_number, repository + ) + + return downloaded_urls + + return { + "issue_cwd": str(issue_cwd), + "args": args, + "repository": repository, + "call_download": call_download, + } + + +class TestURLExtraction: + """Test URL extraction with realistic issue content.""" + + def test_mixed_urls(self): + issue_data = { + "body": """ + ## Bug Report + + When uploading files, I see this error. Here's a screenshot: + https://github.com/user-attachments/assets/abc123def456 + + The logs show: https://github.com/user-attachments/files/789/error-log.txt + + This is similar to https://github.com/someorg/somerepo/issues/42 but different. + + You can also see the video at https://user-images.githubusercontent.com/12345/video-demo.mov + + Here's how to reproduce: + ```bash + # Don't extract this example URL: + curl https://github.com/user-attachments/assets/example999 + ``` + + More info at https://docs.example.com/guide + + Also see this inline code `https://github.com/user-attachments/files/111/inline.pdf` should not extract. + + Final attachment: https://github.com/user-attachments/files/222/report.pdf. + """, + "comment_data": [ + { + "body": "Here's another attachment: https://private-user-images.githubusercontent.com/98765/secret.png?jwt=token123" + }, + { + "body": """ + Example code: + ```python + url = "https://github.com/user-attachments/assets/code-example" + ``` + But this is real: https://github.com/user-attachments/files/333/actual.zip + """ + }, + ], + } + + # Extract URLs + urls = github_backup.extract_attachment_urls(issue_data) + + expected_urls = [ + "https://github.com/user-attachments/assets/abc123def456", + "https://github.com/user-attachments/files/789/error-log.txt", + "https://user-images.githubusercontent.com/12345/video-demo.mov", + "https://github.com/user-attachments/files/222/report.pdf", + "https://private-user-images.githubusercontent.com/98765/secret.png?jwt=token123", + "https://github.com/user-attachments/files/333/actual.zip", + ] + + assert set(urls) == set(expected_urls) + + def test_trailing_punctuation_stripped(self): + """URLs with trailing punctuation should have punctuation stripped.""" + issue_data = { + "body": """ + See this file: https://github.com/user-attachments/files/1/doc.pdf. + And this one (https://github.com/user-attachments/files/2/image.png). + Check it out! https://github.com/user-attachments/files/3/data.csv! + """ + } + + urls = github_backup.extract_attachment_urls(issue_data) + + expected = [ + "https://github.com/user-attachments/files/1/doc.pdf", + "https://github.com/user-attachments/files/2/image.png", + "https://github.com/user-attachments/files/3/data.csv", + ] + assert set(urls) == set(expected) + + def test_deduplication_across_body_and_comments(self): + """Same URL in body and comments should only appear once.""" + duplicate_url = "https://github.com/user-attachments/assets/abc123" + + issue_data = { + "body": f"First mention: {duplicate_url}", + "comment_data": [ + {"body": f"Second mention: {duplicate_url}"}, + {"body": f"Third mention: {duplicate_url}"}, + ], + } + + urls = github_backup.extract_attachment_urls(issue_data) + + assert set(urls) == {duplicate_url} + + +class TestFilenameExtraction: + """Test filename extraction from different URL types.""" + + def test_modern_assets_url(self): + """Modern assets URL returns UUID.""" + url = "https://github.com/user-attachments/assets/abc123def456" + filename = github_backup.get_attachment_filename(url) + assert filename == "abc123def456" + + def test_modern_files_url(self): + """Modern files URL returns filename.""" + url = "https://github.com/user-attachments/files/12345/report.pdf" + filename = github_backup.get_attachment_filename(url) + assert filename == "report.pdf" + + def test_legacy_cdn_url(self): + """Legacy CDN URL returns filename with extension.""" + url = "https://user-images.githubusercontent.com/123456/abc-def.png" + filename = github_backup.get_attachment_filename(url) + assert filename == "abc-def.png" + + def test_private_cdn_url(self): + """Private CDN URL returns filename.""" + url = "https://private-user-images.githubusercontent.com/98765/secret.png?jwt=token123" + filename = github_backup.get_attachment_filename(url) + assert filename == "secret.png" + + def test_repo_files_url(self): + """Repo-scoped files URL returns filename.""" + url = "https://github.com/owner/repo/files/789/document.txt" + filename = github_backup.get_attachment_filename(url) + assert filename == "document.txt" + + +class TestFilenameCollision: + """Test filename collision resolution.""" + + def test_collision_behavior(self): + """Test filename collision resolution with real files.""" + with tempfile.TemporaryDirectory() as tmpdir: + # No collision - file doesn't exist + result = github_backup.resolve_filename_collision( + os.path.join(tmpdir, "report.pdf") + ) + assert result == os.path.join(tmpdir, "report.pdf") + + # Create the file, now collision exists + Path(os.path.join(tmpdir, "report.pdf")).touch() + result = github_backup.resolve_filename_collision( + os.path.join(tmpdir, "report.pdf") + ) + assert result == os.path.join(tmpdir, "report_1.pdf") + + # Create report_1.pdf too + Path(os.path.join(tmpdir, "report_1.pdf")).touch() + result = github_backup.resolve_filename_collision( + os.path.join(tmpdir, "report.pdf") + ) + assert result == os.path.join(tmpdir, "report_2.pdf") + + def test_manifest_reserved(self): + """manifest.json is always treated as reserved.""" + with tempfile.TemporaryDirectory() as tmpdir: + # Even if manifest.json doesn't exist, should get manifest_1.json + result = github_backup.resolve_filename_collision( + os.path.join(tmpdir, "manifest.json") + ) + assert result == os.path.join(tmpdir, "manifest_1.json") + + +class TestManifestDuplicatePrevention: + """Test that manifest prevents duplicate downloads (the bug fix).""" + + def test_manifest_filters_existing_urls(self, attachment_test_setup): + """URLs in manifest are not re-downloaded.""" + setup = attachment_test_setup + + # Create manifest with existing URLs + attachments_dir = os.path.join(setup["issue_cwd"], "attachments", "123") + os.makedirs(attachments_dir) + manifest_path = os.path.join(attachments_dir, "manifest.json") + + manifest = { + "attachments": [ + { + "url": "https://github.com/user-attachments/assets/old1", + "success": True, + "saved_as": "old1.pdf", + }, + { + "url": "https://github.com/user-attachments/assets/old2", + "success": True, + "saved_as": "old2.pdf", + }, + ] + } + with open(manifest_path, "w") as f: + json.dump(manifest, f) + + # Issue data with 2 old URLs and 1 new URL + issue_data = { + "body": """ + Old: https://github.com/user-attachments/assets/old1 + Old: https://github.com/user-attachments/assets/old2 + New: https://github.com/user-attachments/assets/new1 + """ + } + + downloaded_urls = setup["call_download"](issue_data) + + # Should only download the NEW URL (old ones filtered by manifest) + assert len(downloaded_urls) == 1 + assert downloaded_urls[0] == "https://github.com/user-attachments/assets/new1" + + def test_no_manifest_downloads_all(self, attachment_test_setup): + """Without manifest, all URLs should be downloaded.""" + setup = attachment_test_setup + + # Issue data with 2 URLs + issue_data = { + "body": """ + https://github.com/user-attachments/assets/url1 + https://github.com/user-attachments/assets/url2 + """ + } + + downloaded_urls = setup["call_download"](issue_data) + + # Should download ALL URLs (no manifest to filter) + assert len(downloaded_urls) == 2 + assert set(downloaded_urls) == { + "https://github.com/user-attachments/assets/url1", + "https://github.com/user-attachments/assets/url2", + } + + def test_manifest_skips_permanent_failures(self, attachment_test_setup): + """Manifest skips permanent failures (404, 410) but retries transient (503).""" + setup = attachment_test_setup + + # Create manifest with different failure types + attachments_dir = os.path.join(setup["issue_cwd"], "attachments", "123") + os.makedirs(attachments_dir) + manifest_path = os.path.join(attachments_dir, "manifest.json") + + manifest = { + "attachments": [ + { + "url": "https://github.com/user-attachments/assets/success", + "success": True, + "saved_as": "success.pdf", + }, + { + "url": "https://github.com/user-attachments/assets/notfound", + "success": False, + "http_status": 404, + }, + { + "url": "https://github.com/user-attachments/assets/gone", + "success": False, + "http_status": 410, + }, + { + "url": "https://github.com/user-attachments/assets/unavailable", + "success": False, + "http_status": 503, + }, + ] + } + with open(manifest_path, "w") as f: + json.dump(manifest, f) + + # Issue data has all 4 URLs + issue_data = { + "body": """ + https://github.com/user-attachments/assets/success + https://github.com/user-attachments/assets/notfound + https://github.com/user-attachments/assets/gone + https://github.com/user-attachments/assets/unavailable + """ + } + + downloaded_urls = setup["call_download"](issue_data) + + # Should only retry 503 (transient failure) + # Success, 404, and 410 should be skipped + assert len(downloaded_urls) == 1 + assert ( + downloaded_urls[0] + == "https://github.com/user-attachments/assets/unavailable" + ) From 1ec0820936c420b52e77eaefdf903098e2f2cb8d Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sun, 16 Nov 2025 02:01:39 +0000 Subject: [PATCH 351/455] Release version 0.51.1 --- CHANGES.rst | 90 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 50cbd09..269a77b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,98 @@ Changelog ========= -0.51.0 (2025-11-06) +0.51.1 (2025-11-16) ------------------- ------------------------ +Fix +~~~ +- Prevent duplicate attachment downloads. [Rodos] + + Fixes bug where attachments were downloaded multiple times with + incremented filenames (file.mov, file_1.mov, file_2.mov) when + running backups without --skip-existing flag. + + I should not have used the --skip-existing flag for attachments, + it did not do what I thought it did. + + The correct approach is to always use the manifest to guide what + has already been downloaded and what now needs to be done. + +Other +~~~~~ +- Chore(deps): bump certifi in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [certifi](https://github.com/certifi/python-certifi). + + + Updates `certifi` from 2025.10.5 to 2025.11.12 + - [Commits](https://github.com/certifi/python-certifi/compare/2025.10.05...2025.11.12) + + --- + updated-dependencies: + - dependency-name: certifi + dependency-version: 2025.11.12 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Test: Add pytest infrastructure and attachment tests. [Rodos] + + In making my last fix to attachments, I found it challenging not + having tests to ensure there was no regression. + + Added pytest with minimal setup and isolated configuration. Created + a separate test workflow to keep tests isolated from linting. + + Tests cover the key elements of the attachment logic: + - URL extraction from issue bodies + - Filename extraction from different URL types + - Filename collision resolution + - Manifest duplicate prevention +- Chore(deps): bump black in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). + + + Updates `black` from 25.9.0 to 25.11.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/25.9.0...25.11.0) + + --- + updated-dependencies: + - dependency-name: black + dependency-version: 25.11.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... +- Chore(deps): bump docutils in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [docutils](https://github.com/rtfd/recommonmark). + + + Updates `docutils` from 0.22.2 to 0.22.3 + - [Changelog](https://github.com/readthedocs/recommonmark/blob/master/CHANGELOG.md) + - [Commits](https://github.com/rtfd/recommonmark/commits) + + --- + updated-dependencies: + - dependency-name: docutils + dependency-version: 0.22.3 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... + + +0.51.0 (2025-11-06) +------------------- + Fix ~~~ - Remove Python 3.8 and 3.9 from CI matrix. [Rodos] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index d942e9e..d280604 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.51.0" +__version__ = "0.51.1" From 90ba839c7d7e121ac5bc3865e2f9f3e02a9774ec Mon Sep 17 00:00:00 2001 From: Rodos Date: Thu, 13 Nov 2025 15:46:06 +1100 Subject: [PATCH 352/455] fix: Improve CA certificate detection with fallback chain The previous implementation incorrectly assumed empty get_ca_certs() meant broken SSL, causing false failures in GitHub Codespaces and other directory-based cert systems where certificates exist but aren't pre-loaded. It would then attempt to import certifi as a workaround, but certifi wasn't listed in requirements.txt, causing the fallback to fail with ImportError even though the system certificates would have worked fine. This commit replaces the naive check with a layered fallback approach that checks multiple certificate sources. First it checks for pre-loaded system certs (file-based systems). Then it verifies system cert paths exist (directory-based systems like Ubuntu/Debian/Codespaces). Finally it attempts to use certifi as an optional fallback only if needed. This approach eliminates hard dependencies (certifi is now optional), works in GitHub Codespaces without any setup, and fails gracefully with clear hints for resolution when SSL is actually broken rather than failing with ModuleNotFoundError. Fixes #444 --- github_backup/github_backup.py | 41 +++++++++++++++++++++------------- requirements.txt | 1 - 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index b0c2aef..b69ba4a 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -37,22 +37,33 @@ FILE_URI_PREFIX = "file://" logger = logging.getLogger(__name__) +# Setup SSL context with fallback chain https_ctx = ssl.create_default_context() -if not https_ctx.get_ca_certs(): - import warnings - - warnings.warn( - "\n\nYOUR DEFAULT CA CERTS ARE EMPTY.\n" - + "PLEASE POPULATE ANY OF:" - + "".join( - ["\n - " + x for x in ssl.get_default_verify_paths() if type(x) is str] - ) - + "\n", - stacklevel=2, - ) - import certifi - - https_ctx = ssl.create_default_context(cafile=certifi.where()) +if https_ctx.get_ca_certs(): + # Layer 1: Certificates pre-loaded from system (file-based) + pass +else: + paths = ssl.get_default_verify_paths() + if (paths.cafile and os.path.exists(paths.cafile)) or ( + paths.capath and os.path.exists(paths.capath) + ): + # Layer 2: Cert paths exist, will be lazy-loaded on first use (directory-based) + pass + else: + # Layer 3: Try certifi package as optional fallback + try: + import certifi + + https_ctx = ssl.create_default_context(cafile=certifi.where()) + except ImportError: + # All layers failed - no certificates available anywhere + sys.exit( + "\nERROR: No CA certificates found. Cannot connect to GitHub over SSL.\n\n" + "Solutions you can explore:\n" + " 1. pip install certifi\n" + " 2. Alpine: apk add ca-certificates\n" + " 3. Debian/Ubuntu: apt-get install ca-certificates\n\n" + ) def logging_subprocess( diff --git a/requirements.txt b/requirements.txt index 8b13789..e69de29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +0,0 @@ - From 72d35a9b94a22b4a3fe4589749d6f9b4fc8d3970 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sun, 16 Nov 2025 23:55:36 +0000 Subject: [PATCH 353/455] Release version 0.51.2 --- CHANGES.rst | 30 +++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 269a77b..ce23331 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,38 @@ Changelog ========= -0.51.1 (2025-11-16) +0.51.2 (2025-11-16) ------------------- ------------------------ +Fix +~~~ +- Improve CA certificate detection with fallback chain. [Rodos] + + The previous implementation incorrectly assumed empty get_ca_certs() + meant broken SSL, causing false failures in GitHub Codespaces and other + directory-based cert systems where certificates exist but aren't pre-loaded. + It would then attempt to import certifi as a workaround, but certifi wasn't + listed in requirements.txt, causing the fallback to fail with ImportError + even though the system certificates would have worked fine. + + This commit replaces the naive check with a layered fallback approach that + checks multiple certificate sources. First it checks for pre-loaded system + certs (file-based systems). Then it verifies system cert paths exist + (directory-based systems like Ubuntu/Debian/Codespaces). Finally it attempts + to use certifi as an optional fallback only if needed. + + This approach eliminates hard dependencies (certifi is now optional), works + in GitHub Codespaces without any setup, and fails gracefully with clear hints + for resolution when SSL is actually broken rather than failing with + ModuleNotFoundError. + + Fixes #444 + + +0.51.1 (2025-11-16) +------------------- + Fix ~~~ - Prevent duplicate attachment downloads. [Rodos] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index d280604..210a2d0 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.51.1" +__version__ = "0.51.2" From 755182967749cfdd482bb311812bc97442265941 Mon Sep 17 00:00:00 2001 From: Helio Machado <0x2b3bfa0+git@googlemail.com> Date: Mon, 17 Nov 2025 02:09:29 +0100 Subject: [PATCH 354/455] Use cursor based pagination --- github_backup/github_backup.py | 69 ++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 8abca62..14f0ed8 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -592,27 +592,26 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): auth = get_auth(args, encode=not args.as_app) query_args = get_query_args(query_args) per_page = 100 - page = 0 + next_url = None while True: if single_request: - request_page, request_per_page = None, None + request_per_page = None else: - page = page + 1 - request_page, request_per_page = page, per_page + request_per_page = per_page request = _construct_request( request_per_page, - request_page, query_args, - template, + next_url or template, auth, as_app=args.as_app, fine=True if args.token_fine is not None else False, ) # noqa - r, errors = _get_response(request, auth, template) + r, errors = _get_response(request, auth, next_url or template) status_code = int(r.getcode()) + # Check if we got correct data try: response = json.loads(r.read().decode("utf-8")) @@ -644,15 +643,14 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): retries += 1 time.sleep(5) request = _construct_request( - per_page, - page, + request_per_page, query_args, - template, + next_url or template, auth, as_app=args.as_app, fine=True if args.token_fine is not None else False, ) # noqa - r, errors = _get_response(request, auth, template) + r, errors = _get_response(request, auth, next_url or template) status_code = int(r.getcode()) try: @@ -682,7 +680,16 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): if type(response) is list: for resp in response: yield resp - if len(response) < per_page: + # Parse Link header for next page URL (cursor-based pagination) + link_header = r.headers.get("Link", "") + next_url = None + if link_header: + # Parse Link header: ; rel="next" + for link in link_header.split(","): + if 'rel="next"' in link: + next_url = link[link.find("<") + 1:link.find(">")] + break + if not next_url: break elif type(response) is dict and single_request: yield response @@ -735,22 +742,27 @@ def _get_response(request, auth, template): def _construct_request( - per_page, page, query_args, template, auth, as_app=None, fine=False + per_page, query_args, template, auth, as_app=None, fine=False ): - all_query_args = {} - if per_page: - all_query_args["per_page"] = per_page - if page: - all_query_args["page"] = page - if query_args: - all_query_args.update(query_args) - - request_url = template - if all_query_args: - querystring = urlencode(all_query_args) - request_url = template + "?" + querystring + # If template is already a full URL with query params (from Link header), use it directly + if "?" in template and template.startswith("http"): + request_url = template + # Extract query string for logging + querystring = template.split("?", 1)[1] else: - querystring = "" + # Build URL with query parameters + all_query_args = {} + if per_page: + all_query_args["per_page"] = per_page + if query_args: + all_query_args.update(query_args) + + request_url = template + if all_query_args: + querystring = urlencode(all_query_args) + request_url = template + "?" + querystring + else: + querystring = "" request = Request(request_url) if auth is not None: @@ -766,7 +778,7 @@ def _construct_request( "Accept", "application/vnd.github.machine-man-preview+json" ) - log_url = template + log_url = template if "?" not in template else template.split("?")[0] if querystring: log_url += "?" + querystring logger.info("Requesting {}".format(log_url)) @@ -843,8 +855,7 @@ def download_file(url, path, auth, as_app=False, fine=False): return request = _construct_request( - per_page=100, - page=1, + per_page=None, query_args={}, template=url, auth=auth, From 5af522a34841bf7d56221449bac2a7dc3c8d97b1 Mon Sep 17 00:00:00 2001 From: Rodos Date: Mon, 17 Nov 2025 17:14:29 +1100 Subject: [PATCH 355/455] test: Add pagination tests for cursor and page-based Link headers --- tests/test_pagination.py | 153 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 tests/test_pagination.py diff --git a/tests/test_pagination.py b/tests/test_pagination.py new file mode 100644 index 0000000..0d5bd82 --- /dev/null +++ b/tests/test_pagination.py @@ -0,0 +1,153 @@ +"""Tests for Link header pagination handling.""" + +import json +from unittest.mock import Mock, patch + +import pytest + +from github_backup import github_backup + + +class MockHTTPResponse: + """Mock HTTP response for paginated API calls.""" + + def __init__(self, data, link_header=None): + self._content = json.dumps(data).encode("utf-8") + self._link_header = link_header + self._read = False + self.reason = "OK" + + def getcode(self): + return 200 + + def read(self): + if self._read: + return b"" + self._read = True + return self._content + + def get_header(self, name, default=None): + """Mock method for headers.get().""" + return self.headers.get(name, default) + + @property + def headers(self): + headers = {"x-ratelimit-remaining": "5000"} + if self._link_header: + headers["Link"] = self._link_header + return headers + + +@pytest.fixture +def mock_args(): + """Mock args for retrieve_data_gen.""" + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = "fake_token" + args.username = None + args.password = None + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = None + args.throttle_pause = 0 + return args + + +def test_cursor_based_pagination(mock_args): + """Link header with 'after' cursor parameter works correctly.""" + + # Simulate issues endpoint behavior: returns cursor in Link header + responses = [ + # Issues endpoint returns 'after' cursor parameter (not 'page') + MockHTTPResponse( + data=[{"issue": i} for i in range(1, 101)], # Page 1 contents + link_header='; rel="next"', + ), + MockHTTPResponse( + data=[{"issue": i} for i in range(101, 151)], # Page 2 contents + link_header=None, # No Link header - signals end of pagination + ), + ] + requests_made = [] + + def mock_urlopen(request, *args, **kwargs): + url = request.get_full_url() + requests_made.append(url) + return responses[len(requests_made) - 1] + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + results = list( + github_backup.retrieve_data_gen( + mock_args, "https://api.github.com/repos/owner/repo/issues" + ) + ) + + # Verify all items retrieved and cursor was used in second request + assert len(results) == 150 + assert len(requests_made) == 2 + assert "after=ABC123" in requests_made[1] + + +def test_page_based_pagination(mock_args): + """Link header with 'page' parameter works correctly.""" + + # Simulate pulls/repos endpoint behavior: returns page numbers in Link header + responses = [ + # Pulls endpoint uses traditional 'page' parameter (not cursor) + MockHTTPResponse( + data=[{"pull": i} for i in range(1, 101)], # Page 1 contents + link_header='; rel="next"', + ), + MockHTTPResponse( + data=[{"pull": i} for i in range(101, 181)], # Page 2 contents + link_header=None, # No Link header - signals end of pagination + ), + ] + requests_made = [] + + def mock_urlopen(request, *args, **kwargs): + url = request.get_full_url() + requests_made.append(url) + return responses[len(requests_made) - 1] + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + results = list( + github_backup.retrieve_data_gen( + mock_args, "https://api.github.com/repos/owner/repo/pulls" + ) + ) + + # Verify all items retrieved and page parameter was used (not cursor) + assert len(results) == 180 + assert len(requests_made) == 2 + assert "page=2" in requests_made[1] + assert "after" not in requests_made[1] + + +def test_no_link_header_stops_pagination(mock_args): + """Pagination stops when Link header is absent.""" + + # Simulate endpoint with results that fit in a single page + responses = [ + MockHTTPResponse( + data=[{"label": i} for i in range(1, 51)], # Page contents + link_header=None, # No Link header - signals end of pagination + ) + ] + requests_made = [] + + def mock_urlopen(request, *args, **kwargs): + requests_made.append(request.get_full_url()) + return responses[len(requests_made) - 1] + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + results = list( + github_backup.retrieve_data_gen( + mock_args, "https://api.github.com/repos/owner/repo/labels" + ) + ) + + # Verify pagination stopped after first request + assert len(results) == 50 + assert len(requests_made) == 1 From 9ef496efada55c9e8eced5183037e1a1935db140 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Tue, 18 Nov 2025 06:55:36 +0000 Subject: [PATCH 356/455] Release version 0.51.3 --- CHANGES.rst | 9 ++++++++- github_backup/__init__.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index ce23331..3c7c16f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,16 @@ Changelog ========= -0.51.2 (2025-11-16) +0.51.3 (2025-11-18) ------------------- ------------------------ +- Test: Add pagination tests for cursor and page-based Link headers. + [Rodos] +- Use cursor based pagination. [Helio Machado] + + +0.51.2 (2025-11-16) +------------------- Fix ~~~ diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 210a2d0..378947a 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.51.2" +__version__ = "0.51.3" From d3edef06227521169bf20bbd98fc8e28788ae57a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 18 Nov 2025 13:24:06 +0000 Subject: [PATCH 357/455] chore(deps): bump the python-packages group with 3 updates Bumps the python-packages group with 3 updates: [click](https://github.com/pallets/click), [pytest](https://github.com/pytest-dev/pytest) and [keyring](https://github.com/jaraco/keyring). Updates `click` from 8.3.0 to 8.3.1 - [Release notes](https://github.com/pallets/click/releases) - [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/click/compare/8.3.0...8.3.1) Updates `pytest` from 8.3.3 to 9.0.1 - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/8.3.3...9.0.1) Updates `keyring` from 25.6.0 to 25.7.0 - [Release notes](https://github.com/jaraco/keyring/releases) - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/keyring/compare/v25.6.0...v25.7.0) --- updated-dependencies: - dependency-name: click dependency-version: 8.3.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: pytest dependency-version: 9.0.1 dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages - dependency-name: keyring dependency-version: 25.7.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 3a1d550..aedbf64 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -3,16 +3,16 @@ black==25.11.0 bleach==6.3.0 certifi==2025.11.12 charset-normalizer==3.4.4 -click==8.3.0 +click==8.3.1 colorama==0.4.6 docutils==0.22.3 flake8==7.3.0 gitchangelog==3.0.4 -pytest==8.3.3 +pytest==9.0.1 idna==3.11 importlib-metadata==8.7.0 jaraco.classes==3.4.0 -keyring==25.6.0 +keyring==25.7.0 markdown-it-py==4.0.0 mccabe==0.7.0 mdurl==0.1.2 From c3855a94f1bf5866f41f84b15b2e50c53f9717be Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 04:09:25 +0000 Subject: [PATCH 358/455] chore(deps): bump actions/checkout from 5 to 6 Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to 6. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/automatic-release.yml | 2 +- .github/workflows/docker.yml | 2 +- .github/workflows/lint.yml | 2 +- .github/workflows/test.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index 2160206..60c0b41 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: fetch-depth: 0 ssh-key: ${{ secrets.DEPLOY_PRIVATE_KEY }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 2c7cb38..f367b99 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -38,7 +38,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: persist-credentials: false diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 02ad174..0ca0aa2 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: fetch-depth: 0 - name: Setup Python diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fb43350..0c8b3af 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: fetch-depth: 0 - name: Setup Python From 9f6b401171afa2614aa1c9ea8e8756f8e0c8c257 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 14:58:52 +0000 Subject: [PATCH 359/455] chore(deps): bump restructuredtext-lint in the python-packages group Bumps the python-packages group with 1 update: [restructuredtext-lint](https://github.com/twolfson/restructuredtext-lint). Updates `restructuredtext-lint` from 1.4.0 to 2.0.2 - [Changelog](https://github.com/twolfson/restructuredtext-lint/blob/master/CHANGELOG.rst) - [Commits](https://github.com/twolfson/restructuredtext-lint/compare/1.4.0...2.0.2) --- updated-dependencies: - dependency-name: restructuredtext-lint dependency-version: 2.0.2 dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index aedbf64..76df516 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -28,7 +28,7 @@ Pygments==2.19.2 readme-renderer==44.0 requests==2.32.5 requests-toolbelt==1.0.0 -restructuredtext-lint==1.4.0 +restructuredtext-lint==2.0.2 rfc3986==2.0.0 rich==14.2.0 setuptools==80.9.0 From 7840528fe25f95b7ed4f0aacab602288f1f73c74 Mon Sep 17 00:00:00 2001 From: Rodos Date: Sat, 29 Nov 2025 09:19:23 +1100 Subject: [PATCH 360/455] Skip DMCA'd repos which return a 451 response Log a warning and the link to the DMCA notice. Continue backing up other repositories instead of crashing. Closes #163 --- github_backup/github_backup.py | 87 +++++++++++++------- tests/test_http_451.py | 143 +++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+), 29 deletions(-) create mode 100644 tests/test_http_451.py diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 14f0ed8..dcf79e8 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -37,6 +37,15 @@ FILE_URI_PREFIX = "file://" logger = logging.getLogger(__name__) + +class RepositoryUnavailableError(Exception): + """Raised when a repository is unavailable due to legal reasons (e.g., DMCA takedown).""" + + def __init__(self, message, dmca_url=None): + super().__init__(message) + self.dmca_url = dmca_url + + # Setup SSL context with fallback chain https_ctx = ssl.create_default_context() if https_ctx.get_ca_certs(): @@ -612,6 +621,19 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): status_code = int(r.getcode()) + # Handle DMCA takedown (HTTP 451) - raise exception to skip entire repository + if status_code == 451: + dmca_url = None + try: + response_data = json.loads(r.read().decode("utf-8")) + dmca_url = response_data.get("block", {}).get("html_url") + except Exception: + pass + raise RepositoryUnavailableError( + "Repository unavailable due to legal reasons (HTTP 451)", + dmca_url=dmca_url + ) + # Check if we got correct data try: response = json.loads(r.read().decode("utf-8")) @@ -1668,40 +1690,47 @@ def backup_repositories(args, output_directory, repositories): continue # don't try to back anything else for a gist; it doesn't exist - download_wiki = args.include_wiki or args.include_everything - if repository["has_wiki"] and download_wiki: - fetch_repository( - repository["name"], - repo_url.replace(".git", ".wiki.git"), - os.path.join(repo_cwd, "wiki"), - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone, - no_prune=args.no_prune, - ) - if args.include_issues or args.include_everything: - backup_issues(args, repo_cwd, repository, repos_template) + try: + download_wiki = args.include_wiki or args.include_everything + if repository["has_wiki"] and download_wiki: + fetch_repository( + repository["name"], + repo_url.replace(".git", ".wiki.git"), + os.path.join(repo_cwd, "wiki"), + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone, + no_prune=args.no_prune, + ) + if args.include_issues or args.include_everything: + backup_issues(args, repo_cwd, repository, repos_template) - if args.include_pulls or args.include_everything: - backup_pulls(args, repo_cwd, repository, repos_template) + if args.include_pulls or args.include_everything: + backup_pulls(args, repo_cwd, repository, repos_template) - if args.include_milestones or args.include_everything: - backup_milestones(args, repo_cwd, repository, repos_template) + if args.include_milestones or args.include_everything: + backup_milestones(args, repo_cwd, repository, repos_template) - if args.include_labels or args.include_everything: - backup_labels(args, repo_cwd, repository, repos_template) + if args.include_labels or args.include_everything: + backup_labels(args, repo_cwd, repository, repos_template) - if args.include_hooks or args.include_everything: - backup_hooks(args, repo_cwd, repository, repos_template) + if args.include_hooks or args.include_everything: + backup_hooks(args, repo_cwd, repository, repos_template) - if args.include_releases or args.include_everything: - backup_releases( - args, - repo_cwd, - repository, - repos_template, - include_assets=args.include_assets or args.include_everything, - ) + if args.include_releases or args.include_everything: + backup_releases( + args, + repo_cwd, + repository, + repos_template, + include_assets=args.include_assets or args.include_everything, + ) + except RepositoryUnavailableError as e: + logger.warning(f"Repository {repository['full_name']} is unavailable (HTTP 451)") + if e.dmca_url: + logger.warning(f"DMCA notice: {e.dmca_url}") + logger.info(f"Skipping remaining resources for {repository['full_name']}") + continue if args.incremental: if last_update == "0000-00-00T00:00:00Z": diff --git a/tests/test_http_451.py b/tests/test_http_451.py new file mode 100644 index 0000000..7feca1d --- /dev/null +++ b/tests/test_http_451.py @@ -0,0 +1,143 @@ +"""Tests for HTTP 451 (DMCA takedown) handling.""" + +import json +from unittest.mock import Mock, patch + +import pytest + +from github_backup import github_backup + + +class TestHTTP451Exception: + """Test suite for HTTP 451 DMCA takedown exception handling.""" + + def test_repository_unavailable_error_raised(self): + """HTTP 451 should raise RepositoryUnavailableError with DMCA URL.""" + # Create mock args + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = None + args.username = None + args.password = None + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = None + args.throttle_pause = 0 + + # Mock HTTPError 451 response + mock_response = Mock() + mock_response.getcode.return_value = 451 + + dmca_data = { + "message": "Repository access blocked", + "block": { + "reason": "dmca", + "created_at": "2024-11-12T14:38:04Z", + "html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" + } + } + mock_response.read.return_value = json.dumps(dmca_data).encode("utf-8") + mock_response.headers = {"x-ratelimit-remaining": "5000"} + mock_response.reason = "Unavailable For Legal Reasons" + + def mock_get_response(request, auth, template): + return mock_response, [] + + with patch("github_backup.github_backup._get_response", side_effect=mock_get_response): + with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: + list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues")) + + # Check exception has DMCA URL + assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" + assert "451" in str(exc_info.value) + + def test_repository_unavailable_error_without_dmca_url(self): + """HTTP 451 without DMCA details should still raise exception.""" + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = None + args.username = None + args.password = None + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = None + args.throttle_pause = 0 + + mock_response = Mock() + mock_response.getcode.return_value = 451 + mock_response.read.return_value = b'{"message": "Blocked"}' + mock_response.headers = {"x-ratelimit-remaining": "5000"} + mock_response.reason = "Unavailable For Legal Reasons" + + def mock_get_response(request, auth, template): + return mock_response, [] + + with patch("github_backup.github_backup._get_response", side_effect=mock_get_response): + with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: + list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues")) + + # Exception raised even without DMCA URL + assert exc_info.value.dmca_url is None + assert "451" in str(exc_info.value) + + def test_repository_unavailable_error_with_malformed_json(self): + """HTTP 451 with malformed JSON should still raise exception.""" + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = None + args.username = None + args.password = None + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = None + args.throttle_pause = 0 + + mock_response = Mock() + mock_response.getcode.return_value = 451 + mock_response.read.return_value = b"invalid json {" + mock_response.headers = {"x-ratelimit-remaining": "5000"} + mock_response.reason = "Unavailable For Legal Reasons" + + def mock_get_response(request, auth, template): + return mock_response, [] + + with patch("github_backup.github_backup._get_response", side_effect=mock_get_response): + with pytest.raises(github_backup.RepositoryUnavailableError): + list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues")) + + def test_other_http_errors_unchanged(self): + """Other HTTP errors should still raise generic Exception.""" + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = None + args.username = None + args.password = None + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = None + args.throttle_pause = 0 + + mock_response = Mock() + mock_response.getcode.return_value = 404 + mock_response.read.return_value = b'{"message": "Not Found"}' + mock_response.headers = {"x-ratelimit-remaining": "5000"} + mock_response.reason = "Not Found" + + def mock_get_response(request, auth, template): + return mock_response, [] + + with patch("github_backup.github_backup._get_response", side_effect=mock_get_response): + # Should raise generic Exception, not RepositoryUnavailableError + with pytest.raises(Exception) as exc_info: + list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/notfound/issues")) + + assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError) + assert "404" in str(exc_info.value) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 8b7512c8d845ab3e845b807cdf9baa6357571af4 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Fri, 28 Nov 2025 23:39:09 +0000 Subject: [PATCH 361/455] Release version 0.52.0 --- CHANGES.rst | 83 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3c7c16f..396dfe8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,90 @@ Changelog ========= -0.51.3 (2025-11-18) +0.52.0 (2025-11-28) ------------------- ------------------------ +- Skip DMCA'd repos which return a 451 response. [Rodos] + + Log a warning and the link to the DMCA notice. Continue backing up + other repositories instead of crashing. + + Closes #163 +- Chore(deps): bump restructuredtext-lint in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [restructuredtext-lint](https://github.com/twolfson/restructuredtext-lint). + + + Updates `restructuredtext-lint` from 1.4.0 to 2.0.2 + - [Changelog](https://github.com/twolfson/restructuredtext-lint/blob/master/CHANGELOG.rst) + - [Commits](https://github.com/twolfson/restructuredtext-lint/compare/1.4.0...2.0.2) + + --- + updated-dependencies: + - dependency-name: restructuredtext-lint + dependency-version: 2.0.2 + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump actions/checkout from 5 to 6. [dependabot[bot]] + + Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to 6. + - [Release notes](https://github.com/actions/checkout/releases) + - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) + - [Commits](https://github.com/actions/checkout/compare/v5...v6) + + --- + updated-dependencies: + - dependency-name: actions/checkout + dependency-version: '6' + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Chore(deps): bump the python-packages group with 3 updates. + [dependabot[bot]] + + Bumps the python-packages group with 3 updates: [click](https://github.com/pallets/click), [pytest](https://github.com/pytest-dev/pytest) and [keyring](https://github.com/jaraco/keyring). + + + Updates `click` from 8.3.0 to 8.3.1 + - [Release notes](https://github.com/pallets/click/releases) + - [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst) + - [Commits](https://github.com/pallets/click/compare/8.3.0...8.3.1) + + Updates `pytest` from 8.3.3 to 9.0.1 + - [Release notes](https://github.com/pytest-dev/pytest/releases) + - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) + - [Commits](https://github.com/pytest-dev/pytest/compare/8.3.3...9.0.1) + + Updates `keyring` from 25.6.0 to 25.7.0 + - [Release notes](https://github.com/jaraco/keyring/releases) + - [Changelog](https://github.com/jaraco/keyring/blob/main/NEWS.rst) + - [Commits](https://github.com/jaraco/keyring/compare/v25.6.0...v25.7.0) + + --- + updated-dependencies: + - dependency-name: click + dependency-version: 8.3.1 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: pytest + dependency-version: 9.0.1 + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + - dependency-name: keyring + dependency-version: 25.7.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... + + +0.51.3 (2025-11-18) +------------------- - Test: Add pagination tests for cursor and page-based Link headers. [Rodos] - Use cursor based pagination. [Helio Machado] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 378947a..aa21288 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.51.3" +__version__ = "0.52.0" From 5739ac074551171b22e74bb32705b6a10ca5ce39 Mon Sep 17 00:00:00 2001 From: Rodos Date: Sat, 29 Nov 2025 16:50:53 +1100 Subject: [PATCH 362/455] Avoid rewriting unchanged JSON files for labels, milestones, releases, hooks, followers, and following This change reduces unnecessary writes when backing up metadata that changes infrequently. The implementation compares existing file content before writing and skips the write if the content is identical, preserving file timestamps. Key changes: - Added json_dump_if_changed() helper that compares content before writing - Uses atomic writes (temp file + rename) for all metadata files - NOT applied to issues/pulls (they use incremental_by_files logic) - Made log messages consistent and past tense ("Saved" instead of "Saving") - Added informative logging showing skip counts Fixes #133 --- github_backup/github_backup.py | 96 ++++++++++++-- tests/test_json_dump_if_changed.py | 198 +++++++++++++++++++++++++++++ 2 files changed, 283 insertions(+), 11 deletions(-) create mode 100644 tests/test_json_dump_if_changed.py diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index dcf79e8..9d39a64 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1898,11 +1898,21 @@ def backup_milestones(args, repo_cwd, repository, repos_template): for milestone in _milestones: milestones[milestone["number"]] = milestone - logger.info("Saving {0} milestones to disk".format(len(list(milestones.keys())))) + written_count = 0 for number, milestone in list(milestones.items()): milestone_file = "{0}/{1}.json".format(milestone_cwd, number) - with codecs.open(milestone_file, "w", encoding="utf-8") as f: - json_dump(milestone, f) + if json_dump_if_changed(milestone, milestone_file): + written_count += 1 + + total = len(milestones) + if written_count == total: + logger.info("Saved {0} milestones to disk".format(total)) + elif written_count == 0: + logger.info("{0} milestones unchanged, skipped write".format(total)) + else: + logger.info("Saved {0} of {1} milestones to disk ({2} unchanged)".format( + written_count, total, total - written_count + )) def backup_labels(args, repo_cwd, repository, repos_template): @@ -1955,19 +1965,17 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F reverse=True, ) releases = releases[: args.number_of_latest_releases] - logger.info("Saving the latest {0} releases to disk".format(len(releases))) - else: - logger.info("Saving {0} releases to disk".format(len(releases))) # for each release, store it + written_count = 0 for release in releases: release_name = release["tag_name"] release_name_safe = release_name.replace("/", "__") output_filepath = os.path.join( release_cwd, "{0}.json".format(release_name_safe) ) - with codecs.open(output_filepath, "w+", encoding="utf-8") as f: - json_dump(release, f) + if json_dump_if_changed(release, output_filepath): + written_count += 1 if include_assets: assets = retrieve_data(args, release["assets_url"]) @@ -1984,6 +1992,17 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F fine=True if args.token_fine is not None else False, ) + # Log the results + total = len(releases) + if written_count == total: + logger.info("Saved {0} releases to disk".format(total)) + elif written_count == 0: + logger.info("{0} releases unchanged, skipped write".format(total)) + else: + logger.info("Saved {0} of {1} releases to disk ({2} unchanged)".format( + written_count, total, total - written_count + )) + def fetch_repository( name, @@ -2108,9 +2127,10 @@ def _backup_data(args, name, template, output_file, output_directory): mkdir_p(output_directory) data = retrieve_data(args, template) - logger.info("Writing {0} {1} to disk".format(len(data), name)) - with codecs.open(output_file, "w", encoding="utf-8") as f: - json_dump(data, f) + if json_dump_if_changed(data, output_file): + logger.info("Saved {0} {1} to disk".format(len(data), name)) + else: + logger.info("{0} {1} unchanged, skipped write".format(len(data), name)) def json_dump(data, output_file): @@ -2122,3 +2142,57 @@ def json_dump(data, output_file): indent=4, separators=(",", ": "), ) + + +def json_dump_if_changed(data, output_file_path): + """ + Write JSON data to file only if content has changed. + + Compares the serialized JSON data with the existing file content + and only writes if different. This prevents unnecessary file + modification timestamp updates and disk writes. + + Uses atomic writes (temp file + rename) to prevent corruption + if the process is interrupted during the write. + + Args: + data: The data to serialize as JSON + output_file_path: The path to the output file + + Returns: + True if file was written (content changed or new file) + False if write was skipped (content unchanged) + """ + # Serialize new data with consistent formatting matching json_dump() + new_content = json.dumps( + data, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(",", ": "), + ) + + # Check if file exists and compare content + if os.path.exists(output_file_path): + try: + with codecs.open(output_file_path, "r", encoding="utf-8") as f: + existing_content = f.read() + if existing_content == new_content: + logger.debug( + "Content unchanged, skipping write: {0}".format(output_file_path) + ) + return False + except (OSError, UnicodeDecodeError) as e: + # If we can't read the existing file, write the new one + logger.debug( + "Error reading existing file {0}, will overwrite: {1}".format( + output_file_path, e + ) + ) + + # Write the file atomically using temp file + rename + temp_file = output_file_path + ".temp" + with codecs.open(temp_file, "w", encoding="utf-8") as f: + f.write(new_content) + os.rename(temp_file, output_file_path) # Atomic on POSIX systems + return True diff --git a/tests/test_json_dump_if_changed.py b/tests/test_json_dump_if_changed.py new file mode 100644 index 0000000..426baee --- /dev/null +++ b/tests/test_json_dump_if_changed.py @@ -0,0 +1,198 @@ +"""Tests for json_dump_if_changed functionality.""" + +import codecs +import json +import os +import tempfile + +import pytest + +from github_backup import github_backup + + +class TestJsonDumpIfChanged: + """Test suite for json_dump_if_changed function.""" + + def test_writes_new_file(self): + """Should write file when it doesn't exist.""" + with tempfile.TemporaryDirectory() as tmpdir: + output_file = os.path.join(tmpdir, "test.json") + test_data = {"key": "value", "number": 42} + + result = github_backup.json_dump_if_changed(test_data, output_file) + + assert result is True + assert os.path.exists(output_file) + + # Verify content matches expected format + with codecs.open(output_file, "r", encoding="utf-8") as f: + content = f.read() + loaded = json.loads(content) + assert loaded == test_data + + def test_skips_unchanged_file(self): + """Should skip write when content is identical.""" + with tempfile.TemporaryDirectory() as tmpdir: + output_file = os.path.join(tmpdir, "test.json") + test_data = {"key": "value", "number": 42} + + # First write + result1 = github_backup.json_dump_if_changed(test_data, output_file) + assert result1 is True + + # Get the initial mtime + mtime1 = os.path.getmtime(output_file) + + # Second write with same data + result2 = github_backup.json_dump_if_changed(test_data, output_file) + assert result2 is False + + # File should not have been modified + mtime2 = os.path.getmtime(output_file) + assert mtime1 == mtime2 + + def test_writes_when_content_changed(self): + """Should write file when content has changed.""" + with tempfile.TemporaryDirectory() as tmpdir: + output_file = os.path.join(tmpdir, "test.json") + test_data1 = {"key": "value1"} + test_data2 = {"key": "value2"} + + # First write + result1 = github_backup.json_dump_if_changed(test_data1, output_file) + assert result1 is True + + # Second write with different data + result2 = github_backup.json_dump_if_changed(test_data2, output_file) + assert result2 is True + + # Verify new content + with codecs.open(output_file, "r", encoding="utf-8") as f: + loaded = json.load(f) + assert loaded == test_data2 + + def test_uses_consistent_formatting(self): + """Should use same JSON formatting as json_dump.""" + with tempfile.TemporaryDirectory() as tmpdir: + output_file = os.path.join(tmpdir, "test.json") + test_data = {"z": "last", "a": "first", "m": "middle"} + + github_backup.json_dump_if_changed(test_data, output_file) + + with codecs.open(output_file, "r", encoding="utf-8") as f: + content = f.read() + + # Check for consistent formatting: + # - sorted keys + # - 4-space indent + # - comma-colon-space separator + expected = json.dumps( + test_data, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(",", ": "), + ) + assert content == expected + + def test_atomic_write_always_used(self): + """Should always use temp file and rename for atomic writes.""" + with tempfile.TemporaryDirectory() as tmpdir: + output_file = os.path.join(tmpdir, "test.json") + test_data = {"key": "value"} + + result = github_backup.json_dump_if_changed(test_data, output_file) + + assert result is True + assert os.path.exists(output_file) + + # Temp file should not exist after atomic write + temp_file = output_file + ".temp" + assert not os.path.exists(temp_file) + + # Verify content + with codecs.open(output_file, "r", encoding="utf-8") as f: + loaded = json.load(f) + assert loaded == test_data + + def test_handles_unicode_content(self): + """Should correctly handle Unicode content.""" + with tempfile.TemporaryDirectory() as tmpdir: + output_file = os.path.join(tmpdir, "test.json") + test_data = { + "emoji": "🚀", + "chinese": "你好", + "arabic": "مرحبا", + "cyrillic": "Привет", + } + + result = github_backup.json_dump_if_changed(test_data, output_file) + assert result is True + + # Verify Unicode is preserved + with codecs.open(output_file, "r", encoding="utf-8") as f: + loaded = json.load(f) + assert loaded == test_data + + # Second write should skip + result2 = github_backup.json_dump_if_changed(test_data, output_file) + assert result2 is False + + def test_handles_complex_nested_data(self): + """Should handle complex nested data structures.""" + with tempfile.TemporaryDirectory() as tmpdir: + output_file = os.path.join(tmpdir, "test.json") + test_data = { + "users": [ + {"id": 1, "name": "Alice", "tags": ["admin", "user"]}, + {"id": 2, "name": "Bob", "tags": ["user"]}, + ], + "metadata": {"version": "1.0", "nested": {"deep": {"value": 42}}}, + } + + result = github_backup.json_dump_if_changed(test_data, output_file) + assert result is True + + # Verify structure is preserved + with codecs.open(output_file, "r", encoding="utf-8") as f: + loaded = json.load(f) + assert loaded == test_data + + def test_overwrites_on_unicode_decode_error(self): + """Should overwrite if existing file has invalid UTF-8.""" + with tempfile.TemporaryDirectory() as tmpdir: + output_file = os.path.join(tmpdir, "test.json") + test_data = {"key": "value"} + + # Write invalid UTF-8 bytes + with open(output_file, "wb") as f: + f.write(b"\xff\xfe invalid utf-8") + + # Should catch UnicodeDecodeError and overwrite + result = github_backup.json_dump_if_changed(test_data, output_file) + assert result is True + + # Verify new content was written + with codecs.open(output_file, "r", encoding="utf-8") as f: + loaded = json.load(f) + assert loaded == test_data + + def test_key_order_independence(self): + """Should treat differently-ordered dicts as same if keys/values match.""" + with tempfile.TemporaryDirectory() as tmpdir: + output_file = os.path.join(tmpdir, "test.json") + + # Write first dict + data1 = {"z": 1, "a": 2, "m": 3} + github_backup.json_dump_if_changed(data1, output_file) + + # Try to write same data but different order + data2 = {"a": 2, "m": 3, "z": 1} + result = github_backup.json_dump_if_changed(data2, output_file) + + # Should skip because content is the same (keys are sorted) + assert result is False + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 6ad1959d437afc8349f605f5f5d816ebdf0ab8e2 Mon Sep 17 00:00:00 2001 From: Rodos Date: Sat, 29 Nov 2025 21:16:22 +1100 Subject: [PATCH 363/455] fix: case-sensitive username filtering causing silent backup failures GitHub's API accepts usernames in any case but returns canonical case. The case-sensitive comparison in filter_repositories() filtered out all repositories when user-provided case didn't match GitHub's canonical case. Changed to case-insensitive comparison. Fixes #198 --- github_backup/github_backup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index dcf79e8..a54e299 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1587,7 +1587,9 @@ def filter_repositories(args, unfiltered_repositories): repositories = [] for r in unfiltered_repositories: # gists can be anonymous, so need to safely check owner - if r.get("owner", {}).get("login") == args.user or r.get("is_starred"): + # Use case-insensitive comparison to match GitHub's case-insensitive username behavior + owner_login = r.get("owner", {}).get("login", "") + if owner_login.lower() == args.user.lower() or r.get("is_starred"): repositories.append(r) name_regex = None From ff2681e1960f0176f176bb22b0c4682d74d89b6f Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sun, 30 Nov 2025 04:30:48 +0000 Subject: [PATCH 364/455] Release version 0.53.0 --- CHANGES.rst | 37 ++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 396dfe8..b84d655 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,44 @@ Changelog ========= -0.52.0 (2025-11-28) +0.53.0 (2025-11-30) ------------------- ------------------------ + +Fix +~~~ +- Case-sensitive username filtering causing silent backup failures. + [Rodos] + + GitHub's API accepts usernames in any case but returns canonical case. + The case-sensitive comparison in filter_repositories() filtered out all + repositories when user-provided case didn't match GitHub's canonical case. + + Changed to case-insensitive comparison. + + Fixes #198 + +Other +~~~~~ +- Avoid rewriting unchanged JSON files for labels, milestones, releases, + hooks, followers, and following. [Rodos] + + This change reduces unnecessary writes when backing up metadata that changes + infrequently. The implementation compares existing file content before writing + and skips the write if the content is identical, preserving file timestamps. + + Key changes: + - Added json_dump_if_changed() helper that compares content before writing + - Uses atomic writes (temp file + rename) for all metadata files + - NOT applied to issues/pulls (they use incremental_by_files logic) + - Made log messages consistent and past tense ("Saved" instead of "Saving") + - Added informative logging showing skip counts + + Fixes #133 + + +0.52.0 (2025-11-28) +------------------- - Skip DMCA'd repos which return a 451 response. [Rodos] Log a warning and the link to the DMCA notice. Continue backing up diff --git a/github_backup/__init__.py b/github_backup/__init__.py index aa21288..3c5da5f 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.52.0" +__version__ = "0.53.0" From bf28b46954395a1e5e27c766743735dee6c73033 Mon Sep 17 00:00:00 2001 From: Rodos Date: Mon, 1 Dec 2025 15:53:26 +1100 Subject: [PATCH 365/455] docs: update README testing section and add fetch vs pull explanation --- README.rst | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 69d5524..9836107 100644 --- a/README.rst +++ b/README.rst @@ -308,6 +308,25 @@ Skip existing on incomplete backups The ``--skip-existing`` argument will skip a backup if the directory already exists, even if the backup in that directory failed (perhaps due to a blocking error). This may result in unexpected missing data in a regular backup. +Updates use fetch, not pull +--------------------------- + +When updating an existing repository backup, ``github-backup`` uses ``git fetch`` rather than ``git pull``. This is intentional - a backup tool should reliably download data without risk of failure. Using ``git pull`` would require handling merge conflicts, which adds complexity and could cause backups to fail unexpectedly. + +With fetch, **all branches and commits are downloaded** safely into remote-tracking branches. The working directory files won't change, but your backup is complete. + +If you look at files directly (e.g., ``cat README.md``), you'll see the old content. The new data is in the remote-tracking branches (confusingly named "remote" but stored locally). To view or use the latest files:: + + git show origin/main:README.md # view a file + git merge origin/main # update working directory + +All branches are backed up as remote refs (``origin/main``, ``origin/feature-branch``, etc.). + +If you want to browse files directly without merging, consider using ``--bare`` which skips the working directory entirely - the backup is just the git data. + +See `#269 `_ for more discussion. + + Github Backup Examples ====================== @@ -357,7 +376,12 @@ A huge thanks to all the contibuters! Testing ------- -This project currently contains no unit tests. To run linting:: +To run the test suite:: + + pip install pytest + pytest + +To run linting:: pip install flake8 flake8 --ignore=E501 From 12802103c470402c0ceccbbb1d8b767bd4ffcc82 Mon Sep 17 00:00:00 2001 From: Rodos Date: Mon, 1 Dec 2025 16:11:11 +1100 Subject: [PATCH 366/455] fix: send INFO/DEBUG to stdout, WARNING/ERROR to stderr Fixes #182 --- bin/github-backup | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/bin/github-backup b/bin/github-backup index b33d19f..d685bc9 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -16,12 +16,23 @@ from github_backup.github_backup import ( retrieve_repositories, ) -logging.basicConfig( - format="%(asctime)s.%(msecs)03d: %(message)s", +# INFO and DEBUG go to stdout, WARNING and above go to stderr +log_format = logging.Formatter( + fmt="%(asctime)s.%(msecs)03d: %(message)s", datefmt="%Y-%m-%dT%H:%M:%S", - level=logging.INFO, ) +stdout_handler = logging.StreamHandler(sys.stdout) +stdout_handler.setLevel(logging.DEBUG) +stdout_handler.addFilter(lambda r: r.levelno < logging.WARNING) +stdout_handler.setFormatter(log_format) + +stderr_handler = logging.StreamHandler(sys.stderr) +stderr_handler.setLevel(logging.WARNING) +stderr_handler.setFormatter(log_format) + +logging.basicConfig(level=logging.INFO, handlers=[stdout_handler, stderr_handler]) + def main(): args = parse_args() From 2a9d86a6bf2f1de3989e6a411b5a7dc326546e79 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 3 Dec 2025 02:17:59 +0000 Subject: [PATCH 367/455] Release version 0.54.0 --- CHANGES.rst | 17 ++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b84d655..1b02e0d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,25 @@ Changelog ========= -0.53.0 (2025-11-30) +0.54.0 (2025-12-03) ------------------- ------------------------ +Fix +~~~ +- Send INFO/DEBUG to stdout, WARNING/ERROR to stderr. [Rodos] + + Fixes #182 + +Other +~~~~~ +- Docs: update README testing section and add fetch vs pull explanation. + [Rodos] + + +0.53.0 (2025-11-30) +------------------- + Fix ~~~ - Case-sensitive username filtering causing silent backup failures. diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 3c5da5f..450ee12 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.53.0" +__version__ = "0.54.0" From 899ab5fdc286bd4064b78411e15a8cf44be4568c Mon Sep 17 00:00:00 2001 From: Rodos Date: Thu, 4 Dec 2025 10:07:43 +1100 Subject: [PATCH 368/455] fix: warn and skip when --starred-gists used for different user GitHub's API only allows retrieving starred gists for the authenticated user. Previously, using --starred-gists when backing up a different user would silently return no relevant data. Now warns and skips the retrieval entirely when the target user differs from the authenticated user. Uses case-insensitive comparison to match GitHub's username handling. Fixes #93 --- README.rst | 2 ++ github_backup/github_backup.py | 26 ++++++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 9836107..a33db61 100644 --- a/README.rst +++ b/README.rst @@ -301,6 +301,8 @@ Starred gists vs starred repo behaviour The starred normal repo cloning (``--all-starred``) argument stores starred repos separately to the users own repositories. However, using ``--starred-gists`` will store starred gists within the same directory as the users own gists ``--gists``. Also, all gist repo directory names are IDs not the gist's name. +Note: ``--starred-gists`` only retrieves starred gists for the authenticated user, not the target user, due to a GitHub API limitation. + Skip existing on incomplete backups ----------------------------------- diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 0ad55d1..cdb536d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1565,16 +1565,22 @@ def retrieve_repositories(args, authenticated_user): repos.extend(gists) if args.include_starred_gists: - starred_gists_template = "https://{0}/gists/starred".format( - get_github_api_host(args) - ) - starred_gists = retrieve_data( - args, starred_gists_template, single_request=False - ) - # flag each repo as a starred gist for downstream processing - for item in starred_gists: - item.update({"is_gist": True, "is_starred": True}) - repos.extend(starred_gists) + if not authenticated_user.get("login") or args.user.lower() != authenticated_user["login"].lower(): + logger.warning( + "Cannot retrieve starred gists for '%s'. GitHub only allows access to the authenticated user's starred gists.", + args.user, + ) + else: + starred_gists_template = "https://{0}/gists/starred".format( + get_github_api_host(args) + ) + starred_gists = retrieve_data( + args, starred_gists_template, single_request=False + ) + # flag each repo as a starred gist for downstream processing + for item in starred_gists: + item.update({"is_gist": True, "is_starred": True}) + repos.extend(starred_gists) return repos From fdfaaec1ba072b0a98d1981b55de5ccb213e9625 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 6 Dec 2025 04:51:42 +0000 Subject: [PATCH 369/455] chore(deps): bump urllib3 from 2.5.0 to 2.6.0 Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.5.0 to 2.6.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.5.0...2.6.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.6.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 76df516..b1323a0 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -35,6 +35,6 @@ setuptools==80.9.0 six==1.17.0 tqdm==4.67.1 twine==6.2.0 -urllib3==2.5.0 +urllib3==2.6.0 webencodings==0.5.1 zipp==3.23.0 From aba048a3e983074b2a0fba0d3e304c00cd090d79 Mon Sep 17 00:00:00 2001 From: Rodos Date: Sun, 7 Dec 2025 21:20:54 +1100 Subject: [PATCH 370/455] fix: warn when --private used without authentication --- bin/github-backup | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bin/github-backup b/bin/github-backup index d685bc9..dcac622 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -9,6 +9,7 @@ from github_backup.github_backup import ( backup_repositories, check_git_lfs_install, filter_repositories, + get_auth, get_authenticated_user, logger, mkdir_p, @@ -37,6 +38,12 @@ logging.basicConfig(level=logging.INFO, handlers=[stdout_handler, stderr_handler def main(): args = parse_args() + if args.private and not get_auth(args): + logger.warning( + "The --private flag has no effect without authentication. " + "Use -t/--token, -f/--token-fine, or -u/--username to authenticate." + ) + if args.quiet: logger.setLevel(logging.WARNING) From 6e2a7e521ca1e9b8aae58bbe4eaebbb107d828bb Mon Sep 17 00:00:00 2001 From: Rodos Date: Sun, 7 Dec 2025 21:21:14 +1100 Subject: [PATCH 371/455] fix: --all-starred now clones repos without --repositories --- github_backup/github_backup.py | 14 ++- tests/test_all_starred.py | 161 +++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 8 deletions(-) create mode 100644 tests/test_all_starred.py diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index cdb536d..bbacdae 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -561,7 +561,7 @@ def get_github_host(args): def read_file_contents(file_uri): - return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() + return open(file_uri[len(FILE_URI_PREFIX):], "rt").readline().strip() def get_github_repo_url(args, repository): @@ -1672,9 +1672,10 @@ def backup_repositories(args, output_directory, repositories): repo_url = get_github_repo_url(args, repository) include_gists = args.include_gists or args.include_starred_gists + include_starred = args.all_starred and repository.get("is_starred") if (args.include_repository or args.include_everything) or ( include_gists and repository.get("is_gist") - ): + ) or include_starred: repo_name = ( repository.get("name") if not repository.get("is_gist") @@ -2023,12 +2024,9 @@ def fetch_repository( ): if bare_clone: if os.path.exists(local_dir): - clone_exists = ( - subprocess.check_output( - ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir - ) - == b"true\n" - ) + clone_exists = subprocess.check_output( + ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir + ) == b"true\n" else: clone_exists = False else: diff --git a/tests/test_all_starred.py b/tests/test_all_starred.py new file mode 100644 index 0000000..f59a67e --- /dev/null +++ b/tests/test_all_starred.py @@ -0,0 +1,161 @@ +"""Tests for --all-starred flag behavior (issue #225).""" + +import pytest +from unittest.mock import Mock, patch + +from github_backup import github_backup + + +class TestAllStarredCloning: + """Test suite for --all-starred repository cloning behavior. + + Issue #225: --all-starred should clone starred repos without requiring --repositories. + """ + + def _create_mock_args(self, **overrides): + """Create a mock args object with sensible defaults.""" + args = Mock() + args.user = "testuser" + args.output_directory = "/tmp/backup" + args.include_repository = False + args.include_everything = False + args.include_gists = False + args.include_starred_gists = False + args.all_starred = False + args.skip_existing = False + args.bare_clone = False + args.lfs_clone = False + args.no_prune = False + args.include_wiki = False + args.include_issues = False + args.include_issue_comments = False + args.include_issue_events = False + args.include_pulls = False + args.include_pull_comments = False + args.include_pull_commits = False + args.include_pull_details = False + args.include_labels = False + args.include_hooks = False + args.include_milestones = False + args.include_releases = False + args.include_assets = False + args.include_attachments = False + args.incremental = False + args.incremental_by_files = False + args.github_host = None + args.prefer_ssh = False + args.token_classic = None + args.token_fine = None + args.username = None + args.password = None + args.as_app = False + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + + for key, value in overrides.items(): + setattr(args, key, value) + + return args + + @patch('github_backup.github_backup.fetch_repository') + @patch('github_backup.github_backup.get_github_repo_url') + def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_fetch): + """--all-starred should clone starred repos without --repositories flag. + + This is the core fix for issue #225. + """ + args = self._create_mock_args(all_starred=True) + mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git" + + # A starred repository (is_starred flag set by retrieve_repositories) + starred_repo = { + "name": "awesome-project", + "full_name": "otheruser/awesome-project", + "owner": {"login": "otheruser"}, + "private": False, + "fork": False, + "has_wiki": False, + "is_starred": True, # This flag is set for starred repos + } + + with patch('github_backup.github_backup.mkdir_p'): + github_backup.backup_repositories(args, "/tmp/backup", [starred_repo]) + + # fetch_repository should be called for the starred repo + assert mock_fetch.called, "--all-starred should trigger repository cloning" + mock_fetch.assert_called_once() + call_args = mock_fetch.call_args + assert call_args[0][0] == "awesome-project" # repo name + + @patch('github_backup.github_backup.fetch_repository') + @patch('github_backup.github_backup.get_github_repo_url') + def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mock_fetch): + """Starred repos should NOT be cloned if --all-starred is not set.""" + args = self._create_mock_args(all_starred=False) + mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git" + + starred_repo = { + "name": "awesome-project", + "full_name": "otheruser/awesome-project", + "owner": {"login": "otheruser"}, + "private": False, + "fork": False, + "has_wiki": False, + "is_starred": True, + } + + with patch('github_backup.github_backup.mkdir_p'): + github_backup.backup_repositories(args, "/tmp/backup", [starred_repo]) + + # fetch_repository should NOT be called + assert not mock_fetch.called, "Starred repos should not be cloned without --all-starred" + + @patch('github_backup.github_backup.fetch_repository') + @patch('github_backup.github_backup.get_github_repo_url') + def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, mock_fetch): + """Non-starred repos should NOT be cloned when only --all-starred is set.""" + args = self._create_mock_args(all_starred=True) + mock_get_url.return_value = "https://github.com/testuser/my-project.git" + + # A regular (non-starred) repository + regular_repo = { + "name": "my-project", + "full_name": "testuser/my-project", + "owner": {"login": "testuser"}, + "private": False, + "fork": False, + "has_wiki": False, + # No is_starred flag + } + + with patch('github_backup.github_backup.mkdir_p'): + github_backup.backup_repositories(args, "/tmp/backup", [regular_repo]) + + # fetch_repository should NOT be called for non-starred repos + assert not mock_fetch.called, "Non-starred repos should not be cloned with only --all-starred" + + @patch('github_backup.github_backup.fetch_repository') + @patch('github_backup.github_backup.get_github_repo_url') + def test_repositories_flag_still_works(self, mock_get_url, mock_fetch): + """--repositories flag should still clone repos as before.""" + args = self._create_mock_args(include_repository=True) + mock_get_url.return_value = "https://github.com/testuser/my-project.git" + + regular_repo = { + "name": "my-project", + "full_name": "testuser/my-project", + "owner": {"login": "testuser"}, + "private": False, + "fork": False, + "has_wiki": False, + } + + with patch('github_backup.github_backup.mkdir_p'): + github_backup.backup_repositories(args, "/tmp/backup", [regular_repo]) + + # fetch_repository should be called + assert mock_fetch.called, "--repositories should trigger repository cloning" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 58ad1c2378691802dbdf9e23d2137ea73bcc4690 Mon Sep 17 00:00:00 2001 From: Rodos Date: Sun, 7 Dec 2025 21:21:26 +1100 Subject: [PATCH 372/455] docs: fix RST formatting in Known blocking errors section --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index a33db61..9fd35fd 100644 --- a/README.rst +++ b/README.rst @@ -281,11 +281,11 @@ If the incremental argument is used, this will result in the next backup only re It's therefore recommended to only use the incremental argument if the output/result is being actively monitored, or complimented with periodic full non-incremental runs, to avoid unexpected missing data in a regular backup runs. -1. **Starred public repo hooks blocking** +**Starred public repo hooks blocking** - Since the ``--all`` argument includes ``--hooks``, if you use ``--all`` and ``--all-starred`` together to clone a users starred public repositories, the backup will likely error and block the backup continuing. +Since the ``--all`` argument includes ``--hooks``, if you use ``--all`` and ``--all-starred`` together to clone a users starred public repositories, the backup will likely error and block the backup continuing. - This is due to needing the correct permission for ``--hooks`` on public repos. +This is due to needing the correct permission for ``--hooks`` on public repos. "bare" is actually "mirror" From b80049e96e5d57e869203e09dc9db1e39329c68c Mon Sep 17 00:00:00 2001 From: Rodos Date: Sun, 7 Dec 2025 21:21:37 +1100 Subject: [PATCH 373/455] test: add missing test coverage for case sensitivity fix --- tests/test_case_sensitivity.py | 112 +++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 tests/test_case_sensitivity.py diff --git a/tests/test_case_sensitivity.py b/tests/test_case_sensitivity.py new file mode 100644 index 0000000..1398d0d --- /dev/null +++ b/tests/test_case_sensitivity.py @@ -0,0 +1,112 @@ +"""Tests for case-insensitive username/organization filtering.""" + +import pytest +from unittest.mock import Mock + +from github_backup import github_backup + + +class TestCaseSensitivity: + """Test suite for case-insensitive username matching in filter_repositories.""" + + def test_filter_repositories_case_insensitive_user(self): + """Should filter repositories case-insensitively for usernames. + + Reproduces issue #198 where typing 'iamrodos' fails to match + repositories with owner.login='Iamrodos' (the canonical case from GitHub API). + """ + # Simulate user typing lowercase username + args = Mock() + args.user = "iamrodos" # lowercase (what user typed) + args.repository = None + args.name_regex = None + args.languages = None + args.exclude = None + args.fork = False + args.private = False + args.public = False + args.all = True + + # Simulate GitHub API returning canonical case + repos = [ + { + "name": "repo1", + "owner": {"login": "Iamrodos"}, # Capital I (canonical from API) + "private": False, + "fork": False, + }, + { + "name": "repo2", + "owner": {"login": "Iamrodos"}, + "private": False, + "fork": False, + }, + ] + + filtered = github_backup.filter_repositories(args, repos) + + # Should match despite case difference + assert len(filtered) == 2 + assert filtered[0]["name"] == "repo1" + assert filtered[1]["name"] == "repo2" + + def test_filter_repositories_case_insensitive_org(self): + """Should filter repositories case-insensitively for organizations. + + Tests the example from issue #198 where 'prai-org' doesn't match 'PRAI-Org'. + """ + args = Mock() + args.user = "prai-org" # lowercase (what user typed) + args.repository = None + args.name_regex = None + args.languages = None + args.exclude = None + args.fork = False + args.private = False + args.public = False + args.all = True + + repos = [ + { + "name": "repo1", + "owner": {"login": "PRAI-Org"}, # Different case (canonical from API) + "private": False, + "fork": False, + }, + ] + + filtered = github_backup.filter_repositories(args, repos) + + # Should match despite case difference + assert len(filtered) == 1 + assert filtered[0]["name"] == "repo1" + + def test_filter_repositories_case_variations(self): + """Should handle various case combinations correctly.""" + args = Mock() + args.user = "TeSt-UsEr" # Mixed case + args.repository = None + args.name_regex = None + args.languages = None + args.exclude = None + args.fork = False + args.private = False + args.public = False + args.all = True + + repos = [ + {"name": "repo1", "owner": {"login": "test-user"}, "private": False, "fork": False}, + {"name": "repo2", "owner": {"login": "TEST-USER"}, "private": False, "fork": False}, + {"name": "repo3", "owner": {"login": "TeSt-UsEr"}, "private": False, "fork": False}, + {"name": "repo4", "owner": {"login": "other-user"}, "private": False, "fork": False}, + ] + + filtered = github_backup.filter_repositories(args, repos) + + # Should match first 3 (all case variations of same user) + assert len(filtered) == 3 + assert set(r["name"] for r in filtered) == {"repo1", "repo2", "repo3"} + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 1d6d474408968f728b11aa50c55ec9bb7ddf068e Mon Sep 17 00:00:00 2001 From: Rodos Date: Sun, 7 Dec 2025 21:50:49 +1100 Subject: [PATCH 374/455] fix: improve error messages for inaccessible repos and empty wikis --- github_backup/github_backup.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index bbacdae..0282809 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -2041,11 +2041,14 @@ def fetch_repository( "git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True ) if initialized == 128: - logger.info( - "Skipping {0} ({1}) since it's not initialized".format( - name, masked_remote_url + if ".wiki.git" in remote_url: + logger.info( + "Skipping {0} wiki (wiki is enabled but has no content)".format(name) + ) + else: + logger.info( + "Skipping {0} (repository not accessible - may be empty, private, or credentials invalid)".format(name) ) - ) return if clone_exists: From eb5779ac23ba68dbe05981d1ded2a72500767504 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sun, 7 Dec 2025 13:59:35 +0000 Subject: [PATCH 375/455] Release version 0.55.0 --- CHANGES.rst | 41 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1b02e0d..f15dd59 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,49 @@ Changelog ========= -0.54.0 (2025-12-03) +0.55.0 (2025-12-07) ------------------- ------------------------ +Fix +~~~ +- Improve error messages for inaccessible repos and empty wikis. [Rodos] +- --all-starred now clones repos without --repositories. [Rodos] +- Warn when --private used without authentication. [Rodos] +- Warn and skip when --starred-gists used for different user. [Rodos] + + GitHub's API only allows retrieving starred gists for the authenticated + user. Previously, using --starred-gists when backing up a different user + would silently return no relevant data. + + Now warns and skips the retrieval entirely when the target user differs + from the authenticated user. Uses case-insensitive comparison to match + GitHub's username handling. + + Fixes #93 + +Other +~~~~~ +- Test: add missing test coverage for case sensitivity fix. [Rodos] +- Docs: fix RST formatting in Known blocking errors section. [Rodos] +- Chore(deps): bump urllib3 from 2.5.0 to 2.6.0. [dependabot[bot]] + + Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.5.0 to 2.6.0. + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.5.0...2.6.0) + + --- + updated-dependencies: + - dependency-name: urllib3 + dependency-version: 2.6.0 + dependency-type: direct:production + ... + + +0.54.0 (2025-12-03) +------------------- + Fix ~~~ - Send INFO/DEBUG to stdout, WARNING/ERROR to stderr. [Rodos] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 450ee12..8b19221 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.54.0" +__version__ = "0.55.0" From 2fbe8d272c2230d20e6a4d1ed13a40f47c53857a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Dec 2025 13:09:32 +0000 Subject: [PATCH 376/455] chore(deps): bump the python-packages group with 3 updates Bumps the python-packages group with 3 updates: [black](https://github.com/psf/black), [pytest](https://github.com/pytest-dev/pytest) and [platformdirs](https://github.com/tox-dev/platformdirs). Updates `black` from 25.11.0 to 25.12.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/25.11.0...25.12.0) Updates `pytest` from 9.0.1 to 9.0.2 - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/9.0.1...9.0.2) Updates `platformdirs` from 4.5.0 to 4.5.1 - [Release notes](https://github.com/tox-dev/platformdirs/releases) - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) - [Commits](https://github.com/tox-dev/platformdirs/compare/4.5.0...4.5.1) --- updated-dependencies: - dependency-name: black dependency-version: 25.12.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: pytest dependency-version: 9.0.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: platformdirs dependency-version: 4.5.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index b1323a0..d6e9b8e 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,5 +1,5 @@ autopep8==2.3.2 -black==25.11.0 +black==25.12.0 bleach==6.3.0 certifi==2025.11.12 charset-normalizer==3.4.4 @@ -8,7 +8,7 @@ colorama==0.4.6 docutils==0.22.3 flake8==7.3.0 gitchangelog==3.0.4 -pytest==9.0.1 +pytest==9.0.2 idna==3.11 importlib-metadata==8.7.0 jaraco.classes==3.4.0 @@ -21,7 +21,7 @@ mypy-extensions==1.1.0 packaging==25.0 pathspec==0.12.1 pkginfo==1.12.1.2 -platformdirs==4.5.0 +platformdirs==4.5.1 pycodestyle==2.14.0 pyflakes==3.4.0 Pygments==2.19.2 From 6d74af9126829b698a83cbe244093c9831b64f79 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Dec 2025 13:10:12 +0000 Subject: [PATCH 377/455] chore(deps): bump urllib3 in the python-packages group Bumps the python-packages group with 1 update: [urllib3](https://github.com/urllib3/urllib3). Updates `urllib3` from 2.6.0 to 2.6.1 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.6.0...2.6.1) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.6.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index d6e9b8e..5ca68cb 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -35,6 +35,6 @@ setuptools==80.9.0 six==1.17.0 tqdm==4.67.1 twine==6.2.0 -urllib3==2.6.0 +urllib3==2.6.1 webencodings==0.5.1 zipp==3.23.0 From 75e6f56773c0afc2d1bd1f8976603e673b6d1378 Mon Sep 17 00:00:00 2001 From: Rodos Date: Thu, 11 Dec 2025 20:27:03 +1100 Subject: [PATCH 378/455] docs: add "Restoring from Backup" section to README Clarifies that this tool is backup-only with no inbuilt restore. Documents that git repos can be pushed back, but issues/PRs have GitHub API limitations affecting all backup tools. Closes #246 --- README.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.rst b/README.rst index 9fd35fd..f7bd30b 100644 --- a/README.rst +++ b/README.rst @@ -360,6 +360,25 @@ Debug an error/block or incomplete backup into a temporary directory. Omit "incr github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER +Restoring from Backup +===================== + +This tool creates backups only, there is no inbuilt restore command. + +**Git repositories, wikis, and gists** can be restored by pushing them back to GitHub as you would any git repository. For example, to restore a bare repository backup:: + + cd /tmp/white-house/repositories/petitions/repository + git push --mirror git@github.com:WhiteHouse/petitions.git + +**Issues, pull requests, comments, and other metadata** are saved as JSON files for archival purposes. The GitHub API does not support recreating this data faithfully, creating issues via the API has limitations: + +- New issue/PR numbers are assigned (original numbers cannot be set) +- Timestamps reflect creation time (original dates cannot be set) +- The API caller becomes the author (original authors cannot be set) +- Cross-references between issues and PRs will break + +These are GitHub API limitations that affect all backup and migration tools, not just this one. Recreating issues with these limitations via the GitHub API is an exercise for the reader. The JSON backups remain useful for searching, auditing, or manual reference. + Development =========== From e745b557557b808e19509df49352742af25c6201 Mon Sep 17 00:00:00 2001 From: Rodos Date: Thu, 11 Dec 2025 20:55:24 +1100 Subject: [PATCH 379/455] fix: replace deprecated git lfs clone with git clone + git lfs fetch --all git lfs clone is deprecated - modern git clone handles LFS automatically. Using git lfs fetch --all ensures all LFS objects across all refs are backed up, matching the existing bare clone behavior and providing complete LFS backups. Closes #379 --- README.rst | 2 ++ github_backup/github_backup.py | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 9fd35fd..5630681 100644 --- a/README.rst +++ b/README.rst @@ -215,6 +215,8 @@ When you use the ``--lfs`` option, you will need to make sure you have Git LFS i Instructions on how to do this can be found on https://git-lfs.github.com. +LFS objects are fetched for all refs, not just the current checkout, ensuring a complete backup of all LFS content across all branches and history. + About Attachments ----------------- diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 0282809..f706741 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -2090,11 +2090,13 @@ def fetch_repository( git_command.pop() logging_subprocess(git_command, cwd=local_dir) else: - if lfs_clone: - git_command = ["git", "lfs", "clone", remote_url, local_dir] - else: - git_command = ["git", "clone", remote_url, local_dir] + git_command = ["git", "clone", remote_url, local_dir] logging_subprocess(git_command) + if lfs_clone: + git_command = ["git", "lfs", "fetch", "--all", "--prune"] + if no_prune: + git_command.pop() + logging_subprocess(git_command, cwd=local_dir) def backup_account(args, output_directory): From 3684756eaa8e7dfa799d12d01d4d2e65115345a3 Mon Sep 17 00:00:00 2001 From: Rodos Date: Thu, 11 Dec 2025 21:18:23 +1100 Subject: [PATCH 380/455] fix: add Windows support with entry_points and os.replace - Replace os.rename() with os.replace() for atomic file operations on Windows (os.rename fails if destination exists on Windows) - Add entry_points console_scripts for proper .exe generation on Windows - Create github_backup/cli.py with main() entry point - Add github_backup/__main__.py for python -m github_backup support - Keep bin/github-backup as thin wrapper for backwards compatibility Closes #112 --- bin/github-backup | 78 +++++--------------------------- github_backup/__main__.py | 13 ++++++ github_backup/cli.py | 82 ++++++++++++++++++++++++++++++++++ github_backup/github_backup.py | 12 ++--- setup.py | 6 ++- 5 files changed, 116 insertions(+), 75 deletions(-) create mode 100644 github_backup/__main__.py create mode 100644 github_backup/cli.py diff --git a/bin/github-backup b/bin/github-backup index dcac622..c922888 100755 --- a/bin/github-backup +++ b/bin/github-backup @@ -1,76 +1,18 @@ #!/usr/bin/env python +""" +Backwards-compatible wrapper script. -import logging -import os -import sys - -from github_backup.github_backup import ( - backup_account, - backup_repositories, - check_git_lfs_install, - filter_repositories, - get_auth, - get_authenticated_user, - logger, - mkdir_p, - parse_args, - retrieve_repositories, -) - -# INFO and DEBUG go to stdout, WARNING and above go to stderr -log_format = logging.Formatter( - fmt="%(asctime)s.%(msecs)03d: %(message)s", - datefmt="%Y-%m-%dT%H:%M:%S", -) - -stdout_handler = logging.StreamHandler(sys.stdout) -stdout_handler.setLevel(logging.DEBUG) -stdout_handler.addFilter(lambda r: r.levelno < logging.WARNING) -stdout_handler.setFormatter(log_format) - -stderr_handler = logging.StreamHandler(sys.stderr) -stderr_handler.setLevel(logging.WARNING) -stderr_handler.setFormatter(log_format) - -logging.basicConfig(level=logging.INFO, handlers=[stdout_handler, stderr_handler]) - +The recommended way to run github-backup is via the installed command +(pip install github-backup) or python -m github_backup. -def main(): - args = parse_args() +This script is kept for backwards compatibility with existing installations +that may reference this path directly. +""" - if args.private and not get_auth(args): - logger.warning( - "The --private flag has no effect without authentication. " - "Use -t/--token, -f/--token-fine, or -u/--username to authenticate." - ) - - if args.quiet: - logger.setLevel(logging.WARNING) - - output_directory = os.path.realpath(args.output_directory) - if not os.path.isdir(output_directory): - logger.info("Create output directory {0}".format(output_directory)) - mkdir_p(output_directory) - - if args.lfs_clone: - check_git_lfs_install() - - if args.log_level: - log_level = logging.getLevelName(args.log_level.upper()) - if isinstance(log_level, int): - logger.root.setLevel(log_level) - - if not args.as_app: - logger.info("Backing up user {0} to {1}".format(args.user, output_directory)) - authenticated_user = get_authenticated_user(args) - else: - authenticated_user = {"login": None} - - repositories = retrieve_repositories(args, authenticated_user) - repositories = filter_repositories(args, repositories) - backup_repositories(args, output_directory, repositories) - backup_account(args, output_directory) +import sys +from github_backup.cli import main +from github_backup.github_backup import logger if __name__ == "__main__": try: diff --git a/github_backup/__main__.py b/github_backup/__main__.py new file mode 100644 index 0000000..0b4a7c3 --- /dev/null +++ b/github_backup/__main__.py @@ -0,0 +1,13 @@ +"""Allow running as: python -m github_backup""" + +import sys + +from github_backup.cli import main +from github_backup.github_backup import logger + +if __name__ == "__main__": + try: + main() + except Exception as e: + logger.error(str(e)) + sys.exit(1) diff --git a/github_backup/cli.py b/github_backup/cli.py new file mode 100644 index 0000000..98f8d4a --- /dev/null +++ b/github_backup/cli.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +"""Command-line interface for github-backup.""" + +import logging +import os +import sys + +from github_backup.github_backup import ( + backup_account, + backup_repositories, + check_git_lfs_install, + filter_repositories, + get_auth, + get_authenticated_user, + logger, + mkdir_p, + parse_args, + retrieve_repositories, +) + +# INFO and DEBUG go to stdout, WARNING and above go to stderr +log_format = logging.Formatter( + fmt="%(asctime)s.%(msecs)03d: %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S", +) + +stdout_handler = logging.StreamHandler(sys.stdout) +stdout_handler.setLevel(logging.DEBUG) +stdout_handler.addFilter(lambda r: r.levelno < logging.WARNING) +stdout_handler.setFormatter(log_format) + +stderr_handler = logging.StreamHandler(sys.stderr) +stderr_handler.setLevel(logging.WARNING) +stderr_handler.setFormatter(log_format) + +logging.basicConfig(level=logging.INFO, handlers=[stdout_handler, stderr_handler]) + + +def main(): + """Main entry point for github-backup CLI.""" + args = parse_args() + + if args.private and not get_auth(args): + logger.warning( + "The --private flag has no effect without authentication. " + "Use -t/--token, -f/--token-fine, or -u/--username to authenticate." + ) + + if args.quiet: + logger.setLevel(logging.WARNING) + + output_directory = os.path.realpath(args.output_directory) + if not os.path.isdir(output_directory): + logger.info("Create output directory {0}".format(output_directory)) + mkdir_p(output_directory) + + if args.lfs_clone: + check_git_lfs_install() + + if args.log_level: + log_level = logging.getLevelName(args.log_level.upper()) + if isinstance(log_level, int): + logger.root.setLevel(log_level) + + if not args.as_app: + logger.info("Backing up user {0} to {1}".format(args.user, output_directory)) + authenticated_user = get_authenticated_user(args) + else: + authenticated_user = {"login": None} + + repositories = retrieve_repositories(args, authenticated_user) + repositories = filter_repositories(args, repositories) + backup_repositories(args, output_directory, repositories) + backup_account(args, output_directory) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + logger.error(str(e)) + sys.exit(1) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 0282809..14dd167 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1038,7 +1038,7 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False): bytes_downloaded += len(chunk) # Atomic rename to final location - os.rename(temp_path, path) + os.replace(temp_path, path) metadata["size_bytes"] = bytes_downloaded metadata["success"] = True @@ -1459,7 +1459,7 @@ def download_attachments( # Rename to add extension (already atomic from download) try: - os.rename(filepath, final_filepath) + os.replace(filepath, final_filepath) metadata["saved_as"] = os.path.basename(final_filepath) except Exception as e: logger.warning( @@ -1490,7 +1490,7 @@ def download_attachments( manifest_path = os.path.join(attachments_dir, "manifest.json") with open(manifest_path + ".temp", "w") as f: json.dump(manifest, f, indent=2) - os.rename(manifest_path + ".temp", manifest_path) # Atomic write + os.replace(manifest_path + ".temp", manifest_path) # Atomic write logger.debug( "Wrote manifest for {0} #{1}: {2} attachments".format( item_type_display, number, len(attachment_metadata_list) @@ -1811,7 +1811,7 @@ def backup_issues(args, repo_cwd, repository, repos_template): with codecs.open(issue_file + ".temp", "w", encoding="utf-8") as f: json_dump(issue, f) - os.rename(issue_file + ".temp", issue_file) # Unlike json_dump, this is atomic + os.replace(issue_file + ".temp", issue_file) # Atomic write def backup_pulls(args, repo_cwd, repository, repos_template): @@ -1886,7 +1886,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): with codecs.open(pull_file + ".temp", "w", encoding="utf-8") as f: json_dump(pull, f) - os.rename(pull_file + ".temp", pull_file) # Unlike json_dump, this is atomic + os.replace(pull_file + ".temp", pull_file) # Atomic write def backup_milestones(args, repo_cwd, repository, repos_template): @@ -2203,5 +2203,5 @@ def json_dump_if_changed(data, output_file_path): temp_file = output_file_path + ".temp" with codecs.open(temp_file, "w", encoding="utf-8") as f: f.write(new_content) - os.rename(temp_file, output_file_path) # Atomic on POSIX systems + os.replace(temp_file, output_file_path) # Atomic write return True diff --git a/setup.py b/setup.py index 374e6ec..7835a32 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,11 @@ def open_file(fname): author="Jose Diaz-Gonzalez", author_email="github-backup@josediazgonzalez.com", packages=["github_backup"], - scripts=["bin/github-backup"], + entry_points={ + "console_scripts": [ + "github-backup=github_backup.cli:main", + ], + }, url="http://github.com/josegonzalez/python-github-backup", license="MIT", classifiers=[ From 2bb83d6d8b710dee274521b23cbc003e0c0240df Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Thu, 11 Dec 2025 16:50:28 +0000 Subject: [PATCH 381/455] Release version 0.56.0 --- CHANGES.rst | 96 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f15dd59..37bdefc 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,104 @@ Changelog ========= -0.55.0 (2025-12-07) +0.56.0 (2025-12-11) ------------------- ------------------------ +Fix +~~~ +- Replace deprecated git lfs clone with git clone + git lfs fetch --all. + [Rodos] + + git lfs clone is deprecated - modern git clone handles LFS automatically. + Using git lfs fetch --all ensures all LFS objects across all refs are + backed up, matching the existing bare clone behavior and providing + complete LFS backups. + + Closes #379 +- Add Windows support with entry_points and os.replace. [Rodos] + + - Replace os.rename() with os.replace() for atomic file operations + on Windows (os.rename fails if destination exists on Windows) + - Add entry_points console_scripts for proper .exe generation on Windows + - Create github_backup/cli.py with main() entry point + - Add github_backup/__main__.py for python -m github_backup support + - Keep bin/github-backup as thin wrapper for backwards compatibility + + Closes #112 + +Other +~~~~~ +- Docs: add "Restoring from Backup" section to README. [Rodos] + + Clarifies that this tool is backup-only with no inbuilt restore. + Documents that git repos can be pushed back, but issues/PRs have + GitHub API limitations affecting all backup tools. + + Closes #246 +- Chore(deps): bump urllib3 in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [urllib3](https://github.com/urllib3/urllib3). + + + Updates `urllib3` from 2.6.0 to 2.6.1 + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.6.0...2.6.1) + + --- + updated-dependencies: + - dependency-name: urllib3 + dependency-version: 2.6.1 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump the python-packages group with 3 updates. + [dependabot[bot]] + + Bumps the python-packages group with 3 updates: [black](https://github.com/psf/black), [pytest](https://github.com/pytest-dev/pytest) and [platformdirs](https://github.com/tox-dev/platformdirs). + + + Updates `black` from 25.11.0 to 25.12.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/25.11.0...25.12.0) + + Updates `pytest` from 9.0.1 to 9.0.2 + - [Release notes](https://github.com/pytest-dev/pytest/releases) + - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) + - [Commits](https://github.com/pytest-dev/pytest/compare/9.0.1...9.0.2) + + Updates `platformdirs` from 4.5.0 to 4.5.1 + - [Release notes](https://github.com/tox-dev/platformdirs/releases) + - [Changelog](https://github.com/tox-dev/platformdirs/blob/main/CHANGES.rst) + - [Commits](https://github.com/tox-dev/platformdirs/compare/4.5.0...4.5.1) + + --- + updated-dependencies: + - dependency-name: black + dependency-version: 25.12.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: pytest + dependency-version: 9.0.2 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + - dependency-name: platformdirs + dependency-version: 4.5.1 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... + + +0.55.0 (2025-12-07) +------------------- + Fix ~~~ - Improve error messages for inaccessible repos and empty wikis. [Rodos] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 8b19221..9dc8116 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.55.0" +__version__ = "0.56.0" From 3a513b6646e37e1c40ed066956b66079261e1b2e Mon Sep 17 00:00:00 2001 From: Rodos Date: Fri, 12 Dec 2025 09:55:13 +1100 Subject: [PATCH 382/455] docs: add stdin token example to README Add example showing how to pipe a token from stdin using file:///dev/stdin to avoid storing tokens in environment variables or command history. Closes #187 --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 9fd35fd..55e21c8 100644 --- a/README.rst +++ b/README.rst @@ -359,6 +359,9 @@ Debug an error/block or incomplete backup into a temporary directory. Omit "incr github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER +Pipe a token from stdin to avoid storing it in environment variables or command history (Unix-like systems only):: + + my-secret-manager get github-token | github-backup user -t file:///dev/stdin -o /backup --repositories Development From ef990483e2bcc76257776b02fbcf239943d09897 Mon Sep 17 00:00:00 2001 From: Rodos Date: Fri, 12 Dec 2025 10:25:49 +1100 Subject: [PATCH 383/455] Add GitHub Apps documentation and remove outdated header - Add GitHub Apps authentication section with setup steps and CI/CD workflow example using actions/create-github-app-token - Remove outdated machine-man-preview header (graduated 2020) Closes #189 --- README.rst | 31 +++++++++++++++++++++++++++++++ github_backup/github_backup.py | 3 --- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 55e21c8..272b606 100644 --- a/README.rst +++ b/README.rst @@ -174,6 +174,37 @@ Customise the permissions for your use case, but for a personal account full bac **Repository permissions**: Read access to contents, issues, metadata, pull requests, and webhooks. +GitHub Apps +~~~~~~~~~~~ + +GitHub Apps are ideal for organization backups in CI/CD. Tokens are scoped to specific repositories and expire after 1 hour. + +**One-time setup:** + +1. Create a GitHub App at *Settings -> Developer Settings -> GitHub Apps -> New GitHub App* +2. Set a name and homepage URL (can be any URL) +3. Uncheck "Webhook > Active" (not needed for backups) +4. Set permissions (same as fine-grained tokens above) +5. Click "Create GitHub App", then note the **App ID** shown on the next page +6. Under "Private keys", click "Generate a private key" and save the downloaded file +7. Go to *Install App* in your app's settings +8. Select the account/organization and which repositories to back up + +**CI/CD usage with GitHub Actions:** + +Store the App ID as a repository variable and the private key contents as a secret, then use ``actions/create-github-app-token``:: + + - uses: actions/create-github-app-token@v1 + id: app-token + with: + app-id: ${{ vars.APP_ID }} + private-key: ${{ secrets.APP_PRIVATE_KEY }} + + - run: github-backup myorg -t ${{ steps.app-token.outputs.token }} --as-app -o ./backup --all + +Note: Installation tokens expire after 1 hour. For long-running backups, use a fine-grained personal access token instead. + + Prefer SSH ~~~~~~~~~~ diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 0282809..21daa20 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -796,9 +796,6 @@ def _construct_request( else: auth = auth.encode("ascii") request.add_header("Authorization", "token ".encode("ascii") + auth) - request.add_header( - "Accept", "application/vnd.github.machine-man-preview+json" - ) log_url = template if "?" not in template else template.split("?")[0] if querystring: From f6e2f40b0986260a20eed20e29fe124c53d32941 Mon Sep 17 00:00:00 2001 From: Rodos Date: Fri, 12 Dec 2025 16:14:47 +1100 Subject: [PATCH 384/455] Add --skip-assets-on flag to skip release asset downloads (#135) Allow users to skip downloading release assets for specific repositories while still backing up release metadata. Useful for starred repos with large assets (e.g. syncthing with 27GB+). Usage: --skip-assets-on repo1 repo2 owner/repo3 Features: - Space-separated repos (consistent with --exclude) - Case-insensitive matching - Supports both repo name and owner/repo format --- README.rst | 7 +- github_backup/github_backup.py | 102 +++++++---- tests/test_skip_assets_on.py | 320 +++++++++++++++++++++++++++++++++ 3 files changed, 397 insertions(+), 32 deletions(-) create mode 100644 tests/test_skip_assets_on.py diff --git a/README.rst b/README.rst index f292c87..506b67b 100644 --- a/README.rst +++ b/README.rst @@ -50,8 +50,8 @@ CLI Help output:: [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] [--releases] [--latest-releases NUMBER_OF_LATEST_RELEASES] - [--skip-prerelease] [--assets] [--attachments] - [--exclude [REPOSITORY [REPOSITORY ...]] + [--skip-prerelease] [--assets] [--skip-assets-on [REPO ...]] + [--attachments] [--exclude [REPOSITORY [REPOSITORY ...]] [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] USER @@ -133,6 +133,9 @@ CLI Help output:: --skip-prerelease skip prerelease and draft versions; only applies if including releases --assets include assets alongside release information; only applies if including releases + --skip-assets-on [REPO ...] + skip asset downloads for these repositories (e.g. + --skip-assets-on repo1 owner/repo2) --attachments download user-attachments from issues and pull requests to issues/attachments/{issue_number}/ and pulls/attachments/{pull_number}/ directories diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 0782514..b9c23a7 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -440,6 +440,12 @@ def parse_args(args=None): dest="include_assets", help="include assets alongside release information; only applies if including releases", ) + parser.add_argument( + "--skip-assets-on", + dest="skip_assets_on", + nargs="*", + help="skip asset downloads for these repositories", + ) parser.add_argument( "--attachments", action="store_true", @@ -561,7 +567,7 @@ def get_github_host(args): def read_file_contents(file_uri): - return open(file_uri[len(FILE_URI_PREFIX):], "rt").readline().strip() + return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() def get_github_repo_url(args, repository): @@ -631,7 +637,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): pass raise RepositoryUnavailableError( "Repository unavailable due to legal reasons (HTTP 451)", - dmca_url=dmca_url + dmca_url=dmca_url, ) # Check if we got correct data @@ -709,7 +715,7 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False): # Parse Link header: ; rel="next" for link in link_header.split(","): if 'rel="next"' in link: - next_url = link[link.find("<") + 1:link.find(">")] + next_url = link[link.find("<") + 1 : link.find(">")] break if not next_url: break @@ -763,9 +769,7 @@ def _get_response(request, auth, template): return r, errors -def _construct_request( - per_page, query_args, template, auth, as_app=None, fine=False -): +def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False): # If template is already a full URL with query params (from Link header), use it directly if "?" in template and template.startswith("http"): request_url = template @@ -1480,9 +1484,11 @@ def download_attachments( manifest = { "issue_number": number, "issue_type": item_type, - "repository": f"{args.user}/{args.repository}" - if hasattr(args, "repository") and args.repository - else args.user, + "repository": ( + f"{args.user}/{args.repository}" + if hasattr(args, "repository") and args.repository + else args.user + ), "manifest_updated_at": datetime.now(timezone.utc).isoformat(), "attachments": attachment_metadata_list, } @@ -1538,9 +1544,7 @@ def retrieve_repositories(args, authenticated_user): else: repo_path = "{0}/{1}".format(args.user, args.repository) single_request = True - template = "https://{0}/repos/{1}".format( - get_github_api_host(args), repo_path - ) + template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path) repos = retrieve_data(args, template, single_request=single_request) @@ -1565,7 +1569,10 @@ def retrieve_repositories(args, authenticated_user): repos.extend(gists) if args.include_starred_gists: - if not authenticated_user.get("login") or args.user.lower() != authenticated_user["login"].lower(): + if ( + not authenticated_user.get("login") + or args.user.lower() != authenticated_user["login"].lower() + ): logger.warning( "Cannot retrieve starred gists for '%s'. GitHub only allows access to the authenticated user's starred gists.", args.user, @@ -1673,9 +1680,11 @@ def backup_repositories(args, output_directory, repositories): include_gists = args.include_gists or args.include_starred_gists include_starred = args.all_starred and repository.get("is_starred") - if (args.include_repository or args.include_everything) or ( - include_gists and repository.get("is_gist") - ) or include_starred: + if ( + (args.include_repository or args.include_everything) + or (include_gists and repository.get("is_gist")) + or include_starred + ): repo_name = ( repository.get("name") if not repository.get("is_gist") @@ -1735,7 +1744,9 @@ def backup_repositories(args, output_directory, repositories): include_assets=args.include_assets or args.include_everything, ) except RepositoryUnavailableError as e: - logger.warning(f"Repository {repository['full_name']} is unavailable (HTTP 451)") + logger.warning( + f"Repository {repository['full_name']} is unavailable (HTTP 451)" + ) if e.dmca_url: logger.warning(f"DMCA notice: {e.dmca_url}") logger.info(f"Skipping remaining resources for {repository['full_name']}") @@ -1795,7 +1806,11 @@ def backup_issues(args, repo_cwd, repository, repos_template): modified = os.path.getmtime(issue_file) modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") if modified > issue["updated_at"]: - logger.info("Skipping issue {0} because it wasn't modified since last backup".format(number)) + logger.info( + "Skipping issue {0} because it wasn't modified since last backup".format( + number + ) + ) continue if args.include_issue_comments or args.include_everything: @@ -1869,7 +1884,11 @@ def backup_pulls(args, repo_cwd, repository, repos_template): modified = os.path.getmtime(pull_file) modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") if modified > pull["updated_at"]: - logger.info("Skipping pull request {0} because it wasn't modified since last backup".format(number)) + logger.info( + "Skipping pull request {0} because it wasn't modified since last backup".format( + number + ) + ) continue if args.include_pull_comments or args.include_everything: template = comments_regular_template.format(number) @@ -1919,9 +1938,11 @@ def backup_milestones(args, repo_cwd, repository, repos_template): elif written_count == 0: logger.info("{0} milestones unchanged, skipped write".format(total)) else: - logger.info("Saved {0} of {1} milestones to disk ({2} unchanged)".format( - written_count, total, total - written_count - )) + logger.info( + "Saved {0} of {1} milestones to disk ({2} unchanged)".format( + written_count, total, total - written_count + ) + ) def backup_labels(args, repo_cwd, repository, repos_template): @@ -1975,6 +1996,20 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F ) releases = releases[: args.number_of_latest_releases] + # Check if this repo should skip asset downloads (case-insensitive) + skip_assets = False + if include_assets: + repo_name = repository.get("name", "").lower() + repo_full_name = repository.get("full_name", "").lower() + skip_repos = [r.lower() for r in (args.skip_assets_on or [])] + skip_assets = repo_name in skip_repos or repo_full_name in skip_repos + if skip_assets: + logger.info( + "Skipping assets for {0} ({1} releases) due to --skip-assets-on".format( + repository.get("name"), len(releases) + ) + ) + # for each release, store it written_count = 0 for release in releases: @@ -1986,7 +2021,7 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F if json_dump_if_changed(release, output_filepath): written_count += 1 - if include_assets: + if include_assets and not skip_assets: assets = retrieve_data(args, release["assets_url"]) if len(assets) > 0: # give release asset files somewhere to live & download them (not including source archives) @@ -2008,9 +2043,11 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F elif written_count == 0: logger.info("{0} releases unchanged, skipped write".format(total)) else: - logger.info("Saved {0} of {1} releases to disk ({2} unchanged)".format( - written_count, total, total - written_count - )) + logger.info( + "Saved {0} of {1} releases to disk ({2} unchanged)".format( + written_count, total, total - written_count + ) + ) def fetch_repository( @@ -2024,9 +2061,12 @@ def fetch_repository( ): if bare_clone: if os.path.exists(local_dir): - clone_exists = subprocess.check_output( - ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir - ) == b"true\n" + clone_exists = ( + subprocess.check_output( + ["git", "rev-parse", "--is-bare-repository"], cwd=local_dir + ) + == b"true\n" + ) else: clone_exists = False else: @@ -2047,7 +2087,9 @@ def fetch_repository( ) else: logger.info( - "Skipping {0} (repository not accessible - may be empty, private, or credentials invalid)".format(name) + "Skipping {0} (repository not accessible - may be empty, private, or credentials invalid)".format( + name + ) ) return diff --git a/tests/test_skip_assets_on.py b/tests/test_skip_assets_on.py new file mode 100644 index 0000000..2437e05 --- /dev/null +++ b/tests/test_skip_assets_on.py @@ -0,0 +1,320 @@ +"""Tests for --skip-assets-on flag behavior (issue #135).""" + +import pytest +from unittest.mock import Mock, patch + +from github_backup import github_backup + + +class TestSkipAssetsOn: + """Test suite for --skip-assets-on flag. + + Issue #135: Allow skipping asset downloads for specific repositories + while still backing up release metadata. + """ + + def _create_mock_args(self, **overrides): + """Create a mock args object with sensible defaults.""" + args = Mock() + args.user = "testuser" + args.output_directory = "/tmp/backup" + args.include_repository = False + args.include_everything = False + args.include_gists = False + args.include_starred_gists = False + args.all_starred = False + args.skip_existing = False + args.bare_clone = False + args.lfs_clone = False + args.no_prune = False + args.include_wiki = False + args.include_issues = False + args.include_issue_comments = False + args.include_issue_events = False + args.include_pulls = False + args.include_pull_comments = False + args.include_pull_commits = False + args.include_pull_details = False + args.include_labels = False + args.include_hooks = False + args.include_milestones = False + args.include_releases = True + args.include_assets = True + args.skip_assets_on = [] + args.include_attachments = False + args.incremental = False + args.incremental_by_files = False + args.github_host = None + args.prefer_ssh = False + args.token_classic = "test-token" + args.token_fine = None + args.username = None + args.password = None + args.as_app = False + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.skip_prerelease = False + args.number_of_latest_releases = None + + for key, value in overrides.items(): + setattr(args, key, value) + + return args + + def _create_mock_repository(self, name="test-repo", owner="testuser"): + """Create a mock repository object.""" + return { + "name": name, + "full_name": f"{owner}/{name}", + "owner": {"login": owner}, + "private": False, + "fork": False, + "has_wiki": False, + } + + def _create_mock_release(self, tag="v1.0.0"): + """Create a mock release object.""" + return { + "tag_name": tag, + "name": tag, + "prerelease": False, + "draft": False, + "assets_url": f"https://api.github.com/repos/testuser/test-repo/releases/{tag}/assets", + } + + def _create_mock_asset(self, name="asset.zip"): + """Create a mock asset object.""" + return { + "name": name, + "url": f"https://api.github.com/repos/testuser/test-repo/releases/assets/{name}", + } + + +class TestSkipAssetsOnArgumentParsing(TestSkipAssetsOn): + """Tests for --skip-assets-on argument parsing.""" + + def test_skip_assets_on_not_set_defaults_to_none(self): + """When --skip-assets-on is not specified, it should default to None.""" + args = github_backup.parse_args(["testuser"]) + assert args.skip_assets_on is None + + def test_skip_assets_on_single_repo(self): + """Single --skip-assets-on should create list with one item.""" + args = github_backup.parse_args(["testuser", "--skip-assets-on", "big-repo"]) + assert args.skip_assets_on == ["big-repo"] + + def test_skip_assets_on_multiple_repos(self): + """Multiple repos can be specified space-separated (like --exclude).""" + args = github_backup.parse_args( + [ + "testuser", + "--skip-assets-on", + "big-repo", + "another-repo", + "owner/third-repo", + ] + ) + assert args.skip_assets_on == ["big-repo", "another-repo", "owner/third-repo"] + + +class TestSkipAssetsOnBehavior(TestSkipAssetsOn): + """Tests for --skip-assets-on behavior in backup_releases.""" + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_assets_downloaded_when_not_skipped( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Assets should be downloaded when repo is not in skip list.""" + args = self._create_mock_args(skip_assets_on=[]) + repository = self._create_mock_repository(name="normal-repo") + release = self._create_mock_release() + asset = self._create_mock_asset() + + mock_json_dump.return_value = True + mock_retrieve.side_effect = [ + [release], # First call: get releases + [asset], # Second call: get assets + ] + + with patch("os.path.join", side_effect=lambda *args: "/".join(args)): + github_backup.backup_releases( + args, + "/tmp/backup/repositories/normal-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should have been called for the asset + mock_download.assert_called_once() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_assets_skipped_when_repo_name_matches( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Assets should be skipped when repo name is in skip list.""" + args = self._create_mock_args(skip_assets_on=["big-repo"]) + repository = self._create_mock_repository(name="big-repo") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/big-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should NOT have been called + mock_download.assert_not_called() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_assets_skipped_when_full_name_matches( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Assets should be skipped when owner/repo format matches.""" + args = self._create_mock_args(skip_assets_on=["otheruser/big-repo"]) + repository = self._create_mock_repository(name="big-repo", owner="otheruser") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/big-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should NOT have been called + mock_download.assert_not_called() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_case_insensitive_matching( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Skip matching should be case-insensitive.""" + # User types uppercase, repo name is lowercase + args = self._create_mock_args(skip_assets_on=["BIG-REPO"]) + repository = self._create_mock_repository(name="big-repo") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/big-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should NOT have been called (case-insensitive match) + assert not mock_download.called + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_multiple_skip_repos( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Multiple repos in skip list should all be skipped.""" + args = self._create_mock_args(skip_assets_on=["repo1", "repo2", "repo3"]) + repository = self._create_mock_repository(name="repo2") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/repo2", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file should NOT have been called + mock_download.assert_not_called() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_release_metadata_still_saved_when_assets_skipped( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Release JSON should still be saved even when assets are skipped.""" + args = self._create_mock_args(skip_assets_on=["big-repo"]) + repository = self._create_mock_repository(name="big-repo") + release = self._create_mock_release() + + mock_json_dump.return_value = True + mock_retrieve.return_value = [release] + + github_backup.backup_releases( + args, + "/tmp/backup/repositories/big-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # json_dump_if_changed should have been called for release metadata + mock_json_dump.assert_called_once() + # But download_file should NOT have been called + mock_download.assert_not_called() + + @patch("github_backup.github_backup.download_file") + @patch("github_backup.github_backup.retrieve_data") + @patch("github_backup.github_backup.mkdir_p") + @patch("github_backup.github_backup.json_dump_if_changed") + def test_non_matching_repo_still_downloads_assets( + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + ): + """Repos not in skip list should still download assets.""" + args = self._create_mock_args(skip_assets_on=["other-repo"]) + repository = self._create_mock_repository(name="normal-repo") + release = self._create_mock_release() + asset = self._create_mock_asset() + + mock_json_dump.return_value = True + mock_retrieve.side_effect = [ + [release], # First call: get releases + [asset], # Second call: get assets + ] + + with patch("os.path.join", side_effect=lambda *args: "/".join(args)): + github_backup.backup_releases( + args, + "/tmp/backup/repositories/normal-repo", + repository, + "https://api.github.com/repos/{owner}/{repo}", + include_assets=True, + ) + + # download_file SHOULD have been called + mock_download.assert_called_once() + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From ba852b58307cbb1a44f8d383fe0dbfd54fc41c5b Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Fri, 12 Dec 2025 11:07:14 +0000 Subject: [PATCH 385/455] Release version 0.57.0 --- CHANGES.rst | 33 ++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 37bdefc..1a8809e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,40 @@ Changelog ========= -0.56.0 (2025-12-11) +0.57.0 (2025-12-12) ------------------- ------------------------ +- Add GitHub Apps documentation and remove outdated header. [Rodos] + + - Add GitHub Apps authentication section with setup steps + and CI/CD workflow example using actions/create-github-app-token + - Remove outdated machine-man-preview header (graduated 2020) + + Closes #189 +- Docs: add stdin token example to README. [Rodos] + + Add example showing how to pipe a token from stdin using + file:///dev/stdin to avoid storing tokens in environment + variables or command history. + + Closes #187 +- Add --skip-assets-on flag to skip release asset downloads (#135) + [Rodos] + + Allow users to skip downloading release assets for specific repositories + while still backing up release metadata. Useful for starred repos with + large assets (e.g. syncthing with 27GB+). + + Usage: --skip-assets-on repo1 repo2 owner/repo3 + + Features: + - Space-separated repos (consistent with --exclude) + - Case-insensitive matching + - Supports both repo name and owner/repo format + + +0.56.0 (2025-12-11) +------------------- Fix ~~~ diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 9dc8116..6e6e624 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.56.0" +__version__ = "0.57.0" From 59a70ff11aaa0c60c10d0116e6962118d70f46e5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Dec 2025 13:09:29 +0000 Subject: [PATCH 386/455] chore(deps): bump urllib3 in the python-packages group Bumps the python-packages group with 1 update: [urllib3](https://github.com/urllib3/urllib3). Updates `urllib3` from 2.6.1 to 2.6.2 - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.6.1...2.6.2) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.6.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 5ca68cb..7a478f8 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -35,6 +35,6 @@ setuptools==80.9.0 six==1.17.0 tqdm==4.67.1 twine==6.2.0 -urllib3==2.6.1 +urllib3==2.6.2 webencodings==0.5.1 zipp==3.23.0 From 241949137deead07b8d4e0c7a4a1a28b7cedbf61 Mon Sep 17 00:00:00 2001 From: Rodos Date: Sat, 13 Dec 2025 11:22:53 +1100 Subject: [PATCH 387/455] chore: remove transitive deps from release-requirements.txt --- release-requirements.txt | 45 +++++++++------------------------------- 1 file changed, 10 insertions(+), 35 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 7a478f8..dd2d73f 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,40 +1,15 @@ +# Linting & Formatting autopep8==2.3.2 black==25.12.0 -bleach==6.3.0 -certifi==2025.11.12 -charset-normalizer==3.4.4 -click==8.3.1 -colorama==0.4.6 -docutils==0.22.3 flake8==7.3.0 -gitchangelog==3.0.4 + +# Testing pytest==9.0.2 -idna==3.11 -importlib-metadata==8.7.0 -jaraco.classes==3.4.0 -keyring==25.7.0 -markdown-it-py==4.0.0 -mccabe==0.7.0 -mdurl==0.1.2 -more-itertools==10.8.0 -mypy-extensions==1.1.0 -packaging==25.0 -pathspec==0.12.1 -pkginfo==1.12.1.2 -platformdirs==4.5.1 -pycodestyle==2.14.0 -pyflakes==3.4.0 -Pygments==2.19.2 -readme-renderer==44.0 -requests==2.32.5 -requests-toolbelt==1.0.0 -restructuredtext-lint==2.0.2 -rfc3986==2.0.0 -rich==14.2.0 -setuptools==80.9.0 -six==1.17.0 -tqdm==4.67.1 + +# Release & Publishing twine==6.2.0 -urllib3==2.6.2 -webencodings==0.5.1 -zipp==3.23.0 +gitchangelog==3.0.4 +setuptools==80.9.0 + +# Documentation +restructuredtext-lint==2.0.2 From 46140b0ff13dd512960f42365b35d5ebd011aff6 Mon Sep 17 00:00:00 2001 From: Rodos Date: Tue, 16 Dec 2025 21:44:16 +1100 Subject: [PATCH 388/455] Fix retry logic for HTTP 5xx errors and network failures Refactors error handling to retry all 5xx errors (not just 502), network errors (URLError, socket.error, IncompleteRead), and JSON parse errors with exponential backoff and jitter. Respects retry-after and rate limit headers per GitHub API requirements. Consolidates retry logic into make_request_with_retry() wrapper and adds clear logging for retry attempts and failures. Removes dead code from 2016 (errors list, _request_http_error, _request_url_error) that was intentionally disabled in commit 1e5a9048 to fix #29. Fixes #140, #110, #138 --- github_backup/github_backup.py | 369 +++++++++++++++------------------ tests/test_http_451.py | 55 +---- tests/test_pagination.py | 20 +- tests/test_retrieve_data.py | 365 ++++++++++++++++++++++++++++++++ 4 files changed, 545 insertions(+), 264 deletions(-) create mode 100644 tests/test_retrieve_data.py diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 4bd38ce..34d529a 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -12,6 +12,7 @@ import logging import os import platform +import random import re import select import socket @@ -19,6 +20,7 @@ import subprocess import sys import time +from collections.abc import Generator from datetime import datetime from http.client import IncompleteRead from urllib.error import HTTPError, URLError @@ -74,6 +76,9 @@ def __init__(self, message, dmca_url=None): " 3. Debian/Ubuntu: apt-get install ca-certificates\n\n" ) +# Retry configuration +MAX_RETRIES = 5 + def logging_subprocess( popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs @@ -603,170 +608,178 @@ def get_github_repo_url(args, repository): return repo_url -def retrieve_data_gen(args, template, query_args=None, single_request=False): - auth = get_auth(args, encode=not args.as_app) - query_args = get_query_args(query_args) - per_page = 100 - next_url = None +def calculate_retry_delay(attempt, headers): + """Calculate delay before next retry with exponential backoff.""" + # Respect retry-after header if present + if retry_after := headers.get("retry-after"): + return int(retry_after) - while True: - if single_request: - request_per_page = None - else: - request_per_page = per_page + # Respect rate limit reset time + if int(headers.get("x-ratelimit-remaining", 1)) < 1: + reset_time = int(headers.get("x-ratelimit-reset", 0)) + return max(10, reset_time - calendar.timegm(time.gmtime())) - request = _construct_request( - request_per_page, - query_args, - next_url or template, - auth, - as_app=args.as_app, - fine=True if args.token_fine is not None else False, - ) # noqa - r, errors = _get_response(request, auth, next_url or template) + # Exponential backoff with jitter for server errors (1s base, 120s max) + delay = min(1.0 * (2**attempt), 120.0) + return delay + random.uniform(0, delay * 0.1) - status_code = int(r.getcode()) - # Handle DMCA takedown (HTTP 451) - raise exception to skip entire repository - if status_code == 451: - dmca_url = None - try: - response_data = json.loads(r.read().decode("utf-8")) - dmca_url = response_data.get("block", {}).get("html_url") - except Exception: - pass - raise RepositoryUnavailableError( - "Repository unavailable due to legal reasons (HTTP 451)", - dmca_url=dmca_url, - ) +def retrieve_data(args, template, query_args=None, paginated=True): + """ + Fetch the data from GitHub API. - # Check if we got correct data - try: - response = json.loads(r.read().decode("utf-8")) - except IncompleteRead: - logger.warning("Incomplete read error detected") - read_error = True - except json.decoder.JSONDecodeError: - logger.warning("JSON decode error detected") - read_error = True - except TimeoutError: - logger.warning("Tiemout error detected") - read_error = True - else: - read_error = False + Handle both single requests and pagination with yield of individual dicts. + Handles throttling, retries, read errors, and DMCA takedowns. + """ + query_args = query_args or {} + auth = get_auth(args, encode=not args.as_app) + per_page = 100 - # be gentle with API request limit and throttle requests if remaining requests getting low - limit_remaining = int(r.headers.get("x-ratelimit-remaining", 0)) - if args.throttle_limit and limit_remaining <= args.throttle_limit: - logger.info( - "API request limit hit: {} requests left, pausing further requests for {}s".format( - limit_remaining, args.throttle_pause + def _extract_next_page_url(link_header): + for link in link_header.split(","): + if 'rel="next"' in link: + return link[link.find("<") + 1:link.find(">")] + return None + + def fetch_all() -> Generator[dict, None, None]: + next_url = None + + while True: + # FIRST: Fetch response + + for attempt in range(MAX_RETRIES): + request = _construct_request( + per_page=per_page if paginated else None, + query_args=query_args, + template=next_url or template, + auth=auth, + as_app=args.as_app, + fine=args.token_fine is not None, ) - ) - time.sleep(args.throttle_pause) - - retries = 0 - while retries < 3 and (status_code == 502 or read_error): - logger.warning("API request failed. Retrying in 5 seconds") - retries += 1 - time.sleep(5) - request = _construct_request( - request_per_page, - query_args, - next_url or template, - auth, - as_app=args.as_app, - fine=True if args.token_fine is not None else False, - ) # noqa - r, errors = _get_response(request, auth, next_url or template) - - status_code = int(r.getcode()) - try: - response = json.loads(r.read().decode("utf-8")) - read_error = False - except IncompleteRead: - logger.warning("Incomplete read error detected") - read_error = True - except json.decoder.JSONDecodeError: - logger.warning("JSON decode error detected") - read_error = True - except TimeoutError: - logger.warning("Tiemout error detected") - read_error = True - - if status_code != 200: - template = "API request returned HTTP {0}: {1}" - errors.append(template.format(status_code, r.reason)) - raise Exception(", ".join(errors)) - - if read_error: - template = "API request problem reading response for {0}" - errors.append(template.format(request)) - raise Exception(", ".join(errors)) - - if len(errors) == 0: - if type(response) is list: - for resp in response: - yield resp - # Parse Link header for next page URL (cursor-based pagination) - link_header = r.headers.get("Link", "") - next_url = None - if link_header: - # Parse Link header: ; rel="next" - for link in link_header.split(","): - if 'rel="next"' in link: - next_url = link[link.find("<") + 1 : link.find(">")] - break - if not next_url: - break - elif type(response) is dict and single_request: - yield response + http_response = make_request_with_retry(request, auth) + + match http_response.getcode(): + case 200: + # Success - Parse JSON response + try: + response = json.loads(http_response.read().decode("utf-8")) + break # Exit retry loop and handle the data returned + except ( + IncompleteRead, + json.decoder.JSONDecodeError, + TimeoutError, + ) as e: + logger.warning(f"{type(e).__name__} reading response") + if attempt < MAX_RETRIES - 1: + delay = calculate_retry_delay(attempt, {}) + logger.warning( + f"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RETRIES})" + ) + time.sleep(delay) + continue # Next retry attempt + + case 451: + # DMCA takedown - extract URL if available, then raise + dmca_url = None + try: + response_data = json.loads( + http_response.read().decode("utf-8") + ) + dmca_url = response_data.get("block", {}).get("html_url") + except Exception: + pass + raise RepositoryUnavailableError( + "Repository unavailable due to legal reasons (HTTP 451)", + dmca_url=dmca_url, + ) + + case _: + raise Exception( + f"API request returned HTTP {http_response.getcode()}: {http_response.reason}" + ) + else: + logger.error( + f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}" + ) + raise Exception( + f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}" + ) + + # SECOND: Process and paginate - if len(errors) > 0: - raise Exception(", ".join(errors)) + # Pause before next request if rate limit is low + if ( + remaining := int(http_response.headers.get("x-ratelimit-remaining", 0)) + ) <= (args.throttle_limit or 0): + if args.throttle_limit: + logger.info( + f"Throttling: {remaining} requests left, pausing {args.throttle_pause}s" + ) + time.sleep(args.throttle_pause) - if single_request: - break + # Yield results + if isinstance(response, list): + yield from response + elif isinstance(response, dict): + yield response + # Check for more pages + if not paginated or not ( + next_url := _extract_next_page_url( + http_response.headers.get("Link", "") + ) + ): + break # No more data -def retrieve_data(args, template, query_args=None, single_request=False): - return list(retrieve_data_gen(args, template, query_args, single_request)) + return list(fetch_all()) -def get_query_args(query_args=None): - if not query_args: - query_args = {} - return query_args +def make_request_with_retry(request, auth): + """Make HTTP request with automatic retry for transient errors.""" + def is_retryable_status(status_code, headers): + # Server errors are always retryable + if status_code in (500, 502, 503, 504): + return True + # Rate limit (403/429) is retryable if limit exhausted + if status_code in (403, 429): + return int(headers.get("x-ratelimit-remaining", 1)) < 1 + return False -def _get_response(request, auth, template): - retry_timeout = 3 - errors = [] - # We'll make requests in a loop so we can - # delay and retry in the case of rate-limiting - while True: - should_continue = False + for attempt in range(MAX_RETRIES): try: - r = urlopen(request, context=https_ctx) + return urlopen(request, context=https_ctx) + except HTTPError as exc: - errors, should_continue = _request_http_error(exc, auth, errors) # noqa - r = exc - except URLError as e: - logger.warning(e.reason) - should_continue, retry_timeout = _request_url_error(template, retry_timeout) - if not should_continue: - raise - except socket.error as e: - logger.warning(e.strerror) - should_continue, retry_timeout = _request_url_error(template, retry_timeout) - if not should_continue: + # HTTPError can be used as a response-like object + if not is_retryable_status(exc.code, exc.headers): + raise # Non-retryable error + + if attempt >= MAX_RETRIES - 1: + logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts") raise - if should_continue: - continue + delay = calculate_retry_delay(attempt, exc.headers) + logger.warning( + f"HTTP {exc.code}, retrying in {delay:.1f}s " + f"(attempt {attempt + 1}/{MAX_RETRIES})" + ) + if auth is None and exc.code in (403, 429): + logger.info("Hint: Authenticate to raise your GitHub rate limit") + time.sleep(delay) - break - return r, errors + except (URLError, socket.error) as e: + if attempt >= MAX_RETRIES - 1: + logger.error(f"Connection error failed after {MAX_RETRIES} attempts: {e}") + raise + delay = calculate_retry_delay(attempt, {}) + logger.warning( + f"Connection error: {e}, retrying in {delay:.1f}s " + f"(attempt {attempt + 1}/{MAX_RETRIES})" + ) + time.sleep(delay) + + raise Exception(f"Request failed after {MAX_RETRIES} attempts") # pragma: no cover def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False): @@ -808,52 +821,6 @@ def _construct_request(per_page, query_args, template, auth, as_app=None, fine=F return request -def _request_http_error(exc, auth, errors): - # HTTPError behaves like a Response so we can - # check the status code and headers to see exactly - # what failed. - - should_continue = False - headers = exc.headers - limit_remaining = int(headers.get("x-ratelimit-remaining", 0)) - - if exc.code == 403 and limit_remaining < 1: - # The X-RateLimit-Reset header includes a - # timestamp telling us when the limit will reset - # so we can calculate how long to wait rather - # than inefficiently polling: - gm_now = calendar.timegm(time.gmtime()) - reset = int(headers.get("x-ratelimit-reset", 0)) or gm_now - # We'll never sleep for less than 10 seconds: - delta = max(10, reset - gm_now) - - limit = headers.get("x-ratelimit-limit") - logger.warning( - "Exceeded rate limit of {} requests; waiting {} seconds to reset".format( - limit, delta - ) - ) # noqa - - if auth is None: - logger.info("Hint: Authenticate to raise your GitHub rate limit") - - time.sleep(delta) - should_continue = True - return errors, should_continue - - -def _request_url_error(template, retry_timeout): - # In case of a connection timing out, we can retry a few time - # But we won't crash and not back-up the rest now - logger.info("'{}' timed out".format(template)) - retry_timeout -= 1 - - if retry_timeout >= 0: - return True, retry_timeout - - raise Exception("'{}' timed out to much, skipping!".format(template)) - - class S3HTTPRedirectHandler(HTTPRedirectHandler): """ A subclassed redirect handler for downloading Github assets from S3. @@ -1503,7 +1470,7 @@ def download_attachments( def get_authenticated_user(args): template = "https://{0}/user".format(get_github_api_host(args)) - data = retrieve_data(args, template, single_request=True) + data = retrieve_data(args, template, paginated=False) return data[0] @@ -1517,7 +1484,7 @@ def check_git_lfs_install(): def retrieve_repositories(args, authenticated_user): logger.info("Retrieving repositories") - single_request = False + paginated = True if args.user == authenticated_user["login"]: # we must use the /user/repos API to be able to access private repos template = "https://{0}/user/repos".format(get_github_api_host(args)) @@ -1540,16 +1507,16 @@ def retrieve_repositories(args, authenticated_user): repo_path = args.repository else: repo_path = "{0}/{1}".format(args.user, args.repository) - single_request = True + paginated = False template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path) - repos = retrieve_data(args, template, single_request=single_request) + repos = retrieve_data(args, template, paginated=paginated) if args.all_starred: starred_template = "https://{0}/users/{1}/starred".format( get_github_api_host(args), args.user ) - starred_repos = retrieve_data(args, starred_template, single_request=False) + starred_repos = retrieve_data(args, starred_template) # flag each repo as starred for downstream processing for item in starred_repos: item.update({"is_starred": True}) @@ -1559,7 +1526,7 @@ def retrieve_repositories(args, authenticated_user): gists_template = "https://{0}/users/{1}/gists".format( get_github_api_host(args), args.user ) - gists = retrieve_data(args, gists_template, single_request=False) + gists = retrieve_data(args, gists_template) # flag each repo as a gist for downstream processing for item in gists: item.update({"is_gist": True}) @@ -1578,9 +1545,7 @@ def retrieve_repositories(args, authenticated_user): starred_gists_template = "https://{0}/gists/starred".format( get_github_api_host(args) ) - starred_gists = retrieve_data( - args, starred_gists_template, single_request=False - ) + starred_gists = retrieve_data(args, starred_gists_template) # flag each repo as a starred gist for downstream processing for item in starred_gists: item.update({"is_gist": True, "is_starred": True}) @@ -1849,14 +1814,14 @@ def backup_pulls(args, repo_cwd, repository, repos_template): pull_states = ["open", "closed"] for pull_state in pull_states: query_args["state"] = pull_state - _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) + _pulls = retrieve_data(args, _pulls_template, query_args=query_args) for pull in _pulls: if args.since and pull["updated_at"] < args.since: break if not args.since or pull["updated_at"] >= args.since: pulls[pull["number"]] = pull else: - _pulls = retrieve_data_gen(args, _pulls_template, query_args=query_args) + _pulls = retrieve_data(args, _pulls_template, query_args=query_args) for pull in _pulls: if args.since and pull["updated_at"] < args.since: break @@ -1864,7 +1829,7 @@ def backup_pulls(args, repo_cwd, repository, repos_template): pulls[pull["number"]] = retrieve_data( args, _pulls_template + "/{}".format(pull["number"]), - single_request=True, + paginated=False, )[0] logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) diff --git a/tests/test_http_451.py b/tests/test_http_451.py index 7feca1d..51218d2 100644 --- a/tests/test_http_451.py +++ b/tests/test_http_451.py @@ -13,7 +13,6 @@ class TestHTTP451Exception: def test_repository_unavailable_error_raised(self): """HTTP 451 should raise RepositoryUnavailableError with DMCA URL.""" - # Create mock args args = Mock() args.as_app = False args.token_fine = None @@ -25,7 +24,6 @@ def test_repository_unavailable_error_raised(self): args.throttle_limit = None args.throttle_pause = 0 - # Mock HTTPError 451 response mock_response = Mock() mock_response.getcode.return_value = 451 @@ -41,14 +39,10 @@ def test_repository_unavailable_error_raised(self): mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Unavailable For Legal Reasons" - def mock_get_response(request, auth, template): - return mock_response, [] - - with patch("github_backup.github_backup._get_response", side_effect=mock_get_response): + with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: - list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues")) + github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues") - # Check exception has DMCA URL assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" assert "451" in str(exc_info.value) @@ -71,14 +65,10 @@ def test_repository_unavailable_error_without_dmca_url(self): mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Unavailable For Legal Reasons" - def mock_get_response(request, auth, template): - return mock_response, [] - - with patch("github_backup.github_backup._get_response", side_effect=mock_get_response): + with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: - list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues")) + github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues") - # Exception raised even without DMCA URL assert exc_info.value.dmca_url is None assert "451" in str(exc_info.value) @@ -101,42 +91,9 @@ def test_repository_unavailable_error_with_malformed_json(self): mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Unavailable For Legal Reasons" - def mock_get_response(request, auth, template): - return mock_response, [] - - with patch("github_backup.github_backup._get_response", side_effect=mock_get_response): + with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): with pytest.raises(github_backup.RepositoryUnavailableError): - list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues")) - - def test_other_http_errors_unchanged(self): - """Other HTTP errors should still raise generic Exception.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = None - args.username = None - args.password = None - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - - mock_response = Mock() - mock_response.getcode.return_value = 404 - mock_response.read.return_value = b'{"message": "Not Found"}' - mock_response.headers = {"x-ratelimit-remaining": "5000"} - mock_response.reason = "Not Found" - - def mock_get_response(request, auth, template): - return mock_response, [] - - with patch("github_backup.github_backup._get_response", side_effect=mock_get_response): - # Should raise generic Exception, not RepositoryUnavailableError - with pytest.raises(Exception) as exc_info: - list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/notfound/issues")) - - assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError) - assert "404" in str(exc_info.value) + github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues") if __name__ == "__main__": diff --git a/tests/test_pagination.py b/tests/test_pagination.py index 0d5bd82..75dfccd 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -40,7 +40,7 @@ def headers(self): @pytest.fixture def mock_args(): - """Mock args for retrieve_data_gen.""" + """Mock args for retrieve_data.""" args = Mock() args.as_app = False args.token_fine = None @@ -77,10 +77,8 @@ def mock_urlopen(request, *args, **kwargs): return responses[len(requests_made) - 1] with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - results = list( - github_backup.retrieve_data_gen( - mock_args, "https://api.github.com/repos/owner/repo/issues" - ) + results = github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/owner/repo/issues" ) # Verify all items retrieved and cursor was used in second request @@ -112,10 +110,8 @@ def mock_urlopen(request, *args, **kwargs): return responses[len(requests_made) - 1] with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - results = list( - github_backup.retrieve_data_gen( - mock_args, "https://api.github.com/repos/owner/repo/pulls" - ) + results = github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/owner/repo/pulls" ) # Verify all items retrieved and page parameter was used (not cursor) @@ -142,10 +138,8 @@ def mock_urlopen(request, *args, **kwargs): return responses[len(requests_made) - 1] with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - results = list( - github_backup.retrieve_data_gen( - mock_args, "https://api.github.com/repos/owner/repo/labels" - ) + results = github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/owner/repo/labels" ) # Verify pagination stopped after first request diff --git a/tests/test_retrieve_data.py b/tests/test_retrieve_data.py new file mode 100644 index 0000000..c358ff0 --- /dev/null +++ b/tests/test_retrieve_data.py @@ -0,0 +1,365 @@ +"""Tests for retrieve_data function.""" + +import json +import socket +from unittest.mock import Mock, patch +from urllib.error import HTTPError, URLError + +import pytest + +from github_backup import github_backup +from github_backup.github_backup import ( + MAX_RETRIES, + calculate_retry_delay, + make_request_with_retry, +) + + +class TestCalculateRetryDelay: + def test_respects_retry_after_header(self): + headers = {'retry-after': '30'} + assert calculate_retry_delay(0, headers) == 30 + + def test_respects_rate_limit_reset(self): + import time + import calendar + # Set reset time 60 seconds in the future + future_reset = calendar.timegm(time.gmtime()) + 60 + headers = { + 'x-ratelimit-remaining': '0', + 'x-ratelimit-reset': str(future_reset) + } + delay = calculate_retry_delay(0, headers) + # Should be approximately 60 seconds (with some tolerance for execution time) + assert 55 <= delay <= 65 + + def test_exponential_backoff(self): + delay_0 = calculate_retry_delay(0, {}) + delay_1 = calculate_retry_delay(1, {}) + delay_2 = calculate_retry_delay(2, {}) + # Base delay is 1s, so delays should be roughly 1, 2, 4 (plus jitter) + assert 0.9 <= delay_0 <= 1.2 # ~1s + up to 10% jitter + assert 1.8 <= delay_1 <= 2.4 # ~2s + up to 10% jitter + assert 3.6 <= delay_2 <= 4.8 # ~4s + up to 10% jitter + + def test_max_delay_cap(self): + # Very high attempt number should not exceed 120s + jitter + delay = calculate_retry_delay(100, {}) + assert delay <= 120 * 1.1 # 120s max + 10% jitter + + def test_minimum_rate_limit_delay(self): + import time + import calendar + # Set reset time in the past (already reset) + past_reset = calendar.timegm(time.gmtime()) - 100 + headers = { + 'x-ratelimit-remaining': '0', + 'x-ratelimit-reset': str(past_reset) + } + delay = calculate_retry_delay(0, headers) + # Should be minimum 10 seconds even if reset time is in past + assert delay >= 10 + + +class TestRetrieveDataRetry: + """Tests for retry behavior in retrieve_data.""" + + @pytest.fixture + def mock_args(self): + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = "fake_token" + args.username = None + args.password = None + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = None + args.throttle_pause = 0 + return args + + def test_json_parse_error_retries_and_fails(self, mock_args): + """HTTP 200 with invalid JSON should retry and eventually fail.""" + mock_response = Mock() + mock_response.getcode.return_value = 200 + mock_response.read.return_value = b"not valid json {" + mock_response.headers = {"x-ratelimit-remaining": "5000"} + + call_count = 0 + + def mock_make_request(*args, **kwargs): + nonlocal call_count + call_count += 1 + return mock_response + + with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request): + with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): # No delay in tests + with pytest.raises(Exception) as exc_info: + github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues") + + assert "Failed to read response after" in str(exc_info.value) + assert call_count == MAX_RETRIES + + def test_json_parse_error_recovers_on_retry(self, mock_args): + """HTTP 200 with invalid JSON should succeed if retry returns valid JSON.""" + bad_response = Mock() + bad_response.getcode.return_value = 200 + bad_response.read.return_value = b"not valid json {" + bad_response.headers = {"x-ratelimit-remaining": "5000"} + + good_response = Mock() + good_response.getcode.return_value = 200 + good_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8") + good_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""} + + responses = [bad_response, bad_response, good_response] + call_count = 0 + + def mock_make_request(*args, **kwargs): + nonlocal call_count + result = responses[call_count] + call_count += 1 + return result + + with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request): + with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + result = github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues") + + assert result == [{"id": 1}] + assert call_count == 3 # Failed twice, succeeded on third + + def test_http_error_raises_exception(self, mock_args): + """Non-success HTTP status codes should raise Exception.""" + mock_response = Mock() + mock_response.getcode.return_value = 404 + mock_response.read.return_value = b'{"message": "Not Found"}' + mock_response.headers = {"x-ratelimit-remaining": "5000"} + mock_response.reason = "Not Found" + + with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with pytest.raises(Exception) as exc_info: + github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/notfound/issues") + + assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError) + assert "404" in str(exc_info.value) + + +class TestMakeRequestWithRetry: + """Tests for HTTP error retry behavior in make_request_with_retry.""" + + def test_502_error_retries_and_succeeds(self): + """HTTP 502 should retry and succeed if subsequent request works.""" + good_response = Mock() + good_response.read.return_value = b'{"ok": true}' + + call_count = 0 + fail_count = MAX_RETRIES - 1 # Fail all but last attempt + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count <= fail_count: + raise HTTPError( + url="https://api.github.com/test", + code=502, + msg="Bad Gateway", + hdrs={"x-ratelimit-remaining": "5000"}, + fp=None, + ) + return good_response + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + result = make_request_with_retry(Mock(), None) + + assert result == good_response + assert call_count == MAX_RETRIES + + def test_503_error_retries_until_exhausted(self): + """HTTP 503 should retry MAX_RETRIES times then raise.""" + call_count = 0 + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + raise HTTPError( + url="https://api.github.com/test", + code=503, + msg="Service Unavailable", + hdrs={"x-ratelimit-remaining": "5000"}, + fp=None, + ) + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with pytest.raises(HTTPError) as exc_info: + make_request_with_retry(Mock(), None) + + assert exc_info.value.code == 503 + assert call_count == MAX_RETRIES + + def test_404_error_not_retried(self): + """HTTP 404 should not be retried - raise immediately.""" + call_count = 0 + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + raise HTTPError( + url="https://api.github.com/test", + code=404, + msg="Not Found", + hdrs={"x-ratelimit-remaining": "5000"}, + fp=None, + ) + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with pytest.raises(HTTPError) as exc_info: + make_request_with_retry(Mock(), None) + + assert exc_info.value.code == 404 + assert call_count == 1 # No retries + + def test_rate_limit_403_retried_when_remaining_zero(self): + """HTTP 403 with x-ratelimit-remaining=0 should retry.""" + good_response = Mock() + call_count = 0 + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise HTTPError( + url="https://api.github.com/test", + code=403, + msg="Forbidden", + hdrs={"x-ratelimit-remaining": "0"}, + fp=None, + ) + return good_response + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + result = make_request_with_retry(Mock(), None) + + assert result == good_response + assert call_count == 2 + + def test_403_not_retried_when_remaining_nonzero(self): + """HTTP 403 with x-ratelimit-remaining>0 should not retry (permission error).""" + call_count = 0 + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + raise HTTPError( + url="https://api.github.com/test", + code=403, + msg="Forbidden", + hdrs={"x-ratelimit-remaining": "5000"}, + fp=None, + ) + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with pytest.raises(HTTPError) as exc_info: + make_request_with_retry(Mock(), None) + + assert exc_info.value.code == 403 + assert call_count == 1 # No retries + + def test_connection_error_retries_and_succeeds(self): + """URLError (connection error) should retry and succeed if subsequent request works.""" + good_response = Mock() + call_count = 0 + fail_count = MAX_RETRIES - 1 # Fail all but last attempt + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count <= fail_count: + raise URLError("Connection refused") + return good_response + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + result = make_request_with_retry(Mock(), None) + + assert result == good_response + assert call_count == MAX_RETRIES + + def test_socket_error_retries_until_exhausted(self): + """socket.error should retry MAX_RETRIES times then raise.""" + call_count = 0 + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + raise socket.error("Connection reset by peer") + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with pytest.raises(socket.error): + make_request_with_retry(Mock(), None) + + assert call_count == MAX_RETRIES + + +class TestRetrieveDataThrottling: + """Tests for throttling behavior in retrieve_data.""" + + @pytest.fixture + def mock_args(self): + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = "fake_token" + args.username = None + args.password = None + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = 10 # Throttle when remaining <= 10 + args.throttle_pause = 5 # Pause 5 seconds + return args + + def test_throttling_pauses_when_rate_limit_low(self, mock_args): + """Should pause when x-ratelimit-remaining is at or below throttle_limit.""" + mock_response = Mock() + mock_response.getcode.return_value = 200 + mock_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8") + mock_response.headers = {"x-ratelimit-remaining": "5", "Link": ""} # Below throttle_limit + + with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch("github_backup.github_backup.time.sleep") as mock_sleep: + github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues") + + mock_sleep.assert_called_once_with(5) # throttle_pause value + + +class TestRetrieveDataSingleItem: + """Tests for single item (dict) responses in retrieve_data.""" + + @pytest.fixture + def mock_args(self): + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = "fake_token" + args.username = None + args.password = None + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = None + args.throttle_pause = 0 + return args + + def test_dict_response_returned_as_list(self, mock_args): + """Single dict response should be returned as a list with one item.""" + mock_response = Mock() + mock_response.getcode.return_value = 200 + mock_response.read.return_value = json.dumps({"login": "testuser", "id": 123}).encode("utf-8") + mock_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""} + + with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + result = github_backup.retrieve_data(mock_args, "https://api.github.com/user") + + assert result == [{"login": "testuser", "id": 123}] From c70cc43f5774fd2cbbff126255604b2e159c3cc5 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Tue, 16 Dec 2025 15:17:23 +0000 Subject: [PATCH 389/455] Release version 0.58.0 --- CHANGES.rst | 31 ++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1a8809e..697b39f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,38 @@ Changelog ========= -0.57.0 (2025-12-12) +0.58.0 (2025-12-16) ------------------- ------------------------ +- Fix retry logic for HTTP 5xx errors and network failures. [Rodos] + + Refactors error handling to retry all 5xx errors (not just 502), network errors (URLError, socket.error, IncompleteRead), and JSON parse errors with exponential backoff and jitter. Respects retry-after and rate limit headers per GitHub API requirements. Consolidates retry logic into make_request_with_retry() wrapper and adds clear logging for retry attempts and failures. Removes dead code from 2016 (errors list, _request_http_error, _request_url_error) that was intentionally disabled in commit 1e5a9048 to fix #29. + + Fixes #140, #110, #138 +- Chore: remove transitive deps from release-requirements.txt. [Rodos] +- Chore(deps): bump urllib3 in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [urllib3](https://github.com/urllib3/urllib3). + + + Updates `urllib3` from 2.6.1 to 2.6.2 + - [Release notes](https://github.com/urllib3/urllib3/releases) + - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) + - [Commits](https://github.com/urllib3/urllib3/compare/2.6.1...2.6.2) + + --- + updated-dependencies: + - dependency-name: urllib3 + dependency-version: 2.6.2 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... + + +0.57.0 (2025-12-12) +------------------- - Add GitHub Apps documentation and remove outdated header. [Rodos] - Add GitHub Apps authentication section with setup steps diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 6e6e624..45dbfca 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.57.0" +__version__ = "0.58.0" From db36c3c137ced1469c8ccf6f5619d10bb04d169a Mon Sep 17 00:00:00 2001 From: Rodos Date: Sat, 20 Dec 2025 19:16:11 +1100 Subject: [PATCH 390/455] chore: remove deprecated -u/-p password authentication options --- README.rst | 90 +++++++++++++++++----------------- github_backup/cli.py | 2 +- github_backup/github_backup.py | 23 --------- tests/test_all_starred.py | 2 - tests/test_attachments.py | 2 - tests/test_http_451.py | 6 --- tests/test_pagination.py | 2 - tests/test_retrieve_data.py | 6 --- tests/test_skip_assets_on.py | 2 - 9 files changed, 47 insertions(+), 88 deletions(-) diff --git a/README.rst b/README.rst index e4300a7..943f8ec 100644 --- a/README.rst +++ b/README.rst @@ -36,23 +36,26 @@ Show the CLI help output:: CLI Help output:: - github-backup [-h] [-u USERNAME] [-p PASSWORD] [-t TOKEN_CLASSIC] - [-f TOKEN_FINE] [--as-app] [-o OUTPUT_DIRECTORY] - [-l LOG_LEVEL] [-i] [--starred] [--all-starred] - [--watched] [--followers] [--following] [--all] [--issues] - [--issue-comments] [--issue-events] [--pulls] + github-backup [-h] [-t TOKEN_CLASSIC] [-f TOKEN_FINE] [-q] [--as-app] + [-o OUTPUT_DIRECTORY] [-l LOG_LEVEL] [-i] + [--incremental-by-files] + [--starred] [--all-starred] + [--watched] [--followers] [--following] [--all] + [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--pull-details] [--labels] [--hooks] [--milestones] [--repositories] - [--bare] [--lfs] [--wikis] [--gists] [--starred-gists] - [--skip-archived] [--skip-existing] [-L [LANGUAGES ...]] - [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] - [-P] [-F] [--prefer-ssh] [-v] + [--bare] [--no-prune] [--lfs] [--wikis] [--gists] + [--starred-gists] [--skip-archived] [--skip-existing] + [-L [LANGUAGES ...]] [-N NAME_REGEX] [-H GITHUB_HOST] + [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] [--keychain-account OSX_KEYCHAIN_ITEM_ACCOUNT] [--releases] [--latest-releases NUMBER_OF_LATEST_RELEASES] - [--skip-prerelease] [--assets] [--skip-assets-on [REPO ...]] - [--attachments] [--exclude [REPOSITORY [REPOSITORY ...]] - [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] + [--skip-prerelease] [--assets] + [--skip-assets-on [SKIP_ASSETS_ON ...]] [--attachments] + [--throttle-limit THROTTLE_LIMIT] + [--throttle-pause THROTTLE_PAUSE] + [--exclude [EXCLUDE ...]] USER Backup a github account @@ -60,27 +63,25 @@ CLI Help output:: positional arguments: USER github username - optional arguments: + options: -h, --help show this help message and exit - -u USERNAME, --username USERNAME - username for basic auth - -p PASSWORD, --password PASSWORD - password for basic auth. If a username is given but - not a password, the password will be prompted for. - -f TOKEN_FINE, --token-fine TOKEN_FINE - fine-grained personal access token or path to token - (file://...) - -t TOKEN_CLASSIC, --token TOKEN_CLASSIC + -t, --token TOKEN_CLASSIC personal access, OAuth, or JSON Web token, or path to token (file://...) + -f, --token-fine TOKEN_FINE + fine-grained personal access token (github_pat_....), + or path to token (file://...) + -q, --quiet supress log messages less severe than warning, e.g. + info --as-app authenticate as github app instead of as a user. - -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY + -o, --output-directory OUTPUT_DIRECTORY directory at which to backup the repositories - -l LOG_LEVEL, --log-level LOG_LEVEL + -l, --log-level LOG_LEVEL log level to use (default: info, possible levels: debug, info, warning, error, critical) -i, --incremental incremental backup - --incremental-by-files incremental backup using modified time of files + --incremental-by-files + incremental backup based on modification date of files --starred include JSON output of starred repositories in backup --all-starred include starred repositories in backup [*] --watched include JSON output of watched repositories in backup @@ -100,20 +101,22 @@ CLI Help output:: --milestones include milestones in backup --repositories include repository clone in backup --bare clone bare repositories + --no-prune disable prune option for git fetch --lfs clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*] --wikis include wiki clone in backup --gists include gists in backup [*] --starred-gists include starred gists in backup [*] + --skip-archived skip project if it is archived --skip-existing skip project if a backup directory exists - -L [LANGUAGES [LANGUAGES ...]], --languages [LANGUAGES [LANGUAGES ...]] + -L, --languages [LANGUAGES ...] only allow these languages - -N NAME_REGEX, --name-regex NAME_REGEX + -N, --name-regex NAME_REGEX python regex to match names against - -H GITHUB_HOST, --github-host GITHUB_HOST + -H, --github-host GITHUB_HOST GitHub Enterprise hostname -O, --organization whether or not this is an organization user - -R REPOSITORY, --repository REPOSITORY + -R, --repository REPOSITORY name of repository to limit backup to -P, --private include private repositories [*] -F, --fork include forked repositories [*] @@ -128,19 +131,16 @@ CLI Help output:: --releases include release information, not including assets or binaries --latest-releases NUMBER_OF_LATEST_RELEASES - include certain number of the latest releases; - only applies if including releases - --skip-prerelease skip prerelease and draft versions; only applies if including releases + include certain number of the latest releases; only + applies if including releases + --skip-prerelease skip prerelease and draft versions; only applies if + including releases --assets include assets alongside release information; only applies if including releases - --skip-assets-on [REPO ...] - skip asset downloads for these repositories (e.g. - --skip-assets-on repo1 owner/repo2) - --attachments download user-attachments from issues and pull requests - to issues/attachments/{issue_number}/ and - pulls/attachments/{pull_number}/ directories - --exclude [REPOSITORY [REPOSITORY ...]] - names of repositories to exclude from backup. + --skip-assets-on [SKIP_ASSETS_ON ...] + skip asset downloads for these repositories + --attachments download user-attachments from issues and pull + requests --throttle-limit THROTTLE_LIMIT start throttling of GitHub API requests after this amount of API requests remain @@ -148,6 +148,8 @@ CLI Help output:: wait this amount of seconds when API request throttling is active (default: 30.0, requires --throttle-limit to be set) + --exclude [EXCLUDE ...] + names of repositories to exclude Usage Details @@ -156,13 +158,13 @@ Usage Details Authentication -------------- -**Password-based authentication** will fail if you have two-factor authentication enabled, and will `be deprecated `_ by 2023 EOY. +GitHub requires token-based authentication for API access. Password authentication was `removed in November 2020 `_. -``--username`` is used for basic password authentication and separate from the positional argument ``USER``, which specifies the user account you wish to back up. +The positional argument ``USER`` specifies the user or organization account you wish to back up. -**Classic tokens** are `slightly less secure `_ as they provide very coarse-grained permissions. +**Fine-grained tokens** (``-f TOKEN_FINE``) are recommended for most use cases, especially long-running backups (e.g. cron jobs), as they provide precise permission control. -If you need authentication for long-running backups (e.g. for a cron job) it is recommended to use **fine-grained personal access token** ``-f TOKEN_FINE``. +**Classic tokens** (``-t TOKEN``) are `slightly less secure `_ as they provide very coarse-grained permissions. Fine Tokens diff --git a/github_backup/cli.py b/github_backup/cli.py index 98f8d4a..54849d4 100644 --- a/github_backup/cli.py +++ b/github_backup/cli.py @@ -43,7 +43,7 @@ def main(): if args.private and not get_auth(args): logger.warning( "The --private flag has no effect without authentication. " - "Use -t/--token, -f/--token-fine, or -u/--username to authenticate." + "Use -t/--token or -f/--token-fine to authenticate." ) if args.quiet: diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 34d529a..d62afc3 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -7,7 +7,6 @@ import calendar import codecs import errno -import getpass import json import logging import os @@ -24,7 +23,6 @@ from datetime import datetime from http.client import IncompleteRead from urllib.error import HTTPError, URLError -from urllib.parse import quote as urlquote from urllib.parse import urlencode, urlparse from urllib.request import HTTPRedirectHandler, Request, build_opener, urlopen @@ -149,17 +147,6 @@ def mask_password(url, secret="*****"): def parse_args(args=None): parser = argparse.ArgumentParser(description="Backup a github account") parser.add_argument("user", metavar="USER", type=str, help="github username") - parser.add_argument( - "-u", "--username", dest="username", help="username for basic auth" - ) - parser.add_argument( - "-p", - "--password", - dest="password", - help="password for basic auth. " - "If a username is given but not a password, the " - "password will be prompted for.", - ) parser.add_argument( "-t", "--token", @@ -533,16 +520,6 @@ def get_auth(args, encode=True, for_git_cli=False): auth = args.token_classic else: auth = "x-access-token:" + args.token_classic - elif args.username: - if not args.password: - args.password = getpass.getpass() - if encode: - password = args.password - else: - password = urlquote(args.password) - auth = args.username + ":" + password - elif args.password: - raise Exception("You must specify a username for basic auth") if not auth: return None diff --git a/tests/test_all_starred.py b/tests/test_all_starred.py index f59a67e..0fab048 100644 --- a/tests/test_all_starred.py +++ b/tests/test_all_starred.py @@ -46,8 +46,6 @@ def _create_mock_args(self, **overrides): args.prefer_ssh = False args.token_classic = None args.token_fine = None - args.username = None - args.password = None args.as_app = False args.osx_keychain_item_name = None args.osx_keychain_item_account = None diff --git a/tests/test_attachments.py b/tests/test_attachments.py index 07c1b33..b338caf 100644 --- a/tests/test_attachments.py +++ b/tests/test_attachments.py @@ -24,8 +24,6 @@ def attachment_test_setup(tmp_path): args.as_app = False args.token_fine = None args.token_classic = None - args.username = None - args.password = None args.osx_keychain_item_name = None args.osx_keychain_item_account = None args.user = "testuser" diff --git a/tests/test_http_451.py b/tests/test_http_451.py index 51218d2..d53d65c 100644 --- a/tests/test_http_451.py +++ b/tests/test_http_451.py @@ -17,8 +17,6 @@ def test_repository_unavailable_error_raised(self): args.as_app = False args.token_fine = None args.token_classic = None - args.username = None - args.password = None args.osx_keychain_item_name = None args.osx_keychain_item_account = None args.throttle_limit = None @@ -52,8 +50,6 @@ def test_repository_unavailable_error_without_dmca_url(self): args.as_app = False args.token_fine = None args.token_classic = None - args.username = None - args.password = None args.osx_keychain_item_name = None args.osx_keychain_item_account = None args.throttle_limit = None @@ -78,8 +74,6 @@ def test_repository_unavailable_error_with_malformed_json(self): args.as_app = False args.token_fine = None args.token_classic = None - args.username = None - args.password = None args.osx_keychain_item_name = None args.osx_keychain_item_account = None args.throttle_limit = None diff --git a/tests/test_pagination.py b/tests/test_pagination.py index 75dfccd..831b913 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -45,8 +45,6 @@ def mock_args(): args.as_app = False args.token_fine = None args.token_classic = "fake_token" - args.username = None - args.password = None args.osx_keychain_item_name = None args.osx_keychain_item_account = None args.throttle_limit = None diff --git a/tests/test_retrieve_data.py b/tests/test_retrieve_data.py index c358ff0..adb1152 100644 --- a/tests/test_retrieve_data.py +++ b/tests/test_retrieve_data.py @@ -70,8 +70,6 @@ def mock_args(self): args.as_app = False args.token_fine = None args.token_classic = "fake_token" - args.username = None - args.password = None args.osx_keychain_item_name = None args.osx_keychain_item_account = None args.throttle_limit = None @@ -313,8 +311,6 @@ def mock_args(self): args.as_app = False args.token_fine = None args.token_classic = "fake_token" - args.username = None - args.password = None args.osx_keychain_item_name = None args.osx_keychain_item_account = None args.throttle_limit = 10 # Throttle when remaining <= 10 @@ -344,8 +340,6 @@ def mock_args(self): args.as_app = False args.token_fine = None args.token_classic = "fake_token" - args.username = None - args.password = None args.osx_keychain_item_name = None args.osx_keychain_item_account = None args.throttle_limit = None diff --git a/tests/test_skip_assets_on.py b/tests/test_skip_assets_on.py index 2437e05..ce28287 100644 --- a/tests/test_skip_assets_on.py +++ b/tests/test_skip_assets_on.py @@ -48,8 +48,6 @@ def _create_mock_args(self, **overrides): args.prefer_ssh = False args.token_classic = "test-token" args.token_fine = None - args.username = None - args.password = None args.as_app = False args.osx_keychain_item_name = None args.osx_keychain_item_account = None From 3c43e0f481e6f4a9f5885ca92e9c87552f3010ee Mon Sep 17 00:00:00 2001 From: Rodos Date: Sat, 20 Dec 2025 18:04:25 +1100 Subject: [PATCH 391/455] Add --starred-skip-size-over flag to limit starred repo size (#108) Allow users to skip starred repositories exceeding a size threshold when using --all-starred. Size is specified in MB and checked against the GitHub API's repository size field. - Only affects starred repos; user's own repos always included - Logs each skipped repo with name and size Closes #108 --- README.rst | 20 ++- github_backup/github_backup.py | 26 ++++ tests/test_case_sensitivity.py | 6 + tests/test_starred_skip_size_over.py | 224 +++++++++++++++++++++++++++ 4 files changed, 272 insertions(+), 4 deletions(-) create mode 100644 tests/test_starred_skip_size_over.py diff --git a/README.rst b/README.rst index 943f8ec..ffa80ac 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ CLI Help output:: github-backup [-h] [-t TOKEN_CLASSIC] [-f TOKEN_FINE] [-q] [--as-app] [-o OUTPUT_DIRECTORY] [-l LOG_LEVEL] [-i] [--incremental-by-files] - [--starred] [--all-starred] + [--starred] [--all-starred] [--starred-skip-size-over MB] [--watched] [--followers] [--following] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--pull-details] @@ -84,6 +84,8 @@ CLI Help output:: incremental backup based on modification date of files --starred include JSON output of starred repositories in backup --all-starred include starred repositories in backup [*] + --starred-skip-size-over MB + skip starred repositories larger than this size in MB --watched include JSON output of watched repositories in backup --followers include JSON output of followers in backup --following include JSON output of following users in backup @@ -292,10 +294,20 @@ All is not everything The ``--all`` argument does not include: cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. -Cloning all starred size ------------------------- +Starred repository size +----------------------- + +Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space. + +To see your starred repositories sorted by size (requires `GitHub CLI `_):: + + gh api user/starred --paginate --jq 'sort_by(-.size)[]|"\(.full_name) \(.size/1024|round)MB"' + +To limit which starred repositories are cloned, use ``--starred-skip-size-over SIZE`` where SIZE is in MB. For example, ``--starred-skip-size-over 500`` will skip any starred repository where the git repository size (code and history) exceeds 500 MB. Note that this size limit only applies to the repository itself, not issues, release assets or other metadata. This filter only affects starred repositories; your own repositories are always included regardless of size. + +For finer control, avoid using ``--assets`` with starred repos, or use ``--skip-assets-on`` for specific repositories with large release binaries. -Using the ``--all-starred`` argument to clone all starred repositories may use a large amount of storage space, especially if ``--all`` or more arguments are used. e.g. commonly starred repos can have tens of thousands of issues, many large assets and the repo itself etc. Consider just storing links to starred repos in JSON format with ``--starred``. +Alternatively, consider just storing links to starred repos in JSON format with ``--starred``. Incremental Backup ------------------ diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index d62afc3..1d4e354 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -211,6 +211,13 @@ def parse_args(args=None): dest="all_starred", help="include starred repositories in backup [*]", ) + parser.add_argument( + "--starred-skip-size-over", + type=int, + metavar="MB", + dest="starred_skip_size_over", + help="skip starred repositories larger than this size in MB", + ) parser.add_argument( "--watched", action="store_true", @@ -1570,6 +1577,25 @@ def filter_repositories(args, unfiltered_repositories): ] if args.skip_archived: repositories = [r for r in repositories if not r.get("archived")] + if args.starred_skip_size_over is not None: + if args.starred_skip_size_over <= 0: + logger.warning( + "--starred-skip-size-over must be greater than 0, ignoring" + ) + else: + size_limit_kb = args.starred_skip_size_over * 1024 + filtered = [] + for r in repositories: + if r.get("is_starred") and r.get("size", 0) > size_limit_kb: + size_mb = r.get("size", 0) / 1024 + logger.info( + "Skipping starred repo {0} ({1:.0f} MB) due to --starred-skip-size-over {2}".format( + r.get("full_name", r.get("name")), size_mb, args.starred_skip_size_over + ) + ) + else: + filtered.append(r) + repositories = filtered if args.exclude: repositories = [ r for r in repositories if "name" not in r or r["name"] not in args.exclude diff --git a/tests/test_case_sensitivity.py b/tests/test_case_sensitivity.py index 1398d0d..058a7df 100644 --- a/tests/test_case_sensitivity.py +++ b/tests/test_case_sensitivity.py @@ -26,6 +26,8 @@ def test_filter_repositories_case_insensitive_user(self): args.private = False args.public = False args.all = True + args.skip_archived = False + args.starred_skip_size_over = None # Simulate GitHub API returning canonical case repos = [ @@ -65,6 +67,8 @@ def test_filter_repositories_case_insensitive_org(self): args.private = False args.public = False args.all = True + args.skip_archived = False + args.starred_skip_size_over = None repos = [ { @@ -93,6 +97,8 @@ def test_filter_repositories_case_variations(self): args.private = False args.public = False args.all = True + args.skip_archived = False + args.starred_skip_size_over = None repos = [ {"name": "repo1", "owner": {"login": "test-user"}, "private": False, "fork": False}, diff --git a/tests/test_starred_skip_size_over.py b/tests/test_starred_skip_size_over.py new file mode 100644 index 0000000..2deb72a --- /dev/null +++ b/tests/test_starred_skip_size_over.py @@ -0,0 +1,224 @@ +"""Tests for --starred-skip-size-over flag behavior (issue #108).""" + +import pytest +from unittest.mock import Mock + +from github_backup import github_backup + + +class TestStarredSkipSizeOver: + """Test suite for --starred-skip-size-over flag. + + Issue #108: Allow restricting size of starred repositories before cloning. + The size is based on the GitHub API's 'size' field (in KB), but the CLI + argument accepts MB for user convenience. + """ + + def _create_mock_args(self, **overrides): + """Create a mock args object with sensible defaults.""" + args = Mock() + args.user = "testuser" + args.repository = None + args.name_regex = None + args.languages = None + args.fork = False + args.private = False + args.skip_archived = False + args.starred_skip_size_over = None + args.exclude = None + + for key, value in overrides.items(): + setattr(args, key, value) + + return args + + +class TestStarredSkipSizeOverArgumentParsing(TestStarredSkipSizeOver): + """Tests for --starred-skip-size-over argument parsing.""" + + def test_starred_skip_size_over_not_set_defaults_to_none(self): + """When --starred-skip-size-over is not specified, it should default to None.""" + args = github_backup.parse_args(["testuser"]) + assert args.starred_skip_size_over is None + + def test_starred_skip_size_over_accepts_integer(self): + """--starred-skip-size-over should accept an integer value.""" + args = github_backup.parse_args(["testuser", "--starred-skip-size-over", "500"]) + assert args.starred_skip_size_over == 500 + + def test_starred_skip_size_over_rejects_non_integer(self): + """--starred-skip-size-over should reject non-integer values.""" + with pytest.raises(SystemExit): + github_backup.parse_args(["testuser", "--starred-skip-size-over", "abc"]) + + +class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): + """Tests for --starred-skip-size-over filtering behavior.""" + + def test_starred_repo_under_limit_is_kept(self): + """Starred repos under the size limit should be kept.""" + args = self._create_mock_args(starred_skip_size_over=500) + + repos = [ + { + "name": "small-repo", + "owner": {"login": "otheruser"}, + "size": 100 * 1024, # 100 MB in KB + "is_starred": True, + } + ] + + result = github_backup.filter_repositories(args, repos) + assert len(result) == 1 + assert result[0]["name"] == "small-repo" + + def test_starred_repo_over_limit_is_filtered(self): + """Starred repos over the size limit should be filtered out.""" + args = self._create_mock_args(starred_skip_size_over=500) + + repos = [ + { + "name": "huge-repo", + "owner": {"login": "otheruser"}, + "size": 600 * 1024, # 600 MB in KB + "is_starred": True, + } + ] + + result = github_backup.filter_repositories(args, repos) + assert len(result) == 0 + + def test_own_repo_over_limit_is_kept(self): + """User's own repos should not be affected by the size limit.""" + args = self._create_mock_args(starred_skip_size_over=500) + + repos = [ + { + "name": "my-huge-repo", + "owner": {"login": "testuser"}, + "size": 600 * 1024, # 600 MB in KB + # No is_starred flag - this is the user's own repo + } + ] + + result = github_backup.filter_repositories(args, repos) + assert len(result) == 1 + assert result[0]["name"] == "my-huge-repo" + + def test_starred_repo_at_exact_limit_is_kept(self): + """Starred repos at exactly the size limit should be kept.""" + args = self._create_mock_args(starred_skip_size_over=500) + + repos = [ + { + "name": "exact-limit-repo", + "owner": {"login": "otheruser"}, + "size": 500 * 1024, # Exactly 500 MB in KB + "is_starred": True, + } + ] + + result = github_backup.filter_repositories(args, repos) + assert len(result) == 1 + assert result[0]["name"] == "exact-limit-repo" + + def test_mixed_repos_filtered_correctly(self): + """Mix of own and starred repos should be filtered correctly.""" + args = self._create_mock_args(starred_skip_size_over=500) + + repos = [ + { + "name": "my-huge-repo", + "owner": {"login": "testuser"}, + "size": 1000 * 1024, # 1 GB - own repo, should be kept + }, + { + "name": "starred-small", + "owner": {"login": "otheruser"}, + "size": 100 * 1024, # 100 MB - under limit + "is_starred": True, + }, + { + "name": "starred-huge", + "owner": {"login": "anotheruser"}, + "size": 2000 * 1024, # 2 GB - over limit + "is_starred": True, + }, + ] + + result = github_backup.filter_repositories(args, repos) + assert len(result) == 2 + names = [r["name"] for r in result] + assert "my-huge-repo" in names + assert "starred-small" in names + assert "starred-huge" not in names + + def test_no_size_limit_keeps_all_starred(self): + """When no size limit is set, all starred repos should be kept.""" + args = self._create_mock_args(starred_skip_size_over=None) + + repos = [ + { + "name": "huge-starred-repo", + "owner": {"login": "otheruser"}, + "size": 10000 * 1024, # 10 GB + "is_starred": True, + } + ] + + result = github_backup.filter_repositories(args, repos) + assert len(result) == 1 + + def test_repo_without_size_field_is_kept(self): + """Repos without a size field should be kept (size defaults to 0).""" + args = self._create_mock_args(starred_skip_size_over=500) + + repos = [ + { + "name": "no-size-repo", + "owner": {"login": "otheruser"}, + "is_starred": True, + # No size field + } + ] + + result = github_backup.filter_repositories(args, repos) + assert len(result) == 1 + + def test_zero_value_warns_and_is_ignored(self, caplog): + """Zero value should warn and keep all repos.""" + args = self._create_mock_args(starred_skip_size_over=0) + + repos = [ + { + "name": "huge-starred-repo", + "owner": {"login": "otheruser"}, + "size": 10000 * 1024, # 10 GB + "is_starred": True, + } + ] + + result = github_backup.filter_repositories(args, repos) + assert len(result) == 1 + assert "must be greater than 0" in caplog.text + + def test_negative_value_warns_and_is_ignored(self, caplog): + """Negative value should warn and keep all repos.""" + args = self._create_mock_args(starred_skip_size_over=-5) + + repos = [ + { + "name": "huge-starred-repo", + "owner": {"login": "otheruser"}, + "size": 10000 * 1024, # 10 GB + "is_starred": True, + } + ] + + result = github_backup.filter_repositories(args, repos) + assert len(result) == 1 + assert "must be greater than 0" in caplog.text + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 81a72ac8af02a39b79bf74c37bbd21938294c9d8 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sun, 21 Dec 2025 23:48:36 +0000 Subject: [PATCH 392/455] Release version 0.59.0 --- CHANGES.rst | 19 ++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 697b39f..a6a1c4d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,26 @@ Changelog ========= -0.58.0 (2025-12-16) +0.59.0 (2025-12-21) ------------------- ------------------------ +- Add --starred-skip-size-over flag to limit starred repo size (#108) + [Rodos] + + Allow users to skip starred repositories exceeding a size threshold + when using --all-starred. Size is specified in MB and checked against + the GitHub API's repository size field. + + - Only affects starred repos; user's own repos always included + - Logs each skipped repo with name and size + + Closes #108 +- Chore: remove deprecated -u/-p password authentication options. + [Rodos] + + +0.58.0 (2025-12-16) +------------------- - Fix retry logic for HTTP 5xx errors and network failures. [Rodos] Refactors error handling to retry all 5xx errors (not just 502), network errors (URLError, socket.error, IncompleteRead), and JSON parse errors with exponential backoff and jitter. Respects retry-after and rate limit headers per GitHub API requirements. Consolidates retry logic into make_request_with_retry() wrapper and adds clear logging for retry attempts and failures. Removes dead code from 2016 (errors list, _request_http_error, _request_url_error) that was intentionally disabled in commit 1e5a9048 to fix #29. diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 45dbfca..25dbb4b 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.58.0" +__version__ = "0.59.0" From 89502c326d0aab93d4e60b7103f5738593d93d6b Mon Sep 17 00:00:00 2001 From: michaelmartinez Date: Mon, 22 Dec 2025 14:23:02 -0800 Subject: [PATCH 393/455] update retry logic and logging ### What 1. configureable retry count 2. additional logging ### Why 1. pass retry count as a command line arg; default 5 2. show details when api requests fail ### Testing before merge compiles cleanly ### Validation after merge compile and test ### Issue addressed by this PR https://github.com/stellar/ops/issues/2039 --- github_backup/cli.py | 2 ++ github_backup/github_backup.py | 21 ++++++++++++++++----- github_backup/max_retries.py | 1 + 3 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 github_backup/max_retries.py diff --git a/github_backup/cli.py b/github_backup/cli.py index 54849d4..cdc9c5f 100644 --- a/github_backup/cli.py +++ b/github_backup/cli.py @@ -4,6 +4,7 @@ import logging import os import sys +from github_backup import max_retries from github_backup.github_backup import ( backup_account, @@ -39,6 +40,7 @@ def main(): """Main entry point for github-backup CLI.""" args = parse_args() + max_retries.MAX_RETRIES = args.max_retries if args.private and not get_auth(args): logger.warning( diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 1d4e354..13cda22 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -25,6 +25,7 @@ from urllib.error import HTTPError, URLError from urllib.parse import urlencode, urlparse from urllib.request import HTTPRedirectHandler, Request, build_opener, urlopen +from github_backup import max_retries try: from . import __version__ @@ -75,7 +76,7 @@ def __init__(self, message, dmca_url=None): ) # Retry configuration -MAX_RETRIES = 5 +MAX_RETRIES = max_retries.MAX_RETRIES def logging_subprocess( @@ -468,6 +469,13 @@ def parse_args(args=None): parser.add_argument( "--exclude", dest="exclude", help="names of repositories to exclude", nargs="*" ) + parser.add_argument( + "--retries", + dest="max_retries", + type=int, + default=5, + help="maximum number of retries for API calls (default: 5)", + ) return parser.parse_args(args) @@ -737,16 +745,19 @@ def is_retryable_status(status_code, headers): except HTTPError as exc: # HTTPError can be used as a response-like object if not is_retryable_status(exc.code, exc.headers): + logger.error(f"API Error: {exc.code} {exc.reason} for {request.full_url}") raise # Non-retryable error if attempt >= MAX_RETRIES - 1: logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts") + logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts for {request.full_url}") raise delay = calculate_retry_delay(attempt, exc.headers) logger.warning( - f"HTTP {exc.code}, retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{MAX_RETRIES})" + f"HTTP {exc.code} ({exc.reason}), retrying in {delay:.1f}s " + f"(attempt {attempt + 1}/{MAX_RETRIES}) for {request.full_url}" + ) if auth is None and exc.code in (403, 429): logger.info("Hint: Authenticate to raise your GitHub rate limit") @@ -754,12 +765,12 @@ def is_retryable_status(status_code, headers): except (URLError, socket.error) as e: if attempt >= MAX_RETRIES - 1: - logger.error(f"Connection error failed after {MAX_RETRIES} attempts: {e}") + logger.error(f"Connection error failed after {MAX_RETRIES} attempts: {e} for {request.full_url}") raise delay = calculate_retry_delay(attempt, {}) logger.warning( f"Connection error: {e}, retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{MAX_RETRIES})" + f"(attempt {attempt + 1}/{MAX_RETRIES}) for {request.full_url}" ) time.sleep(delay) diff --git a/github_backup/max_retries.py b/github_backup/max_retries.py new file mode 100644 index 0000000..3bd0f5d --- /dev/null +++ b/github_backup/max_retries.py @@ -0,0 +1 @@ +MAX_RETRIES=None From 8b1b632d8962a868f7ebfb1d2c38bde93983ee58 Mon Sep 17 00:00:00 2001 From: michaelmartinez Date: Mon, 22 Dec 2025 14:47:26 -0800 Subject: [PATCH 394/455] max_retries 5 --- github_backup/max_retries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/max_retries.py b/github_backup/max_retries.py index 3bd0f5d..43594f7 100644 --- a/github_backup/max_retries.py +++ b/github_backup/max_retries.py @@ -1 +1 @@ -MAX_RETRIES=None +MAX_RETRIES=5 From 1f2ec016d561e0c73faa22519730dc47aaf70d44 Mon Sep 17 00:00:00 2001 From: michaelmartinez Date: Mon, 22 Dec 2025 16:13:12 -0800 Subject: [PATCH 395/455] readme, simplify the logic a bit --- github_backup/github_backup.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 13cda22..23bb836 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -75,9 +75,6 @@ def __init__(self, message, dmca_url=None): " 3. Debian/Ubuntu: apt-get install ca-certificates\n\n" ) -# Retry configuration -MAX_RETRIES = max_retries.MAX_RETRIES - def logging_subprocess( popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs @@ -639,7 +636,7 @@ def fetch_all() -> Generator[dict, None, None]: while True: # FIRST: Fetch response - for attempt in range(MAX_RETRIES): + for attempt in range(max_retries.MAX_RETRIES): request = _construct_request( per_page=per_page if paginated else None, query_args=query_args, @@ -662,10 +659,10 @@ def fetch_all() -> Generator[dict, None, None]: TimeoutError, ) as e: logger.warning(f"{type(e).__name__} reading response") - if attempt < MAX_RETRIES - 1: + if attempt < max_retries.MAX_RETRIES - 1: delay = calculate_retry_delay(attempt, {}) logger.warning( - f"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RETRIES})" + f"Retrying in {delay:.1f}s (attempt {attempt + 1}/{max_retries.MAX_RETRIES})" ) time.sleep(delay) continue # Next retry attempt @@ -691,10 +688,10 @@ def fetch_all() -> Generator[dict, None, None]: ) else: logger.error( - f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}" + f"Failed to read response after {max_retries.MAX_RETRIES} attempts for {next_url or template}" ) raise Exception( - f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}" + f"Failed to read response after {max_retries.MAX_RETRIES} attempts for {next_url or template}" ) # SECOND: Process and paginate @@ -738,7 +735,7 @@ def is_retryable_status(status_code, headers): return int(headers.get("x-ratelimit-remaining", 1)) < 1 return False - for attempt in range(MAX_RETRIES): + for attempt in range(max_retries.MAX_RETRIES): try: return urlopen(request, context=https_ctx) @@ -748,15 +745,14 @@ def is_retryable_status(status_code, headers): logger.error(f"API Error: {exc.code} {exc.reason} for {request.full_url}") raise # Non-retryable error - if attempt >= MAX_RETRIES - 1: - logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts") - logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts for {request.full_url}") + if attempt >= max_retries.MAX_RETRIES - 1: + logger.error(f"HTTP {exc.code} failed after {max_retries.MAX_RETRIES} attempts for {request.full_url}") raise delay = calculate_retry_delay(attempt, exc.headers) logger.warning( f"HTTP {exc.code} ({exc.reason}), retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{MAX_RETRIES}) for {request.full_url}" + f"(attempt {attempt + 1}/{max_retries.MAX_RETRIES}) for {request.full_url}" ) if auth is None and exc.code in (403, 429): @@ -764,17 +760,17 @@ def is_retryable_status(status_code, headers): time.sleep(delay) except (URLError, socket.error) as e: - if attempt >= MAX_RETRIES - 1: - logger.error(f"Connection error failed after {MAX_RETRIES} attempts: {e} for {request.full_url}") + if attempt >= max_retries.MAX_RETRIES - 1: + logger.error(f"Connection error failed after {max_retries.MAX_RETRIES} attempts: {e} for {request.full_url}") raise delay = calculate_retry_delay(attempt, {}) logger.warning( f"Connection error: {e}, retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{MAX_RETRIES}) for {request.full_url}" + f"(attempt {attempt + 1}/{max_retries.MAX_RETRIES}) for {request.full_url}" ) time.sleep(delay) - raise Exception(f"Request failed after {MAX_RETRIES} attempts") # pragma: no cover + raise Exception(f"Request failed after {max_retries.MAX_RETRIES} attempts") # pragma: no cover def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False): From f9827da342a5306ed904acfac116d0afeaab4109 Mon Sep 17 00:00:00 2001 From: michaelmartinez Date: Tue, 23 Dec 2025 08:53:54 -0800 Subject: [PATCH 396/455] don't use a global variable, pass the args instead --- github_backup/cli.py | 2 -- github_backup/github_backup.py | 32 +++++++++++++++----------------- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/github_backup/cli.py b/github_backup/cli.py index cdc9c5f..54849d4 100644 --- a/github_backup/cli.py +++ b/github_backup/cli.py @@ -4,7 +4,6 @@ import logging import os import sys -from github_backup import max_retries from github_backup.github_backup import ( backup_account, @@ -40,7 +39,6 @@ def main(): """Main entry point for github-backup CLI.""" args = parse_args() - max_retries.MAX_RETRIES = args.max_retries if args.private and not get_auth(args): logger.warning( diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 23bb836..7aaf722 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -25,7 +25,6 @@ from urllib.error import HTTPError, URLError from urllib.parse import urlencode, urlparse from urllib.request import HTTPRedirectHandler, Request, build_opener, urlopen -from github_backup import max_retries try: from . import __version__ @@ -636,7 +635,7 @@ def fetch_all() -> Generator[dict, None, None]: while True: # FIRST: Fetch response - for attempt in range(max_retries.MAX_RETRIES): + for attempt in range(args.max_retries): request = _construct_request( per_page=per_page if paginated else None, query_args=query_args, @@ -645,7 +644,7 @@ def fetch_all() -> Generator[dict, None, None]: as_app=args.as_app, fine=args.token_fine is not None, ) - http_response = make_request_with_retry(request, auth) + http_response = make_request_with_retry(request, auth, args.max_retries) match http_response.getcode(): case 200: @@ -659,10 +658,10 @@ def fetch_all() -> Generator[dict, None, None]: TimeoutError, ) as e: logger.warning(f"{type(e).__name__} reading response") - if attempt < max_retries.MAX_RETRIES - 1: + if attempt < args.max_retries - 1: delay = calculate_retry_delay(attempt, {}) logger.warning( - f"Retrying in {delay:.1f}s (attempt {attempt + 1}/{max_retries.MAX_RETRIES})" + f"Retrying read in {delay:.1f}s (attempt {attempt + 1}/{args.max_retries})" ) time.sleep(delay) continue # Next retry attempt @@ -688,10 +687,10 @@ def fetch_all() -> Generator[dict, None, None]: ) else: logger.error( - f"Failed to read response after {max_retries.MAX_RETRIES} attempts for {next_url or template}" + f"Failed to read response after {args.max_retries} attempts for {next_url or template}" ) raise Exception( - f"Failed to read response after {max_retries.MAX_RETRIES} attempts for {next_url or template}" + f"Failed to read response after {args.max_retries} attempts for {next_url or template}" ) # SECOND: Process and paginate @@ -723,7 +722,7 @@ def fetch_all() -> Generator[dict, None, None]: return list(fetch_all()) -def make_request_with_retry(request, auth): +def make_request_with_retry(request, auth, max_retries=5): """Make HTTP request with automatic retry for transient errors.""" def is_retryable_status(status_code, headers): @@ -735,7 +734,7 @@ def is_retryable_status(status_code, headers): return int(headers.get("x-ratelimit-remaining", 1)) < 1 return False - for attempt in range(max_retries.MAX_RETRIES): + for attempt in range(max_retries): try: return urlopen(request, context=https_ctx) @@ -745,32 +744,31 @@ def is_retryable_status(status_code, headers): logger.error(f"API Error: {exc.code} {exc.reason} for {request.full_url}") raise # Non-retryable error - if attempt >= max_retries.MAX_RETRIES - 1: - logger.error(f"HTTP {exc.code} failed after {max_retries.MAX_RETRIES} attempts for {request.full_url}") + if attempt >= max_retries - 1: + logger.error(f"HTTP {exc.code} failed after {max_retries} attempts for {request.full_url}") raise delay = calculate_retry_delay(attempt, exc.headers) logger.warning( f"HTTP {exc.code} ({exc.reason}), retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{max_retries.MAX_RETRIES}) for {request.full_url}" - + f"(attempt {attempt + 1}/{max_retries}) for {request.full_url}" ) if auth is None and exc.code in (403, 429): logger.info("Hint: Authenticate to raise your GitHub rate limit") time.sleep(delay) except (URLError, socket.error) as e: - if attempt >= max_retries.MAX_RETRIES - 1: - logger.error(f"Connection error failed after {max_retries.MAX_RETRIES} attempts: {e} for {request.full_url}") + if attempt >= max_retries - 1: + logger.error(f"Connection error failed after {max_retries} attempts: {e} for {request.full_url}") raise delay = calculate_retry_delay(attempt, {}) logger.warning( f"Connection error: {e}, retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{max_retries.MAX_RETRIES}) for {request.full_url}" + f"(attempt {attempt + 1}/{max_retries}) for {request.full_url}" ) time.sleep(delay) - raise Exception(f"Request failed after {max_retries.MAX_RETRIES} attempts") # pragma: no cover + raise Exception(f"Request failed after {max_retries} attempts") # pragma: no cover def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False): From 8b21e2501c8111cd3aa2a67ceec1ea1b9ec746dc Mon Sep 17 00:00:00 2001 From: michaelmartinez Date: Tue, 23 Dec 2025 08:55:52 -0800 Subject: [PATCH 397/455] readme --- README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index ffa80ac..df31e28 100644 --- a/README.rst +++ b/README.rst @@ -152,7 +152,8 @@ CLI Help output:: --throttle-limit to be set) --exclude [EXCLUDE ...] names of repositories to exclude - + --retries MAX_RETRIES + maximum number of retries for API calls (default: 5) Usage Details ============= From 5ab3852476d4c387f04473a1ea1b1b76cd6a4878 Mon Sep 17 00:00:00 2001 From: michaelmartinez Date: Tue, 23 Dec 2025 08:57:57 -0800 Subject: [PATCH 398/455] rm max_retries.py --- github_backup/max_retries.py | 1 - 1 file changed, 1 deletion(-) delete mode 100644 github_backup/max_retries.py diff --git a/github_backup/max_retries.py b/github_backup/max_retries.py deleted file mode 100644 index 43594f7..0000000 --- a/github_backup/max_retries.py +++ /dev/null @@ -1 +0,0 @@ -MAX_RETRIES=5 From 44b0003ec9766759f39e23084db1ba152d90d1a1 Mon Sep 17 00:00:00 2001 From: michaelmartinez Date: Tue, 23 Dec 2025 14:07:38 -0800 Subject: [PATCH 399/455] updates to the tests, and fixes to the retry --- github_backup/github_backup.py | 59 ++++++--- tests/test_http_451.py | 41 ++++-- tests/test_pagination.py | 1 + tests/test_retrieve_data.py | 235 +++++++++++++++++++++++++++------ 4 files changed, 266 insertions(+), 70 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 7aaf722..12b354b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -141,6 +141,17 @@ def mask_password(url, secret="*****"): return url.replace(parsed.password, secret) +def non_negative_int(value): + """Argparse type validator for non-negative integers.""" + try: + ivalue = int(value) + except ValueError: + raise argparse.ArgumentTypeError(f"'{value}' is not a valid integer") + if ivalue < 0: + raise argparse.ArgumentTypeError(f"{value} must be 0 or greater") + return ivalue + + def parse_args(args=None): parser = argparse.ArgumentParser(description="Backup a github account") parser.add_argument("user", metavar="USER", type=str, help="github username") @@ -468,7 +479,7 @@ def parse_args(args=None): parser.add_argument( "--retries", dest="max_retries", - type=int, + type=non_negative_int, default=5, help="maximum number of retries for API calls (default: 5)", ) @@ -626,7 +637,7 @@ def retrieve_data(args, template, query_args=None, paginated=True): def _extract_next_page_url(link_header): for link in link_header.split(","): if 'rel="next"' in link: - return link[link.find("<") + 1:link.find(">")] + return link[link.find("<") + 1 : link.find(">")] return None def fetch_all() -> Generator[dict, None, None]: @@ -635,7 +646,7 @@ def fetch_all() -> Generator[dict, None, None]: while True: # FIRST: Fetch response - for attempt in range(args.max_retries): + for attempt in range(args.max_retries + 1): request = _construct_request( per_page=per_page if paginated else None, query_args=query_args, @@ -658,10 +669,10 @@ def fetch_all() -> Generator[dict, None, None]: TimeoutError, ) as e: logger.warning(f"{type(e).__name__} reading response") - if attempt < args.max_retries - 1: + if attempt < args.max_retries: delay = calculate_retry_delay(attempt, {}) logger.warning( - f"Retrying read in {delay:.1f}s (attempt {attempt + 1}/{args.max_retries})" + f"Retrying read in {delay:.1f}s (attempt {attempt + 1}/{args.max_retries + 1})" ) time.sleep(delay) continue # Next retry attempt @@ -687,10 +698,10 @@ def fetch_all() -> Generator[dict, None, None]: ) else: logger.error( - f"Failed to read response after {args.max_retries} attempts for {next_url or template}" + f"Failed to read response after {args.max_retries + 1} attempts for {next_url or template}" ) raise Exception( - f"Failed to read response after {args.max_retries} attempts for {next_url or template}" + f"Failed to read response after {args.max_retries + 1} attempts for {next_url or template}" ) # SECOND: Process and paginate @@ -734,41 +745,49 @@ def is_retryable_status(status_code, headers): return int(headers.get("x-ratelimit-remaining", 1)) < 1 return False - for attempt in range(max_retries): + for attempt in range(max_retries + 1): try: return urlopen(request, context=https_ctx) except HTTPError as exc: # HTTPError can be used as a response-like object if not is_retryable_status(exc.code, exc.headers): - logger.error(f"API Error: {exc.code} {exc.reason} for {request.full_url}") + logger.error( + f"API Error: {exc.code} {exc.reason} for {request.full_url}" + ) raise # Non-retryable error - if attempt >= max_retries - 1: - logger.error(f"HTTP {exc.code} failed after {max_retries} attempts for {request.full_url}") + if attempt >= max_retries: + logger.error( + f"HTTP {exc.code} failed after {max_retries + 1} attempts for {request.full_url}" + ) raise delay = calculate_retry_delay(attempt, exc.headers) logger.warning( f"HTTP {exc.code} ({exc.reason}), retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{max_retries}) for {request.full_url}" + f"(attempt {attempt + 1}/{max_retries + 1}) for {request.full_url}" ) if auth is None and exc.code in (403, 429): logger.info("Hint: Authenticate to raise your GitHub rate limit") time.sleep(delay) except (URLError, socket.error) as e: - if attempt >= max_retries - 1: - logger.error(f"Connection error failed after {max_retries} attempts: {e} for {request.full_url}") + if attempt >= max_retries: + logger.error( + f"Connection error failed after {max_retries + 1} attempts: {e} for {request.full_url}" + ) raise delay = calculate_retry_delay(attempt, {}) logger.warning( f"Connection error: {e}, retrying in {delay:.1f}s " - f"(attempt {attempt + 1}/{max_retries}) for {request.full_url}" + f"(attempt {attempt + 1}/{max_retries + 1}) for {request.full_url}" ) time.sleep(delay) - raise Exception(f"Request failed after {max_retries} attempts") # pragma: no cover + raise Exception( + f"Request failed after {max_retries + 1} attempts" + ) # pragma: no cover def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False): @@ -1584,9 +1603,7 @@ def filter_repositories(args, unfiltered_repositories): repositories = [r for r in repositories if not r.get("archived")] if args.starred_skip_size_over is not None: if args.starred_skip_size_over <= 0: - logger.warning( - "--starred-skip-size-over must be greater than 0, ignoring" - ) + logger.warning("--starred-skip-size-over must be greater than 0, ignoring") else: size_limit_kb = args.starred_skip_size_over * 1024 filtered = [] @@ -1595,7 +1612,9 @@ def filter_repositories(args, unfiltered_repositories): size_mb = r.get("size", 0) / 1024 logger.info( "Skipping starred repo {0} ({1:.0f} MB) due to --starred-skip-size-over {2}".format( - r.get("full_name", r.get("name")), size_mb, args.starred_skip_size_over + r.get("full_name", r.get("name")), + size_mb, + args.starred_skip_size_over, ) ) else: diff --git a/tests/test_http_451.py b/tests/test_http_451.py index d53d65c..bb825f7 100644 --- a/tests/test_http_451.py +++ b/tests/test_http_451.py @@ -21,6 +21,7 @@ def test_repository_unavailable_error_raised(self): args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = 5 mock_response = Mock() mock_response.getcode.return_value = 451 @@ -30,18 +31,26 @@ def test_repository_unavailable_error_raised(self): "block": { "reason": "dmca", "created_at": "2024-11-12T14:38:04Z", - "html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" - } + "html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md", + }, } mock_response.read.return_value = json.dumps(dmca_data).encode("utf-8") mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Unavailable For Legal Reasons" - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: - github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues") - - assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/dmca/issues" + ) + + assert ( + exc_info.value.dmca_url + == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" + ) assert "451" in str(exc_info.value) def test_repository_unavailable_error_without_dmca_url(self): @@ -54,6 +63,7 @@ def test_repository_unavailable_error_without_dmca_url(self): args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = 5 mock_response = Mock() mock_response.getcode.return_value = 451 @@ -61,9 +71,14 @@ def test_repository_unavailable_error_without_dmca_url(self): mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Unavailable For Legal Reasons" - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: - github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues") + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/dmca/issues" + ) assert exc_info.value.dmca_url is None assert "451" in str(exc_info.value) @@ -78,6 +93,7 @@ def test_repository_unavailable_error_with_malformed_json(self): args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = 5 mock_response = Mock() mock_response.getcode.return_value = 451 @@ -85,9 +101,14 @@ def test_repository_unavailable_error_with_malformed_json(self): mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Unavailable For Legal Reasons" - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with pytest.raises(github_backup.RepositoryUnavailableError): - github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues") + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/dmca/issues" + ) if __name__ == "__main__": diff --git a/tests/test_pagination.py b/tests/test_pagination.py index 831b913..e35ff38 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -49,6 +49,7 @@ def mock_args(): args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = 5 return args diff --git a/tests/test_retrieve_data.py b/tests/test_retrieve_data.py index adb1152..fa82bd7 100644 --- a/tests/test_retrieve_data.py +++ b/tests/test_retrieve_data.py @@ -9,26 +9,27 @@ from github_backup import github_backup from github_backup.github_backup import ( - MAX_RETRIES, calculate_retry_delay, make_request_with_retry, ) +# Default retry count used in tests (matches argparse default) +# With max_retries=5, total attempts = 6 (1 initial + 5 retries) +DEFAULT_MAX_RETRIES = 5 + class TestCalculateRetryDelay: def test_respects_retry_after_header(self): - headers = {'retry-after': '30'} + headers = {"retry-after": "30"} assert calculate_retry_delay(0, headers) == 30 def test_respects_rate_limit_reset(self): import time import calendar + # Set reset time 60 seconds in the future future_reset = calendar.timegm(time.gmtime()) + 60 - headers = { - 'x-ratelimit-remaining': '0', - 'x-ratelimit-reset': str(future_reset) - } + headers = {"x-ratelimit-remaining": "0", "x-ratelimit-reset": str(future_reset)} delay = calculate_retry_delay(0, headers) # Should be approximately 60 seconds (with some tolerance for execution time) assert 55 <= delay <= 65 @@ -50,12 +51,10 @@ def test_max_delay_cap(self): def test_minimum_rate_limit_delay(self): import time import calendar + # Set reset time in the past (already reset) past_reset = calendar.timegm(time.gmtime()) - 100 - headers = { - 'x-ratelimit-remaining': '0', - 'x-ratelimit-reset': str(past_reset) - } + headers = {"x-ratelimit-remaining": "0", "x-ratelimit-reset": str(past_reset)} delay = calculate_retry_delay(0, headers) # Should be minimum 10 seconds even if reset time is in past assert delay >= 10 @@ -74,6 +73,7 @@ def mock_args(self): args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = DEFAULT_MAX_RETRIES return args def test_json_parse_error_retries_and_fails(self, mock_args): @@ -90,13 +90,22 @@ def mock_make_request(*args, **kwargs): call_count += 1 return mock_response - with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): # No delay in tests + with patch( + "github_backup.github_backup.make_request_with_retry", + side_effect=mock_make_request, + ): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): # No delay in tests with pytest.raises(Exception) as exc_info: - github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues") + github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/test/repo/issues" + ) assert "Failed to read response after" in str(exc_info.value) - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts def test_json_parse_error_recovers_on_retry(self, mock_args): """HTTP 200 with invalid JSON should succeed if retry returns valid JSON.""" @@ -119,9 +128,16 @@ def mock_make_request(*args, **kwargs): call_count += 1 return result - with patch("github_backup.github_backup.make_request_with_retry", side_effect=mock_make_request): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): - result = github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues") + with patch( + "github_backup.github_backup.make_request_with_retry", + side_effect=mock_make_request, + ): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): + result = github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/test/repo/issues" + ) assert result == [{"id": 1}] assert call_count == 3 # Failed twice, succeeded on third @@ -134,11 +150,18 @@ def test_http_error_raises_exception(self, mock_args): mock_response.headers = {"x-ratelimit-remaining": "5000"} mock_response.reason = "Not Found" - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with pytest.raises(Exception) as exc_info: - github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/notfound/issues") + github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/test/notfound/issues" + ) - assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError) + assert not isinstance( + exc_info.value, github_backup.RepositoryUnavailableError + ) assert "404" in str(exc_info.value) @@ -151,7 +174,7 @@ def test_502_error_retries_and_succeeds(self): good_response.read.return_value = b'{"ok": true}' call_count = 0 - fail_count = MAX_RETRIES - 1 # Fail all but last attempt + fail_count = DEFAULT_MAX_RETRIES # Fail all retries, succeed on last attempt def mock_urlopen(*args, **kwargs): nonlocal call_count @@ -167,14 +190,18 @@ def mock_urlopen(*args, **kwargs): return good_response with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): result = make_request_with_retry(Mock(), None) assert result == good_response - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts def test_503_error_retries_until_exhausted(self): - """HTTP 503 should retry MAX_RETRIES times then raise.""" + """HTTP 503 should make 1 initial + DEFAULT_MAX_RETRIES retry attempts then raise.""" call_count = 0 def mock_urlopen(*args, **kwargs): @@ -189,12 +216,16 @@ def mock_urlopen(*args, **kwargs): ) with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): with pytest.raises(HTTPError) as exc_info: make_request_with_retry(Mock(), None) assert exc_info.value.code == 503 - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts def test_404_error_not_retried(self): """HTTP 404 should not be retried - raise immediately.""" @@ -237,7 +268,9 @@ def mock_urlopen(*args, **kwargs): return good_response with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): result = make_request_with_retry(Mock(), None) assert result == good_response @@ -269,7 +302,7 @@ def test_connection_error_retries_and_succeeds(self): """URLError (connection error) should retry and succeed if subsequent request works.""" good_response = Mock() call_count = 0 - fail_count = MAX_RETRIES - 1 # Fail all but last attempt + fail_count = DEFAULT_MAX_RETRIES # Fail all retries, succeed on last attempt def mock_urlopen(*args, **kwargs): nonlocal call_count @@ -279,14 +312,18 @@ def mock_urlopen(*args, **kwargs): return good_response with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): result = make_request_with_retry(Mock(), None) assert result == good_response - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts def test_socket_error_retries_until_exhausted(self): - """socket.error should retry MAX_RETRIES times then raise.""" + """socket.error should make 1 initial + DEFAULT_MAX_RETRIES retry attempts then raise.""" call_count = 0 def mock_urlopen(*args, **kwargs): @@ -295,11 +332,15 @@ def mock_urlopen(*args, **kwargs): raise socket.error("Connection reset by peer") with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): - with patch("github_backup.github_backup.calculate_retry_delay", return_value=0): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): with pytest.raises(socket.error): make_request_with_retry(Mock(), None) - assert call_count == MAX_RETRIES + assert ( + call_count == DEFAULT_MAX_RETRIES + 1 + ) # 1 initial + 5 retries = 6 attempts class TestRetrieveDataThrottling: @@ -315,6 +356,7 @@ def mock_args(self): args.osx_keychain_item_account = None args.throttle_limit = 10 # Throttle when remaining <= 10 args.throttle_pause = 5 # Pause 5 seconds + args.max_retries = DEFAULT_MAX_RETRIES return args def test_throttling_pauses_when_rate_limit_low(self, mock_args): @@ -322,11 +364,19 @@ def test_throttling_pauses_when_rate_limit_low(self, mock_args): mock_response = Mock() mock_response.getcode.return_value = 200 mock_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8") - mock_response.headers = {"x-ratelimit-remaining": "5", "Link": ""} # Below throttle_limit - - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): + mock_response.headers = { + "x-ratelimit-remaining": "5", + "Link": "", + } # Below throttle_limit + + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): with patch("github_backup.github_backup.time.sleep") as mock_sleep: - github_backup.retrieve_data(mock_args, "https://api.github.com/repos/test/repo/issues") + github_backup.retrieve_data( + mock_args, "https://api.github.com/repos/test/repo/issues" + ) mock_sleep.assert_called_once_with(5) # throttle_pause value @@ -344,16 +394,121 @@ def mock_args(self): args.osx_keychain_item_account = None args.throttle_limit = None args.throttle_pause = 0 + args.max_retries = DEFAULT_MAX_RETRIES return args def test_dict_response_returned_as_list(self, mock_args): """Single dict response should be returned as a list with one item.""" mock_response = Mock() mock_response.getcode.return_value = 200 - mock_response.read.return_value = json.dumps({"login": "testuser", "id": 123}).encode("utf-8") + mock_response.read.return_value = json.dumps( + {"login": "testuser", "id": 123} + ).encode("utf-8") mock_response.headers = {"x-ratelimit-remaining": "5000", "Link": ""} - with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response): - result = github_backup.retrieve_data(mock_args, "https://api.github.com/user") + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): + result = github_backup.retrieve_data( + mock_args, "https://api.github.com/user" + ) assert result == [{"login": "testuser", "id": 123}] + + +class TestRetriesCliArgument: + """Tests for --retries CLI argument validation and behavior.""" + + def test_retries_argument_accepted(self): + """--retries flag should be accepted and parsed correctly.""" + args = github_backup.parse_args(["--retries", "3", "testuser"]) + assert args.max_retries == 3 + + def test_retries_default_value(self): + """--retries should default to 5 if not specified.""" + args = github_backup.parse_args(["testuser"]) + assert args.max_retries == 5 + + def test_retries_zero_is_valid(self): + """--retries 0 should be valid and mean 1 attempt (no retries).""" + args = github_backup.parse_args(["--retries", "0", "testuser"]) + assert args.max_retries == 0 + + def test_retries_negative_rejected(self): + """--retries with negative value should be rejected by argparse.""" + with pytest.raises(SystemExit): + github_backup.parse_args(["--retries", "-1", "testuser"]) + + def test_retries_non_integer_rejected(self): + """--retries with non-integer value should be rejected by argparse.""" + with pytest.raises(SystemExit): + github_backup.parse_args(["--retries", "abc", "testuser"]) + + def test_retries_one_with_transient_error_succeeds(self): + """--retries 1 should allow one retry after initial failure.""" + good_response = Mock() + good_response.read.return_value = b'{"ok": true}' + + call_count = 0 + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise HTTPError( + url="https://api.github.com/test", + code=502, + msg="Bad Gateway", + hdrs={"x-ratelimit-remaining": "5000"}, + fp=None, + ) + return good_response + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): + result = make_request_with_retry(Mock(), None, max_retries=1) + + assert result == good_response + assert call_count == 2 # 1 initial + 1 retry = 2 attempts + + def test_custom_retry_count_limits_attempts(self): + """Custom --retries value should limit actual retry attempts.""" + args = Mock() + args.as_app = False + args.token_fine = None + args.token_classic = "fake_token" + args.osx_keychain_item_name = None + args.osx_keychain_item_account = None + args.throttle_limit = None + args.throttle_pause = 0 + args.max_retries = 2 # 2 retries = 3 total attempts (1 initial + 2 retries) + + mock_response = Mock() + mock_response.getcode.return_value = 200 + mock_response.read.return_value = b"not valid json {" + mock_response.headers = {"x-ratelimit-remaining": "5000"} + + call_count = 0 + + def mock_make_request(*args, **kwargs): + nonlocal call_count + call_count += 1 + return mock_response + + with patch( + "github_backup.github_backup.make_request_with_retry", + side_effect=mock_make_request, + ): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): + with pytest.raises(Exception) as exc_info: + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/repo/issues" + ) + + assert "Failed to read response after 3 attempts" in str(exc_info.value) + assert call_count == 3 # 1 initial + 2 retries = 3 attempts From 858731ebbd609c9eb5caecce9bbb8b5e04b490bb Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 24 Dec 2025 00:45:01 +0000 Subject: [PATCH 400/455] Release version 0.60.0 --- CHANGES.rst | 11 ++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a6a1c4d..ee2a1d4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,18 @@ Changelog ========= -0.59.0 (2025-12-21) +0.60.0 (2025-12-24) ------------------- ------------------------ +- Rm max_retries.py. [michaelmartinez] +- Readme. [michaelmartinez] +- Don't use a global variable, pass the args instead. [michaelmartinez] +- Readme, simplify the logic a bit. [michaelmartinez] +- Max_retries 5. [michaelmartinez] + + +0.59.0 (2025-12-21) +------------------- - Add --starred-skip-size-over flag to limit starred repo size (#108) [Rodos] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 25dbb4b..5684ec7 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.59.0" +__version__ = "0.60.0" From 9a6f0b4c21be4f9157a110b96a5561d672dbf6b1 Mon Sep 17 00:00:00 2001 From: Lukas Bestle Date: Fri, 9 Jan 2026 21:04:21 +0100 Subject: [PATCH 401/455] feat: Backup of repository security advisories --- README.rst | 10 ++++---- github_backup/github_backup.py | 44 ++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index df31e28..8e00d49 100644 --- a/README.rst +++ b/README.rst @@ -43,9 +43,9 @@ CLI Help output:: [--watched] [--followers] [--following] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--pull-details] - [--labels] [--hooks] [--milestones] [--repositories] - [--bare] [--no-prune] [--lfs] [--wikis] [--gists] - [--starred-gists] [--skip-archived] [--skip-existing] + [--labels] [--hooks] [--milestones] [--security-advisories] + [--repositories] [--bare] [--no-prune] [--lfs] [--wikis] + [--gists] [--starred-gists] [--skip-archived] [--skip-existing] [-L [LANGUAGES ...]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] @@ -101,6 +101,8 @@ CLI Help output:: --hooks include hooks in backup (works only when authenticated) --milestones include milestones in backup + --security-advisories + include security advisories in backup --repositories include repository clone in backup --bare clone bare repositories --no-prune disable prune option for git fetch @@ -401,7 +403,7 @@ Quietly and incrementally backup useful Github user data (public and private rep export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER - github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER + github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --security-advisories --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER Debug an error/block or incomplete backup into a temporary directory. Omit "incremental" to fill a previous incomplete backup. :: diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 12b354b..8a60f66 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -310,6 +310,12 @@ def parse_args(args=None): dest="include_milestones", help="include milestones in backup", ) + parser.add_argument( + "--security-advisories", + action="store_true", + dest="include_security_advisories", + help="include security advisories in backup", + ) parser.add_argument( "--repositories", action="store_true", @@ -1718,6 +1724,9 @@ def backup_repositories(args, output_directory, repositories): if args.include_milestones or args.include_everything: backup_milestones(args, repo_cwd, repository, repos_template) + if args.include_security_advisories or args.include_everything: + backup_security_advisories(args, repo_cwd, repository, repos_template) + if args.include_labels or args.include_everything: backup_labels(args, repo_cwd, repository, repos_template) @@ -1934,6 +1943,41 @@ def backup_milestones(args, repo_cwd, repository, repos_template): ) +def backup_security_advisories(args, repo_cwd, repository, repos_template): + advisory_cwd = os.path.join(repo_cwd, "security-advisories") + if args.skip_existing and os.path.isdir(advisory_cwd): + return + + logger.info("Retrieving {0} security advisories".format(repository["full_name"])) + mkdir_p(repo_cwd, advisory_cwd) + + template = "{0}/{1}/security-advisories".format(repos_template, repository["full_name"]) + + _advisories = retrieve_data(args, template) + + advisories = {} + for advisory in _advisories: + advisories[advisory["ghsa_id"]] = advisory + + written_count = 0 + for ghsa_id, advisory in list(advisories.items()): + advisory_file = "{0}/{1}.json".format(advisory_cwd, ghsa_id) + if json_dump_if_changed(advisory, advisory_file): + written_count += 1 + + total = len(advisories) + if written_count == total: + logger.info("Saved {0} security advisories to disk".format(total)) + elif written_count == 0: + logger.info("{0} security advisories unchanged, skipped write".format(total)) + else: + logger.info( + "Saved {0} of {1} security advisories to disk ({2} unchanged)".format( + written_count, total, total - written_count + ) + ) + + def backup_labels(args, repo_cwd, repository, repos_template): label_cwd = os.path.join(repo_cwd, "labels") output_file = "{0}/labels.json".format(label_cwd) From a175ac3ed90cbcb5aa29785f8ce5adc7567e9123 Mon Sep 17 00:00:00 2001 From: Lukas Bestle Date: Sat, 10 Jan 2026 11:12:42 +0100 Subject: [PATCH 402/455] test: Adapt tests to new argument --- tests/test_all_starred.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_all_starred.py b/tests/test_all_starred.py index 0fab048..297d148 100644 --- a/tests/test_all_starred.py +++ b/tests/test_all_starred.py @@ -37,6 +37,7 @@ def _create_mock_args(self, **overrides): args.include_labels = False args.include_hooks = False args.include_milestones = False + args.include_security_advisories = False args.include_releases = False args.include_assets = False args.include_attachments = False From b3d35f9d9f7f3c1223c2eb94a8e0cd3c8a466e79 Mon Sep 17 00:00:00 2001 From: Lukas Bestle Date: Sat, 10 Jan 2026 15:44:37 +0100 Subject: [PATCH 403/455] docs: Add missing `--retries` argument to README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index df31e28..f5149e6 100644 --- a/README.rst +++ b/README.rst @@ -55,7 +55,7 @@ CLI Help output:: [--skip-assets-on [SKIP_ASSETS_ON ...]] [--attachments] [--throttle-limit THROTTLE_LIMIT] [--throttle-pause THROTTLE_PAUSE] - [--exclude [EXCLUDE ...]] + [--exclude [EXCLUDE ...]] [--retries MAX_RETRIES] USER Backup a github account From c63fb37d30fc5547f39c2ba798c30a97545ea285 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 12 Jan 2026 16:30:28 +0000 Subject: [PATCH 404/455] Release version 0.61.0 --- CHANGES.rst | 9 ++++++++- github_backup/__init__.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index ee2a1d4..0e66663 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,16 @@ Changelog ========= -0.60.0 (2025-12-24) +0.61.0 (2026-01-12) ------------------- ------------------------ +- Docs: Add missing `--retries` argument to README. [Lukas Bestle] +- Test: Adapt tests to new argument. [Lukas Bestle] +- Feat: Backup of repository security advisories. [Lukas Bestle] + + +0.60.0 (2025-12-24) +------------------- - Rm max_retries.py. [michaelmartinez] - Readme. [michaelmartinez] - Don't use a global variable, pass the args instead. [michaelmartinez] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 5684ec7..a076e5d 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.60.0" +__version__ = "0.61.0" From fce4abb74ae729679d5a6dc7b0b5cf57044efcf2 Mon Sep 17 00:00:00 2001 From: Rodos Date: Tue, 13 Jan 2026 13:15:38 +1100 Subject: [PATCH 405/455] Fix fine-grained PAT attachment downloads for private repos (#477) Fine-grained personal access tokens cannot download attachments from private repositories directly due to a GitHub platform limitation. This adds a workaround for image attachments (/assets/ URLs) using GitHub's Markdown API to convert URLs to JWT-signed URLs that can be downloaded without authentication. Changes: - Add get_jwt_signed_url_via_markdown_api() function - Detect fine-grained token + private repo + /assets/ URL upfront - Use JWT workaround for those cases, mark success with jwt_workaround flag - Skip download with skipped_at when workaround fails - Add startup warning when using --attachments with fine-grained tokens - Document limitation in README (file attachments still fail) - Add 6 unit tests for JWT workaround logic --- README.rst | 2 + github_backup/cli.py | 10 +++ github_backup/github_backup.py | 108 ++++++++++++++++++++++++-- tests/test_attachments.py | 136 +++++++++++++++++++++++++++++++++ 4 files changed, 248 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index e2c8fc2..c23027d 100644 --- a/README.rst +++ b/README.rst @@ -281,6 +281,8 @@ The tool automatically extracts file extensions from HTTP headers to ensure file **Repository filtering** for repo files/assets handles renamed and transferred repositories gracefully. URLs are included if they either match the current repository name directly, or redirect to it (e.g., ``willmcgugan/rich`` redirects to ``Textualize/rich`` after transfer). +**Fine-grained token limitation:** Due to a GitHub platform limitation, fine-grained personal access tokens (``github_pat_...``) cannot download attachments from private repositories directly. This affects both ``/assets/`` (images) and ``/files/`` (documents) URLs. The tool implements a workaround for image attachments using GitHub's Markdown API, which converts URLs to temporary JWT-signed URLs that can be downloaded. However, this workaround only works for images - document attachments (PDFs, text files, etc.) will fail with 404 errors when using fine-grained tokens on private repos. For full attachment support on private repositories, use a classic token (``-t``) instead of a fine-grained token (``-f``). See `#477 `_ for details. + Run in Docker container ----------------------- diff --git a/github_backup/cli.py b/github_backup/cli.py index 54849d4..987ae71 100644 --- a/github_backup/cli.py +++ b/github_backup/cli.py @@ -46,6 +46,16 @@ def main(): "Use -t/--token or -f/--token-fine to authenticate." ) + # Issue #477: Fine-grained PATs cannot download all attachment types from + # private repos. Image attachments will be retried via Markdown API workaround. + if args.include_attachments and args.token_fine: + logger.warning( + "Using --attachments with fine-grained token. Due to GitHub platform " + "limitations, file attachments (PDFs, etc.) from private repos may fail. " + "Image attachments will be retried via workaround. For full attachment " + "support, use --token-classic instead." + ) + if args.quiet: logger.setLevel(logging.WARNING) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 8a60f66..705f013 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1062,6 +1062,65 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False): return metadata +def get_jwt_signed_url_via_markdown_api(url, token, repo_context): + """Convert a user-attachments/assets URL to a JWT-signed URL via Markdown API. + + GitHub's Markdown API renders image URLs and returns HTML containing + JWT-signed private-user-images.githubusercontent.com URLs that work + without token authentication. + + This is a workaround for issue #477 where fine-grained PATs cannot + download user-attachments URLs from private repos directly. + + Limitations: + - Only works for /assets/ URLs (images) + - Does NOT work for /files/ URLs (PDFs, text files, etc.) + - JWT URLs expire after ~5 minutes + + Args: + url: The github.com/user-attachments/assets/UUID URL + token: Raw fine-grained PAT (github_pat_...) + repo_context: Repository context as "owner/repo" + + Returns: + str: JWT-signed URL from private-user-images.githubusercontent.com + None: If conversion fails + """ + + try: + payload = json.dumps( + {"text": f"![img]({url})", "mode": "gfm", "context": repo_context} + ).encode("utf-8") + + request = Request("https://api.github.com/markdown", data=payload, method="POST") + request.add_header("Authorization", f"token {token}") + request.add_header("Content-Type", "application/json") + request.add_header("Accept", "application/vnd.github+json") + + html = urlopen(request, timeout=30).read().decode("utf-8") + + # Parse JWT-signed URL from HTML response + # Format: + if match := re.search( + r'src="(https://private-user-images\.githubusercontent\.com/[^"]+)"', html + ): + jwt_url = match.group(1) + logger.debug("Converted attachment URL to JWT-signed URL via Markdown API") + return jwt_url + + logger.debug("Markdown API response did not contain JWT-signed URL") + return None + + except HTTPError as e: + logger.debug( + "Markdown API request failed with HTTP {0}: {1}".format(e.code, e.reason) + ) + return None + except Exception as e: + logger.debug("Markdown API request failed: {0}".format(str(e))) + return None + + def extract_attachment_urls(item_data, issue_number=None, repository_full_name=None): """Extract GitHub-hosted attachment URLs from issue/PR body and comments. @@ -1415,15 +1474,46 @@ def download_attachments( filename = get_attachment_filename(url) filepath = os.path.join(attachments_dir, filename) - # Download and get metadata - metadata = download_attachment_file( - url, - filepath, - get_auth(args, encode=not args.as_app), - as_app=args.as_app, - fine=args.token_fine is not None, + # Issue #477: Fine-grained PATs cannot download user-attachments/assets + # from private repos directly (404). Use Markdown API workaround to get + # a JWT-signed URL. Only works for /assets/ (images), not /files/. + needs_jwt = ( + args.token_fine is not None + and repository.get("private", False) + and "github.com/user-attachments/assets/" in url ) + if not needs_jwt: + # NORMAL download path + metadata = download_attachment_file( + url, + filepath, + get_auth(args, encode=not args.as_app), + as_app=args.as_app, + fine=args.token_fine is not None, + ) + elif jwt_url := get_jwt_signed_url_via_markdown_api( + url, args.token_fine, repository["full_name"] + ): + # JWT needed and extracted, download via JWT + metadata = download_attachment_file( + jwt_url, filepath, auth=None, as_app=False, fine=False + ) + metadata["url"] = url # Apply back the original URL + metadata["jwt_workaround"] = True + else: + # Markdown API workaround failed - skip download we know will fail + metadata = { + "url": url, + "success": False, + "skipped_at": datetime.now(timezone.utc).isoformat(), + "error": "Fine-grained token cannot download private repo attachments. " + "Markdown API workaround failed. Use --token-classic instead.", + } + logger.warning( + "Skipping attachment {0}: {1}".format(url, metadata["error"]) + ) + # If download succeeded but we got an extension from Content-Disposition, # we may need to rename the file to add the extension if metadata["success"] and metadata.get("original_filename"): @@ -1951,7 +2041,9 @@ def backup_security_advisories(args, repo_cwd, repository, repos_template): logger.info("Retrieving {0} security advisories".format(repository["full_name"])) mkdir_p(repo_cwd, advisory_cwd) - template = "{0}/{1}/security-advisories".format(repos_template, repository["full_name"]) + template = "{0}/{1}/security-advisories".format( + repos_template, repository["full_name"] + ) _advisories = retrieve_data(args, template) diff --git a/tests/test_attachments.py b/tests/test_attachments.py index b338caf..4613984 100644 --- a/tests/test_attachments.py +++ b/tests/test_attachments.py @@ -349,3 +349,139 @@ def test_manifest_skips_permanent_failures(self, attachment_test_setup): downloaded_urls[0] == "https://github.com/user-attachments/assets/unavailable" ) + + +class TestJWTWorkaround: + """Test JWT workaround for fine-grained tokens on private repos (issue #477).""" + + def test_markdown_api_extracts_jwt_url(self): + """Markdown API response with JWT URL is extracted correctly.""" + from unittest.mock import patch, Mock + + html_response = '''

img

''' + + mock_response = Mock() + mock_response.read.return_value = html_response.encode("utf-8") + + with patch("github_backup.github_backup.urlopen", return_value=mock_response): + result = github_backup.get_jwt_signed_url_via_markdown_api( + "https://github.com/user-attachments/assets/abc123", + "github_pat_token", + "owner/repo" + ) + + assert result == "https://private-user-images.githubusercontent.com/123/abc.png?jwt=eyJhbGciOiJ" + + def test_markdown_api_returns_none_on_http_error(self): + """HTTP errors return None.""" + from unittest.mock import patch + from urllib.error import HTTPError + + with patch("github_backup.github_backup.urlopen", side_effect=HTTPError(None, 403, "Forbidden", {}, None)): + result = github_backup.get_jwt_signed_url_via_markdown_api( + "https://github.com/user-attachments/assets/abc123", + "github_pat_token", + "owner/repo" + ) + + assert result is None + + def test_markdown_api_returns_none_when_no_jwt_url(self): + """Response without JWT URL returns None.""" + from unittest.mock import patch, Mock + + mock_response = Mock() + mock_response.read.return_value = b"

No image here

" + + with patch("github_backup.github_backup.urlopen", return_value=mock_response): + result = github_backup.get_jwt_signed_url_via_markdown_api( + "https://github.com/user-attachments/assets/abc123", + "github_pat_token", + "owner/repo" + ) + + assert result is None + + def test_needs_jwt_only_for_fine_grained_private_assets(self): + """needs_jwt is True only for fine-grained + private + /assets/ URL.""" + assets_url = "https://github.com/user-attachments/assets/abc123" + files_url = "https://github.com/user-attachments/files/123/doc.pdf" + + # Fine-grained + private + assets = True + assert ( + "github_pat_" is not None + and True # private + and "github.com/user-attachments/assets/" in assets_url + ) is True + + # Fine-grained + private + files = False + assert ( + "github_pat_" is not None + and True + and "github.com/user-attachments/assets/" in files_url + ) is False + + # Fine-grained + public + assets = False + assert ( + "github_pat_" is not None + and False # public + and "github.com/user-attachments/assets/" in assets_url + ) is False + + def test_jwt_workaround_sets_manifest_flag(self, attachment_test_setup): + """Successful JWT workaround sets jwt_workaround flag in manifest.""" + from unittest.mock import patch, Mock + + setup = attachment_test_setup + setup["args"].token_fine = "github_pat_test" + setup["repository"]["private"] = True + + issue_data = {"body": "https://github.com/user-attachments/assets/abc123"} + + jwt_url = "https://private-user-images.githubusercontent.com/123/abc.png?jwt=token" + + with patch( + "github_backup.github_backup.get_jwt_signed_url_via_markdown_api", + return_value=jwt_url + ), patch( + "github_backup.github_backup.download_attachment_file", + return_value={"success": True, "http_status": 200, "url": jwt_url} + ): + github_backup.download_attachments( + setup["args"], setup["issue_cwd"], issue_data, 123, setup["repository"] + ) + + manifest_path = os.path.join(setup["issue_cwd"], "attachments", "123", "manifest.json") + with open(manifest_path) as f: + manifest = json.load(f) + + assert manifest["attachments"][0]["jwt_workaround"] is True + assert manifest["attachments"][0]["url"] == "https://github.com/user-attachments/assets/abc123" + + def test_jwt_workaround_failure_uses_skipped_at(self, attachment_test_setup): + """Failed JWT workaround uses skipped_at instead of downloaded_at.""" + from unittest.mock import patch + + setup = attachment_test_setup + setup["args"].token_fine = "github_pat_test" + setup["repository"]["private"] = True + + issue_data = {"body": "https://github.com/user-attachments/assets/abc123"} + + with patch( + "github_backup.github_backup.get_jwt_signed_url_via_markdown_api", + return_value=None # Markdown API failed + ): + github_backup.download_attachments( + setup["args"], setup["issue_cwd"], issue_data, 123, setup["repository"] + ) + + manifest_path = os.path.join(setup["issue_cwd"], "attachments", "123", "manifest.json") + with open(manifest_path) as f: + manifest = json.load(f) + + attachment = manifest["attachments"][0] + assert attachment["success"] is False + assert "skipped_at" in attachment + assert "downloaded_at" not in attachment + assert "Use --token-classic" in attachment["error"] From ab0eebb175009a07727bd23eb78b5e9f9e0f13bc Mon Sep 17 00:00:00 2001 From: Rodos Date: Tue, 13 Jan 2026 13:43:45 +1100 Subject: [PATCH 406/455] Refactor test fixtures to use shared create_args helper Uses the real parse_args() function to get CLI defaults, so when new arguments are added they're automatically available to all tests. Changes: - Add tests/conftest.py with create_args fixture - Update 8 test files to use shared fixture - Remove duplicate _create_mock_args methods - Remove redundant @pytest.fixture mock_args definitions This eliminates the need to update multiple test files when adding new CLI arguments. --- tests/conftest.py | 25 ++++++++ tests/test_all_starred.py | 62 +++----------------- tests/test_attachments.py | 72 +++++++++++------------ tests/test_case_sensitivity.py | 46 ++------------- tests/test_http_451.py | 36 ++---------- tests/test_pagination.py | 34 ++++------- tests/test_retrieve_data.py | 87 +++++++++------------------- tests/test_skip_assets_on.py | 76 +++++------------------- tests/test_starred_skip_size_over.py | 75 +++++++++--------------- 9 files changed, 158 insertions(+), 355 deletions(-) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..b36fe64 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,25 @@ +"""Shared pytest fixtures for github-backup tests.""" + +import pytest + +from github_backup.github_backup import parse_args + + +@pytest.fixture +def create_args(): + """Factory fixture that creates args with real CLI defaults. + + Uses the actual argument parser so new CLI args are automatically + available with their defaults - no test updates needed. + + Usage: + def test_something(self, create_args): + args = create_args(include_releases=True, user="myuser") + """ + def _create(**overrides): + # Use real parser to get actual defaults + args = parse_args(["testuser"]) + for key, value in overrides.items(): + setattr(args, key, value) + return args + return _create diff --git a/tests/test_all_starred.py b/tests/test_all_starred.py index 297d148..9776926 100644 --- a/tests/test_all_starred.py +++ b/tests/test_all_starred.py @@ -1,7 +1,7 @@ """Tests for --all-starred flag behavior (issue #225).""" import pytest -from unittest.mock import Mock, patch +from unittest.mock import patch from github_backup import github_backup @@ -12,58 +12,14 @@ class TestAllStarredCloning: Issue #225: --all-starred should clone starred repos without requiring --repositories. """ - def _create_mock_args(self, **overrides): - """Create a mock args object with sensible defaults.""" - args = Mock() - args.user = "testuser" - args.output_directory = "/tmp/backup" - args.include_repository = False - args.include_everything = False - args.include_gists = False - args.include_starred_gists = False - args.all_starred = False - args.skip_existing = False - args.bare_clone = False - args.lfs_clone = False - args.no_prune = False - args.include_wiki = False - args.include_issues = False - args.include_issue_comments = False - args.include_issue_events = False - args.include_pulls = False - args.include_pull_comments = False - args.include_pull_commits = False - args.include_pull_details = False - args.include_labels = False - args.include_hooks = False - args.include_milestones = False - args.include_security_advisories = False - args.include_releases = False - args.include_assets = False - args.include_attachments = False - args.incremental = False - args.incremental_by_files = False - args.github_host = None - args.prefer_ssh = False - args.token_classic = None - args.token_fine = None - args.as_app = False - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - - for key, value in overrides.items(): - setattr(args, key, value) - - return args - @patch('github_backup.github_backup.fetch_repository') @patch('github_backup.github_backup.get_github_repo_url') - def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_fetch): + def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_fetch, create_args): """--all-starred should clone starred repos without --repositories flag. This is the core fix for issue #225. """ - args = self._create_mock_args(all_starred=True) + args = create_args(all_starred=True) mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git" # A starred repository (is_starred flag set by retrieve_repositories) @@ -88,9 +44,9 @@ def test_all_starred_clones_without_repositories_flag(self, mock_get_url, mock_f @patch('github_backup.github_backup.fetch_repository') @patch('github_backup.github_backup.get_github_repo_url') - def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mock_fetch): + def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mock_fetch, create_args): """Starred repos should NOT be cloned if --all-starred is not set.""" - args = self._create_mock_args(all_starred=False) + args = create_args(all_starred=False) mock_get_url.return_value = "https://github.com/otheruser/awesome-project.git" starred_repo = { @@ -111,9 +67,9 @@ def test_starred_repo_not_cloned_without_all_starred_flag(self, mock_get_url, mo @patch('github_backup.github_backup.fetch_repository') @patch('github_backup.github_backup.get_github_repo_url') - def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, mock_fetch): + def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, mock_fetch, create_args): """Non-starred repos should NOT be cloned when only --all-starred is set.""" - args = self._create_mock_args(all_starred=True) + args = create_args(all_starred=True) mock_get_url.return_value = "https://github.com/testuser/my-project.git" # A regular (non-starred) repository @@ -135,9 +91,9 @@ def test_non_starred_repo_not_cloned_with_only_all_starred(self, mock_get_url, m @patch('github_backup.github_backup.fetch_repository') @patch('github_backup.github_backup.get_github_repo_url') - def test_repositories_flag_still_works(self, mock_get_url, mock_fetch): + def test_repositories_flag_still_works(self, mock_get_url, mock_fetch, create_args): """--repositories flag should still clone repos as before.""" - args = self._create_mock_args(include_repository=True) + args = create_args(include_repository=True) mock_get_url.return_value = "https://github.com/testuser/my-project.git" regular_repo = { diff --git a/tests/test_attachments.py b/tests/test_attachments.py index 4613984..241a08f 100644 --- a/tests/test_attachments.py +++ b/tests/test_attachments.py @@ -4,7 +4,7 @@ import os import tempfile from pathlib import Path -from unittest.mock import Mock +from unittest.mock import Mock, patch import pytest @@ -12,22 +12,13 @@ @pytest.fixture -def attachment_test_setup(tmp_path): +def attachment_test_setup(tmp_path, create_args): """Fixture providing setup and helper for attachment download tests.""" - from unittest.mock import patch - issue_cwd = tmp_path / "issues" issue_cwd.mkdir() - # Mock args - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = None - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.user = "testuser" - args.repository = "testrepo" + # Create args using shared fixture + args = create_args(user="testuser", repository="testrepo") repository = {"full_name": "testuser/testrepo"} @@ -356,9 +347,12 @@ class TestJWTWorkaround: def test_markdown_api_extracts_jwt_url(self): """Markdown API response with JWT URL is extracted correctly.""" - from unittest.mock import patch, Mock - - html_response = '''

img

''' + html_response = ( + '

' + ) mock_response = Mock() mock_response.read.return_value = html_response.encode("utf-8") @@ -370,14 +364,18 @@ def test_markdown_api_extracts_jwt_url(self): "owner/repo" ) - assert result == "https://private-user-images.githubusercontent.com/123/abc.png?jwt=eyJhbGciOiJ" + expected = ( + "https://private-user-images.githubusercontent.com" + "/123/abc.png?jwt=eyJhbGciOiJ" + ) + assert result == expected def test_markdown_api_returns_none_on_http_error(self): """HTTP errors return None.""" - from unittest.mock import patch from urllib.error import HTTPError - with patch("github_backup.github_backup.urlopen", side_effect=HTTPError(None, 403, "Forbidden", {}, None)): + error = HTTPError("http://test", 403, "Forbidden", {}, None) + with patch("github_backup.github_backup.urlopen", side_effect=error): result = github_backup.get_jwt_signed_url_via_markdown_api( "https://github.com/user-attachments/assets/abc123", "github_pat_token", @@ -388,8 +386,6 @@ def test_markdown_api_returns_none_on_http_error(self): def test_markdown_api_returns_none_when_no_jwt_url(self): """Response without JWT URL returns None.""" - from unittest.mock import patch, Mock - mock_response = Mock() mock_response.read.return_value = b"

No image here

" @@ -406,32 +402,36 @@ def test_needs_jwt_only_for_fine_grained_private_assets(self): """needs_jwt is True only for fine-grained + private + /assets/ URL.""" assets_url = "https://github.com/user-attachments/assets/abc123" files_url = "https://github.com/user-attachments/files/123/doc.pdf" + token_fine = "github_pat_test" + private = True + public = False # Fine-grained + private + assets = True - assert ( - "github_pat_" is not None - and True # private + needs_jwt = ( + token_fine is not None + and private and "github.com/user-attachments/assets/" in assets_url - ) is True + ) + assert needs_jwt is True # Fine-grained + private + files = False - assert ( - "github_pat_" is not None - and True + needs_jwt = ( + token_fine is not None + and private and "github.com/user-attachments/assets/" in files_url - ) is False + ) + assert needs_jwt is False # Fine-grained + public + assets = False - assert ( - "github_pat_" is not None - and False # public + needs_jwt = ( + token_fine is not None + and public and "github.com/user-attachments/assets/" in assets_url - ) is False + ) + assert needs_jwt is False def test_jwt_workaround_sets_manifest_flag(self, attachment_test_setup): """Successful JWT workaround sets jwt_workaround flag in manifest.""" - from unittest.mock import patch, Mock - setup = attachment_test_setup setup["args"].token_fine = "github_pat_test" setup["repository"]["private"] = True @@ -460,8 +460,6 @@ def test_jwt_workaround_sets_manifest_flag(self, attachment_test_setup): def test_jwt_workaround_failure_uses_skipped_at(self, attachment_test_setup): """Failed JWT workaround uses skipped_at instead of downloaded_at.""" - from unittest.mock import patch - setup = attachment_test_setup setup["args"].token_fine = "github_pat_test" setup["repository"]["private"] = True diff --git a/tests/test_case_sensitivity.py b/tests/test_case_sensitivity.py index 058a7df..795c14b 100644 --- a/tests/test_case_sensitivity.py +++ b/tests/test_case_sensitivity.py @@ -1,7 +1,6 @@ """Tests for case-insensitive username/organization filtering.""" import pytest -from unittest.mock import Mock from github_backup import github_backup @@ -9,25 +8,14 @@ class TestCaseSensitivity: """Test suite for case-insensitive username matching in filter_repositories.""" - def test_filter_repositories_case_insensitive_user(self): + def test_filter_repositories_case_insensitive_user(self, create_args): """Should filter repositories case-insensitively for usernames. Reproduces issue #198 where typing 'iamrodos' fails to match repositories with owner.login='Iamrodos' (the canonical case from GitHub API). """ # Simulate user typing lowercase username - args = Mock() - args.user = "iamrodos" # lowercase (what user typed) - args.repository = None - args.name_regex = None - args.languages = None - args.exclude = None - args.fork = False - args.private = False - args.public = False - args.all = True - args.skip_archived = False - args.starred_skip_size_over = None + args = create_args(user="iamrodos") # Simulate GitHub API returning canonical case repos = [ @@ -52,23 +40,12 @@ def test_filter_repositories_case_insensitive_user(self): assert filtered[0]["name"] == "repo1" assert filtered[1]["name"] == "repo2" - def test_filter_repositories_case_insensitive_org(self): + def test_filter_repositories_case_insensitive_org(self, create_args): """Should filter repositories case-insensitively for organizations. Tests the example from issue #198 where 'prai-org' doesn't match 'PRAI-Org'. """ - args = Mock() - args.user = "prai-org" # lowercase (what user typed) - args.repository = None - args.name_regex = None - args.languages = None - args.exclude = None - args.fork = False - args.private = False - args.public = False - args.all = True - args.skip_archived = False - args.starred_skip_size_over = None + args = create_args(user="prai-org") repos = [ { @@ -85,20 +62,9 @@ def test_filter_repositories_case_insensitive_org(self): assert len(filtered) == 1 assert filtered[0]["name"] == "repo1" - def test_filter_repositories_case_variations(self): + def test_filter_repositories_case_variations(self, create_args): """Should handle various case combinations correctly.""" - args = Mock() - args.user = "TeSt-UsEr" # Mixed case - args.repository = None - args.name_regex = None - args.languages = None - args.exclude = None - args.fork = False - args.private = False - args.public = False - args.all = True - args.skip_archived = False - args.starred_skip_size_over = None + args = create_args(user="TeSt-UsEr") repos = [ {"name": "repo1", "owner": {"login": "test-user"}, "private": False, "fork": False}, diff --git a/tests/test_http_451.py b/tests/test_http_451.py index bb825f7..b556069 100644 --- a/tests/test_http_451.py +++ b/tests/test_http_451.py @@ -11,17 +11,9 @@ class TestHTTP451Exception: """Test suite for HTTP 451 DMCA takedown exception handling.""" - def test_repository_unavailable_error_raised(self): + def test_repository_unavailable_error_raised(self, create_args): """HTTP 451 should raise RepositoryUnavailableError with DMCA URL.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = None - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 5 + args = create_args() mock_response = Mock() mock_response.getcode.return_value = 451 @@ -53,17 +45,9 @@ def test_repository_unavailable_error_raised(self): ) assert "451" in str(exc_info.value) - def test_repository_unavailable_error_without_dmca_url(self): + def test_repository_unavailable_error_without_dmca_url(self, create_args): """HTTP 451 without DMCA details should still raise exception.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = None - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 5 + args = create_args() mock_response = Mock() mock_response.getcode.return_value = 451 @@ -83,17 +67,9 @@ def test_repository_unavailable_error_without_dmca_url(self): assert exc_info.value.dmca_url is None assert "451" in str(exc_info.value) - def test_repository_unavailable_error_with_malformed_json(self): + def test_repository_unavailable_error_with_malformed_json(self, create_args): """HTTP 451 with malformed JSON should still raise exception.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = None - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 5 + args = create_args() mock_response = Mock() mock_response.getcode.return_value = 451 diff --git a/tests/test_pagination.py b/tests/test_pagination.py index e35ff38..1931042 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -1,9 +1,7 @@ """Tests for Link header pagination handling.""" import json -from unittest.mock import Mock, patch - -import pytest +from unittest.mock import patch from github_backup import github_backup @@ -38,23 +36,9 @@ def headers(self): return headers -@pytest.fixture -def mock_args(): - """Mock args for retrieve_data.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 5 - return args - - -def test_cursor_based_pagination(mock_args): +def test_cursor_based_pagination(create_args): """Link header with 'after' cursor parameter works correctly.""" + args = create_args(token_classic="fake_token") # Simulate issues endpoint behavior: returns cursor in Link header responses = [ @@ -77,7 +61,7 @@ def mock_urlopen(request, *args, **kwargs): with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): results = github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/owner/repo/issues" + args, "https://api.github.com/repos/owner/repo/issues" ) # Verify all items retrieved and cursor was used in second request @@ -86,8 +70,9 @@ def mock_urlopen(request, *args, **kwargs): assert "after=ABC123" in requests_made[1] -def test_page_based_pagination(mock_args): +def test_page_based_pagination(create_args): """Link header with 'page' parameter works correctly.""" + args = create_args(token_classic="fake_token") # Simulate pulls/repos endpoint behavior: returns page numbers in Link header responses = [ @@ -110,7 +95,7 @@ def mock_urlopen(request, *args, **kwargs): with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): results = github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/owner/repo/pulls" + args, "https://api.github.com/repos/owner/repo/pulls" ) # Verify all items retrieved and page parameter was used (not cursor) @@ -120,8 +105,9 @@ def mock_urlopen(request, *args, **kwargs): assert "after" not in requests_made[1] -def test_no_link_header_stops_pagination(mock_args): +def test_no_link_header_stops_pagination(create_args): """Pagination stops when Link header is absent.""" + args = create_args(token_classic="fake_token") # Simulate endpoint with results that fit in a single page responses = [ @@ -138,7 +124,7 @@ def mock_urlopen(request, *args, **kwargs): with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): results = github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/owner/repo/labels" + args, "https://api.github.com/repos/owner/repo/labels" ) # Verify pagination stopped after first request diff --git a/tests/test_retrieve_data.py b/tests/test_retrieve_data.py index fa82bd7..159f06e 100644 --- a/tests/test_retrieve_data.py +++ b/tests/test_retrieve_data.py @@ -63,21 +63,9 @@ def test_minimum_rate_limit_delay(self): class TestRetrieveDataRetry: """Tests for retry behavior in retrieve_data.""" - @pytest.fixture - def mock_args(self): - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = DEFAULT_MAX_RETRIES - return args - - def test_json_parse_error_retries_and_fails(self, mock_args): + def test_json_parse_error_retries_and_fails(self, create_args): """HTTP 200 with invalid JSON should retry and eventually fail.""" + args = create_args(token_classic="fake_token") mock_response = Mock() mock_response.getcode.return_value = 200 mock_response.read.return_value = b"not valid json {" @@ -85,7 +73,7 @@ def test_json_parse_error_retries_and_fails(self, mock_args): call_count = 0 - def mock_make_request(*args, **kwargs): + def mock_make_request(*a, **kw): nonlocal call_count call_count += 1 return mock_response @@ -99,7 +87,7 @@ def mock_make_request(*args, **kwargs): ): # No delay in tests with pytest.raises(Exception) as exc_info: github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/test/repo/issues" + args, "https://api.github.com/repos/test/repo/issues" ) assert "Failed to read response after" in str(exc_info.value) @@ -107,8 +95,9 @@ def mock_make_request(*args, **kwargs): call_count == DEFAULT_MAX_RETRIES + 1 ) # 1 initial + 5 retries = 6 attempts - def test_json_parse_error_recovers_on_retry(self, mock_args): + def test_json_parse_error_recovers_on_retry(self, create_args): """HTTP 200 with invalid JSON should succeed if retry returns valid JSON.""" + args = create_args(token_classic="fake_token") bad_response = Mock() bad_response.getcode.return_value = 200 bad_response.read.return_value = b"not valid json {" @@ -122,7 +111,7 @@ def test_json_parse_error_recovers_on_retry(self, mock_args): responses = [bad_response, bad_response, good_response] call_count = 0 - def mock_make_request(*args, **kwargs): + def mock_make_request(*a, **kw): nonlocal call_count result = responses[call_count] call_count += 1 @@ -136,14 +125,15 @@ def mock_make_request(*args, **kwargs): "github_backup.github_backup.calculate_retry_delay", return_value=0 ): result = github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/test/repo/issues" + args, "https://api.github.com/repos/test/repo/issues" ) assert result == [{"id": 1}] assert call_count == 3 # Failed twice, succeeded on third - def test_http_error_raises_exception(self, mock_args): + def test_http_error_raises_exception(self, create_args): """Non-success HTTP status codes should raise Exception.""" + args = create_args(token_classic="fake_token") mock_response = Mock() mock_response.getcode.return_value = 404 mock_response.read.return_value = b'{"message": "Not Found"}' @@ -156,7 +146,7 @@ def test_http_error_raises_exception(self, mock_args): ): with pytest.raises(Exception) as exc_info: github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/test/notfound/issues" + args, "https://api.github.com/repos/test/notfound/issues" ) assert not isinstance( @@ -346,21 +336,13 @@ def mock_urlopen(*args, **kwargs): class TestRetrieveDataThrottling: """Tests for throttling behavior in retrieve_data.""" - @pytest.fixture - def mock_args(self): - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = 10 # Throttle when remaining <= 10 - args.throttle_pause = 5 # Pause 5 seconds - args.max_retries = DEFAULT_MAX_RETRIES - return args - - def test_throttling_pauses_when_rate_limit_low(self, mock_args): + def test_throttling_pauses_when_rate_limit_low(self, create_args): """Should pause when x-ratelimit-remaining is at or below throttle_limit.""" + args = create_args( + token_classic="fake_token", + throttle_limit=10, + throttle_pause=5, + ) mock_response = Mock() mock_response.getcode.return_value = 200 mock_response.read.return_value = json.dumps([{"id": 1}]).encode("utf-8") @@ -375,7 +357,7 @@ def test_throttling_pauses_when_rate_limit_low(self, mock_args): ): with patch("github_backup.github_backup.time.sleep") as mock_sleep: github_backup.retrieve_data( - mock_args, "https://api.github.com/repos/test/repo/issues" + args, "https://api.github.com/repos/test/repo/issues" ) mock_sleep.assert_called_once_with(5) # throttle_pause value @@ -384,21 +366,9 @@ def test_throttling_pauses_when_rate_limit_low(self, mock_args): class TestRetrieveDataSingleItem: """Tests for single item (dict) responses in retrieve_data.""" - @pytest.fixture - def mock_args(self): - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = DEFAULT_MAX_RETRIES - return args - - def test_dict_response_returned_as_list(self, mock_args): + def test_dict_response_returned_as_list(self, create_args): """Single dict response should be returned as a list with one item.""" + args = create_args(token_classic="fake_token") mock_response = Mock() mock_response.getcode.return_value = 200 mock_response.read.return_value = json.dumps( @@ -411,7 +381,7 @@ def test_dict_response_returned_as_list(self, mock_args): return_value=mock_response, ): result = github_backup.retrieve_data( - mock_args, "https://api.github.com/user" + args, "https://api.github.com/user" ) assert result == [{"login": "testuser", "id": 123}] @@ -474,17 +444,12 @@ def mock_urlopen(*args, **kwargs): assert result == good_response assert call_count == 2 # 1 initial + 1 retry = 2 attempts - def test_custom_retry_count_limits_attempts(self): + def test_custom_retry_count_limits_attempts(self, create_args): """Custom --retries value should limit actual retry attempts.""" - args = Mock() - args.as_app = False - args.token_fine = None - args.token_classic = "fake_token" - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.throttle_limit = None - args.throttle_pause = 0 - args.max_retries = 2 # 2 retries = 3 total attempts (1 initial + 2 retries) + args = create_args( + token_classic="fake_token", + max_retries=2, # 2 retries = 3 total attempts (1 initial + 2 retries) + ) mock_response = Mock() mock_response.getcode.return_value = 200 diff --git a/tests/test_skip_assets_on.py b/tests/test_skip_assets_on.py index ce28287..519750e 100644 --- a/tests/test_skip_assets_on.py +++ b/tests/test_skip_assets_on.py @@ -1,7 +1,7 @@ """Tests for --skip-assets-on flag behavior (issue #135).""" import pytest -from unittest.mock import Mock, patch +from unittest.mock import patch from github_backup import github_backup @@ -13,52 +13,6 @@ class TestSkipAssetsOn: while still backing up release metadata. """ - def _create_mock_args(self, **overrides): - """Create a mock args object with sensible defaults.""" - args = Mock() - args.user = "testuser" - args.output_directory = "/tmp/backup" - args.include_repository = False - args.include_everything = False - args.include_gists = False - args.include_starred_gists = False - args.all_starred = False - args.skip_existing = False - args.bare_clone = False - args.lfs_clone = False - args.no_prune = False - args.include_wiki = False - args.include_issues = False - args.include_issue_comments = False - args.include_issue_events = False - args.include_pulls = False - args.include_pull_comments = False - args.include_pull_commits = False - args.include_pull_details = False - args.include_labels = False - args.include_hooks = False - args.include_milestones = False - args.include_releases = True - args.include_assets = True - args.skip_assets_on = [] - args.include_attachments = False - args.incremental = False - args.incremental_by_files = False - args.github_host = None - args.prefer_ssh = False - args.token_classic = "test-token" - args.token_fine = None - args.as_app = False - args.osx_keychain_item_name = None - args.osx_keychain_item_account = None - args.skip_prerelease = False - args.number_of_latest_releases = None - - for key, value in overrides.items(): - setattr(args, key, value) - - return args - def _create_mock_repository(self, name="test-repo", owner="testuser"): """Create a mock repository object.""" return { @@ -123,10 +77,10 @@ class TestSkipAssetsOnBehavior(TestSkipAssetsOn): @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_assets_downloaded_when_not_skipped( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Assets should be downloaded when repo is not in skip list.""" - args = self._create_mock_args(skip_assets_on=[]) + args = create_args(skip_assets_on=[]) repository = self._create_mock_repository(name="normal-repo") release = self._create_mock_release() asset = self._create_mock_asset() @@ -154,10 +108,10 @@ def test_assets_downloaded_when_not_skipped( @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_assets_skipped_when_repo_name_matches( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Assets should be skipped when repo name is in skip list.""" - args = self._create_mock_args(skip_assets_on=["big-repo"]) + args = create_args(skip_assets_on=["big-repo"]) repository = self._create_mock_repository(name="big-repo") release = self._create_mock_release() @@ -180,10 +134,10 @@ def test_assets_skipped_when_repo_name_matches( @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_assets_skipped_when_full_name_matches( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Assets should be skipped when owner/repo format matches.""" - args = self._create_mock_args(skip_assets_on=["otheruser/big-repo"]) + args = create_args(skip_assets_on=["otheruser/big-repo"]) repository = self._create_mock_repository(name="big-repo", owner="otheruser") release = self._create_mock_release() @@ -206,11 +160,11 @@ def test_assets_skipped_when_full_name_matches( @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_case_insensitive_matching( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Skip matching should be case-insensitive.""" # User types uppercase, repo name is lowercase - args = self._create_mock_args(skip_assets_on=["BIG-REPO"]) + args = create_args(skip_assets_on=["BIG-REPO"]) repository = self._create_mock_repository(name="big-repo") release = self._create_mock_release() @@ -233,10 +187,10 @@ def test_case_insensitive_matching( @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_multiple_skip_repos( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Multiple repos in skip list should all be skipped.""" - args = self._create_mock_args(skip_assets_on=["repo1", "repo2", "repo3"]) + args = create_args(skip_assets_on=["repo1", "repo2", "repo3"]) repository = self._create_mock_repository(name="repo2") release = self._create_mock_release() @@ -259,10 +213,10 @@ def test_multiple_skip_repos( @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_release_metadata_still_saved_when_assets_skipped( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Release JSON should still be saved even when assets are skipped.""" - args = self._create_mock_args(skip_assets_on=["big-repo"]) + args = create_args(skip_assets_on=["big-repo"]) repository = self._create_mock_repository(name="big-repo") release = self._create_mock_release() @@ -287,10 +241,10 @@ def test_release_metadata_still_saved_when_assets_skipped( @patch("github_backup.github_backup.mkdir_p") @patch("github_backup.github_backup.json_dump_if_changed") def test_non_matching_repo_still_downloads_assets( - self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download + self, mock_json_dump, mock_mkdir, mock_retrieve, mock_download, create_args ): """Repos not in skip list should still download assets.""" - args = self._create_mock_args(skip_assets_on=["other-repo"]) + args = create_args(skip_assets_on=["other-repo"]) repository = self._create_mock_repository(name="normal-repo") release = self._create_mock_release() asset = self._create_mock_asset() diff --git a/tests/test_starred_skip_size_over.py b/tests/test_starred_skip_size_over.py index 2deb72a..250d191 100644 --- a/tests/test_starred_skip_size_over.py +++ b/tests/test_starred_skip_size_over.py @@ -1,39 +1,11 @@ """Tests for --starred-skip-size-over flag behavior (issue #108).""" import pytest -from unittest.mock import Mock from github_backup import github_backup -class TestStarredSkipSizeOver: - """Test suite for --starred-skip-size-over flag. - - Issue #108: Allow restricting size of starred repositories before cloning. - The size is based on the GitHub API's 'size' field (in KB), but the CLI - argument accepts MB for user convenience. - """ - - def _create_mock_args(self, **overrides): - """Create a mock args object with sensible defaults.""" - args = Mock() - args.user = "testuser" - args.repository = None - args.name_regex = None - args.languages = None - args.fork = False - args.private = False - args.skip_archived = False - args.starred_skip_size_over = None - args.exclude = None - - for key, value in overrides.items(): - setattr(args, key, value) - - return args - - -class TestStarredSkipSizeOverArgumentParsing(TestStarredSkipSizeOver): +class TestStarredSkipSizeOverArgumentParsing: """Tests for --starred-skip-size-over argument parsing.""" def test_starred_skip_size_over_not_set_defaults_to_none(self): @@ -52,12 +24,17 @@ def test_starred_skip_size_over_rejects_non_integer(self): github_backup.parse_args(["testuser", "--starred-skip-size-over", "abc"]) -class TestStarredSkipSizeOverFiltering(TestStarredSkipSizeOver): - """Tests for --starred-skip-size-over filtering behavior.""" +class TestStarredSkipSizeOverFiltering: + """Tests for --starred-skip-size-over filtering behavior. + + Issue #108: Allow restricting size of starred repositories before cloning. + The size is based on the GitHub API's 'size' field (in KB), but the CLI + argument accepts MB for user convenience. + """ - def test_starred_repo_under_limit_is_kept(self): + def test_starred_repo_under_limit_is_kept(self, create_args): """Starred repos under the size limit should be kept.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -72,9 +49,9 @@ def test_starred_repo_under_limit_is_kept(self): assert len(result) == 1 assert result[0]["name"] == "small-repo" - def test_starred_repo_over_limit_is_filtered(self): + def test_starred_repo_over_limit_is_filtered(self, create_args): """Starred repos over the size limit should be filtered out.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -88,9 +65,9 @@ def test_starred_repo_over_limit_is_filtered(self): result = github_backup.filter_repositories(args, repos) assert len(result) == 0 - def test_own_repo_over_limit_is_kept(self): + def test_own_repo_over_limit_is_kept(self, create_args): """User's own repos should not be affected by the size limit.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -105,9 +82,9 @@ def test_own_repo_over_limit_is_kept(self): assert len(result) == 1 assert result[0]["name"] == "my-huge-repo" - def test_starred_repo_at_exact_limit_is_kept(self): + def test_starred_repo_at_exact_limit_is_kept(self, create_args): """Starred repos at exactly the size limit should be kept.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -122,9 +99,9 @@ def test_starred_repo_at_exact_limit_is_kept(self): assert len(result) == 1 assert result[0]["name"] == "exact-limit-repo" - def test_mixed_repos_filtered_correctly(self): + def test_mixed_repos_filtered_correctly(self, create_args): """Mix of own and starred repos should be filtered correctly.""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -153,9 +130,9 @@ def test_mixed_repos_filtered_correctly(self): assert "starred-small" in names assert "starred-huge" not in names - def test_no_size_limit_keeps_all_starred(self): + def test_no_size_limit_keeps_all_starred(self, create_args): """When no size limit is set, all starred repos should be kept.""" - args = self._create_mock_args(starred_skip_size_over=None) + args = create_args(starred_skip_size_over=None) repos = [ { @@ -169,9 +146,9 @@ def test_no_size_limit_keeps_all_starred(self): result = github_backup.filter_repositories(args, repos) assert len(result) == 1 - def test_repo_without_size_field_is_kept(self): + def test_repo_without_size_field_is_kept(self, create_args): """Repos without a size field should be kept (size defaults to 0).""" - args = self._create_mock_args(starred_skip_size_over=500) + args = create_args(starred_skip_size_over=500) repos = [ { @@ -185,9 +162,9 @@ def test_repo_without_size_field_is_kept(self): result = github_backup.filter_repositories(args, repos) assert len(result) == 1 - def test_zero_value_warns_and_is_ignored(self, caplog): + def test_zero_value_warns_and_is_ignored(self, create_args, caplog): """Zero value should warn and keep all repos.""" - args = self._create_mock_args(starred_skip_size_over=0) + args = create_args(starred_skip_size_over=0) repos = [ { @@ -202,9 +179,9 @@ def test_zero_value_warns_and_is_ignored(self, caplog): assert len(result) == 1 assert "must be greater than 0" in caplog.text - def test_negative_value_warns_and_is_ignored(self, caplog): + def test_negative_value_warns_and_is_ignored(self, create_args, caplog): """Negative value should warn and keep all repos.""" - args = self._create_mock_args(starred_skip_size_over=-5) + args = create_args(starred_skip_size_over=-5) repos = [ { From 6780d3ad6c86228f6eaf06f5656efdbee6870d9f Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Tue, 13 Jan 2026 23:10:05 +0000 Subject: [PATCH 407/455] Release version 0.61.1 --- CHANGES.rst | 37 ++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0e66663..e44cd3f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,44 @@ Changelog ========= -0.61.0 (2026-01-12) +0.61.1 (2026-01-13) ------------------- ------------------------ +- Refactor test fixtures to use shared create_args helper. [Rodos] + + Uses the real parse_args() function to get CLI defaults, so when + new arguments are added they're automatically available to all tests. + + Changes: + - Add tests/conftest.py with create_args fixture + - Update 8 test files to use shared fixture + - Remove duplicate _create_mock_args methods + - Remove redundant @pytest.fixture mock_args definitions + + This eliminates the need to update multiple test files when + adding new CLI arguments. +- Fix fine-grained PAT attachment downloads for private repos (#477) + [Rodos] + + Fine-grained personal access tokens cannot download attachments from + private repositories directly due to a GitHub platform limitation. + + This adds a workaround for image attachments (/assets/ URLs) using + GitHub's Markdown API to convert URLs to JWT-signed URLs that can be + downloaded without authentication. + + Changes: + - Add get_jwt_signed_url_via_markdown_api() function + - Detect fine-grained token + private repo + /assets/ URL upfront + - Use JWT workaround for those cases, mark success with jwt_workaround flag + - Skip download with skipped_at when workaround fails + - Add startup warning when using --attachments with fine-grained tokens + - Document limitation in README (file attachments still fail) + - Add 6 unit tests for JWT workaround logic + + +0.61.0 (2026-01-12) +------------------- - Docs: Add missing `--retries` argument to README. [Lukas Bestle] - Test: Adapt tests to new argument. [Lukas Bestle] - Feat: Backup of repository security advisories. [Lukas Bestle] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index a076e5d..daa1407 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.61.0" +__version__ = "0.61.1" From 93e505c07da4cf02e4257933c003471a2ecc53f8 Mon Sep 17 00:00:00 2001 From: Lukas Bestle Date: Wed, 14 Jan 2026 21:01:59 +0100 Subject: [PATCH 408/455] fix: Handle 404 errors on security advisories --- github_backup/github_backup.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 705f013..9d96f3b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -2045,7 +2045,13 @@ def backup_security_advisories(args, repo_cwd, repository, repos_template): repos_template, repository["full_name"] ) - _advisories = retrieve_data(args, template) + try: + _advisories = retrieve_data(args, template) + except Exception as e: + if "404" in str(e): + logger.info("Security advisories are not available for this repository, skipping") + return + raise advisories = {} for advisory in _advisories: From c6fa8c76955e881cbcc5fa9b9cf301e114fdcea7 Mon Sep 17 00:00:00 2001 From: Lukas Bestle Date: Wed, 14 Jan 2026 21:02:51 +0100 Subject: [PATCH 409/455] feat: Only make security advisory dir if successful Avoids empty directories for private repos --- github_backup/github_backup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 9d96f3b..fdc18f9 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -2039,7 +2039,6 @@ def backup_security_advisories(args, repo_cwd, repository, repos_template): return logger.info("Retrieving {0} security advisories".format(repository["full_name"])) - mkdir_p(repo_cwd, advisory_cwd) template = "{0}/{1}/security-advisories".format( repos_template, repository["full_name"] @@ -2053,6 +2052,8 @@ def backup_security_advisories(args, repo_cwd, repository, repos_template): return raise + mkdir_p(repo_cwd, advisory_cwd) + advisories = {} for advisory in _advisories: advisories[advisory["ghsa_id"]] = advisory From 856ad5db415f0df0e94462b7929c264ec2aeb818 Mon Sep 17 00:00:00 2001 From: Lukas Bestle Date: Wed, 14 Jan 2026 21:03:17 +0100 Subject: [PATCH 410/455] fix: Skip security advisories for private repos unless explicitly requested --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index fdc18f9..346d541 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1814,7 +1814,7 @@ def backup_repositories(args, output_directory, repositories): if args.include_milestones or args.include_everything: backup_milestones(args, repo_cwd, repository, repos_template) - if args.include_security_advisories or args.include_everything: + if args.include_security_advisories or (args.include_everything and not repository["Private"]): backup_security_advisories(args, repo_cwd, repository, repos_template) if args.include_labels or args.include_everything: From 1181f811b704d58e971a7686240694c63c3e6a50 Mon Sep 17 00:00:00 2001 From: Lukas Bestle Date: Fri, 16 Jan 2026 08:52:45 +0100 Subject: [PATCH 411/455] docs: Explain security advisories in README --- README.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.rst b/README.rst index c23027d..cd7be1f 100644 --- a/README.rst +++ b/README.rst @@ -284,6 +284,17 @@ The tool automatically extracts file extensions from HTTP headers to ensure file **Fine-grained token limitation:** Due to a GitHub platform limitation, fine-grained personal access tokens (``github_pat_...``) cannot download attachments from private repositories directly. This affects both ``/assets/`` (images) and ``/files/`` (documents) URLs. The tool implements a workaround for image attachments using GitHub's Markdown API, which converts URLs to temporary JWT-signed URLs that can be downloaded. However, this workaround only works for images - document attachments (PDFs, text files, etc.) will fail with 404 errors when using fine-grained tokens on private repos. For full attachment support on private repositories, use a classic token (``-t``) instead of a fine-grained token (``-f``). See `#477 `_ for details. +About security advisories +------------------------- + +GitHub security advisories are only available in public repositories. GitHub does not provide the respective API endpoint for private repositories. + +Therefore the logic is implemented as follows: +- Security advisories are included in the `--all` option. +- If only the `--all` option was provided, backups of security advisories are skipped for private repositories. +- If the `--security-advisories` option is provided (on its own or in addition to `--all`), a backup of security advisories is attempted for all repositories, with graceful handling if the GitHub API doesn't return any. + + Run in Docker container ----------------------- From e6283f93847b5378bf6f2800d8b15fb60ac44b61 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Jan 2026 14:50:28 +0000 Subject: [PATCH 412/455] chore(deps): bump black in the python-packages group Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). Updates `black` from 25.12.0 to 26.1.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/25.12.0...26.1.0) --- updated-dependencies: - dependency-name: black dependency-version: 26.1.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index dd2d73f..1d3c36f 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,6 +1,6 @@ # Linting & Formatting autopep8==2.3.2 -black==25.12.0 +black==26.1.0 flake8==7.3.0 # Testing From 712d22d124d2922a4a4a3f35433ccf2a8903392c Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 19 Jan 2026 17:40:27 +0000 Subject: [PATCH 413/455] Release version 0.61.2 --- CHANGES.rst | 38 +++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e44cd3f..1811a4f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,45 @@ Changelog ========= -0.61.1 (2026-01-13) +0.61.2 (2026-01-19) ------------------- ------------------------ + +Fix +~~~ +- Skip security advisories for private repos unless explicitly + requested. [Lukas Bestle] +- Handle 404 errors on security advisories. [Lukas Bestle] + +Other +~~~~~ +- Chore(deps): bump black in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). + + + Updates `black` from 25.12.0 to 26.1.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/25.12.0...26.1.0) + + --- + updated-dependencies: + - dependency-name: black + dependency-version: 26.1.0 + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Docs: Explain security advisories in README. [Lukas Bestle] +- Feat: Only make security advisory dir if successful. [Lukas Bestle] + + Avoids empty directories for private repos + + +0.61.1 (2026-01-13) +------------------- - Refactor test fixtures to use shared create_args helper. [Rodos] Uses the real parse_args() function to get CLI defaults, so when diff --git a/github_backup/__init__.py b/github_backup/__init__.py index daa1407..bbe1689 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.61.1" +__version__ = "0.61.2" From 0d8a504b024a73096f00175ffbac51a8100cf08c Mon Sep 17 00:00:00 2001 From: Rodos Date: Wed, 21 Jan 2026 21:12:03 +1100 Subject: [PATCH 414/455] Fix KeyError: 'Private' when using --all flag (#481) The repository dictionary uses lowercase "private" key. Use .get() with the correct case to match the pattern used elsewhere in the codebase. The bug only affects --all users since --security-advisories short-circuits before the key access. --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 346d541..0b7e1f8 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1814,7 +1814,7 @@ def backup_repositories(args, output_directory, repositories): if args.include_milestones or args.include_everything: backup_milestones(args, repo_cwd, repository, repos_template) - if args.include_security_advisories or (args.include_everything and not repository["Private"]): + if args.include_security_advisories or (args.include_everything and not repository.get("private", False)): backup_security_advisories(args, repo_cwd, repository, repos_template) if args.include_labels or args.include_everything: From 2f5e7c2dcfa0446d7dd2ae9368e4397b4a878c0e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Jan 2026 13:05:17 +0000 Subject: [PATCH 415/455] chore(deps): bump setuptools in the python-packages group Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). Updates `setuptools` from 80.9.0 to 80.10.1 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v80.9.0...v80.10.1) --- updated-dependencies: - dependency-name: setuptools dependency-version: 80.10.1 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 1d3c36f..1a533c0 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -9,7 +9,7 @@ pytest==9.0.2 # Release & Publishing twine==6.2.0 gitchangelog==3.0.4 -setuptools==80.9.0 +setuptools==80.10.1 # Documentation restructuredtext-lint==2.0.2 From 9be6282719862f58dd59a6a29b61e45b95e31296 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Sat, 24 Jan 2026 05:45:42 +0000 Subject: [PATCH 416/455] Release version 0.61.3 --- CHANGES.rst | 32 +++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1811a4f..094f1ee 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,39 @@ Changelog ========= -0.61.2 (2026-01-19) +0.61.3 (2026-01-24) ------------------- ------------------------ +- Fix KeyError: 'Private' when using --all flag (#481) [Rodos] + + The repository dictionary uses lowercase "private" key. Use .get() with + the correct case to match the pattern used elsewhere in the codebase. + + The bug only affects --all users since --security-advisories short-circuits + before the key access. +- Chore(deps): bump setuptools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). + + + Updates `setuptools` from 80.9.0 to 80.10.1 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v80.9.0...v80.10.1) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-version: 80.10.1 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + ... + + +0.61.2 (2026-01-19) +------------------- Fix ~~~ diff --git a/github_backup/__init__.py b/github_backup/__init__.py index bbe1689..ce11d35 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.61.2" +__version__ = "0.61.3" From be900d1f3ffb0a0a010cad0d6c0e9ac22d14ed65 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:08:53 +0000 Subject: [PATCH 417/455] chore(deps): bump setuptools in the python-packages group Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). Updates `setuptools` from 80.10.1 to 80.10.2 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v80.10.1...v80.10.2) --- updated-dependencies: - dependency-name: setuptools dependency-version: 80.10.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 1a533c0..4c614e9 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -9,7 +9,7 @@ pytest==9.0.2 # Release & Publishing twine==6.2.0 gitchangelog==3.0.4 -setuptools==80.10.1 +setuptools==80.10.2 # Documentation restructuredtext-lint==2.0.2 From 6268a4c5c6116929c380f58d227529ef97d700a9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Feb 2026 14:31:40 +0000 Subject: [PATCH 418/455] chore(deps): bump setuptools in the python-packages group Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). Updates `setuptools` from 80.10.2 to 82.0.0 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v80.10.2...v82.0.0) --- updated-dependencies: - dependency-name: setuptools dependency-version: 82.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 4c614e9..6742290 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -9,7 +9,7 @@ pytest==9.0.2 # Release & Publishing twine==6.2.0 gitchangelog==3.0.4 -setuptools==80.10.2 +setuptools==82.0.0 # Documentation restructuredtext-lint==2.0.2 From 0162f7ed465ebaf459b694060948b464dbf62c22 Mon Sep 17 00:00:00 2001 From: Rodos Date: Mon, 16 Feb 2026 10:12:36 +1100 Subject: [PATCH 419/455] Fix HTTP 451 DMCA and 403 TOS handling regression (#487) The DMCA handling added in PR #454 had a bug: make_request_with_retry() raises HTTPError before retrieve_data() could check the status code via getcode(), making the case 451 handler dead code. This also affected HTTP 403 TOS violations (e.g. jumoog/MagiskOnWSA). Fix by catching HTTPError in retrieve_data() and converting 451 and blocked 403 responses (identified by "block" key in response body) to RepositoryUnavailableError. Non-block 403s (permissions, scopes) still propagate as HTTPError. Also handle RepositoryUnavailableError in retrieve_repositories() for the --repository case. Rewrote tests to mock urlopen (not make_request_with_retry) to exercise the real code path that was previously untested. Closes #487 --- github_backup/github_backup.py | 123 +++++++++++++--------- tests/test_http_451.py | 180 ++++++++++++++++++++++++++------- tests/test_retrieve_data.py | 22 ++++ 3 files changed, 245 insertions(+), 80 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 0b7e1f8..ada2d40 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -39,11 +39,11 @@ class RepositoryUnavailableError(Exception): - """Raised when a repository is unavailable due to legal reasons (e.g., DMCA takedown).""" + """Raised when a repository is unavailable due to legal reasons (e.g., DMCA takedown, TOS violation).""" - def __init__(self, message, dmca_url=None): + def __init__(self, message, legal_url=None): super().__init__(message) - self.dmca_url = dmca_url + self.legal_url = legal_url # Setup SSL context with fallback chain @@ -647,6 +647,14 @@ def _extract_next_page_url(link_header): return None def fetch_all() -> Generator[dict, None, None]: + def _extract_legal_url(response_body_bytes): + """Extract DMCA/legal notice URL from GitHub API error response body.""" + try: + data = json.loads(response_body_bytes.decode("utf-8")) + return data.get("block", {}).get("html_url") + except Exception: + return None + next_url = None while True: @@ -661,47 +669,66 @@ def fetch_all() -> Generator[dict, None, None]: as_app=args.as_app, fine=args.token_fine is not None, ) - http_response = make_request_with_retry(request, auth, args.max_retries) - - match http_response.getcode(): - case 200: - # Success - Parse JSON response - try: - response = json.loads(http_response.read().decode("utf-8")) - break # Exit retry loop and handle the data returned - except ( - IncompleteRead, - json.decoder.JSONDecodeError, - TimeoutError, - ) as e: - logger.warning(f"{type(e).__name__} reading response") - if attempt < args.max_retries: - delay = calculate_retry_delay(attempt, {}) - logger.warning( - f"Retrying read in {delay:.1f}s (attempt {attempt + 1}/{args.max_retries + 1})" - ) - time.sleep(delay) - continue # Next retry attempt - - case 451: - # DMCA takedown - extract URL if available, then raise - dmca_url = None - try: - response_data = json.loads( - http_response.read().decode("utf-8") - ) - dmca_url = response_data.get("block", {}).get("html_url") - except Exception: - pass + try: + http_response = make_request_with_retry( + request, auth, args.max_retries + ) + except HTTPError as exc: + if exc.code == 451: + legal_url = _extract_legal_url(exc.read()) raise RepositoryUnavailableError( - "Repository unavailable due to legal reasons (HTTP 451)", - dmca_url=dmca_url, + f"Repository unavailable due to legal reasons (HTTP {exc.code})", + legal_url=legal_url, ) + elif exc.code == 403: + # Rate-limit 403s (x-ratelimit-remaining=0) are retried + # by make_request_with_retry — re-raise if exhausted. + if int(exc.headers.get("x-ratelimit-remaining", 1)) < 1: + raise + # Only convert to RepositoryUnavailableError if GitHub + # indicates a TOS/DMCA block (response contains "block" + # key). Other 403s (permissions, scopes) should propagate. + body = exc.read() + try: + data = json.loads(body.decode("utf-8")) + except Exception: + data = {} + if "block" in data: + raise RepositoryUnavailableError( + "Repository access blocked (HTTP 403)", + legal_url=data.get("block", {}).get("html_url"), + ) + raise + else: + raise + + # urlopen raises HTTPError for non-2xx, so only success gets here. + # Guard against unexpected status codes from proxies, future Python + # changes, or other edge cases we haven't considered. + status = http_response.getcode() + if status != 200: + raise Exception( + f"Unexpected HTTP {status} from {next_url or template} " + f"(expected non-2xx to raise HTTPError)" + ) - case _: - raise Exception( - f"API request returned HTTP {http_response.getcode()}: {http_response.reason}" + # Parse JSON response + try: + response = json.loads(http_response.read().decode("utf-8")) + break # Exit retry loop and handle the data returned + except ( + IncompleteRead, + json.decoder.JSONDecodeError, + TimeoutError, + ) as e: + logger.warning(f"{type(e).__name__} reading response") + if attempt < args.max_retries: + delay = calculate_retry_delay(attempt, {}) + logger.warning( + f"Retrying read in {delay:.1f}s (attempt {attempt + 1}/{args.max_retries + 1})" ) + time.sleep(delay) + continue # Next retry attempt else: logger.error( f"Failed to read response after {args.max_retries + 1} attempts for {next_url or template}" @@ -1614,7 +1641,13 @@ def retrieve_repositories(args, authenticated_user): paginated = False template = "https://{0}/repos/{1}".format(get_github_api_host(args), repo_path) - repos = retrieve_data(args, template, paginated=paginated) + try: + repos = retrieve_data(args, template, paginated=paginated) + except RepositoryUnavailableError as e: + logger.warning(f"Repository is unavailable: {e}") + if e.legal_url: + logger.warning(f"Legal notice: {e.legal_url}") + return [] if args.all_starred: starred_template = "https://{0}/users/{1}/starred".format( @@ -1832,11 +1865,9 @@ def backup_repositories(args, output_directory, repositories): include_assets=args.include_assets or args.include_everything, ) except RepositoryUnavailableError as e: - logger.warning( - f"Repository {repository['full_name']} is unavailable (HTTP 451)" - ) - if e.dmca_url: - logger.warning(f"DMCA notice: {e.dmca_url}") + logger.warning(f"Repository {repository['full_name']} is unavailable: {e}") + if e.legal_url: + logger.warning(f"Legal notice: {e.legal_url}") logger.info(f"Skipping remaining resources for {repository['full_name']}") continue diff --git a/tests/test_http_451.py b/tests/test_http_451.py index b556069..bba866e 100644 --- a/tests/test_http_451.py +++ b/tests/test_http_451.py @@ -1,13 +1,28 @@ -"""Tests for HTTP 451 (DMCA takedown) handling.""" +"""Tests for HTTP 451 (DMCA takedown) and HTTP 403 (TOS) handling.""" +import io import json -from unittest.mock import Mock, patch +from unittest.mock import patch +from urllib.error import HTTPError import pytest from github_backup import github_backup +def _make_http_error(code, body_bytes, msg="Error", headers=None): + """Create an HTTPError with a readable body (like a real urllib response).""" + if headers is None: + headers = {"x-ratelimit-remaining": "5000"} + return HTTPError( + url="https://api.github.com/repos/test/repo", + code=code, + msg=msg, + hdrs=headers, + fp=io.BytesIO(body_bytes), + ) + + class TestHTTP451Exception: """Test suite for HTTP 451 DMCA takedown exception handling.""" @@ -15,9 +30,6 @@ def test_repository_unavailable_error_raised(self, create_args): """HTTP 451 should raise RepositoryUnavailableError with DMCA URL.""" args = create_args() - mock_response = Mock() - mock_response.getcode.return_value = 451 - dmca_data = { "message": "Repository access blocked", "block": { @@ -26,66 +38,166 @@ def test_repository_unavailable_error_raised(self, create_args): "html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md", }, } - mock_response.read.return_value = json.dumps(dmca_data).encode("utf-8") - mock_response.headers = {"x-ratelimit-remaining": "5000"} - mock_response.reason = "Unavailable For Legal Reasons" - - with patch( - "github_backup.github_backup.make_request_with_retry", - return_value=mock_response, - ): + body = json.dumps(dmca_data).encode("utf-8") + + def mock_urlopen(*a, **kw): + raise _make_http_error(451, body, msg="Unavailable For Legal Reasons") + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: github_backup.retrieve_data( args, "https://api.github.com/repos/test/dmca/issues" ) assert ( - exc_info.value.dmca_url + exc_info.value.legal_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md" ) assert "451" in str(exc_info.value) - def test_repository_unavailable_error_without_dmca_url(self, create_args): + def test_repository_unavailable_error_without_legal_url(self, create_args): """HTTP 451 without DMCA details should still raise exception.""" args = create_args() - mock_response = Mock() - mock_response.getcode.return_value = 451 - mock_response.read.return_value = b'{"message": "Blocked"}' - mock_response.headers = {"x-ratelimit-remaining": "5000"} - mock_response.reason = "Unavailable For Legal Reasons" + def mock_urlopen(*a, **kw): + raise _make_http_error(451, b'{"message": "Blocked"}') - with patch( - "github_backup.github_backup.make_request_with_retry", - return_value=mock_response, - ): + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: github_backup.retrieve_data( args, "https://api.github.com/repos/test/dmca/issues" ) - assert exc_info.value.dmca_url is None + assert exc_info.value.legal_url is None assert "451" in str(exc_info.value) def test_repository_unavailable_error_with_malformed_json(self, create_args): """HTTP 451 with malformed JSON should still raise exception.""" args = create_args() - mock_response = Mock() - mock_response.getcode.return_value = 451 - mock_response.read.return_value = b"invalid json {" - mock_response.headers = {"x-ratelimit-remaining": "5000"} - mock_response.reason = "Unavailable For Legal Reasons" + def mock_urlopen(*a, **kw): + raise _make_http_error(451, b"invalid json {") - with patch( - "github_backup.github_backup.make_request_with_retry", - return_value=mock_response, - ): + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): with pytest.raises(github_backup.RepositoryUnavailableError): github_backup.retrieve_data( args, "https://api.github.com/repos/test/dmca/issues" ) +class TestHTTP403TOS: + """Test suite for HTTP 403 TOS violation handling.""" + + def test_403_tos_raises_repository_unavailable(self, create_args): + """HTTP 403 (non-rate-limit) should raise RepositoryUnavailableError.""" + args = create_args() + + tos_data = { + "message": "Repository access blocked", + "block": { + "reason": "tos", + "html_url": "https://github.com/contact/tos-violation", + }, + } + body = json.dumps(tos_data).encode("utf-8") + + def mock_urlopen(*a, **kw): + raise _make_http_error( + 403, + body, + msg="Forbidden", + headers={"x-ratelimit-remaining": "5000"}, + ) + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info: + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/blocked/issues" + ) + + assert ( + exc_info.value.legal_url == "https://github.com/contact/tos-violation" + ) + assert "403" in str(exc_info.value) + + def test_403_permission_denied_not_converted(self, create_args): + """HTTP 403 without 'block' in body should propagate as HTTPError, not RepositoryUnavailableError.""" + args = create_args() + + body = json.dumps({"message": "Must have admin rights to Repository."}).encode( + "utf-8" + ) + + def mock_urlopen(*a, **kw): + raise _make_http_error( + 403, + body, + msg="Forbidden", + headers={"x-ratelimit-remaining": "5000"}, + ) + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with pytest.raises(HTTPError) as exc_info: + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/private/issues" + ) + + assert exc_info.value.code == 403 + + def test_403_rate_limit_not_converted(self, create_args): + """HTTP 403 with rate limit exhausted should NOT become RepositoryUnavailableError.""" + args = create_args() + + call_count = 0 + + def mock_urlopen(*a, **kw): + nonlocal call_count + call_count += 1 + raise _make_http_error( + 403, + b'{"message": "rate limit"}', + msg="Forbidden", + headers={"x-ratelimit-remaining": "0"}, + ) + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with patch( + "github_backup.github_backup.calculate_retry_delay", return_value=0 + ): + with pytest.raises(HTTPError) as exc_info: + github_backup.retrieve_data( + args, "https://api.github.com/repos/test/ratelimit/issues" + ) + + assert exc_info.value.code == 403 + # Should have retried (not raised immediately as RepositoryUnavailableError) + assert call_count > 1 + + +class TestRetrieveRepositoriesUnavailable: + """Test that retrieve_repositories handles RepositoryUnavailableError gracefully.""" + + def test_unavailable_repo_returns_empty_list(self, create_args): + """retrieve_repositories should return [] when the repo is unavailable.""" + args = create_args(repository="blocked-repo") + + def mock_urlopen(*a, **kw): + raise _make_http_error( + 451, + json.dumps( + { + "message": "Blocked", + "block": {"html_url": "https://example.com/dmca"}, + } + ).encode("utf-8"), + msg="Unavailable For Legal Reasons", + ) + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + repos = github_backup.retrieve_repositories(args, {"login": None}) + + assert repos == [] + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/test_retrieve_data.py b/tests/test_retrieve_data.py index 159f06e..014c309 100644 --- a/tests/test_retrieve_data.py +++ b/tests/test_retrieve_data.py @@ -288,6 +288,28 @@ def mock_urlopen(*args, **kwargs): assert exc_info.value.code == 403 assert call_count == 1 # No retries + def test_451_error_not_retried(self): + """HTTP 451 should not be retried - raise immediately.""" + call_count = 0 + + def mock_urlopen(*args, **kwargs): + nonlocal call_count + call_count += 1 + raise HTTPError( + url="https://api.github.com/test", + code=451, + msg="Unavailable For Legal Reasons", + hdrs={"x-ratelimit-remaining": "5000"}, + fp=None, + ) + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + with pytest.raises(HTTPError) as exc_info: + make_request_with_retry(Mock(), None) + + assert exc_info.value.code == 451 + assert call_count == 1 # No retries + def test_connection_error_retries_and_succeeds(self): """URLError (connection error) should retry and succeed if subsequent request works.""" good_response = Mock() From 60067650b070b73f8d1821064c8edc9affa6884c Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 16 Feb 2026 05:46:39 +0000 Subject: [PATCH 420/455] Release version 0.61.4 --- CHANGES.rst | 61 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 094f1ee..808da6b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,68 @@ Changelog ========= -0.61.3 (2026-01-24) +0.61.4 (2026-02-16) ------------------- ------------------------ +- Fix HTTP 451 DMCA and 403 TOS handling regression (#487) [Rodos] + + The DMCA handling added in PR #454 had a bug: make_request_with_retry() + raises HTTPError before retrieve_data() could check the status code via + getcode(), making the case 451 handler dead code. This also affected + HTTP 403 TOS violations (e.g. jumoog/MagiskOnWSA). + + Fix by catching HTTPError in retrieve_data() and converting 451 and + blocked 403 responses (identified by "block" key in response body) to + RepositoryUnavailableError. Non-block 403s (permissions, scopes) still + propagate as HTTPError. Also handle RepositoryUnavailableError in + retrieve_repositories() for the --repository case. + + Rewrote tests to mock urlopen (not make_request_with_retry) to exercise + the real code path that was previously untested. + + Closes #487 +- Chore(deps): bump setuptools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). + + + Updates `setuptools` from 80.10.2 to 82.0.0 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v80.10.2...v82.0.0) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-version: 82.0.0 + dependency-type: direct:production + update-type: version-update:semver-major + dependency-group: python-packages + ... +- Chore(deps): bump setuptools in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [setuptools](https://github.com/pypa/setuptools). + + + Updates `setuptools` from 80.10.1 to 80.10.2 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v80.10.1...v80.10.2) + + --- + updated-dependencies: + - dependency-name: setuptools + dependency-version: 80.10.2 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... + + +0.61.3 (2026-01-24) +------------------- - Fix KeyError: 'Private' when using --all flag (#481) [Rodos] The repository dictionary uses lowercase "private" key. Use .get() with diff --git a/github_backup/__init__.py b/github_backup/__init__.py index ce11d35..03f7dee 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.61.3" +__version__ = "0.61.4" From f54a5458f6db668a5ff4d6395d792e00d20999e7 Mon Sep 17 00:00:00 2001 From: Rodos Date: Wed, 18 Feb 2026 20:10:48 +1100 Subject: [PATCH 421/455] Fix empty repository crash due to None timestamp comparison (#489) Empty repositories have None for pushed_at/updated_at, causing a TypeError when compared to the last_update string. Use .get() with truthiness check to skip None timestamps in incremental tracking. --- github_backup/github_backup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index ada2d40..4d5394e 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1772,9 +1772,9 @@ def backup_repositories(args, output_directory, repositories): last_update = "0000-00-00T00:00:00Z" for repository in repositories: - if "updated_at" in repository and repository["updated_at"] > last_update: + if repository.get("updated_at") and repository["updated_at"] > last_update: last_update = repository["updated_at"] - elif "pushed_at" in repository and repository["pushed_at"] > last_update: + elif repository.get("pushed_at") and repository["pushed_at"] > last_update: last_update = repository["pushed_at"] if repository.get("is_gist"): From 68af1d406a5ee0249829b24972e0d9bc77320a5a Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 18 Feb 2026 21:04:32 +0000 Subject: [PATCH 422/455] Release version 0.61.5 --- CHANGES.rst | 12 +++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 808da6b..6041b9e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,19 @@ Changelog ========= -0.61.4 (2026-02-16) +0.61.5 (2026-02-18) ------------------- ------------------------ +- Fix empty repository crash due to None timestamp comparison (#489) + [Rodos] + + Empty repositories have None for pushed_at/updated_at, causing a + TypeError when compared to the last_update string. Use .get() with + truthiness check to skip None timestamps in incremental tracking. + + +0.61.4 (2026-02-16) +------------------- - Fix HTTP 451 DMCA and 403 TOS handling regression (#487) [Rodos] The DMCA handling added in PR #454 had a bug: make_request_with_retry() diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 03f7dee..294be4d 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.61.4" +__version__ = "0.61.5" From 8a0553a5b175a9f91449e6a29b37ceffeff26c1e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Mar 2026 04:33:49 +0000 Subject: [PATCH 423/455] chore(deps): bump docker/metadata-action from 5 to 6 Bumps [docker/metadata-action](https://github.com/docker/metadata-action) from 5 to 6. - [Release notes](https://github.com/docker/metadata-action/releases) - [Commits](https://github.com/docker/metadata-action/compare/v5...v6) --- updated-dependencies: - dependency-name: docker/metadata-action dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f367b99..1aa81fe 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -57,7 +57,7 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@v6 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | From 7f1807aaf82ac3565e1e4f1261644b376d0a5600 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Mar 2026 04:33:53 +0000 Subject: [PATCH 424/455] chore(deps): bump docker/setup-buildx-action from 3 to 4 Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 3 to 4. - [Release notes](https://github.com/docker/setup-buildx-action/releases) - [Commits](https://github.com/docker/setup-buildx-action/compare/v3...v4) --- updated-dependencies: - dependency-name: docker/setup-buildx-action dependency-version: '4' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f367b99..b9103c5 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -46,7 +46,7 @@ jobs: uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Log in to the Container registry uses: docker/login-action@v3 From cceef92346fb8c6fb672b29b8f0917e95cbcb591 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Mar 2026 04:33:55 +0000 Subject: [PATCH 425/455] chore(deps): bump docker/setup-qemu-action from 3 to 4 Bumps [docker/setup-qemu-action](https://github.com/docker/setup-qemu-action) from 3 to 4. - [Release notes](https://github.com/docker/setup-qemu-action/releases) - [Commits](https://github.com/docker/setup-qemu-action/compare/v3...v4) --- updated-dependencies: - dependency-name: docker/setup-qemu-action dependency-version: '4' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f367b99..749ed52 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -43,7 +43,7 @@ jobs: persist-credentials: false - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 From 5758e489e82305bfcdc02cf643c6c543b489ebb7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Mar 2026 04:33:58 +0000 Subject: [PATCH 426/455] chore(deps): bump docker/build-push-action from 6 to 7 Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 6 to 7. - [Release notes](https://github.com/docker/build-push-action/releases) - [Commits](https://github.com/docker/build-push-action/compare/v6...v7) --- updated-dependencies: - dependency-name: docker/build-push-action dependency-version: '7' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f367b99..00fdec3 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -68,7 +68,7 @@ jobs: type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} - name: Build and push Docker image - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v7 with: context: . push: true From d5be07ec809c9c0ca7bfafc80345f09c9baf532b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:28:37 +0000 Subject: [PATCH 427/455] chore(deps): bump the python-packages group with 2 updates Bumps the python-packages group with 2 updates: [black](https://github.com/psf/black) and [setuptools](https://github.com/pypa/setuptools). Updates `black` from 26.1.0 to 26.3.0 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/26.1.0...26.3.0) Updates `setuptools` from 82.0.0 to 82.0.1 - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v82.0.0...v82.0.1) --- updated-dependencies: - dependency-name: black dependency-version: 26.3.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: setuptools dependency-version: 82.0.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release-requirements.txt b/release-requirements.txt index 6742290..65a036b 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,6 +1,6 @@ # Linting & Formatting autopep8==2.3.2 -black==26.1.0 +black==26.3.0 flake8==7.3.0 # Testing @@ -9,7 +9,7 @@ pytest==9.0.2 # Release & Publishing twine==6.2.0 gitchangelog==3.0.4 -setuptools==82.0.0 +setuptools==82.0.1 # Documentation restructuredtext-lint==2.0.2 From 3d961d11184f1fc384a8be290347b1de1e5064fe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Mar 2026 17:26:41 +0000 Subject: [PATCH 428/455] chore(deps): bump docker/login-action from 3 to 4 Bumps [docker/login-action](https://github.com/docker/login-action) from 3 to 4. - [Release notes](https://github.com/docker/login-action/releases) - [Commits](https://github.com/docker/login-action/compare/v3...v4) --- updated-dependencies: - dependency-name: docker/login-action dependency-version: '4' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 9508f94..4e5c89b 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -49,7 +49,7 @@ jobs: uses: docker/setup-buildx-action@v4 - name: Log in to the Container registry - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} From f85c759e5df58bb5c1c680943bedbf03b9141afb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2026 13:05:24 +0000 Subject: [PATCH 429/455] chore(deps): bump black in the python-packages group Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). Updates `black` from 26.3.0 to 26.3.1 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/26.3.0...26.3.1) --- updated-dependencies: - dependency-name: black dependency-version: 26.3.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index 65a036b..ddc1430 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,6 +1,6 @@ # Linting & Formatting autopep8==2.3.2 -black==26.3.0 +black==26.3.1 flake8==7.3.0 # Testing From 9fde6ed1ffff0660b8ead272c4993bd472312762 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 8 Apr 2026 13:05:48 +0000 Subject: [PATCH 430/455] chore(deps): bump pytest in the python-packages group Bumps the python-packages group with 1 update: [pytest](https://github.com/pytest-dev/pytest). Updates `pytest` from 9.0.2 to 9.0.3 - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/9.0.2...9.0.3) --- updated-dependencies: - dependency-name: pytest dependency-version: 9.0.3 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index ddc1430..ad8bc5c 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -4,7 +4,7 @@ black==26.3.1 flake8==7.3.0 # Testing -pytest==9.0.2 +pytest==9.0.3 # Release & Publishing twine==6.2.0 From f4117990b29b8f50ad3c57c86c5af1f9700c1b9c Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Sun, 26 Apr 2026 13:42:14 +0200 Subject: [PATCH 431/455] Add --token-from-gh authentication option --- CHANGES.rst | 5 +++ README.rst | 7 ++-- github_backup/github_backup.py | 48 +++++++++++++++++++++++-- tests/test_auth.py | 65 ++++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 tests/test_auth.py diff --git a/CHANGES.rst b/CHANGES.rst index 6041b9e..364bd3d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,11 @@ Changelog ========= +Unreleased +---------- +- Add ``--token-from-gh`` to read authentication from ``gh auth token``. + + 0.61.5 (2026-02-18) ------------------- ------------------------ diff --git a/README.rst b/README.rst index cd7be1f..030f260 100644 --- a/README.rst +++ b/README.rst @@ -36,8 +36,8 @@ Show the CLI help output:: CLI Help output:: - github-backup [-h] [-t TOKEN_CLASSIC] [-f TOKEN_FINE] [-q] [--as-app] - [-o OUTPUT_DIRECTORY] [-l LOG_LEVEL] [-i] + github-backup [-h] [-t TOKEN_CLASSIC] [-f TOKEN_FINE] [--token-from-gh] + [-q] [--as-app] [-o OUTPUT_DIRECTORY] [-l LOG_LEVEL] [-i] [--incremental-by-files] [--starred] [--all-starred] [--starred-skip-size-over MB] [--watched] [--followers] [--following] [--all] @@ -71,6 +71,7 @@ CLI Help output:: -f, --token-fine TOKEN_FINE fine-grained personal access token (github_pat_....), or path to token (file://...) + --token-from-gh read token from GitHub CLI (gh auth token) -q, --quiet supress log messages less severe than warning, e.g. info --as-app authenticate as github app instead of as a user. @@ -171,6 +172,8 @@ The positional argument ``USER`` specifies the user or organization account you **Classic tokens** (``-t TOKEN``) are `slightly less secure `_ as they provide very coarse-grained permissions. +If you already authenticate with the `GitHub CLI `_, you can use ``--token-from-gh`` to read the token with ``gh auth token`` instead of passing a token directly. This avoids placing the token in shell history or process arguments. When ``--github-host`` is set, the token is read with ``gh auth token --hostname HOST``. + Fine Tokens ~~~~~~~~~~~ diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 4d5394e..fd2fd99 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -167,6 +167,12 @@ def parse_args(args=None): dest="token_fine", help="fine-grained personal access token (github_pat_....), or path to token (file://...)", ) # noqa + parser.add_argument( + "--token-from-gh", + action="store_true", + dest="token_from_gh", + help="read token from GitHub CLI (gh auth token)", + ) parser.add_argument( "-q", "--quiet", @@ -537,8 +543,14 @@ def get_auth(args, encode=True, for_git_cli=False): raise Exception( "Fine-grained token supplied does not look like a GitHub PAT" ) - elif args.token_classic: - if args.token_classic.startswith(FILE_URI_PREFIX): + elif args.token_classic or args.token_from_gh: + if args.token_from_gh: + if args.as_app: + raise Exception( + "--token-from-gh cannot be used with --as-app; provide the app token with --token instead" + ) + args.token_classic = read_token_from_gh_cli(args) + elif args.token_classic.startswith(FILE_URI_PREFIX): args.token_classic = read_file_contents(args.token_classic) if not args.as_app: @@ -580,6 +592,38 @@ def read_file_contents(file_uri): return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() +def read_token_from_gh_cli(args): + cached_token = getattr(args, "_token_from_gh_value", None) + if cached_token: + return cached_token + + command = ["gh", "auth", "token"] + if args.github_host: + command.extend(["--hostname", get_github_host(args)]) + + try: + token = subprocess.check_output(command, stderr=subprocess.PIPE).decode( + "utf-8" + ).strip() + except FileNotFoundError: + raise Exception( + "Unable to read token from GitHub CLI: 'gh' executable not found" + ) + except subprocess.CalledProcessError as e: + stderr = e.stderr.decode("utf-8", errors="replace").strip() + if stderr: + raise Exception( + "Unable to read token from GitHub CLI: {0}".format(stderr) + ) + raise Exception("Unable to read token from GitHub CLI") + + if not token: + raise Exception("Unable to read token from GitHub CLI: token was empty") + + args._token_from_gh_value = token + return token + + def get_github_repo_url(args, repository): if repository.get("is_gist"): if args.prefer_ssh: diff --git a/tests/test_auth.py b/tests/test_auth.py new file mode 100644 index 0000000..504c822 --- /dev/null +++ b/tests/test_auth.py @@ -0,0 +1,65 @@ +"""Tests for authentication helpers.""" + +from unittest.mock import patch + +import pytest + +from github_backup import github_backup + + +def test_token_from_gh_flag_parses(): + args = github_backup.parse_args(["--token-from-gh", "testuser"]) + assert args.token_from_gh is True + + +def test_get_auth_reads_token_from_gh_cli(create_args): + args = create_args(token_from_gh=True) + + with patch( + "github_backup.github_backup.subprocess.check_output", + return_value=b"gho_test_token\n", + ) as mock_check_output: + auth = github_backup.get_auth(args, encode=False) + + assert auth == "gho_test_token:x-oauth-basic" + mock_check_output.assert_called_once_with( + ["gh", "auth", "token"], stderr=github_backup.subprocess.PIPE + ) + + +def test_get_auth_reads_token_from_gh_cli_for_enterprise_host(create_args): + args = create_args(token_from_gh=True, github_host="ghe.example.com") + + with patch( + "github_backup.github_backup.subprocess.check_output", + return_value=b"gho_enterprise_token\n", + ) as mock_check_output: + auth = github_backup.get_auth(args, encode=False) + + assert auth == "gho_enterprise_token:x-oauth-basic" + mock_check_output.assert_called_once_with( + ["gh", "auth", "token", "--hostname", "ghe.example.com"], + stderr=github_backup.subprocess.PIPE, + ) + + +def test_token_from_gh_is_cached(create_args): + args = create_args(token_from_gh=True) + + with patch( + "github_backup.github_backup.subprocess.check_output", + return_value=b"gho_cached_token\n", + ) as mock_check_output: + assert github_backup.get_auth(args, encode=False) == "gho_cached_token:x-oauth-basic" + assert github_backup.get_auth(args, encode=False) == "gho_cached_token:x-oauth-basic" + + mock_check_output.assert_called_once() + + +def test_token_from_gh_rejects_as_app(create_args): + args = create_args(token_from_gh=True, as_app=True) + + with pytest.raises(Exception) as exc_info: + github_backup.get_auth(args, encode=False) + + assert "--token-from-gh cannot be used with --as-app" in str(exc_info.value) From 4d022d94d0c7656a481651d8310a23e97a7db7fd Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Sun, 26 Apr 2026 13:45:29 +0200 Subject: [PATCH 432/455] Add support for discussions Closes #290 --- CHANGES.rst | 2 + README.rst | 34 ++- github_backup/github_backup.py | 495 +++++++++++++++++++++++++++++-- github_backup/graphql_queries.py | 292 ++++++++++++++++++ tests/test_auth.py | 10 + tests/test_discussions.py | 222 ++++++++++++++ tests/test_retrieve_data.py | 28 ++ 7 files changed, 1042 insertions(+), 41 deletions(-) create mode 100644 github_backup/graphql_queries.py create mode 100644 tests/test_discussions.py diff --git a/CHANGES.rst b/CHANGES.rst index 364bd3d..50f8d54 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,6 +3,8 @@ Changelog Unreleased ---------- +- Add GitHub Discussions backups via GraphQL, including comments, replies, + optional attachment downloads, and per-repository incremental checkpoints. - Add ``--token-from-gh`` to read authentication from ``gh auth token``. diff --git a/README.rst b/README.rst index 030f260..4135743 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ github-backup |PyPI| |Python Versions| -The package can be used to backup an *entire* `Github `_ organization, repository or user account, including starred repos, issues and wikis in the most appropriate format (clones for wikis, json files for issues). +The package can be used to backup an *entire* `Github `_ organization, repository or user account, including starred repos, issues, discussions and wikis in the most appropriate format (clones for wikis, json files for issues and discussions). Requirements ============ @@ -44,8 +44,9 @@ CLI Help output:: [--issues] [--issue-comments] [--issue-events] [--pulls] [--pull-comments] [--pull-commits] [--pull-details] [--labels] [--hooks] [--milestones] [--security-advisories] - [--repositories] [--bare] [--no-prune] [--lfs] [--wikis] - [--gists] [--starred-gists] [--skip-archived] [--skip-existing] + [--discussions] [--repositories] [--bare] [--no-prune] + [--lfs] [--wikis] [--gists] [--starred-gists] + [--skip-archived] [--skip-existing] [-L [LANGUAGES ...]] [-N NAME_REGEX] [-H GITHUB_HOST] [-O] [-R REPOSITORY] [-P] [-F] [--prefer-ssh] [-v] [--keychain-name OSX_KEYCHAIN_ITEM_NAME] @@ -104,6 +105,7 @@ CLI Help output:: --milestones include milestones in backup --security-advisories include security advisories in backup + --discussions include discussions in backup --repositories include repository clone in backup --bare clone bare repositories --no-prune disable prune option for git fetch @@ -144,8 +146,8 @@ CLI Help output:: applies if including releases --skip-assets-on [SKIP_ASSETS_ON ...] skip asset downloads for these repositories - --attachments download user-attachments from issues and pull - requests + --attachments download user-attachments from issues, pull requests, + and discussions --throttle-limit THROTTLE_LIMIT start throttling of GitHub API requests after this amount of API requests remain @@ -184,7 +186,7 @@ Customise the permissions for your use case, but for a personal account full bac **User permissions**: Read access to followers, starring, and watching. -**Repository permissions**: Read access to contents, issues, metadata, pull requests, and webhooks. +**Repository permissions**: Read access to contents, discussions, issues, metadata, pull requests, and webhooks. GitHub Apps @@ -265,9 +267,9 @@ LFS objects are fetched for all refs, not just the current checkout, ensuring a About Attachments ----------------- -When you use the ``--attachments`` option with ``--issues`` or ``--pulls``, the tool will download user-uploaded attachments (images, videos, documents, etc.) from issue and pull request descriptions and comments. In some circumstances attachments contain valuable data related to the topic, and without their backup important information or context might be lost inadvertently. +When you use the ``--attachments`` option with ``--issues``, ``--pulls`` or ``--discussions``, the tool will download user-uploaded attachments (images, videos, documents, etc.) from issue, pull request and discussion descriptions and comments. In some circumstances attachments contain valuable data related to the topic, and without their backup important information or context might be lost inadvertently. -Attachments are saved to ``issues/attachments/{issue_number}/`` and ``pulls/attachments/{pull_number}/`` directories, where ``{issue_number}`` is the GitHub issue number (e.g., issue #123 saves to ``issues/attachments/123/``). Each attachment directory contains: +Attachments are saved to ``issues/attachments/{issue_number}/``, ``pulls/attachments/{pull_number}/`` and ``discussions/attachments/{discussion_number}/`` directories, where ``{issue_number}`` is the GitHub issue number (e.g., issue #123 saves to ``issues/attachments/123/``). Each attachment directory contains: - The downloaded attachment files (named by their GitHub identifier with appropriate file extensions) - If multiple attachments have the same filename, conflicts are resolved with numeric suffixes (e.g., ``report.pdf``, ``report_1.pdf``, ``report_2.pdf``) @@ -287,6 +289,16 @@ The tool automatically extracts file extensions from HTTP headers to ensure file **Fine-grained token limitation:** Due to a GitHub platform limitation, fine-grained personal access tokens (``github_pat_...``) cannot download attachments from private repositories directly. This affects both ``/assets/`` (images) and ``/files/`` (documents) URLs. The tool implements a workaround for image attachments using GitHub's Markdown API, which converts URLs to temporary JWT-signed URLs that can be downloaded. However, this workaround only works for images - document attachments (PDFs, text files, etc.) will fail with 404 errors when using fine-grained tokens on private repos. For full attachment support on private repositories, use a classic token (``-t``) instead of a fine-grained token (``-f``). See `#477 `_ for details. +About Discussions +----------------- + +GitHub Discussions are backed up with GitHub's GraphQL API because the REST API does not expose discussions. Use ``--discussions`` to save each discussion as JSON under ``repositories/{repo}/discussions/{number}.json``. Discussion backups include the discussion body and metadata, category information, comments, and comment replies. + +``--discussions`` is included in ``--all``. Unlike most REST API-backed resources, discussions require authentication because GitHub's GraphQL API requires a token. Fine-grained personal access tokens and GitHub Apps need read access to the repository's Discussions permission. + +Incremental backups use a per-repository checkpoint at ``repositories/{repo}/discussions/last_update`` based on discussion ``updatedAt`` timestamps. This is separate from the repository-level ``last_update`` file so discussion activity is not missed if the repository's own update timestamp does not change. If you enable ``--discussions`` on an existing incremental backup, the first run performs a full discussions backup for each repository and creates the discussions checkpoint for future runs. + + About security advisories ------------------------- @@ -419,14 +431,14 @@ Quietly and incrementally backup useful Github user data (public and private rep export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER - github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --security-advisories --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER + github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --security-advisories --discussions --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER Debug an error/block or incomplete backup into a temporary directory. Omit "incremental" to fill a previous incomplete backup. :: export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER - github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER + github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --discussions --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER Pipe a token from stdin to avoid storing it in environment variables or command history (Unix-like systems only):: @@ -442,7 +454,7 @@ This tool creates backups only, there is no inbuilt restore command. cd /tmp/white-house/repositories/petitions/repository git push --mirror git@github.com:WhiteHouse/petitions.git -**Issues, pull requests, comments, and other metadata** are saved as JSON files for archival purposes. The GitHub API does not support recreating this data faithfully, creating issues via the API has limitations: +**Issues, pull requests, discussions, comments, and other metadata** are saved as JSON files for archival purposes. The GitHub API does not support recreating this data faithfully, creating issues via the API has limitations: - New issue/PR numbers are assigned (original numbers cannot be set) - Timestamps reflect creation time (original dates cannot be set) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index fd2fd99..c1245bd 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -33,6 +33,13 @@ except ImportError: VERSION = "unknown" +from .graphql_queries import ( + DISCUSSION_DETAIL_QUERY, + DISCUSSION_LIST_QUERY, + DISCUSSION_PAGE_SIZE, + DISCUSSION_REPLIES_QUERY, +) + FNULL = open(os.devnull, "w") FILE_URI_PREFIX = "file://" logger = logging.getLogger(__name__) @@ -322,6 +329,12 @@ def parse_args(args=None): dest="include_security_advisories", help="include security advisories in backup", ) + parser.add_argument( + "--discussions", + action="store_true", + dest="include_discussions", + help="include discussions in backup", + ) parser.add_argument( "--repositories", action="store_true", @@ -469,7 +482,7 @@ def parse_args(args=None): "--attachments", action="store_true", dest="include_attachments", - help="download user-attachments from issues and pull requests", + help="download user-attachments from issues, pull requests, and discussions", ) parser.add_argument( "--throttle-limit", @@ -579,6 +592,31 @@ def get_github_api_host(args): return host +def get_github_graphql_url(args): + if args.github_host: + return "https://{0}/api/graphql".format(args.github_host) + + return "https://api.github.com/graphql" + + +def get_graphql_auth(args): + auth = get_auth(args, encode=False) + if not auth: + return None + + # GraphQL expects a bearer token. Classic tokens and keychain tokens use + # "token:x-oauth-basic" for REST Basic auth, so strip the synthetic + # password before sending the GraphQL Authorization header. + if ( + not getattr(args, "as_app", False) + and getattr(args, "token_fine", None) is None + and ":" in auth + ): + auth = auth.split(":", 1)[0] + + return auth + + def get_github_host(args): if args.github_host: host = args.github_host @@ -810,6 +848,87 @@ def _extract_legal_url(response_body_bytes): return list(fetch_all()) +def retrieve_graphql_data(args, query, variables=None, log_context=None): + """Fetch data from GitHub's GraphQL API.""" + auth = get_graphql_auth(args) + if not auth: + raise Exception("GitHub GraphQL API requires authentication") + + variables = variables or {} + payload = json.dumps( + {"query": query, "variables": variables}, ensure_ascii=False + ).encode("utf-8") + endpoint = get_github_graphql_url(args) + + for attempt in range(args.max_retries + 1): + request = Request(endpoint, data=payload, method="POST") + request.add_header("Accept", "application/json") + request.add_header("Content-Type", "application/json") + request.add_header("Authorization", "bearer " + auth) + log_url = endpoint + if log_context: + log_url = "{0} ({1})".format(log_url, log_context) + logger.info("Requesting {0}".format(log_url)) + + http_response = make_request_with_retry(request, auth, args.max_retries) + + status = http_response.getcode() + if status != 200: + raise Exception( + f"Unexpected HTTP {status} from {endpoint} " + f"(expected non-2xx to raise HTTPError)" + ) + + try: + response = json.loads(http_response.read().decode("utf-8")) + except (IncompleteRead, json.decoder.JSONDecodeError, TimeoutError) as e: + logger.warning(f"{type(e).__name__} reading GraphQL response") + if attempt < args.max_retries: + delay = calculate_retry_delay(attempt, {}) + logger.warning( + f"Retrying GraphQL read in {delay:.1f}s " + f"(attempt {attempt + 1}/{args.max_retries + 1})" + ) + time.sleep(delay) + continue + raise Exception( + f"Failed to read GraphQL response after {args.max_retries + 1} " + f"attempts for {endpoint}" + ) + + if ( + remaining := int(http_response.headers.get("x-ratelimit-remaining", 0)) + ) <= (args.throttle_limit or 0): + if args.throttle_limit: + logger.info( + f"Throttling: {remaining} requests left, pausing {args.throttle_pause}s" + ) + time.sleep(args.throttle_pause) + + errors = response.get("errors") or [] + if errors: + if any(error.get("type") == "RATE_LIMITED" for error in errors): + if attempt < args.max_retries: + delay = calculate_retry_delay(attempt, http_response.headers) + logger.warning( + f"GraphQL rate limit hit, retrying in {delay:.1f}s " + f"(attempt {attempt + 1}/{args.max_retries + 1})" + ) + time.sleep(delay) + continue + + messages = "; ".join( + error.get("message", str(error)) for error in errors + ) + raise Exception("GraphQL Error: {0}".format(messages)) + + return response.get("data", {}) + + raise Exception( + f"GraphQL request failed after {args.max_retries + 1} attempts" + ) # pragma: no cover + + def make_request_with_retry(request, auth, max_retries=5): """Make HTTP request with automatic retry for transient errors.""" @@ -1193,7 +1312,7 @@ def get_jwt_signed_url_via_markdown_api(url, token, repo_context): def extract_attachment_urls(item_data, issue_number=None, repository_full_name=None): - """Extract GitHub-hosted attachment URLs from issue/PR body and comments. + """Extract GitHub-hosted attachment URLs from issue/PR/discussion body and comments. What qualifies as an attachment? There is no "attachment" concept in the GitHub API - it's a user behavior pattern @@ -1335,33 +1454,29 @@ def redirect_request(self, req, fp, code, msg, headers, newurl): # and exclude the URL to avoid downloading from wrong repos return False + def extract_from_text(text): + text_cleaned = remove_code_blocks(text or "") + for pattern in patterns: + found_urls = re.findall(pattern, text_cleaned) + urls.extend([clean_url(url) for url in found_urls]) + + def extract_from_comments(comments): + for comment in comments: + extract_from_text(comment.get("body") or "") + # GitHub Discussions support one level of replies. Issues and pull + # requests don't have reply_data, so this is a no-op for them. + extract_from_comments(comment.get("reply_data") or []) + # Extract from body - body = item_data.get("body") or "" - # Remove code blocks before searching for URLs - body_cleaned = remove_code_blocks(body) - for pattern in patterns: - found_urls = re.findall(pattern, body_cleaned) - urls.extend([clean_url(url) for url in found_urls]) - - # Extract from issue comments + extract_from_text(item_data.get("body") or "") + + # Extract from issue comments and discussion comments if "comment_data" in item_data: - for comment in item_data["comment_data"]: - comment_body = comment.get("body") or "" - # Remove code blocks before searching for URLs - comment_cleaned = remove_code_blocks(comment_body) - for pattern in patterns: - found_urls = re.findall(pattern, comment_cleaned) - urls.extend([clean_url(url) for url in found_urls]) + extract_from_comments(item_data["comment_data"]) # Extract from PR regular comments if "comment_regular_data" in item_data: - for comment in item_data["comment_regular_data"]: - comment_body = comment.get("body") or "" - # Remove code blocks before searching for URLs - comment_cleaned = remove_code_blocks(comment_body) - for pattern in patterns: - found_urls = re.findall(pattern, comment_cleaned) - urls.extend([clean_url(url) for url in found_urls]) + extract_from_comments(item_data["comment_regular_data"]) regex_urls = list(set(urls)) # dedupe @@ -1463,20 +1578,24 @@ def resolve_filename_collision(filepath): def download_attachments( args, item_cwd, item_data, number, repository, item_type="issue" ): - """Download user-attachments from issue/PR body and comments with manifest. + """Download user-attachments from issue/PR/discussion body and comments with manifest. Args: args: Command line arguments - item_cwd: Working directory (issue_cwd or pulls_cwd) - item_data: Issue or PR data dict - number: Issue or PR number + item_cwd: Working directory (issue_cwd, pulls_cwd, or discussion_cwd) + item_data: Issue, PR, or discussion data dict + number: Issue, PR, or discussion number repository: Repository dict - item_type: "issue" or "pull" for logging/manifest + item_type: "issue", "pull", or "discussion" for logging/manifest """ import json from datetime import datetime, timezone - item_type_display = "issue" if item_type == "issue" else "pull request" + item_type_display = { + "issue": "issue", + "pull": "pull request", + "discussion": "discussion", + }.get(item_type, item_type) urls = extract_attachment_urls( item_data, issue_number=number, repository_full_name=repository["full_name"] @@ -1621,6 +1740,8 @@ def download_attachments( # Write manifest if attachment_metadata_list: manifest = { + "item_number": number, + "item_type": item_type, "issue_number": number, "issue_type": item_type, "repository": ( @@ -1888,6 +2009,9 @@ def backup_repositories(args, output_directory, repositories): if args.include_pulls or args.include_everything: backup_pulls(args, repo_cwd, repository, repos_template) + if args.include_discussions or args.include_everything: + backup_discussions(args, repo_cwd, repository) + if args.include_milestones or args.include_everything: backup_milestones(args, repo_cwd, repository, repos_template) @@ -1922,6 +2046,317 @@ def backup_repositories(args, output_directory, repositories): open(last_update_path, "w").write(last_update) +def _repository_owner_name(repository): + return repository["full_name"].split("/", 1) + + +def _connection_nodes(connection): + return [node for node in (connection or {}).get("nodes") or [] if node] + + +def retrieve_discussion_summaries(args, repository, since=None): + owner, name = _repository_owner_name(repository) + after = None + page = 1 + summaries = [] + newest_seen = None + discussions_enabled = None + total_count = 0 + + while True: + data = retrieve_graphql_data( + args, + DISCUSSION_LIST_QUERY, + { + "owner": owner, + "name": name, + "after": after, + "pageSize": DISCUSSION_PAGE_SIZE, + }, + log_context="discussion summaries {0} page {1}".format( + repository["full_name"], page + ), + ) + repository_data = data.get("repository") + if repository_data is None: + raise Exception( + "Repository {0} not found in GraphQL response".format( + repository["full_name"] + ) + ) + + discussions_enabled = repository_data.get("hasDiscussionsEnabled") + if not discussions_enabled: + return [], None, False, 0 + + discussions = repository_data.get("discussions") or {} + total_count = discussions.get("totalCount", total_count) + stop = False + + for discussion in _connection_nodes(discussions): + updated_at = discussion.get("updatedAt") + if updated_at and (newest_seen is None or updated_at > newest_seen): + newest_seen = updated_at + + if since and updated_at and updated_at < since: + stop = True + break + + summaries.append(discussion) + + page_info = discussions.get("pageInfo") or {} + if stop or not page_info.get("hasNextPage"): + break + + after = page_info.get("endCursor") + page += 1 + + return summaries, newest_seen, discussions_enabled, total_count + + +def retrieve_discussion_comment_replies(args, comment_id, after=None, log_context=None): + data = retrieve_graphql_data( + args, + DISCUSSION_REPLIES_QUERY, + { + "commentId": comment_id, + "repliesCursor": after, + "pageSize": DISCUSSION_PAGE_SIZE, + }, + log_context=log_context, + ) + node = data.get("node") or {} + return node.get("replies") or {} + + +def _discussion_comment_log_identifier(comment_node): + return ( + comment_node.get("databaseId") + or comment_node.get("url") + or comment_node.get("id") + ) + + +def _discussion_comment_with_replies( + args, comment_node, repository_full_name=None, discussion_number=None +): + replies_connection = comment_node.get("replies") or {} + replies = _connection_nodes(replies_connection) + reply_total_count = replies_connection.get("totalCount", len(replies)) + page_info = replies_connection.get("pageInfo") or {} + reply_page = 2 + + while page_info.get("hasNextPage"): + log_context = None + if repository_full_name and discussion_number is not None: + log_context = "discussion {0}#{1} comment {2} replies page {3}".format( + repository_full_name, + discussion_number, + _discussion_comment_log_identifier(comment_node), + reply_page, + ) + + replies_connection = retrieve_discussion_comment_replies( + args, + comment_node["id"], + page_info.get("endCursor"), + log_context=log_context, + ) + replies.extend(_connection_nodes(replies_connection)) + page_info = replies_connection.get("pageInfo") or {} + reply_page += 1 + + comment = {key: value for key, value in comment_node.items() if key != "replies"} + comment["reply_count"] = reply_total_count + comment["reply_data"] = replies + return comment + + +def retrieve_discussion(args, repository, number): + owner, name = _repository_owner_name(repository) + comments_cursor = None + comments_page = 1 + discussion_data = None + comments = [] + comment_total_count = 0 + + while True: + data = retrieve_graphql_data( + args, + DISCUSSION_DETAIL_QUERY, + { + "owner": owner, + "name": name, + "number": number, + "commentsCursor": comments_cursor, + "pageSize": DISCUSSION_PAGE_SIZE, + }, + log_context="discussion {0}#{1} details/comments page {2}".format( + repository["full_name"], number, comments_page + ), + ) + repository_data = data.get("repository") or {} + discussion = repository_data.get("discussion") + if discussion is None: + raise Exception( + "Discussion #{0} not found in {1}".format( + number, repository["full_name"] + ) + ) + + if discussion_data is None: + discussion_data = { + key: value for key, value in discussion.items() if key != "comments" + } + + comments_connection = discussion.get("comments") or {} + comment_total_count = comments_connection.get( + "totalCount", comment_total_count + ) + for comment_node in _connection_nodes(comments_connection): + comments.append( + _discussion_comment_with_replies( + args, comment_node, repository["full_name"], number + ) + ) + + page_info = comments_connection.get("pageInfo") or {} + if not page_info.get("hasNextPage"): + break + + comments_cursor = page_info.get("endCursor") + comments_page += 1 + + discussion_data["comment_count"] = comment_total_count + discussion_data["comment_data"] = comments + return discussion_data + + +def backup_discussions(args, repo_cwd, repository): + discussion_cwd = os.path.join(repo_cwd, "discussions") + if args.skip_existing and os.path.isdir(discussion_cwd): + return + + if not get_graphql_auth(args): + logger.info( + "Skipping {0} discussions since GitHub GraphQL API requires authentication".format( + repository["full_name"] + ) + ) + return + + discussions_since = None + discussion_last_update_path = os.path.join(discussion_cwd, "last_update") + if args.incremental and os.path.exists(discussion_last_update_path): + discussions_since = open(discussion_last_update_path).read().strip() + + logger.info("Retrieving {0} discussions".format(repository["full_name"])) + try: + ( + summaries, + newest_seen, + discussions_enabled, + total_count, + ) = retrieve_discussion_summaries(args, repository, since=discussions_since) + except Exception as e: + logger.warning( + "Unable to retrieve discussions for {0}, skipping: {1}".format( + repository["full_name"], e + ) + ) + return + + if not discussions_enabled: + logger.info( + "Discussions are not enabled for {0}, skipping".format( + repository["full_name"] + ) + ) + return + + mkdir_p(repo_cwd, discussion_cwd) + + if discussions_since: + logger.info( + "Saving {0} updated discussions to disk ({1} total)".format( + len(summaries), total_count + ) + ) + else: + logger.info("Saving {0} discussions to disk".format(len(summaries))) + + written_count = 0 + skipped_count = 0 + had_errors = False + for summary in summaries: + number = summary["number"] + discussion_file = os.path.join(discussion_cwd, "{0}.json".format(number)) + + if args.incremental_by_files and os.path.isfile(discussion_file): + modified = os.path.getmtime(discussion_file) + modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") + if modified > summary["updatedAt"]: + logger.info( + "Skipping discussion {0} because it wasn't modified since last backup".format( + number + ) + ) + skipped_count += 1 + continue + + try: + discussion = retrieve_discussion(args, repository, number) + except Exception as e: + logger.warning( + "Unable to retrieve discussion {0}#{1}, skipping: {2}".format( + repository["full_name"], number, e + ) + ) + had_errors = True + continue + + if args.include_attachments: + download_attachments( + args, + discussion_cwd, + discussion, + number, + repository, + item_type="discussion", + ) + + if json_dump_if_changed(discussion, discussion_file): + written_count += 1 + + if ( + args.incremental + and not had_errors + and newest_seen + and (not discussions_since or newest_seen > discussions_since) + ): + open(discussion_last_update_path, "w").write(newest_seen) + + attempted_count = len(summaries) - skipped_count + if not summaries: + logger.info("No discussions to save") + elif attempted_count == 0: + logger.info("{0} discussions skipped".format(skipped_count)) + elif written_count == attempted_count: + logger.info("Saved {0} discussions to disk".format(written_count)) + elif written_count == 0: + logger.info( + "{0} discussions unchanged, skipped write".format(attempted_count) + ) + else: + logger.info( + "Saved {0} discussions to disk ({1} unchanged, {2} skipped)".format( + written_count, + attempted_count - written_count, + skipped_count, + ) + ) + + def backup_issues(args, repo_cwd, repository, repos_template): has_issues_dir = os.path.isdir("{0}/issues/.git".format(repo_cwd)) if args.skip_existing and has_issues_dir: diff --git a/github_backup/graphql_queries.py b/github_backup/graphql_queries.py new file mode 100644 index 0000000..96eb552 --- /dev/null +++ b/github_backup/graphql_queries.py @@ -0,0 +1,292 @@ +"""GraphQL query templates used by github-backup.""" + +DISCUSSION_PAGE_SIZE = 100 + +DISCUSSION_LIST_QUERY = """ +query($owner: String!, $name: String!, $after: String, $pageSize: Int!) { + repository(owner: $owner, name: $name) { + hasDiscussionsEnabled + discussions( + first: $pageSize, + after: $after, + orderBy: {field: UPDATED_AT, direction: DESC} + ) { + totalCount + nodes { + id + number + title + updatedAt + } + pageInfo { + hasNextPage + endCursor + } + } + } +} +""" + +DISCUSSION_DETAIL_QUERY = """ +query( + $owner: String!, + $name: String!, + $number: Int!, + $commentsCursor: String, + $pageSize: Int! +) { + repository(owner: $owner, name: $name) { + discussion(number: $number) { + activeLockReason + answer { + id + databaseId + url + } + answerChosenAt + answerChosenBy { + ...ActorFields + } + author { + ...ActorFields + } + authorAssociation + body + bodyHTML + bodyText + category { + createdAt + description + emoji + emojiHTML + id + isAnswerable + name + slug + updatedAt + } + closed + closedAt + createdAt + createdViaEmail + databaseId + editor { + ...ActorFields + } + id + includesCreatedEdit + isAnswered + labels(first: 100) { + totalCount + nodes { + id + name + color + description + } + } + lastEditedAt + locked + number + poll { + id + question + totalVoteCount + options(first: 100) { + totalCount + nodes { + id + option + totalVoteCount + } + } + } + publishedAt + reactionGroups { + ...ReactionGroupFields + } + resourcePath + stateReason + title + updatedAt + upvoteCount + url + comments(first: $pageSize, after: $commentsCursor) { + totalCount + nodes { + ...DiscussionCommentFields + replies(first: $pageSize) { + totalCount + nodes { + ...DiscussionReplyFields + } + pageInfo { + hasNextPage + endCursor + } + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + } +} + +fragment ActorFields on Actor { + avatarUrl + login + resourcePath + url +} + +fragment ReactionGroupFields on ReactionGroup { + content + reactors { + totalCount + } +} + +fragment DiscussionCommentFields on DiscussionComment { + author { + ...ActorFields + } + authorAssociation + body + bodyHTML + bodyText + createdAt + createdViaEmail + databaseId + deletedAt + editor { + ...ActorFields + } + id + includesCreatedEdit + isAnswer + isMinimized + lastEditedAt + minimizedReason + publishedAt + reactionGroups { + ...ReactionGroupFields + } + replyTo { + id + databaseId + url + } + resourcePath + updatedAt + upvoteCount + url +} + +fragment DiscussionReplyFields on DiscussionComment { + author { + ...ActorFields + } + authorAssociation + body + bodyHTML + bodyText + createdAt + createdViaEmail + databaseId + deletedAt + editor { + ...ActorFields + } + id + includesCreatedEdit + isAnswer + isMinimized + lastEditedAt + minimizedReason + publishedAt + reactionGroups { + ...ReactionGroupFields + } + replyTo { + id + databaseId + url + } + resourcePath + updatedAt + upvoteCount + url +} +""" + +DISCUSSION_REPLIES_QUERY = """ +query($commentId: ID!, $repliesCursor: String, $pageSize: Int!) { + node(id: $commentId) { + ... on DiscussionComment { + replies(first: $pageSize, after: $repliesCursor) { + totalCount + nodes { + ...DiscussionReplyFields + } + pageInfo { + hasNextPage + endCursor + } + } + } + } +} + +fragment ActorFields on Actor { + avatarUrl + login + resourcePath + url +} + +fragment ReactionGroupFields on ReactionGroup { + content + reactors { + totalCount + } +} + +fragment DiscussionReplyFields on DiscussionComment { + author { + ...ActorFields + } + authorAssociation + body + bodyHTML + bodyText + createdAt + createdViaEmail + databaseId + deletedAt + editor { + ...ActorFields + } + id + includesCreatedEdit + isAnswer + isMinimized + lastEditedAt + minimizedReason + publishedAt + reactionGroups { + ...ReactionGroupFields + } + replyTo { + id + databaseId + url + } + resourcePath + updatedAt + upvoteCount + url +} +""" diff --git a/tests/test_auth.py b/tests/test_auth.py index 504c822..0102878 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -56,6 +56,16 @@ def test_token_from_gh_is_cached(create_args): mock_check_output.assert_called_once() +def test_graphql_auth_strips_basic_auth_suffix_for_gh_cli_token(create_args): + args = create_args(token_from_gh=True) + + with patch( + "github_backup.github_backup.subprocess.check_output", + return_value=b"gho_graphql_token\n", + ): + assert github_backup.get_graphql_auth(args) == "gho_graphql_token" + + def test_token_from_gh_rejects_as_app(create_args): args = create_args(token_from_gh=True, as_app=True) diff --git a/tests/test_discussions.py b/tests/test_discussions.py new file mode 100644 index 0000000..89fd8dd --- /dev/null +++ b/tests/test_discussions.py @@ -0,0 +1,222 @@ +"""Tests for GitHub Discussions backup support.""" + +import json +import os +from unittest.mock import patch + +from github_backup import github_backup + + +def test_parse_args_discussions_flag(): + args = github_backup.parse_args(["--discussions", "testuser"]) + assert args.include_discussions is True + + +def test_retrieve_discussion_summaries_stops_at_incremental_since(create_args): + args = create_args() + repository = {"full_name": "owner/repo"} + + page = { + "repository": { + "hasDiscussionsEnabled": True, + "discussions": { + "totalCount": 3, + "nodes": [ + {"number": 3, "title": "new", "updatedAt": "2026-02-01T00:00:00Z"}, + {"number": 2, "title": "also new", "updatedAt": "2026-01-10T00:00:00Z"}, + {"number": 1, "title": "old", "updatedAt": "2025-12-01T00:00:00Z"}, + ], + "pageInfo": {"hasNextPage": True, "endCursor": "NEXT"}, + }, + } + } + + with patch( + "github_backup.github_backup.retrieve_graphql_data", return_value=page + ) as mock_retrieve: + summaries, newest, enabled, total = github_backup.retrieve_discussion_summaries( + args, repository, since="2026-01-01T00:00:00Z" + ) + + assert enabled is True + assert total == 3 + assert newest == "2026-02-01T00:00:00Z" + assert [item["number"] for item in summaries] == [3, 2] + # The old discussion stops pagination, so the next page is not requested. + assert mock_retrieve.call_count == 1 + assert ( + mock_retrieve.call_args.kwargs["log_context"] + == "discussion summaries owner/repo page 1" + ) + + +def test_retrieve_discussion_summaries_disabled_discussions(create_args): + args = create_args() + repository = {"full_name": "owner/repo"} + + with patch( + "github_backup.github_backup.retrieve_graphql_data", + return_value={"repository": {"hasDiscussionsEnabled": False}}, + ): + summaries, newest, enabled, total = github_backup.retrieve_discussion_summaries( + args, repository + ) + + assert summaries == [] + assert newest is None + assert enabled is False + assert total == 0 + + +def _comment(comment_id, body, replies=None, replies_has_next=False): + replies = replies or [] + return { + "id": comment_id, + "body": body, + "replies": { + "totalCount": len(replies) + (1 if replies_has_next else 0), + "nodes": replies, + "pageInfo": { + "hasNextPage": replies_has_next, + "endCursor": "REPLIES2" if replies_has_next else None, + }, + }, + } + + +def _discussion_page(comment_nodes, has_next=False): + return { + "repository": { + "discussion": { + "number": 42, + "title": "Discussion title", + "updatedAt": "2026-02-01T00:00:00Z", + "comments": { + "totalCount": 2, + "nodes": comment_nodes, + "pageInfo": { + "hasNextPage": has_next, + "endCursor": "COMMENTS2" if has_next else None, + }, + }, + } + } + } + + +def test_retrieve_discussion_paginates_comments_and_replies(create_args): + args = create_args() + repository = {"full_name": "owner/repo"} + + reply_1 = {"id": "reply-1", "body": "first reply"} + reply_2 = {"id": "reply-2", "body": "second reply"} + comment_1 = _comment("comment-1", "first comment", [reply_1], replies_has_next=True) + comment_2 = _comment("comment-2", "second comment") + + responses = [ + _discussion_page([comment_1], has_next=True), + { + "node": { + "replies": { + "totalCount": 2, + "nodes": [reply_2], + "pageInfo": {"hasNextPage": False, "endCursor": None}, + } + } + }, + _discussion_page([comment_2], has_next=False), + ] + + with patch( + "github_backup.github_backup.retrieve_graphql_data", side_effect=responses + ) as mock_retrieve: + discussion = github_backup.retrieve_discussion(args, repository, 42) + + assert discussion["number"] == 42 + assert discussion["comment_count"] == 2 + assert len(discussion["comment_data"]) == 2 + assert discussion["comment_data"][0]["body"] == "first comment" + assert discussion["comment_data"][0]["reply_count"] == 2 + assert [r["body"] for r in discussion["comment_data"][0]["reply_data"]] == [ + "first reply", + "second reply", + ] + assert discussion["comment_data"][1]["body"] == "second comment" + assert mock_retrieve.call_count == 3 + assert [ + call.kwargs["log_context"] for call in mock_retrieve.call_args_list + ] == [ + "discussion owner/repo#42 details/comments page 1", + "discussion owner/repo#42 comment comment-1 replies page 2", + "discussion owner/repo#42 details/comments page 2", + ] + + +def test_backup_discussions_uses_incremental_checkpoint(create_args, tmp_path): + args = create_args(token_classic="fake_token", include_discussions=True, incremental=True) + repository = {"full_name": "owner/repo"} + discussions_dir = tmp_path / "discussions" + discussions_dir.mkdir() + (discussions_dir / "last_update").write_text("2026-01-01T00:00:00Z") + + def fake_summaries(passed_args, passed_repository, since=None): + assert passed_args is args + assert passed_repository == repository + assert since == "2026-01-01T00:00:00Z" + return ( + [{"number": 7, "title": "updated", "updatedAt": "2026-02-01T00:00:00Z"}], + "2026-02-01T00:00:00Z", + True, + 1, + ) + + with patch( + "github_backup.github_backup.retrieve_discussion_summaries", + side_effect=fake_summaries, + ), patch( + "github_backup.github_backup.retrieve_discussion", + return_value={"number": 7, "title": "updated"}, + ): + github_backup.backup_discussions(args, tmp_path, repository) + + with open(discussions_dir / "7.json", encoding="utf-8") as f: + assert json.load(f) == {"number": 7, "title": "updated"} + assert (discussions_dir / "last_update").read_text() == "2026-02-01T00:00:00Z" + + +def test_backup_discussions_does_not_advance_checkpoint_on_discussion_error( + create_args, tmp_path +): + args = create_args(token_classic="fake_token", include_discussions=True, incremental=True) + repository = {"full_name": "owner/repo"} + discussions_dir = tmp_path / "discussions" + discussions_dir.mkdir() + (discussions_dir / "last_update").write_text("2026-01-01T00:00:00Z") + + with patch( + "github_backup.github_backup.retrieve_discussion_summaries", + return_value=( + [{"number": 7, "title": "updated", "updatedAt": "2026-02-01T00:00:00Z"}], + "2026-02-01T00:00:00Z", + True, + 1, + ), + ), patch( + "github_backup.github_backup.retrieve_discussion", + side_effect=Exception("temporary GraphQL error"), + ): + github_backup.backup_discussions(args, tmp_path, repository) + + assert (discussions_dir / "last_update").read_text() == "2026-01-01T00:00:00Z" + assert not os.path.exists(discussions_dir / "7.json") + + +def test_backup_discussions_skips_without_auth(create_args, tmp_path): + args = create_args(include_discussions=True) + repository = {"full_name": "owner/repo"} + + with patch("github_backup.github_backup.retrieve_discussion_summaries") as mock_retrieve: + github_backup.backup_discussions(args, tmp_path, repository) + + assert not mock_retrieve.called + assert not os.path.exists(tmp_path / "discussions") diff --git a/tests/test_retrieve_data.py b/tests/test_retrieve_data.py index 014c309..51848ef 100644 --- a/tests/test_retrieve_data.py +++ b/tests/test_retrieve_data.py @@ -1,6 +1,7 @@ """Tests for retrieve_data function.""" import json +import logging import socket from unittest.mock import Mock, patch from urllib.error import HTTPError, URLError @@ -355,6 +356,33 @@ def mock_urlopen(*args, **kwargs): ) # 1 initial + 5 retries = 6 attempts +class TestRetrieveGraphqlDataLogging: + """Tests for GraphQL request logging.""" + + def test_logs_graphql_context(self, create_args, caplog): + args = create_args(token_classic="fake_token") + mock_response = Mock() + mock_response.getcode.return_value = 200 + mock_response.read.return_value = json.dumps({"data": {}}).encode("utf-8") + mock_response.headers = {"x-ratelimit-remaining": "5000"} + + caplog.set_level(logging.INFO, logger="github_backup.github_backup") + with patch( + "github_backup.github_backup.make_request_with_retry", + return_value=mock_response, + ): + github_backup.retrieve_graphql_data( + args, + "query { viewer { login } }", + log_context="discussion owner/repo#1", + ) + + assert ( + "Requesting https://api.github.com/graphql (discussion owner/repo#1)" + in caplog.text + ) + + class TestRetrieveDataThrottling: """Tests for throttling behavior in retrieve_data.""" From 24b3fdb4f34f85be090c335426e41403331e3ddf Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Sun, 26 Apr 2026 14:08:42 +0200 Subject: [PATCH 433/455] Add support for pull request reviews Closes #124 --- CHANGES.rst | 2 + README.rst | 16 ++- github_backup/github_backup.py | 148 ++++++++++++++++++-- tests/test_pull_reviews.py | 237 +++++++++++++++++++++++++++++++++ 4 files changed, 388 insertions(+), 15 deletions(-) create mode 100644 tests/test_pull_reviews.py diff --git a/CHANGES.rst b/CHANGES.rst index 50f8d54..b790ce1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,8 @@ Unreleased ---------- - Add GitHub Discussions backups via GraphQL, including comments, replies, optional attachment downloads, and per-repository incremental checkpoints. +- Add pull request review backups with ``--pull-reviews`` and one-time + incremental backfill for existing backups. - Add ``--token-from-gh`` to read authentication from ``gh auth token``. diff --git a/README.rst b/README.rst index 4135743..52d7222 100644 --- a/README.rst +++ b/README.rst @@ -42,7 +42,8 @@ CLI Help output:: [--starred] [--all-starred] [--starred-skip-size-over MB] [--watched] [--followers] [--following] [--all] [--issues] [--issue-comments] [--issue-events] [--pulls] - [--pull-comments] [--pull-commits] [--pull-details] + [--pull-comments] [--pull-reviews] [--pull-commits] + [--pull-details] [--labels] [--hooks] [--milestones] [--security-advisories] [--discussions] [--repositories] [--bare] [--no-prune] [--lfs] [--wikis] [--gists] [--starred-gists] @@ -97,6 +98,7 @@ CLI Help output:: --issue-events include issue events in backup --pulls include pull requests in backup --pull-comments include pull request review comments in backup + --pull-reviews include pull request reviews in backup --pull-commits include pull request commits in backup --pull-details include more pull request details in backup [*] --labels include labels in backup @@ -340,6 +342,14 @@ For finer control, avoid using ``--assets`` with starred repos, or use ``--skip- Alternatively, consider just storing links to starred repos in JSON format with ``--starred``. +About pull request reviews +-------------------------- + +Use ``--pull-reviews`` with ``--pulls`` to include GitHub pull request review metadata under each pull request's ``review_data`` key. Reviews are separate from review comments: ``--pull-comments`` backs up inline review comments via ``comment_data`` and regular PR conversation comments via ``comment_regular_data``, while ``--pull-reviews`` backs up review state, submitted time, commit ID, and the top-level review body. + +``--pull-reviews`` is included in ``--all``. Incremental backups use a per-repository checkpoint at ``repositories/{repo}/pulls/reviews_last_update``. If ``--pull-reviews`` is enabled on an existing incremental backup, the first run performs a one-time backfill for pull request reviews so older PRs are not skipped by the existing repository checkpoint. Existing ``comment_data``, ``comment_regular_data`` and ``commit_data`` fields are preserved when only review data is being added. + + Incremental Backup ------------------ @@ -431,14 +441,14 @@ Quietly and incrementally backup useful Github user data (public and private rep export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER - github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --security-advisories --discussions --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER + github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-reviews --pull-commits --labels --milestones --security-advisories --discussions --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER Debug an error/block or incomplete backup into a temporary directory. Omit "incremental" to fill a previous incomplete backup. :: export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER - github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-commits --labels --milestones --discussions --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER + github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-reviews --pull-commits --labels --milestones --discussions --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER Pipe a token from stdin to avoid storing it in environment variables or command history (Unix-like systems only):: diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index c1245bd..054d0c6 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -293,6 +293,12 @@ def parse_args(args=None): dest="include_pull_comments", help="include pull request review comments in backup", ) + parser.add_argument( + "--pull-reviews", + action="store_true", + dest="include_pull_reviews", + help="include pull request reviews in backup", + ) parser.add_argument( "--pull-commits", action="store_true", @@ -2427,6 +2433,57 @@ def backup_issues(args, repo_cwd, repository, repos_template): os.replace(issue_file + ".temp", issue_file) # Atomic write +PULL_OPTIONAL_DATA_KEYS = ( + "comment_regular_data", + "comment_data", + "commit_data", + "review_data", +) +PULL_REVIEWS_LAST_UPDATE_FILENAME = "reviews_last_update" + + +def read_json_file_if_exists(path): + if not os.path.isfile(path): + return None + + try: + with codecs.open(path, "r", encoding="utf-8") as f: + return json.load(f) + except (OSError, UnicodeDecodeError, json.decoder.JSONDecodeError) as e: + logger.debug("Error reading existing JSON file {0}: {1}".format(path, e)) + return None + + +def restore_existing_pull_optional_data(pull, existing_pull): + if not existing_pull: + return + + for key in PULL_OPTIONAL_DATA_KEYS: + if key not in pull and key in existing_pull: + pull[key] = existing_pull[key] + + +def get_pull_reviews_since(args, pulls_cwd): + args_since = getattr(args, "since", None) + if not args.incremental: + return args_since, None, None + + reviews_last_update_path = os.path.join( + pulls_cwd, PULL_REVIEWS_LAST_UPDATE_FILENAME + ) + if not os.path.exists(reviews_last_update_path): + # One-time backfill for existing incremental backups: if the user adds + # --pull-reviews after a repository checkpoint already exists, the + # repository-level checkpoint would otherwise skip old PRs forever. + return None, None, reviews_last_update_path + + reviews_since = open(reviews_last_update_path).read().strip() + if args_since and reviews_since: + return min(args_since, reviews_since), reviews_since, reviews_last_update_path + + return args_since or reviews_since, reviews_since, reviews_last_update_path + + def backup_pulls(args, repo_cwd, repository, repos_template): has_pulls_dir = os.path.isdir("{0}/pulls/.git".format(repo_cwd)) if args.skip_existing and has_pulls_dir: @@ -2436,7 +2493,20 @@ def backup_pulls(args, repo_cwd, repository, repos_template): pulls_cwd = os.path.join(repo_cwd, "pulls") mkdir_p(repo_cwd, pulls_cwd) + include_pull_reviews = args.include_pull_reviews or args.include_everything + repository_since = getattr(args, "since", None) + pulls_since = repository_since + pull_reviews_since = None + pull_reviews_last_update_path = None + if include_pull_reviews: + ( + pulls_since, + pull_reviews_since, + pull_reviews_last_update_path, + ) = get_pull_reviews_since(args, pulls_cwd) + pulls = {} + newest_pull_update = None _pulls_template = "{0}/{1}/pulls".format(repos_template, repository["full_name"]) _issue_template = "{0}/{1}/issues".format(repos_template, repository["full_name"]) query_args = { @@ -2446,27 +2516,43 @@ def backup_pulls(args, repo_cwd, repository, repos_template): "direction": "desc", } + def track_newest_pull_update(pull): + nonlocal newest_pull_update + updated_at = pull.get("updated_at") + if updated_at and ( + newest_pull_update is None or updated_at > newest_pull_update + ): + newest_pull_update = updated_at + + def pull_is_due_for_repository_checkpoint(pull): + return not repository_since or pull["updated_at"] >= repository_since + if not args.include_pull_details: pull_states = ["open", "closed"] for pull_state in pull_states: query_args["state"] = pull_state _pulls = retrieve_data(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull["updated_at"] < args.since: + track_newest_pull_update(pull) + if pulls_since and pull["updated_at"] < pulls_since: break - if not args.since or pull["updated_at"] >= args.since: + if not pulls_since or pull["updated_at"] >= pulls_since: pulls[pull["number"]] = pull else: _pulls = retrieve_data(args, _pulls_template, query_args=query_args) for pull in _pulls: - if args.since and pull["updated_at"] < args.since: + track_newest_pull_update(pull) + if pulls_since and pull["updated_at"] < pulls_since: break - if not args.since or pull["updated_at"] >= args.since: - pulls[pull["number"]] = retrieve_data( - args, - _pulls_template + "/{}".format(pull["number"]), - paginated=False, - )[0] + if not pulls_since or pull["updated_at"] >= pulls_since: + if pull_is_due_for_repository_checkpoint(pull): + pulls[pull["number"]] = retrieve_data( + args, + _pulls_template + "/{}".format(pull["number"]), + paginated=False, + )[0] + else: + pulls[pull["number"]] = pull logger.info("Saving {0} pull requests to disk".format(len(list(pulls.keys())))) # Comments from pulls API are only _review_ comments @@ -2476,24 +2562,50 @@ def backup_pulls(args, repo_cwd, repository, repos_template): comments_regular_template = _issue_template + "/{0}/comments" comments_template = _pulls_template + "/{0}/comments" commits_template = _pulls_template + "/{0}/commits" + reviews_template = _pulls_template + "/{0}/reviews" + pull_review_errors = False + for number, pull in list(pulls.items()): pull_file = "{0}/{1}.json".format(pulls_cwd, number) + existing_pull = read_json_file_if_exists(pull_file) + needs_review_backfill = ( + include_pull_reviews + and (not existing_pull or "review_data" not in existing_pull) + ) + if args.incremental_by_files and os.path.isfile(pull_file): modified = os.path.getmtime(pull_file) modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") - if modified > pull["updated_at"]: + if modified > pull["updated_at"] and not needs_review_backfill: logger.info( "Skipping pull request {0} because it wasn't modified since last backup".format( number ) ) continue - if args.include_pull_comments or args.include_everything: + + should_fetch_non_review_data = pull_is_due_for_repository_checkpoint(pull) + if ( + args.include_pull_comments or args.include_everything + ) and should_fetch_non_review_data: template = comments_regular_template.format(number) pulls[number]["comment_regular_data"] = retrieve_data(args, template) template = comments_template.format(number) pulls[number]["comment_data"] = retrieve_data(args, template) - if args.include_pull_commits or args.include_everything: + if include_pull_reviews: + template = reviews_template.format(number) + try: + pulls[number]["review_data"] = retrieve_data(args, template) + except Exception as e: + pull_review_errors = True + logger.warning( + "Unable to retrieve reviews for pull request {0}#{1}, skipping reviews: {2}".format( + repository["full_name"], number, e + ) + ) + if ( + args.include_pull_commits or args.include_everything + ) and should_fetch_non_review_data: template = commits_template.format(number) pulls[number]["commit_data"] = retrieve_data(args, template) if args.include_attachments: @@ -2501,10 +2613,22 @@ def backup_pulls(args, repo_cwd, repository, repos_template): args, pulls_cwd, pulls[number], number, repository, item_type="pull" ) + restore_existing_pull_optional_data(pull, existing_pull) + with codecs.open(pull_file + ".temp", "w", encoding="utf-8") as f: json_dump(pull, f) os.replace(pull_file + ".temp", pull_file) # Atomic write + if ( + include_pull_reviews + and args.incremental + and pull_reviews_last_update_path + and newest_pull_update + and not pull_review_errors + and (not pull_reviews_since or newest_pull_update > pull_reviews_since) + ): + open(pull_reviews_last_update_path, "w").write(newest_pull_update) + def backup_milestones(args, repo_cwd, repository, repos_template): milestone_cwd = os.path.join(repo_cwd, "milestones") diff --git a/tests/test_pull_reviews.py b/tests/test_pull_reviews.py new file mode 100644 index 0000000..6130269 --- /dev/null +++ b/tests/test_pull_reviews.py @@ -0,0 +1,237 @@ +"""Tests for pull request review backups.""" + +import json +import os + +from github_backup import github_backup + + +def test_parse_args_pull_reviews_flag(): + args = github_backup.parse_args(["--pull-reviews", "testuser"]) + assert args.include_pull_reviews is True + + +def test_backup_pulls_includes_review_data(create_args, tmp_path, monkeypatch): + args = create_args(include_pulls=True, include_pull_reviews=True) + repository = {"full_name": "owner/repo"} + calls = [] + + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + calls.append((template, query_args)) + if template == "https://api.github.com/repos/owner/repo/pulls": + if query_args["state"] == "open": + return [ + { + "number": 1, + "updated_at": "2026-02-01T00:00:00Z", + "title": "Add feature", + } + ] + return [] + if template == "https://api.github.com/repos/owner/repo/pulls/1/reviews": + return [ + { + "id": 123, + "state": "APPROVED", + "body": "Looks good", + "submitted_at": "2026-02-01T00:00:00Z", + } + ] + raise AssertionError("Unexpected template: {0}".format(template)) + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + + with open(tmp_path / "pulls" / "1.json", encoding="utf-8") as f: + pull = json.load(f) + + assert pull["review_data"] == [ + { + "body": "Looks good", + "id": 123, + "state": "APPROVED", + "submitted_at": "2026-02-01T00:00:00Z", + } + ] + assert ( + "https://api.github.com/repos/owner/repo/pulls/1/reviews", + None, + ) in calls + + +def test_pull_reviews_backfill_ignores_repository_checkpoint( + create_args, tmp_path, monkeypatch +): + args = create_args( + include_pulls=True, + include_pull_reviews=True, + incremental=True, + ) + args.since = "2026-01-01T00:00:00Z" + repository = {"full_name": "owner/repo"} + + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + if template == "https://api.github.com/repos/owner/repo/pulls": + if query_args["state"] == "open": + return [ + { + "number": 1, + "updated_at": "2025-01-01T00:00:00Z", + "title": "Old pull request", + } + ] + return [] + if template == "https://api.github.com/repos/owner/repo/pulls/1/reviews": + return [{"id": 123, "state": "APPROVED"}] + raise AssertionError("Unexpected template: {0}".format(template)) + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + + with open(tmp_path / "pulls" / "1.json", encoding="utf-8") as f: + pull = json.load(f) + + assert pull["review_data"] == [{"id": 123, "state": "APPROVED"}] + assert (tmp_path / "pulls" / "reviews_last_update").read_text() == ( + "2025-01-01T00:00:00Z" + ) + + +def test_pull_reviews_uses_review_checkpoint_when_older_than_repository_checkpoint( + create_args, tmp_path, monkeypatch +): + args = create_args( + include_pulls=True, + include_pull_reviews=True, + incremental=True, + ) + args.since = "2026-01-01T00:00:00Z" + repository = {"full_name": "owner/repo"} + pulls_dir = tmp_path / "pulls" + pulls_dir.mkdir() + (pulls_dir / "reviews_last_update").write_text("2025-01-01T00:00:00Z") + + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + if template == "https://api.github.com/repos/owner/repo/pulls": + if query_args["state"] == "open": + return [ + { + "number": 1, + "updated_at": "2025-06-01T00:00:00Z", + "title": "Review changed while feature was disabled", + }, + { + "number": 2, + "updated_at": "2024-12-01T00:00:00Z", + "title": "Too old", + }, + ] + return [] + if template == "https://api.github.com/repos/owner/repo/pulls/1/reviews": + return [{"id": 123, "state": "COMMENTED"}] + raise AssertionError("Unexpected template: {0}".format(template)) + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + + assert os.path.exists(tmp_path / "pulls" / "1.json") + assert not os.path.exists(tmp_path / "pulls" / "2.json") + assert (tmp_path / "pulls" / "reviews_last_update").read_text() == ( + "2025-06-01T00:00:00Z" + ) + + +def test_pull_reviews_preserves_existing_optional_pull_data( + create_args, tmp_path, monkeypatch +): + args = create_args(include_pulls=True, include_pull_reviews=True) + repository = {"full_name": "owner/repo"} + pulls_dir = tmp_path / "pulls" + pulls_dir.mkdir() + with open(pulls_dir / "1.json", "w", encoding="utf-8") as f: + json.dump( + { + "number": 1, + "updated_at": "2026-01-01T00:00:00Z", + "comment_data": [{"id": 10, "body": "inline comment"}], + "comment_regular_data": [{"id": 11, "body": "regular comment"}], + "commit_data": [{"sha": "abc"}], + }, + f, + ) + + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + if template == "https://api.github.com/repos/owner/repo/pulls": + if query_args["state"] == "open": + return [ + { + "number": 1, + "updated_at": "2026-02-01T00:00:00Z", + "title": "Add reviews", + } + ] + return [] + if template == "https://api.github.com/repos/owner/repo/pulls/1/reviews": + return [{"id": 123, "state": "APPROVED"}] + raise AssertionError("Unexpected template: {0}".format(template)) + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + + with open(pulls_dir / "1.json", encoding="utf-8") as f: + pull = json.load(f) + + assert pull["review_data"] == [{"id": 123, "state": "APPROVED"}] + assert pull["comment_data"] == [{"id": 10, "body": "inline comment"}] + assert pull["comment_regular_data"] == [{"id": 11, "body": "regular comment"}] + assert pull["commit_data"] == [{"sha": "abc"}] + + +def test_pull_reviews_does_not_advance_checkpoint_on_review_error( + create_args, tmp_path, monkeypatch +): + args = create_args( + include_pulls=True, + include_pull_reviews=True, + incremental=True, + ) + args.since = "2026-01-01T00:00:00Z" + repository = {"full_name": "owner/repo"} + pulls_dir = tmp_path / "pulls" + pulls_dir.mkdir() + (pulls_dir / "reviews_last_update").write_text("2025-01-01T00:00:00Z") + + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + if template == "https://api.github.com/repos/owner/repo/pulls": + if query_args["state"] == "open": + return [ + { + "number": 1, + "updated_at": "2025-06-01T00:00:00Z", + "title": "Review retrieval fails", + } + ] + return [] + if template == "https://api.github.com/repos/owner/repo/pulls/1/reviews": + raise Exception("temporary API failure") + raise AssertionError("Unexpected template: {0}".format(template)) + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + + assert (pulls_dir / "reviews_last_update").read_text() == "2025-01-01T00:00:00Z" From b3a8241c9ab5930acfae2014d6a48a4feabe95ae Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Sun, 26 Apr 2026 15:03:48 +0200 Subject: [PATCH 434/455] Implement per-resource last_update timestamps Closes #62 --- CHANGES.rst | 5 + README.rst | 12 +- github_backup/github_backup.py | 167 +++++++++++++++++--- tests/test_incremental_per_repository.py | 189 +++++++++++++++++++++++ 4 files changed, 348 insertions(+), 25 deletions(-) create mode 100644 tests/test_incremental_per_repository.py diff --git a/CHANGES.rst b/CHANGES.rst index b790ce1..6cf9f17 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,6 +7,11 @@ Unreleased optional attachment downloads, and per-repository incremental checkpoints. - Add pull request review backups with ``--pull-reviews`` and one-time incremental backfill for existing backups. +- Store incremental ``last_update`` checkpoints per repository resource instead + of using one global checkpoint for the whole output directory. Existing + backups use the legacy global checkpoint as a migration fallback, and the + legacy file is removed once existing issue/pull backups have resource + checkpoints (#62). - Add ``--token-from-gh`` to read authentication from ``gh auth token``. diff --git a/README.rst b/README.rst index 52d7222..3a4be3b 100644 --- a/README.rst +++ b/README.rst @@ -347,15 +347,19 @@ About pull request reviews Use ``--pull-reviews`` with ``--pulls`` to include GitHub pull request review metadata under each pull request's ``review_data`` key. Reviews are separate from review comments: ``--pull-comments`` backs up inline review comments via ``comment_data`` and regular PR conversation comments via ``comment_regular_data``, while ``--pull-reviews`` backs up review state, submitted time, commit ID, and the top-level review body. -``--pull-reviews`` is included in ``--all``. Incremental backups use a per-repository checkpoint at ``repositories/{repo}/pulls/reviews_last_update``. If ``--pull-reviews`` is enabled on an existing incremental backup, the first run performs a one-time backfill for pull request reviews so older PRs are not skipped by the existing repository checkpoint. Existing ``comment_data``, ``comment_regular_data`` and ``commit_data`` fields are preserved when only review data is being added. +``--pull-reviews`` is included in ``--all``. Incremental backups use a per-repository checkpoint at ``repositories/{repo}/pulls/reviews_last_update``. If ``--pull-reviews`` is enabled on an existing incremental backup, the first run performs a one-time backfill for pull request reviews so older PRs are not skipped by the existing pull request checkpoint. Existing ``comment_data``, ``comment_regular_data`` and ``commit_data`` fields are preserved when only review data is being added. Incremental Backup ------------------ -Using (``-i, --incremental``) will only request new data from the API **since the last run (successful or not)**. e.g. only request issues from the API since the last run. +Using (``-i, --incremental``) will only request new data from the API **since the last successful resource backup**. e.g. only request issues from the API since the last issue backup for that repository. -This means any blocking errors on previous runs can cause a large amount of missing data in backups. +Incremental checkpoints for issue and pull request API backups are stored per resource in that repository's backup directory (for example ``repositories/{repo}/issues/last_update``, ``repositories/{repo}/pulls/last_update`` or ``starred/{owner}/{repo}/pulls/last_update``). Older versions stored a single global ``last_update`` file in the output directory root. During migration, the legacy global checkpoint is used as a fallback only for resource directories that already contain backup data but do not yet have their own checkpoint. New repositories or newly enabled resources with no existing data get a full backup instead of inheriting an unrelated global checkpoint. + +After all existing issue and pull request resource directories have per-resource checkpoints, the legacy global ``last_update`` file is removed automatically. + +This means any blocking errors on previous runs can cause missing data in backups for the affected repository resource. Using (``--incremental-by-files``) will request new data from the API **based on when the file was modified on filesystem**. e.g. if you modify the file yourself you may miss something. @@ -368,7 +372,7 @@ Known blocking errors Some errors will block the backup run by exiting the script. e.g. receiving a 403 Forbidden error from the Github API. -If the incremental argument is used, this will result in the next backup only requesting API data since the last blocked/failed run. Potentially causing unexpected large amounts of missing data. +If the incremental argument is used, per-resource checkpoints are only advanced after that resource's backup work completes. A blocking error can still abort the overall run, but repositories and resources that were not processed will keep their previous checkpoints. It's therefore recommended to only use the incremental argument if the output/result is being actively monitored, or complimented with periodic full non-incremental runs, to avoid unexpected missing data in a regular backup runs. diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 054d0c6..e56bb28 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1928,26 +1928,138 @@ def filter_repositories(args, unfiltered_repositories): return repositories +INCREMENTAL_LAST_UPDATE_FILENAME = "last_update" +INCREMENTAL_RESOURCE_DIRECTORIES = ("issues", "pulls") + + +def get_repository_checkpoint_time(repository): + timestamps = [ + timestamp + for timestamp in (repository.get("updated_at"), repository.get("pushed_at")) + if timestamp + ] + if timestamps: + return max(timestamps) + + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime()) + + +def resource_backup_exists(resource_cwd): + if not os.path.isdir(resource_cwd): + return False + + ignored_names = { + INCREMENTAL_LAST_UPDATE_FILENAME, + PULL_REVIEWS_LAST_UPDATE_FILENAME, + } + for name in os.listdir(resource_cwd): + if name in ignored_names or name.endswith(".temp"): + continue + return True + + return False + + +def read_legacy_last_update(args, output_directory): + if not args.incremental: + return None, None + + last_update_path = os.path.join(output_directory, INCREMENTAL_LAST_UPDATE_FILENAME) + if os.path.exists(last_update_path): + return last_update_path, open(last_update_path).read().strip() + + return last_update_path, None + + +def read_resource_last_update(args, resource_cwd, legacy_last_update=None): + if not args.incremental: + return None + + last_update_path = os.path.join(resource_cwd, INCREMENTAL_LAST_UPDATE_FILENAME) + if os.path.exists(last_update_path): + return open(last_update_path).read().strip() + + if legacy_last_update and resource_backup_exists(resource_cwd): + return legacy_last_update + + return None + + +def write_resource_last_update(args, resource_cwd, repository): + if not args.incremental: + return + + mkdir_p(resource_cwd) + last_update_path = os.path.join(resource_cwd, INCREMENTAL_LAST_UPDATE_FILENAME) + open(last_update_path, "w").write(get_repository_checkpoint_time(repository)) + + +def iter_incremental_resource_dirs(output_directory): + repositories_dir = os.path.join(output_directory, "repositories") + if os.path.isdir(repositories_dir): + for repository_name in os.listdir(repositories_dir): + repo_cwd = os.path.join(repositories_dir, repository_name) + if not os.path.isdir(repo_cwd): + continue + for resource_name in INCREMENTAL_RESOURCE_DIRECTORIES: + yield os.path.join(repo_cwd, resource_name) + + starred_dir = os.path.join(output_directory, "starred") + if os.path.isdir(starred_dir): + for owner_name in os.listdir(starred_dir): + owner_cwd = os.path.join(starred_dir, owner_name) + if not os.path.isdir(owner_cwd): + continue + for repository_name in os.listdir(owner_cwd): + repo_cwd = os.path.join(owner_cwd, repository_name) + if not os.path.isdir(repo_cwd): + continue + for resource_name in INCREMENTAL_RESOURCE_DIRECTORIES: + yield os.path.join(repo_cwd, resource_name) + + +def has_unmigrated_incremental_resources(output_directory): + for resource_cwd in iter_incremental_resource_dirs(output_directory): + last_update_path = os.path.join( + resource_cwd, INCREMENTAL_LAST_UPDATE_FILENAME + ) + if resource_backup_exists(resource_cwd) and not os.path.exists( + last_update_path + ): + return True + + return False + + +def remove_legacy_last_update_if_migrated( + args, output_directory, legacy_last_update_path +): + if not args.incremental or not legacy_last_update_path: + return + if not os.path.exists(legacy_last_update_path): + return + if has_unmigrated_incremental_resources(output_directory): + logger.info( + "Keeping legacy global last_update until all existing issue/pull " + "backups have per-resource checkpoints" + ) + return + + os.remove(legacy_last_update_path) + logger.info( + "Removed legacy global last_update after migrating incremental checkpoints" + ) + + def backup_repositories(args, output_directory, repositories): logger.info("Backing up repositories") repos_template = "https://{0}/repos".format(get_github_api_host(args)) + legacy_last_update_path, legacy_last_update = read_legacy_last_update( + args, output_directory + ) + incremental_resource_work_attempted = False - if args.incremental: - last_update_path = os.path.join(output_directory, "last_update") - if os.path.exists(last_update_path): - args.since = open(last_update_path).read().strip() - else: - args.since = None - else: - args.since = None - - last_update = "0000-00-00T00:00:00Z" for repository in repositories: - if repository.get("updated_at") and repository["updated_at"] > last_update: - last_update = repository["updated_at"] - elif repository.get("pushed_at") and repository["pushed_at"] > last_update: - last_update = repository["pushed_at"] - if repository.get("is_gist"): repo_cwd = os.path.join(output_directory, "gists", repository["id"]) elif repository.get("is_starred"): @@ -2010,10 +2122,22 @@ def backup_repositories(args, output_directory, repositories): no_prune=args.no_prune, ) if args.include_issues or args.include_everything: + incremental_resource_work_attempted = True + issue_cwd = os.path.join(repo_cwd, "issues") + args.since = read_resource_last_update( + args, issue_cwd, legacy_last_update + ) backup_issues(args, repo_cwd, repository, repos_template) + write_resource_last_update(args, issue_cwd, repository) if args.include_pulls or args.include_everything: + incremental_resource_work_attempted = True + pulls_cwd = os.path.join(repo_cwd, "pulls") + args.since = read_resource_last_update( + args, pulls_cwd, legacy_last_update + ) backup_pulls(args, repo_cwd, repository, repos_template) + write_resource_last_update(args, pulls_cwd, repository) if args.include_discussions or args.include_everything: backup_discussions(args, repo_cwd, repository) @@ -2021,7 +2145,9 @@ def backup_repositories(args, output_directory, repositories): if args.include_milestones or args.include_everything: backup_milestones(args, repo_cwd, repository, repos_template) - if args.include_security_advisories or (args.include_everything and not repository.get("private", False)): + if args.include_security_advisories or ( + args.include_everything and not repository.get("private", False) + ): backup_security_advisories(args, repo_cwd, repository, repos_template) if args.include_labels or args.include_everything: @@ -2045,11 +2171,10 @@ def backup_repositories(args, output_directory, repositories): logger.info(f"Skipping remaining resources for {repository['full_name']}") continue - if args.incremental: - if last_update == "0000-00-00T00:00:00Z": - last_update = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime()) - - open(last_update_path, "w").write(last_update) + if incremental_resource_work_attempted: + remove_legacy_last_update_if_migrated( + args, output_directory, legacy_last_update_path + ) def _repository_owner_name(repository): diff --git a/tests/test_incremental_per_repository.py b/tests/test_incremental_per_repository.py new file mode 100644 index 0000000..f1fd67a --- /dev/null +++ b/tests/test_incremental_per_repository.py @@ -0,0 +1,189 @@ +"""Tests for per-resource incremental checkpoints.""" + +import json +import os + +from github_backup import github_backup + + +def _repo(name, updated_at, pushed_at=None): + return { + "name": name, + "full_name": "owner/{0}".format(name), + "owner": {"login": "owner"}, + "clone_url": "https://github.com/owner/{0}.git".format(name), + "private": False, + "fork": False, + "has_wiki": False, + "updated_at": updated_at, + "pushed_at": pushed_at, + } + + +def test_incremental_uses_per_resource_last_update( + create_args, tmp_path, monkeypatch +): + args = create_args(incremental=True, include_issues=True) + repositories = [ + _repo("repo-one", "2026-02-01T00:00:00Z"), + _repo("repo-two", "2026-03-01T00:00:00Z"), + ] + repo_one_issues = tmp_path / "repositories" / "repo-one" / "issues" + repo_two_issues = tmp_path / "repositories" / "repo-two" / "issues" + repo_one_issues.mkdir(parents=True) + repo_two_issues.mkdir(parents=True) + (repo_one_issues / "last_update").write_text("2026-01-01T00:00:00Z") + (repo_two_issues / "last_update").write_text("2025-01-01T00:00:00Z") + + seen_since = [] + + def fake_backup_issues(passed_args, repo_cwd, repository, repos_template): + seen_since.append((repository["name"], passed_args.since)) + + monkeypatch.setattr(github_backup, "backup_issues", fake_backup_issues) + + github_backup.backup_repositories(args, tmp_path, repositories) + + assert seen_since == [ + ("repo-one", "2026-01-01T00:00:00Z"), + ("repo-two", "2025-01-01T00:00:00Z"), + ] + assert (repo_one_issues / "last_update").read_text() == "2026-02-01T00:00:00Z" + assert (repo_two_issues / "last_update").read_text() == "2026-03-01T00:00:00Z" + assert not os.path.exists(tmp_path / "last_update") + + +def test_incremental_uses_independent_issue_and_pull_checkpoints( + create_args, tmp_path, monkeypatch +): + args = create_args(incremental=True, include_issues=True, include_pulls=True) + repository = _repo("repo-one", "2026-02-01T00:00:00Z") + repo_dir = tmp_path / "repositories" / "repo-one" + issues_dir = repo_dir / "issues" + pulls_dir = repo_dir / "pulls" + issues_dir.mkdir(parents=True) + pulls_dir.mkdir(parents=True) + (issues_dir / "last_update").write_text("2026-01-01T00:00:00Z") + (pulls_dir / "last_update").write_text("2025-01-01T00:00:00Z") + + seen_since = [] + + def fake_backup_issues(passed_args, repo_cwd, repository, repos_template): + seen_since.append(("issues", passed_args.since)) + + def fake_backup_pulls(passed_args, repo_cwd, repository, repos_template): + seen_since.append(("pulls", passed_args.since)) + + monkeypatch.setattr(github_backup, "backup_issues", fake_backup_issues) + monkeypatch.setattr(github_backup, "backup_pulls", fake_backup_pulls) + + github_backup.backup_repositories(args, tmp_path, [repository]) + + assert seen_since == [ + ("issues", "2026-01-01T00:00:00Z"), + ("pulls", "2025-01-01T00:00:00Z"), + ] + assert (issues_dir / "last_update").read_text() == "2026-02-01T00:00:00Z" + assert (pulls_dir / "last_update").read_text() == "2026-02-01T00:00:00Z" + + +def test_incremental_uses_legacy_global_last_update_for_existing_resource_backup( + create_args, tmp_path, monkeypatch +): + args = create_args(incremental=True, include_issues=True) + repository = _repo("repo-one", "2026-02-01T00:00:00Z") + (tmp_path / "last_update").write_text("2026-01-01T00:00:00Z") + issues_dir = tmp_path / "repositories" / "repo-one" / "issues" + issues_dir.mkdir(parents=True) + with open(issues_dir / "1.json", "w", encoding="utf-8") as f: + json.dump({"number": 1}, f) + + seen_since = [] + + def fake_backup_issues(passed_args, repo_cwd, repository, repos_template): + seen_since.append(passed_args.since) + + monkeypatch.setattr(github_backup, "backup_issues", fake_backup_issues) + + github_backup.backup_repositories(args, tmp_path, [repository]) + + assert seen_since == ["2026-01-01T00:00:00Z"] + assert (issues_dir / "last_update").read_text() == "2026-02-01T00:00:00Z" + assert not os.path.exists(tmp_path / "last_update") + + +def test_incremental_does_not_use_legacy_global_last_update_for_new_resource_backup( + create_args, tmp_path, monkeypatch +): + args = create_args(incremental=True, include_issues=True) + repository = _repo("repo-one", "2026-02-01T00:00:00Z") + (tmp_path / "last_update").write_text("2099-01-01T00:00:00Z") + + seen_since = [] + + def fake_backup_issues(passed_args, repo_cwd, repository, repos_template): + seen_since.append(passed_args.since) + + monkeypatch.setattr(github_backup, "backup_issues", fake_backup_issues) + + github_backup.backup_repositories(args, tmp_path, [repository]) + + assert seen_since == [None] + assert ( + tmp_path / "repositories" / "repo-one" / "issues" / "last_update" + ).read_text() == "2026-02-01T00:00:00Z" + assert not os.path.exists(tmp_path / "last_update") + + +def test_incremental_keeps_legacy_global_last_update_until_all_existing_resources_migrated( + create_args, tmp_path, monkeypatch +): + args = create_args(incremental=True, include_issues=True) + repository = _repo("repo-one", "2026-02-01T00:00:00Z") + (tmp_path / "last_update").write_text("2026-01-01T00:00:00Z") + repo_one_issues = tmp_path / "repositories" / "repo-one" / "issues" + repo_two_issues = tmp_path / "repositories" / "repo-two" / "issues" + repo_one_issues.mkdir(parents=True) + repo_two_issues.mkdir(parents=True) + with open(repo_one_issues / "1.json", "w", encoding="utf-8") as f: + json.dump({"number": 1}, f) + with open(repo_two_issues / "2.json", "w", encoding="utf-8") as f: + json.dump({"number": 2}, f) + + def fake_backup_issues(passed_args, repo_cwd, repository, repos_template): + pass + + monkeypatch.setattr(github_backup, "backup_issues", fake_backup_issues) + + github_backup.backup_repositories(args, tmp_path, [repository]) + + assert (repo_one_issues / "last_update").read_text() == "2026-02-01T00:00:00Z" + assert not os.path.exists(repo_two_issues / "last_update") + assert (tmp_path / "last_update").read_text() == "2026-01-01T00:00:00Z" + + +def test_incremental_does_not_remove_legacy_checkpoint_without_resource_work( + create_args, tmp_path +): + args = create_args(incremental=True, include_repository=True) + repository = _repo("repo-one", "2026-02-01T00:00:00Z") + (tmp_path / "last_update").write_text("2026-01-01T00:00:00Z") + + github_backup.backup_repositories(args, tmp_path, [repository]) + + assert (tmp_path / "last_update").read_text() == "2026-01-01T00:00:00Z" + assert not os.path.exists( + tmp_path / "repositories" / "repo-one" / "issues" / "last_update" + ) + + +def test_repository_checkpoint_time_uses_newest_available_repo_timestamp(): + repository = _repo( + "repo-one", + updated_at="2026-02-01T00:00:00Z", + pushed_at="2026-03-01T00:00:00Z", + ) + + assert github_backup.get_repository_checkpoint_time(repository) == ( + "2026-03-01T00:00:00Z" + ) From 6cd0ab3633df812ab586968b5b2e448e0e1b3efc Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Sun, 26 Apr 2026 15:15:22 +0200 Subject: [PATCH 435/455] Reduce unnecessary pull requests with incremental fetching --- CHANGES.rst | 2 + github_backup/github_backup.py | 18 +++-- tests/test_pull_incremental_pagination.py | 85 +++++++++++++++++++++++ tests/test_pull_reviews.py | 10 +-- 4 files changed, 104 insertions(+), 11 deletions(-) create mode 100644 tests/test_pull_incremental_pagination.py diff --git a/CHANGES.rst b/CHANGES.rst index 6cf9f17..8b62d33 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,6 +12,8 @@ Unreleased backups use the legacy global checkpoint as a migration fallback, and the legacy file is removed once existing issue/pull backups have resource checkpoints (#62). +- Stop paginating pull requests during incremental backups once the sorted + results are older than the active checkpoint. - Add ``--token-from-gh`` to read authentication from ``gh auth token``. diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index e56bb28..f83bdb3 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -717,11 +717,12 @@ def calculate_retry_delay(attempt, headers): return delay + random.uniform(0, delay * 0.1) -def retrieve_data(args, template, query_args=None, paginated=True): +def retrieve_data(args, template, query_args=None, paginated=True, lazy=False): """ Fetch the data from GitHub API. - Handle both single requests and pagination with yield of individual dicts. + Handle both single requests and pagination. Returns a list by default, or + a generator when lazy=True so callers can stop before fetching every page. Handles throttling, retries, read errors, and DMCA takedowns. """ query_args = query_args or {} @@ -851,6 +852,9 @@ def _extract_legal_url(response_body_bytes): ): break # No more data + if lazy: + return fetch_all() + return list(fetch_all()) @@ -2656,16 +2660,18 @@ def pull_is_due_for_repository_checkpoint(pull): pull_states = ["open", "closed"] for pull_state in pull_states: query_args["state"] = pull_state - _pulls = retrieve_data(args, _pulls_template, query_args=query_args) - for pull in _pulls: + for pull in retrieve_data( + args, _pulls_template, query_args=query_args, lazy=True + ): track_newest_pull_update(pull) if pulls_since and pull["updated_at"] < pulls_since: break if not pulls_since or pull["updated_at"] >= pulls_since: pulls[pull["number"]] = pull else: - _pulls = retrieve_data(args, _pulls_template, query_args=query_args) - for pull in _pulls: + for pull in retrieve_data( + args, _pulls_template, query_args=query_args, lazy=True + ): track_newest_pull_update(pull) if pulls_since and pull["updated_at"] < pulls_since: break diff --git a/tests/test_pull_incremental_pagination.py b/tests/test_pull_incremental_pagination.py new file mode 100644 index 0000000..11230b0 --- /dev/null +++ b/tests/test_pull_incremental_pagination.py @@ -0,0 +1,85 @@ +"""Tests for incremental pull request pagination.""" + +import json +import os +from unittest.mock import patch + +from github_backup import github_backup + + +class MockHTTPResponse: + def __init__(self, data, link_header=None): + self._content = json.dumps(data).encode("utf-8") + self._link_header = link_header + self._read = False + self.reason = "OK" + + def getcode(self): + return 200 + + def read(self): + if self._read: + return b"" + self._read = True + return self._content + + @property + def headers(self): + headers = {"x-ratelimit-remaining": "5000"} + if self._link_header: + headers["Link"] = self._link_header + return headers + + +def test_backup_pulls_incremental_stops_before_fetching_old_pages( + create_args, tmp_path +): + args = create_args(include_pulls=True, incremental=True) + args.since = "2026-04-26T08:13:46Z" + repository = {"full_name": "owner/repo"} + + responses = [ + MockHTTPResponse([]), + MockHTTPResponse( + [ + { + "number": 2, + "title": "new pull", + "updated_at": "2026-04-26T09:00:00Z", + }, + { + "number": 1, + "title": "old pull", + "updated_at": "2026-04-26T07:00:00Z", + }, + ], + link_header='; rel="next"', + ), + MockHTTPResponse( + [ + { + "number": 0, + "title": "older pull on page 2", + "updated_at": "2026-04-25T07:00:00Z", + } + ] + ), + ] + requests_made = [] + + def mock_urlopen(request, *args, **kwargs): + requests_made.append(request.get_full_url()) + return responses[len(requests_made) - 1] + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + + assert len(requests_made) == 2 + assert "state=open" in requests_made[0] + assert "state=closed" in requests_made[1] + assert all("page=2" not in url for url in requests_made) + assert os.path.exists(tmp_path / "pulls" / "2.json") + assert not os.path.exists(tmp_path / "pulls" / "1.json") + assert not os.path.exists(tmp_path / "pulls" / "0.json") diff --git a/tests/test_pull_reviews.py b/tests/test_pull_reviews.py index 6130269..2ce9ad1 100644 --- a/tests/test_pull_reviews.py +++ b/tests/test_pull_reviews.py @@ -16,7 +16,7 @@ def test_backup_pulls_includes_review_data(create_args, tmp_path, monkeypatch): repository = {"full_name": "owner/repo"} calls = [] - def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True, **kwargs): calls.append((template, query_args)) if template == "https://api.github.com/repos/owner/repo/pulls": if query_args["state"] == "open": @@ -73,7 +73,7 @@ def test_pull_reviews_backfill_ignores_repository_checkpoint( args.since = "2026-01-01T00:00:00Z" repository = {"full_name": "owner/repo"} - def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True, **kwargs): if template == "https://api.github.com/repos/owner/repo/pulls": if query_args["state"] == "open": return [ @@ -117,7 +117,7 @@ def test_pull_reviews_uses_review_checkpoint_when_older_than_repository_checkpoi pulls_dir.mkdir() (pulls_dir / "reviews_last_update").write_text("2025-01-01T00:00:00Z") - def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True, **kwargs): if template == "https://api.github.com/repos/owner/repo/pulls": if query_args["state"] == "open": return [ @@ -169,7 +169,7 @@ def test_pull_reviews_preserves_existing_optional_pull_data( f, ) - def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True, **kwargs): if template == "https://api.github.com/repos/owner/repo/pulls": if query_args["state"] == "open": return [ @@ -213,7 +213,7 @@ def test_pull_reviews_does_not_advance_checkpoint_on_review_error( pulls_dir.mkdir() (pulls_dir / "reviews_last_update").write_text("2025-01-01T00:00:00Z") - def fake_retrieve_data(passed_args, template, query_args=None, paginated=True): + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True, **kwargs): if template == "https://api.github.com/repos/owner/repo/pulls": if query_args["state"] == "open": return [ From 9d0cfdb61da1cea97b381c2177ccc4e52e9a6352 Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Sun, 26 Apr 2026 16:05:20 +0200 Subject: [PATCH 436/455] Avoid redundant release asset list requests --- CHANGES.rst | 2 + github_backup/github_backup.py | 7 ++- tests/test_releases.py | 95 ++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 tests/test_releases.py diff --git a/CHANGES.rst b/CHANGES.rst index 8b62d33..3d2ceb0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,6 +14,8 @@ Unreleased checkpoints (#62). - Stop paginating pull requests during incremental backups once the sorted results are older than the active checkpoint. +- Avoid extra release asset list requests by using asset metadata already + included in GitHub's releases response. - Add ``--token-from-gh`` to read authentication from ``gh auth token``. diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index f83bdb3..6edfb05 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -2919,7 +2919,12 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F written_count += 1 if include_assets and not skip_assets: - assets = retrieve_data(args, release["assets_url"]) + # The releases list API already includes release asset metadata. Use + # it to avoid an extra /releases/{id}/assets request per release. + # Keep a fallback for older/enterprise responses that might omit it. + assets = release.get("assets") + if assets is None: + assets = retrieve_data(args, release["assets_url"]) if len(assets) > 0: # give release asset files somewhere to live & download them (not including source archives) release_assets_cwd = os.path.join(release_cwd, release_name_safe) diff --git a/tests/test_releases.py b/tests/test_releases.py new file mode 100644 index 0000000..b8584f4 --- /dev/null +++ b/tests/test_releases.py @@ -0,0 +1,95 @@ +"""Tests for release backup behavior.""" + +from github_backup import github_backup + + +def test_backup_releases_uses_embedded_assets_without_extra_asset_list_request( + create_args, tmp_path, monkeypatch +): + args = create_args(include_releases=True, include_assets=True) + repository = {"full_name": "owner/repo", "name": "repo"} + calls = [] + downloads = [] + + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True, **kwargs): + calls.append(template) + if template == "https://api.github.com/repos/owner/repo/releases": + return [ + { + "tag_name": "v1.0.0", + "created_at": "2026-01-01T00:00:00Z", + "updated_at": "2026-01-01T00:00:00Z", + "prerelease": False, + "draft": False, + "assets_url": "https://api.github.com/repos/owner/repo/releases/1/assets", + "assets": [ + { + "name": "artifact.zip", + "url": "https://api.github.com/repos/owner/repo/releases/assets/1", + } + ], + } + ] + raise AssertionError("Unexpected API request: {0}".format(template)) + + def fake_download_file(url, path, auth, as_app=False, fine=False): + downloads.append((url, path)) + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + monkeypatch.setattr(github_backup, "download_file", fake_download_file) + + github_backup.backup_releases( + args, + tmp_path, + repository, + "https://api.github.com/repos", + include_assets=True, + ) + + assert calls == ["https://api.github.com/repos/owner/repo/releases"] + assert downloads == [ + ( + "https://api.github.com/repos/owner/repo/releases/assets/1", + str(tmp_path / "releases" / "v1.0.0" / "artifact.zip"), + ) + ] + + +def test_backup_releases_falls_back_to_assets_url_when_assets_missing( + create_args, tmp_path, monkeypatch +): + args = create_args(include_releases=True, include_assets=True) + repository = {"full_name": "owner/repo", "name": "repo"} + calls = [] + + def fake_retrieve_data(passed_args, template, query_args=None, paginated=True, **kwargs): + calls.append(template) + if template == "https://api.github.com/repos/owner/repo/releases": + return [ + { + "tag_name": "v1.0.0", + "created_at": "2026-01-01T00:00:00Z", + "updated_at": "2026-01-01T00:00:00Z", + "prerelease": False, + "draft": False, + "assets_url": "https://api.github.com/repos/owner/repo/releases/1/assets", + } + ] + if template == "https://api.github.com/repos/owner/repo/releases/1/assets": + return [] + raise AssertionError("Unexpected API request: {0}".format(template)) + + monkeypatch.setattr(github_backup, "retrieve_data", fake_retrieve_data) + + github_backup.backup_releases( + args, + tmp_path, + repository, + "https://api.github.com/repos", + include_assets=True, + ) + + assert calls == [ + "https://api.github.com/repos/owner/repo/releases", + "https://api.github.com/repos/owner/repo/releases/1/assets", + ] From 014eff395a999f82674547efd77a6470b038ce91 Mon Sep 17 00:00:00 2001 From: Duncan Ogilvie Date: Sun, 26 Apr 2026 16:09:42 +0200 Subject: [PATCH 437/455] Skip checkpoint-equal incremental items --- CHANGES.rst | 4 +- github_backup/github_backup.py | 12 +++--- tests/test_discussions.py | 35 +++++++++++++++++ tests/test_pull_incremental_pagination.py | 46 +++++++++++++++++++++++ 4 files changed, 90 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3d2ceb0..3d4cdce 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,7 +13,9 @@ Unreleased legacy file is removed once existing issue/pull backups have resource checkpoints (#62). - Stop paginating pull requests during incremental backups once the sorted - results are older than the active checkpoint. + results are at or older than the active checkpoint. +- Avoid re-fetching discussions and pull requests whose ``updated_at`` exactly + matches the active incremental checkpoint. - Avoid extra release asset list requests by using asset metadata already included in GitHub's releases response. - Add ``--token-from-gh`` to read authentication from ``gh auth token``. diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 6edfb05..ae4ef2e 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -2233,7 +2233,7 @@ def retrieve_discussion_summaries(args, repository, since=None): if updated_at and (newest_seen is None or updated_at > newest_seen): newest_seen = updated_at - if since and updated_at and updated_at < since: + if since and updated_at and updated_at <= since: stop = True break @@ -2654,7 +2654,7 @@ def track_newest_pull_update(pull): newest_pull_update = updated_at def pull_is_due_for_repository_checkpoint(pull): - return not repository_since or pull["updated_at"] >= repository_since + return not repository_since or pull["updated_at"] > repository_since if not args.include_pull_details: pull_states = ["open", "closed"] @@ -2664,18 +2664,18 @@ def pull_is_due_for_repository_checkpoint(pull): args, _pulls_template, query_args=query_args, lazy=True ): track_newest_pull_update(pull) - if pulls_since and pull["updated_at"] < pulls_since: + if pulls_since and pull["updated_at"] <= pulls_since: break - if not pulls_since or pull["updated_at"] >= pulls_since: + if not pulls_since or pull["updated_at"] > pulls_since: pulls[pull["number"]] = pull else: for pull in retrieve_data( args, _pulls_template, query_args=query_args, lazy=True ): track_newest_pull_update(pull) - if pulls_since and pull["updated_at"] < pulls_since: + if pulls_since and pull["updated_at"] <= pulls_since: break - if not pulls_since or pull["updated_at"] >= pulls_since: + if not pulls_since or pull["updated_at"] > pulls_since: if pull_is_due_for_repository_checkpoint(pull): pulls[pull["number"]] = retrieve_data( args, diff --git a/tests/test_discussions.py b/tests/test_discussions.py index 89fd8dd..2b5e3fb 100644 --- a/tests/test_discussions.py +++ b/tests/test_discussions.py @@ -50,6 +50,41 @@ def test_retrieve_discussion_summaries_stops_at_incremental_since(create_args): ) +def test_retrieve_discussion_summaries_excludes_checkpoint_timestamp(create_args): + args = create_args() + repository = {"full_name": "owner/repo"} + + page = { + "repository": { + "hasDiscussionsEnabled": True, + "discussions": { + "totalCount": 1, + "nodes": [ + { + "number": 1, + "title": "already backed up", + "updatedAt": "2026-01-01T00:00:00Z", + }, + ], + "pageInfo": {"hasNextPage": True, "endCursor": "NEXT"}, + }, + } + } + + with patch( + "github_backup.github_backup.retrieve_graphql_data", return_value=page + ) as mock_retrieve: + summaries, newest, enabled, total = github_backup.retrieve_discussion_summaries( + args, repository, since="2026-01-01T00:00:00Z" + ) + + assert enabled is True + assert total == 1 + assert newest == "2026-01-01T00:00:00Z" + assert summaries == [] + assert mock_retrieve.call_count == 1 + + def test_retrieve_discussion_summaries_disabled_discussions(create_args): args = create_args() repository = {"full_name": "owner/repo"} diff --git a/tests/test_pull_incremental_pagination.py b/tests/test_pull_incremental_pagination.py index 11230b0..ac0f83f 100644 --- a/tests/test_pull_incremental_pagination.py +++ b/tests/test_pull_incremental_pagination.py @@ -31,6 +31,52 @@ def headers(self): return headers +def test_backup_pulls_incremental_excludes_checkpoint_timestamp(create_args, tmp_path): + args = create_args(include_pulls=True, incremental=True) + args.since = "2026-04-26T08:13:46Z" + repository = {"full_name": "owner/repo"} + + responses = [ + MockHTTPResponse([]), + MockHTTPResponse( + [ + { + "number": 1, + "title": "already backed up", + "updated_at": "2026-04-26T08:13:46Z", + }, + ], + link_header='; rel="next"', + ), + MockHTTPResponse( + [ + { + "number": 0, + "title": "older pull on page 2", + "updated_at": "2026-04-25T07:00:00Z", + } + ] + ), + ] + requests_made = [] + + def mock_urlopen(request, *args, **kwargs): + requests_made.append(request.get_full_url()) + return responses[len(requests_made) - 1] + + with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen): + github_backup.backup_pulls( + args, tmp_path, repository, "https://api.github.com/repos" + ) + + assert len(requests_made) == 2 + assert "state=open" in requests_made[0] + assert "state=closed" in requests_made[1] + assert all("page=2" not in url for url in requests_made) + assert not os.path.exists(tmp_path / "pulls" / "1.json") + assert not os.path.exists(tmp_path / "pulls" / "0.json") + + def test_backup_pulls_incremental_stops_before_fetching_old_pages( create_args, tmp_path ): From f8cdf55050770bbcb1b5ba178d73b346988f0f89 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 29 Apr 2026 12:10:11 +0000 Subject: [PATCH 438/455] Release version 0.62.0 --- CHANGES.rst | 172 +++++++++++++++++++++++++++++++++----- github_backup/__init__.py | 2 +- 2 files changed, 154 insertions(+), 20 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3d4cdce..86bcb32 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,29 +1,163 @@ Changelog ========= -Unreleased ----------- -- Add GitHub Discussions backups via GraphQL, including comments, replies, - optional attachment downloads, and per-repository incremental checkpoints. -- Add pull request review backups with ``--pull-reviews`` and one-time - incremental backfill for existing backups. -- Store incremental ``last_update`` checkpoints per repository resource instead - of using one global checkpoint for the whole output directory. Existing - backups use the legacy global checkpoint as a migration fallback, and the - legacy file is removed once existing issue/pull backups have resource - checkpoints (#62). -- Stop paginating pull requests during incremental backups once the sorted - results are at or older than the active checkpoint. -- Avoid re-fetching discussions and pull requests whose ``updated_at`` exactly - matches the active incremental checkpoint. -- Avoid extra release asset list requests by using asset metadata already - included in GitHub's releases response. -- Add ``--token-from-gh`` to read authentication from ``gh auth token``. +0.62.0 (2026-04-29) +------------------- +------------------------ +- Skip checkpoint-equal incremental items. [Duncan Ogilvie] +- Avoid redundant release asset list requests. [Duncan Ogilvie] +- Reduce unnecessary pull requests with incremental fetching. [Duncan + Ogilvie] +- Implement per-resource last_update timestamps. [Duncan Ogilvie] + + Closes #62 +- Add support for pull request reviews. [Duncan Ogilvie] + + Closes #124 +- Add support for discussions. [Duncan Ogilvie] + + Closes #290 +- Add --token-from-gh authentication option. [Duncan Ogilvie] +- Chore(deps): bump pytest in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [pytest](https://github.com/pytest-dev/pytest). + + + Updates `pytest` from 9.0.2 to 9.0.3 + - [Release notes](https://github.com/pytest-dev/pytest/releases) + - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) + - [Commits](https://github.com/pytest-dev/pytest/compare/9.0.2...9.0.3) + + --- + updated-dependencies: + - dependency-name: pytest + dependency-version: 9.0.3 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump black in the python-packages group. + [dependabot[bot]] + + Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). + + + Updates `black` from 26.3.0 to 26.3.1 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/26.3.0...26.3.1) + + --- + updated-dependencies: + - dependency-name: black + dependency-version: 26.3.1 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... +- Chore(deps): bump docker/login-action from 3 to 4. [dependabot[bot]] + + Bumps [docker/login-action](https://github.com/docker/login-action) from 3 to 4. + - [Release notes](https://github.com/docker/login-action/releases) + - [Commits](https://github.com/docker/login-action/compare/v3...v4) + + --- + updated-dependencies: + - dependency-name: docker/login-action + dependency-version: '4' + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Chore(deps): bump docker/setup-qemu-action from 3 to 4. + [dependabot[bot]] + + Bumps [docker/setup-qemu-action](https://github.com/docker/setup-qemu-action) from 3 to 4. + - [Release notes](https://github.com/docker/setup-qemu-action/releases) + - [Commits](https://github.com/docker/setup-qemu-action/compare/v3...v4) + + --- + updated-dependencies: + - dependency-name: docker/setup-qemu-action + dependency-version: '4' + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Chore(deps): bump docker/build-push-action from 6 to 7. + [dependabot[bot]] + + Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 6 to 7. + - [Release notes](https://github.com/docker/build-push-action/releases) + - [Commits](https://github.com/docker/build-push-action/compare/v6...v7) + + --- + updated-dependencies: + - dependency-name: docker/build-push-action + dependency-version: '7' + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Chore(deps): bump docker/setup-buildx-action from 3 to 4. + [dependabot[bot]] + + Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 3 to 4. + - [Release notes](https://github.com/docker/setup-buildx-action/releases) + - [Commits](https://github.com/docker/setup-buildx-action/compare/v3...v4) + + --- + updated-dependencies: + - dependency-name: docker/setup-buildx-action + dependency-version: '4' + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Chore(deps): bump docker/metadata-action from 5 to 6. + [dependabot[bot]] + + Bumps [docker/metadata-action](https://github.com/docker/metadata-action) from 5 to 6. + - [Release notes](https://github.com/docker/metadata-action/releases) + - [Commits](https://github.com/docker/metadata-action/compare/v5...v6) + + --- + updated-dependencies: + - dependency-name: docker/metadata-action + dependency-version: '6' + dependency-type: direct:production + update-type: version-update:semver-major + ... +- Chore(deps): bump the python-packages group with 2 updates. + [dependabot[bot]] + + Bumps the python-packages group with 2 updates: [black](https://github.com/psf/black) and [setuptools](https://github.com/pypa/setuptools). + + + Updates `black` from 26.1.0 to 26.3.0 + - [Release notes](https://github.com/psf/black/releases) + - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) + - [Commits](https://github.com/psf/black/compare/26.1.0...26.3.0) + + Updates `setuptools` from 82.0.0 to 82.0.1 + - [Release notes](https://github.com/pypa/setuptools/releases) + - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) + - [Commits](https://github.com/pypa/setuptools/compare/v82.0.0...v82.0.1) + + --- + updated-dependencies: + - dependency-name: black + dependency-version: 26.3.0 + dependency-type: direct:production + update-type: version-update:semver-minor + dependency-group: python-packages + - dependency-name: setuptools + dependency-version: 82.0.1 + dependency-type: direct:production + update-type: version-update:semver-patch + dependency-group: python-packages + ... 0.61.5 (2026-02-18) ------------------- ------------------------- - Fix empty repository crash due to None timestamp comparison (#489) [Rodos] diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 294be4d..647040d 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.61.5" +__version__ = "0.62.0" From 0638666bc7ebc9c55134648d0c4f3cb21932a680 Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 13:38:23 +0000 Subject: [PATCH 439/455] handle more network errors ```python-traceback Traceback (most recent call last): File ".local/bin/github-backup", line 6, in sys.exit(main()) ~~~~^^ File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/cli.py", line 83, in main backup_repositories(args, output_directory, repositories) ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 1845, in backup_repositories backup_pulls(args, repo_cwd, repository, repos_template) ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 2019, in backup_pulls pulls[number]["commit_data"] = retrieve_data(args, template) ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^ File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 766, in retrieve_data return list(fetch_all()) File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 717, in fetch_all response = json.loads(http_response.read().decode("utf-8")) ~~~~~~~~~~~~~~~~~~^^ File "/usr/lib/python3.14/http/client.py", line 500, in read s = self._safe_read(self.length) File "/usr/lib/python3.14/http/client.py", line 648, in _safe_read data = self.fp.read(cursize) File "/usr/lib/python3.14/socket.py", line 725, in readinto return self._sock.recv_into(b) ~~~~~~~~~~~~~~~~~~~~^^^ File "/usr/lib/python3.14/ssl.py", line 1304, in recv_into return self.read(nbytes, buffer) ~~~~~~~~~^^^^^^^^^^^^^^^^ File "/usr/lib/python3.14/ssl.py", line 1138, in read return self._sslobj.read(len, buffer) ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^ ConnectionResetError: [Errno 104] Connection reset by peer ``` --- github_backup/github_backup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index ae4ef2e..73a8a75 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -806,6 +806,7 @@ def _extract_legal_url(response_body_bytes): response = json.loads(http_response.read().decode("utf-8")) break # Exit retry loop and handle the data returned except ( + ConnectionError, IncompleteRead, json.decoder.JSONDecodeError, TimeoutError, From ddf82f1115f7d635993aa44454fb58c034624272 Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 15:25:05 +0000 Subject: [PATCH 440/455] suppress output of call to `git lfs version` --- github_backup/github_backup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index ae4ef2e..317a803 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1781,7 +1781,10 @@ def get_authenticated_user(args): def check_git_lfs_install(): - exit_code = subprocess.call(["git", "lfs", "version"]) + exit_code = subprocess.call( + ["git", "lfs", "version"], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + ) if exit_code != 0: raise Exception( "The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com." From ddf7f82e65e5e57f0d5c499ed6f56234cb686eb3 Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 13:46:44 +0000 Subject: [PATCH 441/455] add missing `context` argument to `urlopen` call --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index ae4ef2e..6670d2d 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1297,7 +1297,7 @@ def get_jwt_signed_url_via_markdown_api(url, token, repo_context): request.add_header("Content-Type", "application/json") request.add_header("Accept", "application/vnd.github+json") - html = urlopen(request, timeout=30).read().decode("utf-8") + html = urlopen(request, context=https_ctx, timeout=30).read().decode("utf-8") # Parse JWT-signed URL from HTML response # Format: From 2f130ecd6692bf8bc6e51bade07b5f36e56181ff Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 13:54:13 +0000 Subject: [PATCH 442/455] remove bad invocation of the system shell --- github_backup/github_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 6670d2d..80689b8 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -2980,7 +2980,7 @@ def fetch_repository( masked_remote_url = mask_password(remote_url) initialized = subprocess.call( - "git ls-remote " + remote_url, stdout=FNULL, stderr=FNULL, shell=True + ["git", "ls-remote", remote_url], stdout=FNULL, stderr=FNULL ) if initialized == 128: if ".wiki.git" in remote_url: From b92aee6f114f98502fea616abeefbbe924229ff0 Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 15:12:13 +0000 Subject: [PATCH 443/455] use `subprocess.DEVNULL` instead of emulating it --- github_backup/github_backup.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 8b96622..990993b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -40,7 +40,6 @@ DISCUSSION_REPLIES_QUERY, ) -FNULL = open(os.devnull, "w") FILE_URI_PREFIX = "file://" logger = logging.getLogger(__name__) @@ -529,19 +528,18 @@ def get_auth(args, encode=True, for_git_cli=False): if platform.system() != "Darwin": raise Exception("Keychain arguments are only supported on Mac OSX") try: - with open(os.devnull, "w") as devnull: - token = subprocess.check_output( - [ - "security", - "find-generic-password", - "-s", - args.osx_keychain_item_name, - "-a", - args.osx_keychain_item_account, - "-w", - ], - stderr=devnull, - ).strip() + token = subprocess.check_output( + [ + "security", + "find-generic-password", + "-s", + args.osx_keychain_item_name, + "-a", + args.osx_keychain_item_account, + "-w", + ], + stderr=subprocess.DEVNULL, + ).strip() token = token.decode("utf-8") auth = token + ":" + "x-oauth-basic" except subprocess.SubprocessError: @@ -2984,7 +2982,8 @@ def fetch_repository( masked_remote_url = mask_password(remote_url) initialized = subprocess.call( - ["git", "ls-remote", remote_url], stdout=FNULL, stderr=FNULL + ["git", "ls-remote", remote_url], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) if initialized == 128: if ".wiki.git" in remote_url: From f3eabf0bfe522b7749d693ceaa65c5de4f13d8bc Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 16:23:03 +0000 Subject: [PATCH 444/455] don't pass stdin when doing so can't do any good When the child process doesn't inherit stderr, it can't ask the user for input, so it shouldn't inherit stdin either. --- github_backup/github_backup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 990993b..b76322a 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -1781,7 +1781,7 @@ def get_authenticated_user(args): def check_git_lfs_install(): exit_code = subprocess.call( - ["git", "lfs", "version"], + ["git", "lfs", "version"], stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) if exit_code != 0: @@ -2982,7 +2982,7 @@ def fetch_repository( masked_remote_url = mask_password(remote_url) initialized = subprocess.call( - ["git", "ls-remote", remote_url], + ["git", "ls-remote", remote_url], stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) if initialized == 128: From ccc27b95f7203ec42bf695cc270317fdd73f4489 Mon Sep 17 00:00:00 2001 From: Changaco Date: Thu, 30 Apr 2026 10:46:46 +0000 Subject: [PATCH 445/455] remove legacy code in `mkdir_p` function --- github_backup/github_backup.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index b76322a..4c07808 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -6,7 +6,6 @@ import base64 import calendar import codecs -import errno import json import logging import os @@ -127,13 +126,7 @@ def check_io(): def mkdir_p(*args): for path in args: - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise + os.makedirs(path, exist_ok=True) def mask_password(url, secret="*****"): From f1fca0f9b7379e02c3d0903daee9d1954d7009eb Mon Sep 17 00:00:00 2001 From: Changaco Date: Thu, 30 Apr 2026 10:53:40 +0000 Subject: [PATCH 446/455] don't leave files open --- github_backup/github_backup.py | 41 ++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 4c07808..e567d3e 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -624,7 +624,8 @@ def get_github_host(args): def read_file_contents(file_uri): - return open(file_uri[len(FILE_URI_PREFIX) :], "rt").readline().strip() + with open(file_uri[len(FILE_URI_PREFIX) :], "rt") as f: + return f.readline().strip() def read_token_from_gh_cli(args): @@ -1964,10 +1965,11 @@ def read_legacy_last_update(args, output_directory): return None, None last_update_path = os.path.join(output_directory, INCREMENTAL_LAST_UPDATE_FILENAME) - if os.path.exists(last_update_path): - return last_update_path, open(last_update_path).read().strip() - - return last_update_path, None + try: + with open(last_update_path) as f: + return last_update_path, f.read().strip() + except FileNotFoundError: + return last_update_path, None def read_resource_last_update(args, resource_cwd, legacy_last_update=None): @@ -1975,13 +1977,13 @@ def read_resource_last_update(args, resource_cwd, legacy_last_update=None): return None last_update_path = os.path.join(resource_cwd, INCREMENTAL_LAST_UPDATE_FILENAME) - if os.path.exists(last_update_path): - return open(last_update_path).read().strip() - - if legacy_last_update and resource_backup_exists(resource_cwd): - return legacy_last_update - - return None + try: + with open(last_update_path) as f: + return f.read().strip() + except FileNotFoundError: + if legacy_last_update and resource_backup_exists(resource_cwd): + return legacy_last_update + return None def write_resource_last_update(args, resource_cwd, repository): @@ -1990,7 +1992,8 @@ def write_resource_last_update(args, resource_cwd, repository): mkdir_p(resource_cwd) last_update_path = os.path.join(resource_cwd, INCREMENTAL_LAST_UPDATE_FILENAME) - open(last_update_path, "w").write(get_repository_checkpoint_time(repository)) + with open(last_update_path, "w") as f: + f.write(get_repository_checkpoint_time(repository)) def iter_incremental_resource_dirs(output_directory): @@ -2378,7 +2381,8 @@ def backup_discussions(args, repo_cwd, repository): discussions_since = None discussion_last_update_path = os.path.join(discussion_cwd, "last_update") if args.incremental and os.path.exists(discussion_last_update_path): - discussions_since = open(discussion_last_update_path).read().strip() + with open(discussion_last_update_path) as f: + discussions_since = f.read().strip() logger.info("Retrieving {0} discussions".format(repository["full_name"])) try: @@ -2464,7 +2468,8 @@ def backup_discussions(args, repo_cwd, repository): and newest_seen and (not discussions_since or newest_seen > discussions_since) ): - open(discussion_last_update_path, "w").write(newest_seen) + with open(discussion_last_update_path, "w") as f: + f.write(newest_seen) attempted_count = len(summaries) - skipped_count if not summaries: @@ -2601,7 +2606,8 @@ def get_pull_reviews_since(args, pulls_cwd): # repository-level checkpoint would otherwise skip old PRs forever. return None, None, reviews_last_update_path - reviews_since = open(reviews_last_update_path).read().strip() + with open(reviews_last_update_path) as f: + reviews_since = f.read().strip() if args_since and reviews_since: return min(args_since, reviews_since), reviews_since, reviews_last_update_path @@ -2753,7 +2759,8 @@ def pull_is_due_for_repository_checkpoint(pull): and not pull_review_errors and (not pull_reviews_since or newest_pull_update > pull_reviews_since) ): - open(pull_reviews_last_update_path, "w").write(newest_pull_update) + with open(pull_reviews_last_update_path, "w") as f: + f.write(newest_pull_update) def backup_milestones(args, repo_cwd, repository, repos_template): From 17b79fcbef880e529ab376090fbd193f102300ac Mon Sep 17 00:00:00 2001 From: Changaco Date: Thu, 30 Apr 2026 10:58:08 +0000 Subject: [PATCH 447/455] rename a function to match what it actually does --- github_backup/github_backup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index e567d3e..f4a94b9 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -545,7 +545,7 @@ def get_auth(args, encode=True, for_git_cli=False): ) elif args.token_fine: if args.token_fine.startswith(FILE_URI_PREFIX): - args.token_fine = read_file_contents(args.token_fine) + args.token_fine = read_first_line(args.token_fine) if args.token_fine.startswith("github_pat_"): auth = args.token_fine @@ -561,7 +561,7 @@ def get_auth(args, encode=True, for_git_cli=False): ) args.token_classic = read_token_from_gh_cli(args) elif args.token_classic.startswith(FILE_URI_PREFIX): - args.token_classic = read_file_contents(args.token_classic) + args.token_classic = read_first_line(args.token_classic) if not args.as_app: auth = args.token_classic + ":" + "x-oauth-basic" @@ -623,7 +623,7 @@ def get_github_host(args): return host -def read_file_contents(file_uri): +def read_first_line(file_uri): with open(file_uri[len(FILE_URI_PREFIX) :], "rt") as f: return f.readline().strip() From 3cda5a01fdf094ea33de7d3c02aa7cc60d553e9b Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 20:32:16 +0000 Subject: [PATCH 448/455] document that `--all` doesn't imply `--attachments` --- README.rst | 2 +- github_backup/github_backup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 3a4be3b..ed037fd 100644 --- a/README.rst +++ b/README.rst @@ -325,7 +325,7 @@ Gotchas / Known-issues All is not everything --------------------- -The ``--all`` argument does not include: cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. +The ``--all`` argument does not include: downloading attachments from issue and pull request comments (``--attachments``), cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. Starred repository size ----------------------- diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index 8b96622..dc872c7 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -488,7 +488,7 @@ def parse_args(args=None): "--attachments", action="store_true", dest="include_attachments", - help="download user-attachments from issues, pull requests, and discussions", + help="download user-attachments from issues, pull requests, and discussions [*]", ) parser.add_argument( "--throttle-limit", From 543d76f24bc4eb808618e7a8b5ccbabea80fa700 Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 20:35:06 +0000 Subject: [PATCH 449/455] fix a typo in the README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index ed037fd..e5f0f14 100644 --- a/README.rst +++ b/README.rst @@ -363,7 +363,7 @@ This means any blocking errors on previous runs can cause missing data in backup Using (``--incremental-by-files``) will request new data from the API **based on when the file was modified on filesystem**. e.g. if you modify the file yourself you may miss something. -Still saver than the previous version. +Still safer than the previous version. Specifically, issues and pull requests are handled like this. From 9340aa3aaada4c2d41aa8f9c1b6164f9ee9ed082 Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 20:35:47 +0000 Subject: [PATCH 450/455] try to clarify what `--incremental` actually does --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index e5f0f14..1bd3ff6 100644 --- a/README.rst +++ b/README.rst @@ -365,7 +365,7 @@ Using (``--incremental-by-files``) will request new data from the API **based on Still safer than the previous version. -Specifically, issues and pull requests are handled like this. +Incremental backup only changes how issue and pull request data is fetched. Known blocking errors --------------------- From a2391a550e45ff4882f006696599fcd408317781 Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 20:37:05 +0000 Subject: [PATCH 451/455] remove pointless and unsafe `export`s in examples --- README.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 1bd3ff6..33a89fb 100644 --- a/README.rst +++ b/README.rst @@ -429,12 +429,12 @@ Github Backup Examples Backup all repositories, including private ones using a classic token:: - export ACCESS_TOKEN=SOME-GITHUB-TOKEN + ACCESS_TOKEN=SOME-GITHUB-TOKEN github-backup WhiteHouse --token $ACCESS_TOKEN --organization --output-directory /tmp/white-house --repositories --private Use a fine-grained access token to backup a single organization repository with everything else (wiki, pull requests, comments, issues etc):: - export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN + FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN ORGANIZATION=docker REPO=cli # e.g. git@github.com:docker/cli.git @@ -442,14 +442,14 @@ Use a fine-grained access token to backup a single organization repository with Quietly and incrementally backup useful Github user data (public and private repos with SSH) including; all issues, pulls, all public starred repos and gists (omitting "hooks", "releases" and therefore "assets" to prevent blocking). *Great for a cron job.* :: - export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN + FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-reviews --pull-commits --labels --milestones --security-advisories --discussions --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER Debug an error/block or incomplete backup into a temporary directory. Omit "incremental" to fill a previous incomplete backup. :: - export FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN + FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN GH_USER=YOUR-GITHUB-USER github-backup -f $FINE_ACCESS_TOKEN -o /tmp/github-backup/ -l debug -P --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-reviews --pull-commits --labels --milestones --discussions --repositories --wikis --releases --assets --pull-details --gists --starred-gists $GH_USER From d30d9bfe6034b174ae3839f7aa13f4ad2eff4dc3 Mon Sep 17 00:00:00 2001 From: Changaco Date: Fri, 10 Apr 2026 20:38:31 +0000 Subject: [PATCH 452/455] eliminate trailing spaces --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 33a89fb..c4d0fd0 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,7 @@ Using PIP via PyPI:: Using PIP via Github (more likely the latest version):: pip install git+https://github.com/josegonzalez/python-github-backup.git#egg=github-backup - + *Install note for python newcomers:* Python scripts are unlikely to be included in your ``$PATH`` by default, this means it cannot be run directly in terminal with ``$ github-backup ...``, you can either add python's install path to your environments ``$PATH`` or call the script directly e.g. using ``$ ~/.local/bin/github-backup``.* @@ -249,7 +249,7 @@ Note: When you run github-backup, you will be asked whether you want to allow " Github Rate-limit and Throttling -------------------------------- -"github-backup" will automatically throttle itself based on feedback from the Github API. +"github-backup" will automatically throttle itself based on feedback from the Github API. Their API is usually rate-limited to 5000 calls per hour. The API will ask github-backup to pause until a specific time when the limit is reset again (at the start of the next hour). This continues until the backup is complete. @@ -446,7 +446,7 @@ Quietly and incrementally backup useful Github user data (public and private rep GH_USER=YOUR-GITHUB-USER github-backup -f $FINE_ACCESS_TOKEN --prefer-ssh -o ~/github-backup/ -l error -P -i --all-starred --starred --watched --followers --following --issues --issue-comments --issue-events --pulls --pull-comments --pull-reviews --pull-commits --labels --milestones --security-advisories --discussions --repositories --wikis --releases --assets --attachments --pull-details --gists --starred-gists $GH_USER - + Debug an error/block or incomplete backup into a temporary directory. Omit "incremental" to fill a previous incomplete backup. :: FINE_ACCESS_TOKEN=SOME-GITHUB-TOKEN From 8e76089565d7822bd94816433c2509daee40f26b Mon Sep 17 00:00:00 2001 From: Changaco Date: Sat, 25 Apr 2026 07:07:24 +0000 Subject: [PATCH 453/455] document that nothing is saved by default --- README.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.rst b/README.rst index c4d0fd0..c3d5d5d 100644 --- a/README.rst +++ b/README.rst @@ -327,6 +327,11 @@ All is not everything The ``--all`` argument does not include: downloading attachments from issue and pull request comments (``--attachments``), cloning private repos (``-P, --private``), cloning forks (``-F, --fork``), cloning starred repositories (``--all-starred``), ``--pull-details``, cloning LFS repositories (``--lfs``), cloning gists (``--gists``) or cloning starred gist repos (``--starred-gists``). See examples for more. +Saves nothing if no arguments are passed +---------------------------------------- + +At least one argument like ``--all`` or ``--repositories`` is needed for github-backup to actually save data. Without relevant arguments, github-backup fetches some data from GitHub but doesn't put any of it into files. + Starred repository size ----------------------- From bd6eea02d5095a83d25f2d57202bb78c93be1cc2 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Thu, 30 Apr 2026 15:52:41 +0000 Subject: [PATCH 454/455] Release version 0.62.1 --- CHANGES.rst | 58 ++++++++++++++++++++++++++++++++++++++- github_backup/__init__.py | 2 +- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 86bcb32..20ac838 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,65 @@ Changelog ========= -0.62.0 (2026-04-29) +0.62.1 (2026-04-30) ------------------- ------------------------ +- Document that nothing is saved by default. [Changaco] +- Eliminate trailing spaces. [Changaco] +- Remove pointless and unsafe `export`s in examples. [Changaco] +- Try to clarify what `--incremental` actually does. [Changaco] +- Fix a typo in the README. [Changaco] +- Document that `--all` doesn't imply `--attachments` [Changaco] +- Rename a function to match what it actually does. [Changaco] +- Don't leave files open. [Changaco] +- Remove legacy code in `mkdir_p` function. [Changaco] +- Don't pass stdin when doing so can't do any good. [Changaco] + + When the child process doesn't inherit stderr, it can't ask the user for input, so it shouldn't inherit stdin either. +- Use `subprocess.DEVNULL` instead of emulating it. [Changaco] +- Remove bad invocation of the system shell. [Changaco] +- Add missing `context` argument to `urlopen` call. [Changaco] +- Suppress output of call to `git lfs version` [Changaco] +- Handle more network errors. [Changaco] + + ```python-traceback + Traceback (most recent call last): + File ".local/bin/github-backup", line 6, in + sys.exit(main()) + ~~~~^^ + File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/cli.py", line 83, in main + backup_repositories(args, output_directory, repositories) + ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 1845, in backup_repositories + backup_pulls(args, repo_cwd, repository, repos_template) + ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 2019, in backup_pulls + pulls[number]["commit_data"] = retrieve_data(args, template) + ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^ + File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 766, in retrieve_data + return list(fetch_all()) + File ".local/share/pipx/venvs/github-backup/lib/python3.14/site-packages/github_backup/github_backup.py", line 717, in fetch_all + response = json.loads(http_response.read().decode("utf-8")) + ~~~~~~~~~~~~~~~~~~^^ + File "/usr/lib/python3.14/http/client.py", line 500, in read + s = self._safe_read(self.length) + File "/usr/lib/python3.14/http/client.py", line 648, in _safe_read + data = self.fp.read(cursize) + File "/usr/lib/python3.14/socket.py", line 725, in readinto + return self._sock.recv_into(b) + ~~~~~~~~~~~~~~~~~~~~^^^ + File "/usr/lib/python3.14/ssl.py", line 1304, in recv_into + return self.read(nbytes, buffer) + ~~~~~~~~~^^^^^^^^^^^^^^^^ + File "/usr/lib/python3.14/ssl.py", line 1138, in read + return self._sslobj.read(len, buffer) + ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^ + ConnectionResetError: [Errno 104] Connection reset by peer + ``` + + +0.62.0 (2026-04-29) +------------------- - Skip checkpoint-equal incremental items. [Duncan Ogilvie] - Avoid redundant release asset list requests. [Duncan Ogilvie] - Reduce unnecessary pull requests with incremental fetching. [Duncan diff --git a/github_backup/__init__.py b/github_backup/__init__.py index 647040d..b7b61f3 100644 --- a/github_backup/__init__.py +++ b/github_backup/__init__.py @@ -1 +1 @@ -__version__ = "0.62.0" +__version__ = "0.62.1" From 2cbce1425cbb2a2f00ba7996f795415d2ede6c37 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 May 2026 22:45:36 +0000 Subject: [PATCH 455/455] chore(deps): bump black in the python-packages group Bumps the python-packages group with 1 update: [black](https://github.com/psf/black). Updates `black` from 26.3.1 to 26.5.1 - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/26.3.1...26.5.1) --- updated-dependencies: - dependency-name: black dependency-version: 26.5.1 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- release-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-requirements.txt b/release-requirements.txt index ad8bc5c..117aeea 100644 --- a/release-requirements.txt +++ b/release-requirements.txt @@ -1,6 +1,6 @@ # Linting & Formatting autopep8==2.3.2 -black==26.3.1 +black==26.5.1 flake8==7.3.0 # Testing