From ed36f6c23e1269eb9068badb42c5336af8b686e3 Mon Sep 17 00:00:00 2001 From: Kaali Date: Mon, 15 Jul 2019 17:13:42 +0530 Subject: [PATCH 01/16] Authenticated user --- app/auth.py | 49 + app/backup_new.py | 90 ++ bin/github-backup => app/github-backup.py | 16 +- app/utils.py | 83 ++ bin/__init__.py | 0 bin/auth.py | 49 + bin/backup_new.py | 96 ++ bin/github-backup.py | 1123 +++++++++++++++++++++ bin/utils.py | 97 ++ 9 files changed, 1598 insertions(+), 5 deletions(-) create mode 100644 app/auth.py create mode 100755 app/backup_new.py rename bin/github-backup => app/github-backup.py (98%) create mode 100644 app/utils.py create mode 100644 bin/__init__.py create mode 100644 bin/auth.py create mode 100755 bin/backup_new.py create mode 100755 bin/github-backup.py create mode 100644 bin/utils.py diff --git a/app/auth.py b/app/auth.py new file mode 100644 index 0000000..9c4b284 --- /dev/null +++ b/app/auth.py @@ -0,0 +1,49 @@ + +from urllib.parse import urlparse +from urllib.parse import quote as urlquote +from urllib.parse import urlencode +import base64 + + +def mask_password(url, secret='*****'): + parsed = urlparse(url) + + if not parsed.password: + return url + elif parsed.password == 'x-oauth-basic': + return url.replace(parsed.username, secret) + + return url.replace(parsed.password, secret) + + + +def get_github_api_host(): + host = 'api.github.com' + return host + + + + + +def get_auth(username, password, encode=True): + """ + Based on the username and password for the github, This will generate + a basic_auth token for github authentication, which will be a base64 encoded + string of username and password. + """ + if username: + if not password: + raise Exception("Passsword is required for github") + password = urlquote(args.password) + auth = username + ':' + password + else: + log_error('You must specify a username for basic auth') + + if not auth: + return None + + if not encode: + return auth + + basic_auth = base64.b64encode(auth.encode('ascii')) + return basic_auth diff --git a/app/backup_new.py b/app/backup_new.py new file mode 100755 index 0000000..e6d2e06 --- /dev/null +++ b/app/backup_new.py @@ -0,0 +1,90 @@ +#!/usr/bin env python + +from auth import get_auth, get_github_api_host +from urllib.request import Request +from utils import construct_request, get_response, ensure_directory +from loguru import logger +import json + +def get_authenticated_user(username, password): + template = 'https://{0}/user'.format(get_github_api_host()) + logger.info (f'THis is the template from authenticated_user {template}') + data = retrieve_data(username, password, template, single_request=True) + return data[0] + +def retrieve_data(username, password, template, query_args=None, single_request=False): + return list(retrieve_data_gen(username, password, template, query_args, single_request)) + +def retrieve_data_gen(username, password, template, query_args=None, single_request=False): + auth = get_auth(username, password) + #query_args = get_query_args(query_args) + logger.info(f"The auth for the user is {auth}") + per_page = 100 + page = 0 + + while True: + page = page + 1 + request = construct_request(per_page, page, template, auth) # noqa + r, errors = get_response(request, auth, template) + + status_code = int(r.getcode()) + + retries = 0 + while retries < 3 and status_code == 502: + print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') + retries += 1 + time.sleep(5) + request = construct_request(per_page, page, query_args, template, auth) # noqa + r, errors = get_response(request, auth, template) + + status_code = int(r.getcode()) + + if status_code != 200: + template = 'API request returned HTTP {0}: {1}' + errors.append(template.format(status_code, r.reason)) + log_error(errors) + + response = json.loads(r.read().decode('utf-8')) + if len(errors) == 0: + if type(response) == list: + for resp in response: + yield resp + if len(response) < per_page: + break + elif type(response) == dict and single_request: + yield response + + if len(errors) > 0: + log_error(errors) + + if single_request: + break + + + + + + +def main(): + print ("Execution started") + output_directory = "." + username = "graphicaldot" + password = "mitthuparishweta" + + output_dirname = "/Users/kaali/Programs/github-backup/python-github-backup/app/account" + + # if args.lfs_clone: + # check_git_lfs_install() + ensure_directory(output_dirname) + logger.info('Backing up user {0} to {1}'.format(username, output_dirname)) + + authenticated_user = get_authenticated_user(username, password) + + print (authenticated_user) + #repositories = retrieve_repositories(args, authenticated_user) + # repositories = filter_repositories(args, repositories) + # backup_repositories(args, output_directory, repositories) + # backup_account(args, output_directory) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/bin/github-backup b/app/github-backup.py similarity index 98% rename from bin/github-backup rename to app/github-backup.py index 87fa565..09b89f6 100755 --- a/bin/github-backup +++ b/app/github-backup.py @@ -41,8 +41,9 @@ from urllib2 import HTTPRedirectHandler from urllib2 import build_opener -from github_backup import __version__ +#from github_backup import __version__ +__version__ = "3.9.9" FNULL = open(os.devnull, 'w') @@ -365,7 +366,9 @@ def get_auth(args, encode=True): if not encode: return auth - return base64.b64encode(auth.encode('ascii')) + basic_auth = base64.b64encode(auth.encode('ascii')) + print (basic_auth) + return basic_auth def get_github_api_host(args): @@ -582,6 +585,7 @@ def download_file(url, path, auth): def get_authenticated_user(args): template = 'https://{0}/user'.format(get_github_api_host(args)) + print (f'THis is the template from authenticated_user {template}') data = retrieve_data(args, template, single_request=True) return data[0] @@ -599,6 +603,7 @@ def retrieve_repositories(args, authenticated_user): # we must use the /user/repos API to be able to access private repos template = 'https://{0}/user/repos'.format( get_github_api_host(args)) + print (f"Template for retrieve_repos is {template}") else: if args.private and not args.organization: log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') @@ -1095,6 +1100,7 @@ def json_dump(data, output_file): def main(): args = parse_args() + print (args) output_directory = os.path.realpath(args.output_directory) if not os.path.isdir(output_directory): @@ -1108,9 +1114,9 @@ def main(): authenticated_user = get_authenticated_user(args) repositories = retrieve_repositories(args, authenticated_user) - repositories = filter_repositories(args, repositories) - backup_repositories(args, output_directory, repositories) - backup_account(args, output_directory) + # repositories = filter_repositories(args, repositories) + # backup_repositories(args, output_directory, repositories) + # backup_account(args, output_directory) if __name__ == '__main__': diff --git a/app/utils.py b/app/utils.py new file mode 100644 index 0000000..eb733cb --- /dev/null +++ b/app/utils.py @@ -0,0 +1,83 @@ + + +from urllib.parse import urlparse +from urllib.parse import quote as urlquote +from urllib.parse import urlencode +from urllib.request import Request +from urllib.error import HTTPError, URLError +from urllib.request import urlopen +from urllib.request import Request +from urllib.request import HTTPRedirectHandler +from urllib.request import build_opener +import socket +import os +from loguru import logger + + + +def mkdir_p(*args): + for path in args: + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +def ensure_directory(dirname): + output_directory = os.path.realpath(dirname) + if not os.path.isdir(output_directory): + logger.debug('Create output directory {0}'.format(dirname)) + + mkdir_p(output_directory) + return + # if args.lfs_clone: + # check_git_lfs_install() + + + +def construct_request(per_page, page, template, auth): + querystring = urlencode(dict(list({ + 'per_page': per_page, + 'page': page + }.items()) + #+ list(query_args.items()) + )) + + request = Request(template + '?' + querystring) + if auth is not None: + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + logger.info('Requesting {}?{}'.format(template, querystring)) + return request + + +def get_response(request, auth, template): + retry_timeout = 3 + errors = [] + # We'll make requests in a loop so we can + # delay and retry in the case of rate-limiting + while True: + should_continue = False + try: + r = urlopen(request) + except HTTPError as exc: + errors, should_continue = _request_http_error(exc, auth, errors) # noqa + r = exc + except URLError as e: + logger.warning(e.reason) + should_continue = _request_url_error(template, retry_timeout) + if not should_continue: + raise + except socket.error as e: + logger.warning(e.strerror) + should_continue = _request_url_error(template, retry_timeout) + if not should_continue: + raise + + if should_continue: + continue + + break + return r, errors \ No newline at end of file diff --git a/bin/__init__.py b/bin/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bin/auth.py b/bin/auth.py new file mode 100644 index 0000000..74cfdf7 --- /dev/null +++ b/bin/auth.py @@ -0,0 +1,49 @@ + +from urllib.parse import urlparse +from urllib.parse import quote as urlquote +from urllib.parse import urlencode + + + +def mask_password(url, secret='*****'): + parsed = urlparse(url) + + if not parsed.password: + return url + elif parsed.password == 'x-oauth-basic': + return url.replace(parsed.username, secret) + + return url.replace(parsed.password, secret) + + + +def get_github_api_host(): + host = 'api.github.com' + return host + + + + + +def get_auth(username, password, encode=True): + """ + Based on the username and password for the github, This will generate + a basic_auth token for github authentication, which will be a base64 encoded + string of username and password. + """ + if username: + if not password: + raise Exception("Passsword is required for github") + password = urlquote(args.password) + auth = username + ':' + password + else: + log_error('You must specify a username for basic auth') + + if not auth: + return None + + if not encode: + return auth + + basic_auth = base64.b64encode(auth.encode('ascii')) + return basic_auth diff --git a/bin/backup_new.py b/bin/backup_new.py new file mode 100755 index 0000000..1aa8ff5 --- /dev/null +++ b/bin/backup_new.py @@ -0,0 +1,96 @@ +#!/usr/bin env python + +from auth import get_auth, get_github_api_host +from urllib.request import Request +from utils import construct_request, get_response + +def mkdir_p(*args): + for path in args: + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + +def get_authenticated_user(): + template = 'https://{0}/user'.format(get_github_api_host()) + print (f'THis is the template from authenticated_user {template}') + data = retrieve_data(template, single_request=True) + return data[0] + +def retrieve_data(args, template, query_args=None, single_request=False): + return list(retrieve_data_gen(args, template, query_args, single_request)) + +def retrieve_data_gen(username, password, template, query_args=None, single_request=False): + auth = get_auth(username, password) + #query_args = get_query_args(query_args) + per_page = 100 + page = 0 + + while True: + page = page + 1 + request = construct_request(per_page, page, query_args, template, auth) # noqa + r, errors = get_response(request, auth, template) + + status_code = int(r.getcode()) + + retries = 0 + while retries < 3 and status_code == 502: + print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') + retries += 1 + time.sleep(5) + request = construct_request(per_page, page, query_args, template, auth) # noqa + r, errors = get_response(request, auth, template) + + status_code = int(r.getcode()) + + if status_code != 200: + template = 'API request returned HTTP {0}: {1}' + errors.append(template.format(status_code, r.reason)) + log_error(errors) + + response = json.loads(r.read().decode('utf-8')) + if len(errors) == 0: + if type(response) == list: + for resp in response: + yield resp + if len(response) < per_page: + break + elif type(response) == dict and single_request: + yield response + + if len(errors) > 0: + log_error(errors) + + if single_request: + break + + + + + + +def main(): + print ("Execution started") + output_directory = "." + user_name = "graphicaldot" + password = "mitthuparishweta" + output_directory = os.path.realpath(output_directory) + if not os.path.isdir(output_directory): + log_info('Create output directory {0}'.format(output_directory)) + mkdir_p(output_directory) + + # if args.lfs_clone: + # check_git_lfs_install() + + log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + + authenticated_user = get_authenticated_user(args) + + print (authnticated_user) + #repositories = retrieve_repositories(args, authenticated_user) + # repositories = filter_repositories(args, repositories) + # backup_repositories(args, output_directory, repositories) + # backup_account(args, output_directory) \ No newline at end of file diff --git a/bin/github-backup.py b/bin/github-backup.py new file mode 100755 index 0000000..09b89f6 --- /dev/null +++ b/bin/github-backup.py @@ -0,0 +1,1123 @@ +#!/usr/bin/env python + +from __future__ import print_function +import socket + +import argparse +import base64 +import calendar +import codecs +import errno +import getpass +import json +import logging +import os +import re +import select +import subprocess +import sys +import time +import platform +PY2 = False +try: + # python 3 + from urllib.parse import urlparse + from urllib.parse import quote as urlquote + from urllib.parse import urlencode + from urllib.error import HTTPError, URLError + from urllib.request import urlopen + from urllib.request import Request + from urllib.request import HTTPRedirectHandler + from urllib.request import build_opener +except ImportError: + # python 2 + PY2 = True + from urlparse import urlparse + from urllib import quote as urlquote + from urllib import urlencode + from urllib2 import HTTPError, URLError + from urllib2 import urlopen + from urllib2 import Request + from urllib2 import HTTPRedirectHandler + from urllib2 import build_opener + +#from github_backup import __version__ + +__version__ = "3.9.9" +FNULL = open(os.devnull, 'w') + + +def log_error(message): + """ + Log message (str) or messages (List[str]) to stderr and exit with status 1 + """ + log_warning(message) + sys.exit(1) + + +def log_info(message): + """ + Log message (str) or messages (List[str]) to stdout + """ + if type(message) == str: + message = [message] + + for msg in message: + sys.stdout.write("{0}\n".format(msg)) + + +def log_warning(message): + """ + Log message (str) or messages (List[str]) to stderr + """ + if type(message) == str: + message = [message] + + for msg in message: + sys.stderr.write("{0}\n".format(msg)) + + +def logging_subprocess(popenargs, + logger, + stdout_log_level=logging.DEBUG, + stderr_log_level=logging.ERROR, + **kwargs): + """ + Variant of subprocess.call that accepts a logger instead of stdout/stderr, + and logs stdout messages via logger.debug and stderr messages via + logger.error. + """ + child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, **kwargs) + if sys.platform == 'win32': + log_info("Windows operating system detected - no subprocess logging will be returned") + + log_level = {child.stdout: stdout_log_level, + child.stderr: stderr_log_level} + + def check_io(): + if sys.platform == 'win32': + return + ready_to_read = select.select([child.stdout, child.stderr], + [], + [], + 1000)[0] + for io in ready_to_read: + line = io.readline() + if not logger: + continue + if not (io == child.stderr and not line): + logger.log(log_level[io], line[:-1]) + + # keep checking stdout/stderr until the child exits + while child.poll() is None: + check_io() + + check_io() # check again to catch anything after the process exits + + rc = child.wait() + + if rc != 0: + print('{} returned {}:'.format(popenargs[0], rc), file=sys.stderr) + print('\t', ' '.join(popenargs), file=sys.stderr) + + return rc + + +def mkdir_p(*args): + for path in args: + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +def mask_password(url, secret='*****'): + parsed = urlparse(url) + + if not parsed.password: + return url + elif parsed.password == 'x-oauth-basic': + return url.replace(parsed.username, secret) + + return url.replace(parsed.password, secret) + + +def parse_args(): + parser = argparse.ArgumentParser(description='Backup a github account') + parser.add_argument('user', + metavar='USER', + type=str, + help='github username') + parser.add_argument('-u', + '--username', + dest='username', + help='username for basic auth') + parser.add_argument('-p', + '--password', + dest='password', + help='password for basic auth. ' + 'If a username is given but not a password, the ' + 'password will be prompted for.') + parser.add_argument('-t', + '--token', + dest='token', + help='personal access or OAuth token, or path to token (file://...)') # noqa + parser.add_argument('-o', + '--output-directory', + default='.', + dest='output_directory', + help='directory at which to backup the repositories') + parser.add_argument('-i', + '--incremental', + action='store_true', + dest='incremental', + help='incremental backup') + parser.add_argument('--starred', + action='store_true', + dest='include_starred', + help='include JSON output of starred repositories in backup') + parser.add_argument('--all-starred', + action='store_true', + dest='all_starred', + help='include starred repositories in backup [*]') + parser.add_argument('--watched', + action='store_true', + dest='include_watched', + help='include JSON output of watched repositories in backup') + parser.add_argument('--followers', + action='store_true', + dest='include_followers', + help='include JSON output of followers in backup') + parser.add_argument('--following', + action='store_true', + dest='include_following', + help='include JSON output of following users in backup') + parser.add_argument('--all', + action='store_true', + dest='include_everything', + help='include everything in backup (not including [*])') + parser.add_argument('--issues', + action='store_true', + dest='include_issues', + help='include issues in backup') + parser.add_argument('--issue-comments', + action='store_true', + dest='include_issue_comments', + help='include issue comments in backup') + parser.add_argument('--issue-events', + action='store_true', + dest='include_issue_events', + help='include issue events in backup') + parser.add_argument('--pulls', + action='store_true', + dest='include_pulls', + help='include pull requests in backup') + parser.add_argument('--pull-comments', + action='store_true', + dest='include_pull_comments', + help='include pull request review comments in backup') + parser.add_argument('--pull-commits', + action='store_true', + dest='include_pull_commits', + help='include pull request commits in backup') + parser.add_argument('--pull-details', + action='store_true', + dest='include_pull_details', + help='include more pull request details in backup [*]') + parser.add_argument('--labels', + action='store_true', + dest='include_labels', + help='include labels in backup') + parser.add_argument('--hooks', + action='store_true', + dest='include_hooks', + help='include hooks in backup (works only when authenticated)') # noqa + parser.add_argument('--milestones', + action='store_true', + dest='include_milestones', + help='include milestones in backup') + parser.add_argument('--repositories', + action='store_true', + dest='include_repository', + help='include repository clone in backup') + parser.add_argument('--bare', + action='store_true', + dest='bare_clone', + help='clone bare repositories') + parser.add_argument('--lfs', + action='store_true', + dest='lfs_clone', + help='clone LFS repositories (requires Git LFS to be installed, https://git-lfs.github.com) [*]') + parser.add_argument('--wikis', + action='store_true', + dest='include_wiki', + help='include wiki clone in backup') + parser.add_argument('--gists', + action='store_true', + dest='include_gists', + help='include gists in backup [*]') + parser.add_argument('--starred-gists', + action='store_true', + dest='include_starred_gists', + help='include starred gists in backup [*]') + parser.add_argument('--skip-existing', + action='store_true', + dest='skip_existing', + help='skip project if a backup directory exists') + parser.add_argument('-L', + '--languages', + dest='languages', + help='only allow these languages', + nargs='*') + parser.add_argument('-N', + '--name-regex', + dest='name_regex', + help='python regex to match names against') + parser.add_argument('-H', + '--github-host', + dest='github_host', + help='GitHub Enterprise hostname') + parser.add_argument('-O', + '--organization', + action='store_true', + dest='organization', + help='whether or not this is an organization user') + parser.add_argument('-R', + '--repository', + dest='repository', + help='name of repository to limit backup to') + parser.add_argument('-P', '--private', + action='store_true', + dest='private', + help='include private repositories [*]') + parser.add_argument('-F', '--fork', + action='store_true', + dest='fork', + help='include forked repositories [*]') + parser.add_argument('--prefer-ssh', + action='store_true', + help='Clone repositories using SSH instead of HTTPS') + parser.add_argument('-v', '--version', + action='version', + version='%(prog)s ' + __version__) + parser.add_argument('--keychain-name', + dest='osx_keychain_item_name', + help='OSX ONLY: name field of password item in OSX keychain that holds the personal access or OAuth token') + parser.add_argument('--keychain-account', + dest='osx_keychain_item_account', + help='OSX ONLY: account field of password item in OSX keychain that holds the personal access or OAuth token') + parser.add_argument('--releases', + action='store_true', + dest='include_releases', + help='include release information, not including assets or binaries' + ) + parser.add_argument('--assets', + action='store_true', + dest='include_assets', + help='include assets alongside release information; only applies if including releases') + return parser.parse_args() + + +def get_auth(args, encode=True): + auth = None + + if args.osx_keychain_item_name: + if not args.osx_keychain_item_account: + log_error('You must specify both name and account fields for osx keychain password items') + else: + if platform.system() != 'Darwin': + log_error("Keychain arguments are only supported on Mac OSX") + try: + with open(os.devnull, 'w') as devnull: + token = (subprocess.check_output([ + 'security', 'find-generic-password', + '-s', args.osx_keychain_item_name, + '-a', args.osx_keychain_item_account, + '-w'], stderr=devnull).strip()) + auth = token + ':' + 'x-oauth-basic' + except: + log_error('No password item matching the provided name and account could be found in the osx keychain.') + elif args.osx_keychain_item_account: + log_error('You must specify both name and account fields for osx keychain password items') + elif args.token: + _path_specifier = 'file://' + if args.token.startswith(_path_specifier): + args.token = open(args.token[len(_path_specifier):], + 'rt').readline().strip() + auth = args.token + ':' + 'x-oauth-basic' + elif args.username: + if not args.password: + args.password = getpass.getpass() + if encode: + password = args.password + else: + password = urlquote(args.password) + auth = args.username + ':' + password + elif args.password: + log_error('You must specify a username for basic auth') + + if not auth: + return None + + if not encode: + return auth + + basic_auth = base64.b64encode(auth.encode('ascii')) + print (basic_auth) + return basic_auth + + +def get_github_api_host(args): + if args.github_host: + host = args.github_host + '/api/v3' + else: + host = 'api.github.com' + + return host + + +def get_github_host(args): + if args.github_host: + host = args.github_host + else: + host = 'github.com' + + return host + + +def get_github_repo_url(args, repository): + if args.prefer_ssh: + return repository['ssh_url'] + + if repository.get('is_gist'): + return repository['git_pull_url'] + + auth = get_auth(args, False) + if auth: + repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( + auth, + get_github_host(args), + repository['owner']['login'], + repository['name']) + else: + repo_url = repository['clone_url'] + + return repo_url + + +def retrieve_data_gen(args, template, query_args=None, single_request=False): + auth = get_auth(args) + query_args = get_query_args(query_args) + per_page = 100 + page = 0 + + while True: + page = page + 1 + request = _construct_request(per_page, page, query_args, template, auth) # noqa + r, errors = _get_response(request, auth, template) + + status_code = int(r.getcode()) + + retries = 0 + while retries < 3 and status_code == 502: + print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') + retries += 1 + time.sleep(5) + request = _construct_request(per_page, page, query_args, template, auth) # noqa + r, errors = _get_response(request, auth, template) + + status_code = int(r.getcode()) + + if status_code != 200: + template = 'API request returned HTTP {0}: {1}' + errors.append(template.format(status_code, r.reason)) + log_error(errors) + + response = json.loads(r.read().decode('utf-8')) + if len(errors) == 0: + if type(response) == list: + for resp in response: + yield resp + if len(response) < per_page: + break + elif type(response) == dict and single_request: + yield response + + if len(errors) > 0: + log_error(errors) + + if single_request: + break + +def retrieve_data(args, template, query_args=None, single_request=False): + return list(retrieve_data_gen(args, template, query_args, single_request)) + +def get_query_args(query_args=None): + if not query_args: + query_args = {} + return query_args + + +def _get_response(request, auth, template): + retry_timeout = 3 + errors = [] + # We'll make requests in a loop so we can + # delay and retry in the case of rate-limiting + while True: + should_continue = False + try: + r = urlopen(request) + except HTTPError as exc: + errors, should_continue = _request_http_error(exc, auth, errors) # noqa + r = exc + except URLError as e: + log_warning(e.reason) + should_continue = _request_url_error(template, retry_timeout) + if not should_continue: + raise + except socket.error as e: + log_warning(e.strerror) + should_continue = _request_url_error(template, retry_timeout) + if not should_continue: + raise + + if should_continue: + continue + + break + return r, errors + + +def _construct_request(per_page, page, query_args, template, auth): + querystring = urlencode(dict(list({ + 'per_page': per_page, + 'page': page + }.items()) + list(query_args.items()))) + + request = Request(template + '?' + querystring) + if auth is not None: + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + log_info('Requesting {}?{}'.format(template, querystring)) + return request + + +def _request_http_error(exc, auth, errors): + # HTTPError behaves like a Response so we can + # check the status code and headers to see exactly + # what failed. + + should_continue = False + headers = exc.headers + limit_remaining = int(headers.get('x-ratelimit-remaining', 0)) + + if exc.code == 403 and limit_remaining < 1: + # The X-RateLimit-Reset header includes a + # timestamp telling us when the limit will reset + # so we can calculate how long to wait rather + # than inefficiently polling: + gm_now = calendar.timegm(time.gmtime()) + reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now + # We'll never sleep for less than 10 seconds: + delta = max(10, reset - gm_now) + + limit = headers.get('x-ratelimit-limit') + print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa + file=sys.stderr) + + if auth is None: + print('Hint: Authenticate to raise your GitHub rate limit', + file=sys.stderr) + + time.sleep(delta) + should_continue = True + return errors, should_continue + + +def _request_url_error(template, retry_timeout): + # Incase of a connection timing out, we can retry a few time + # But we won't crash and not back-up the rest now + log_info('{} timed out'.format(template)) + retry_timeout -= 1 + + if retry_timeout >= 0: + return True + + log_error('{} timed out to much, skipping!') + return False + + +class S3HTTPRedirectHandler(HTTPRedirectHandler): + """ + A subclassed redirect handler for downloading Github assets from S3. + + urllib will add the Authorization header to the redirected request to S3, which will result in a 400, + so we should remove said header on redirect. + """ + def redirect_request(self, req, fp, code, msg, headers, newurl): + if PY2: + # HTTPRedirectHandler is an old style class + request = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) + else: + request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) + del request.headers['Authorization'] + return request + + +def download_file(url, path, auth): + request = Request(url) + request.add_header('Accept', 'application/octet-stream') + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + opener = build_opener(S3HTTPRedirectHandler) + response = opener.open(request) + + chunk_size = 16 * 1024 + with open(path, 'wb') as f: + while True: + chunk = response.read(chunk_size) + if not chunk: + break + f.write(chunk) + + +def get_authenticated_user(args): + template = 'https://{0}/user'.format(get_github_api_host(args)) + print (f'THis is the template from authenticated_user {template}') + data = retrieve_data(args, template, single_request=True) + return data[0] + + +def check_git_lfs_install(): + exit_code = subprocess.call(['git', 'lfs', 'version']) + if exit_code != 0: + log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') + + +def retrieve_repositories(args, authenticated_user): + log_info('Retrieving repositories') + single_request = False + if args.user == authenticated_user['login']: + # we must use the /user/repos API to be able to access private repos + template = 'https://{0}/user/repos'.format( + get_github_api_host(args)) + print (f"Template for retrieve_repos is {template}") + else: + if args.private and not args.organization: + log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') + template = 'https://{0}/users/{1}/repos'.format( + get_github_api_host(args), + args.user) + + if args.organization: + template = 'https://{0}/orgs/{1}/repos'.format( + get_github_api_host(args), + args.user) + + if args.repository: + single_request = True + template = 'https://{0}/repos/{1}/{2}'.format( + get_github_api_host(args), + args.user, + args.repository) + + repos = retrieve_data(args, template, single_request=single_request) + + if args.all_starred: + starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(args), args.user) + starred_repos = retrieve_data(args, starred_template, single_request=False) + # flag each repo as starred for downstream processing + for item in starred_repos: + item.update({'is_starred': True}) + repos.extend(starred_repos) + + if args.include_gists: + gists_template = 'https://{0}/users/{1}/gists'.format(get_github_api_host(args), args.user) + gists = retrieve_data(args, gists_template, single_request=False) + # flag each repo as a gist for downstream processing + for item in gists: + item.update({'is_gist': True}) + repos.extend(gists) + + if args.include_starred_gists: + starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host(args)) + starred_gists = retrieve_data(args, starred_gists_template, single_request=False) + # flag each repo as a starred gist for downstream processing + for item in starred_gists: + item.update({'is_gist': True, + 'is_starred': True}) + repos.extend(starred_gists) + + return repos + + +def filter_repositories(args, unfiltered_repositories): + log_info('Filtering repositories') + + repositories = [] + for r in unfiltered_repositories: + # gists can be anonymous, so need to safely check owner + if r.get('owner', {}).get('login') == args.user or r.get('is_starred'): + repositories.append(r) + + name_regex = None + if args.name_regex: + name_regex = re.compile(args.name_regex) + + languages = None + if args.languages: + languages = [x.lower() for x in args.languages] + + if not args.fork: + repositories = [r for r in repositories if not r.get('fork')] + if not args.private: + repositories = [r for r in repositories if not r.get('private') or r.get('public')] + if languages: + repositories = [r for r in repositories if r.get('language') and r.get('language').lower() in languages] # noqa + if name_regex: + repositories = [r for r in repositories if name_regex.match(r['name'])] + + return repositories + + +def backup_repositories(args, output_directory, repositories): + log_info('Backing up repositories') + repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) + + if args.incremental: + last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa + last_update_path = os.path.join(output_directory, 'last_update') + if os.path.exists(last_update_path): + args.since = open(last_update_path).read().strip() + else: + args.since = None + else: + args.since = None + + for repository in repositories: + if repository.get('is_gist'): + repo_cwd = os.path.join(output_directory, 'gists', repository['id']) + elif repository.get('is_starred'): + # put starred repos in -o/starred/${owner}/${repo} to prevent collision of + # any repositories with the same name + repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) + else: + repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) + + repo_dir = os.path.join(repo_cwd, 'repository') + repo_url = get_github_repo_url(args, repository) + + include_gists = (args.include_gists or args.include_starred_gists) + if (args.include_repository or args.include_everything) \ + or (include_gists and repository.get('is_gist')): + repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') + fetch_repository(repo_name, + repo_url, + repo_dir, + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone) + + if repository.get('is_gist'): + # dump gist information to a file as well + output_file = '{0}/gist.json'.format(repo_cwd) + with codecs.open(output_file, 'w', encoding='utf-8') as f: + json_dump(repository, f) + + continue # don't try to back anything else for a gist; it doesn't exist + + download_wiki = (args.include_wiki or args.include_everything) + if repository['has_wiki'] and download_wiki: + fetch_repository(repository['name'], + repo_url.replace('.git', '.wiki.git'), + os.path.join(repo_cwd, 'wiki'), + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone) + + if args.include_issues or args.include_everything: + backup_issues(args, repo_cwd, repository, repos_template) + + if args.include_pulls or args.include_everything: + backup_pulls(args, repo_cwd, repository, repos_template) + + if args.include_milestones or args.include_everything: + backup_milestones(args, repo_cwd, repository, repos_template) + + if args.include_labels or args.include_everything: + backup_labels(args, repo_cwd, repository, repos_template) + + if args.include_hooks or args.include_everything: + backup_hooks(args, repo_cwd, repository, repos_template) + + if args.include_releases or args.include_everything: + backup_releases(args, repo_cwd, repository, repos_template, + include_assets=args.include_assets or args.include_everything) + + if args.incremental: + open(last_update_path, 'w').write(last_update) + + +def backup_issues(args, repo_cwd, repository, repos_template): + has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) + if args.skip_existing and has_issues_dir: + return + + log_info('Retrieving {0} issues'.format(repository['full_name'])) + issue_cwd = os.path.join(repo_cwd, 'issues') + mkdir_p(repo_cwd, issue_cwd) + + issues = {} + issues_skipped = 0 + issues_skipped_message = '' + _issue_template = '{0}/{1}/issues'.format(repos_template, + repository['full_name']) + + should_include_pulls = args.include_pulls or args.include_everything + issue_states = ['open', 'closed'] + for issue_state in issue_states: + query_args = { + 'filter': 'all', + 'state': issue_state + } + if args.since: + query_args['since'] = args.since + + _issues = retrieve_data(args, + _issue_template, + query_args=query_args) + for issue in _issues: + # skip pull requests which are also returned as issues + # if retrieving pull requests is requested as well + if 'pull_request' in issue and should_include_pulls: + issues_skipped += 1 + continue + + issues[issue['number']] = issue + + if issues_skipped: + issues_skipped_message = ' (skipped {0} pull requests)'.format( + issues_skipped) + + log_info('Saving {0} issues to disk{1}'.format( + len(list(issues.keys())), issues_skipped_message)) + comments_template = _issue_template + '/{0}/comments' + events_template = _issue_template + '/{0}/events' + for number, issue in list(issues.items()): + if args.include_issue_comments or args.include_everything: + template = comments_template.format(number) + issues[number]['comment_data'] = retrieve_data(args, template) + if args.include_issue_events or args.include_everything: + template = events_template.format(number) + issues[number]['event_data'] = retrieve_data(args, template) + + issue_file = '{0}/{1}.json'.format(issue_cwd, number) + with codecs.open(issue_file, 'w', encoding='utf-8') as f: + json_dump(issue, f) + + +def backup_pulls(args, repo_cwd, repository, repos_template): + has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) + if args.skip_existing and has_pulls_dir: + return + + log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa + pulls_cwd = os.path.join(repo_cwd, 'pulls') + mkdir_p(repo_cwd, pulls_cwd) + + pulls = {} + _pulls_template = '{0}/{1}/pulls'.format(repos_template, + repository['full_name']) + query_args = { + 'filter': 'all', + 'state': 'all', + 'sort': 'updated', + 'direction': 'desc', + } + + if not args.include_pull_details: + pull_states = ['open', 'closed'] + for pull_state in pull_states: + query_args['state'] = pull_state + _pulls = retrieve_data_gen(args, + _pulls_template, + query_args=query_args) + for pull in _pulls: + if args.since and pull['updated_at'] < args.since: + break + if not args.since or pull['updated_at'] >= args.since: + pulls[pull['number']] = pull + else: + _pulls = retrieve_data_gen(args, + _pulls_template, + query_args=query_args) + for pull in _pulls: + if args.since and pull['updated_at'] < args.since: + break + if not args.since or pull['updated_at'] >= args.since: + pulls[pull['number']] = retrieve_data( + args, + _pulls_template + '/{}'.format(pull['number']), + single_request=True + )[0] + + log_info('Saving {0} pull requests to disk'.format( + len(list(pulls.keys())))) + comments_template = _pulls_template + '/{0}/comments' + commits_template = _pulls_template + '/{0}/commits' + for number, pull in list(pulls.items()): + if args.include_pull_comments or args.include_everything: + template = comments_template.format(number) + pulls[number]['comment_data'] = retrieve_data(args, template) + if args.include_pull_commits or args.include_everything: + template = commits_template.format(number) + pulls[number]['commit_data'] = retrieve_data(args, template) + + pull_file = '{0}/{1}.json'.format(pulls_cwd, number) + with codecs.open(pull_file, 'w', encoding='utf-8') as f: + json_dump(pull, f) + + +def backup_milestones(args, repo_cwd, repository, repos_template): + milestone_cwd = os.path.join(repo_cwd, 'milestones') + if args.skip_existing and os.path.isdir(milestone_cwd): + return + + log_info('Retrieving {0} milestones'.format(repository['full_name'])) + mkdir_p(repo_cwd, milestone_cwd) + + template = '{0}/{1}/milestones'.format(repos_template, + repository['full_name']) + + query_args = { + 'state': 'all' + } + + _milestones = retrieve_data(args, template, query_args=query_args) + + milestones = {} + for milestone in _milestones: + milestones[milestone['number']] = milestone + + log_info('Saving {0} milestones to disk'.format( + len(list(milestones.keys())))) + for number, milestone in list(milestones.items()): + milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) + with codecs.open(milestone_file, 'w', encoding='utf-8') as f: + json_dump(milestone, f) + + +def backup_labels(args, repo_cwd, repository, repos_template): + label_cwd = os.path.join(repo_cwd, 'labels') + output_file = '{0}/labels.json'.format(label_cwd) + template = '{0}/{1}/labels'.format(repos_template, + repository['full_name']) + _backup_data(args, + 'labels', + template, + output_file, + label_cwd) + + +def backup_hooks(args, repo_cwd, repository, repos_template): + auth = get_auth(args) + if not auth: + log_info("Skipping hooks since no authentication provided") + return + hook_cwd = os.path.join(repo_cwd, 'hooks') + output_file = '{0}/hooks.json'.format(hook_cwd) + template = '{0}/{1}/hooks'.format(repos_template, + repository['full_name']) + try: + _backup_data(args, + 'hooks', + template, + output_file, + hook_cwd) + except SystemExit: + log_info("Unable to read hooks, skipping") + + +def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): + repository_fullname = repository['full_name'] + + # give release files somewhere to live & log intent + release_cwd = os.path.join(repo_cwd, 'releases') + log_info('Retrieving {0} releases'.format(repository_fullname)) + mkdir_p(repo_cwd, release_cwd) + + query_args = {} + + release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + releases = retrieve_data(args, release_template, query_args=query_args) + + # for each release, store it + log_info('Saving {0} releases to disk'.format(len(releases))) + for release in releases: + release_name = release['tag_name'] + output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) + with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: + json_dump(release, f) + + if include_assets: + assets = retrieve_data(args, release['assets_url']) + for asset in assets: + download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args)) + + +def fetch_repository(name, + remote_url, + local_dir, + skip_existing=False, + bare_clone=False, + lfs_clone=False): + if bare_clone: + if os.path.exists(local_dir): + clone_exists = subprocess.check_output(['git', + 'rev-parse', + '--is-bare-repository'], + cwd=local_dir) == b"true\n" + else: + clone_exists = False + else: + clone_exists = os.path.exists(os.path.join(local_dir, '.git')) + + if clone_exists and skip_existing: + return + + masked_remote_url = mask_password(remote_url) + + initialized = subprocess.call('git ls-remote ' + remote_url, + stdout=FNULL, + stderr=FNULL, + shell=True) + if initialized == 128: + log_info("Skipping {0} ({1}) since it's not initialized".format( + name, masked_remote_url)) + return + + if clone_exists: + log_info('Updating {0} in {1}'.format(name, local_dir)) + + remotes = subprocess.check_output(['git', 'remote', 'show'], + cwd=local_dir) + remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] + + if 'origin' not in remotes: + git_command = ['git', 'remote', 'rm', 'origin'] + logging_subprocess(git_command, None, cwd=local_dir) + git_command = ['git', 'remote', 'add', 'origin', remote_url] + logging_subprocess(git_command, None, cwd=local_dir) + else: + git_command = ['git', 'remote', 'set-url', 'origin', remote_url] + logging_subprocess(git_command, None, cwd=local_dir) + + if lfs_clone: + git_command = ['git', 'lfs', 'fetch', '--all', '--force', '--tags', '--prune'] + else: + git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + logging_subprocess(git_command, None, cwd=local_dir) + else: + log_info('Cloning {0} repository from {1} to {2}'.format( + name, + masked_remote_url, + local_dir)) + if bare_clone: + if lfs_clone: + git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir] + else: + git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + else: + if lfs_clone: + git_command = ['git', 'lfs', 'clone', remote_url, local_dir] + else: + git_command = ['git', 'clone', remote_url, local_dir] + logging_subprocess(git_command, None) + + +def backup_account(args, output_directory): + account_cwd = os.path.join(output_directory, 'account') + + if args.include_starred or args.include_everything: + output_file = "{0}/starred.json".format(account_cwd) + template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) + _backup_data(args, + "starred repositories", + template, + output_file, + account_cwd) + + if args.include_watched or args.include_everything: + output_file = "{0}/watched.json".format(account_cwd) + template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) + _backup_data(args, + "watched repositories", + template, + output_file, + account_cwd) + + if args.include_followers or args.include_everything: + output_file = "{0}/followers.json".format(account_cwd) + template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) + _backup_data(args, + "followers", + template, + output_file, + account_cwd) + + if args.include_following or args.include_everything: + output_file = "{0}/following.json".format(account_cwd) + template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) + _backup_data(args, + "following", + template, + output_file, + account_cwd) + + +def _backup_data(args, name, template, output_file, output_directory): + skip_existing = args.skip_existing + if not skip_existing or not os.path.exists(output_file): + log_info('Retrieving {0} {1}'.format(args.user, name)) + mkdir_p(output_directory) + data = retrieve_data(args, template) + + log_info('Writing {0} {1} to disk'.format(len(data), name)) + with codecs.open(output_file, 'w', encoding='utf-8') as f: + json_dump(data, f) + + +def json_dump(data, output_file): + json.dump(data, + output_file, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(',', ': ')) + + +def main(): + args = parse_args() + print (args) + + output_directory = os.path.realpath(args.output_directory) + if not os.path.isdir(output_directory): + log_info('Create output directory {0}'.format(output_directory)) + mkdir_p(output_directory) + + if args.lfs_clone: + check_git_lfs_install() + + log_info('Backing up user {0} to {1}'.format(args.user, output_directory)) + + authenticated_user = get_authenticated_user(args) + repositories = retrieve_repositories(args, authenticated_user) + # repositories = filter_repositories(args, repositories) + # backup_repositories(args, output_directory, repositories) + # backup_account(args, output_directory) + + +if __name__ == '__main__': + main() diff --git a/bin/utils.py b/bin/utils.py new file mode 100644 index 0000000..8dd2561 --- /dev/null +++ b/bin/utils.py @@ -0,0 +1,97 @@ + + +from urllib.parse import urlparse +from urllib.parse import quote as urlquote +from urllib.parse import urlencode +from urllib.request import Request + + + +def construct_request(per_page, page, template, auth): + querystring = urlencode(dict(list({ + 'per_page': per_page, + 'page': page + }.items()) + #+ list(query_args.items()) + )) + + request = Request(template + '?' + querystring) + if auth is not None: + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + log_info('Requesting {}?{}'.format(template, querystring)) + return request + + +def get_response(request, auth, template): + retry_timeout = 3 + errors = [] + # We'll make requests in a loop so we can + # delay and retry in the case of rate-limiting + while True: + should_continue = False + try: + r = urlopen(request) + except HTTPError as exc: + errors, should_continue = _request_http_error(exc, auth, errors) # noqa + r = exc + except URLError as e: + log_warning(e.reason) + should_continue = _request_url_error(template, retry_timeout) + if not should_continue: + raise + except socket.error as e: + log_warning(e.strerror) + should_continue = _request_url_error(template, retry_timeout) + if not should_continue: + raise + + if should_continue: + continue + + break + return r, errors + + +def _request_http_error(exc, auth, errors): + # HTTPError behaves like a Response so we can + # check the status code and headers to see exactly + # what failed. + + should_continue = False + headers = exc.headers + limit_remaining = int(headers.get('x-ratelimit-remaining', 0)) + + if exc.code == 403 and limit_remaining < 1: + # The X-RateLimit-Reset header includes a + # timestamp telling us when the limit will reset + # so we can calculate how long to wait rather + # than inefficiently polling: + gm_now = calendar.timegm(time.gmtime()) + reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now + # We'll never sleep for less than 10 seconds: + delta = max(10, reset - gm_now) + + limit = headers.get('x-ratelimit-limit') + print('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa + file=sys.stderr) + + if auth is None: + print('Hint: Authenticate to raise your GitHub rate limit', + file=sys.stderr) + + time.sleep(delta) + should_continue = True + return errors, should_continue + + +def request_url_error(template, retry_timeout): + # Incase of a connection timing out, we can retry a few time + # But we won't crash and not back-up the rest now + log_info('{} timed out'.format(template)) + retry_timeout -= 1 + + if retry_timeout >= 0: + return True + + log_error('{} timed out to much, skipping!') + return False \ No newline at end of file From b65e68dced1cf7e62e12adf3321f9208d21212d0 Mon Sep 17 00:00:00 2001 From: Kaali Date: Mon, 15 Jul 2019 17:21:54 +0530 Subject: [PATCH 02/16] Refactoring errors --- app/errors.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 app/errors.py diff --git a/app/errors.py b/app/errors.py new file mode 100644 index 0000000..dcf2b26 --- /dev/null +++ b/app/errors.py @@ -0,0 +1,51 @@ + + + +from loguru import logger +import time +import calender +import sys + +def request_http_error(exc, auth, errors): + # HTTPError behaves like a Response so we can + # check the status code and headers to see exactly + # what failed. + + should_continue = False + headers = exc.headers + limit_remaining = int(headers.get('x-ratelimit-remaining', 0)) + + if exc.code == 403 and limit_remaining < 1: + # The X-RateLimit-Reset header includes a + # timestamp telling us when the limit will reset + # so we can calculate how long to wait rather + # than inefficiently polling: + gm_now = calendar.timegm(time.gmtime()) + reset = int(headers.get('x-ratelimit-reset', 0)) or gm_now + # We'll never sleep for less than 10 seconds: + delta = max(10, reset - gm_now) + + limit = headers.get('x-ratelimit-limit') + logger.error'Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa + file=sys.stderr) + + if auth is None: + logger.warninf('Hint: Authenticate to raise your GitHub rate limit', + file=sys.stderr) + + time.sleep(delta) + should_continue = True + return errors, should_continue + + +def request_url_error(template, retry_timeout): + # Incase of a connection timing out, we can retry a few time + # But we won't crash and not back-up the rest now + log_info('{} timed out'.format(template)) + retry_timeout -= 1 + + if retry_timeout >= 0: + return True + + logger.error('{} timed out to much, skipping!') + return False \ No newline at end of file From c95b67ad10299dc61e046e30aeda91ad71aa8183 Mon Sep 17 00:00:00 2001 From: feynman Date: Tue, 16 Jul 2019 03:05:46 +0530 Subject: [PATCH 03/16] Adding --- .gitignore | 8 ++ app/backup_new.py | 258 +++++++++++++++++++++++++++++++++++++++++-- app/errors.py | 4 +- app/github-backup.py | 32 +++--- app/utils.py | 15 ++- pip-selfcheck.json | 1 + 6 files changed, 288 insertions(+), 30 deletions(-) create mode 100644 pip-selfcheck.json diff --git a/.gitignore b/.gitignore index fa073f2..070ee47 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,11 @@ doc/_build # Generated man page doc/aws_hostname.1 + +bin +include +lib +release +__pycache__ + + diff --git a/app/backup_new.py b/app/backup_new.py index e6d2e06..15ea09b 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -1,9 +1,12 @@ #!/usr/bin env python - +import os +import sys from auth import get_auth, get_github_api_host from urllib.request import Request -from utils import construct_request, get_response, ensure_directory +from utils import construct_request, get_response, ensure_directory, c_pretty_print + from loguru import logger +from pprint import pformat import json def get_authenticated_user(username, password): @@ -42,7 +45,7 @@ def retrieve_data_gen(username, password, template, query_args=None, single_requ if status_code != 200: template = 'API request returned HTTP {0}: {1}' errors.append(template.format(status_code, r.reason)) - log_error(errors) + logger.error(errors) response = json.loads(r.read().decode('utf-8')) if len(errors) == 0: @@ -55,24 +58,261 @@ def retrieve_data_gen(username, password, template, query_args=None, single_requ yield response if len(errors) > 0: - log_error(errors) + logger.error(errors) if single_request: break +def retrieve_repositories(username, password): + logger.info('Retrieving repositories') + single_request = False + + template = 'https://{0}/user/repos'.format(get_github_api_host()) + + # print (f"Template for retrieve_repos is {template}") + # else: + # if args.private and not args.organization: + # log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') + # template = 'https://{0}/users/{1}/repos'.format( + # get_github_api_host(args), + # args.user) + + # if args.organization: + # orgnization_repos_template = 'https://{0}/orgs/{1}/repos'.format( + # get_github_api_host(args), + # args.user) + + ##If you want to fetch only one repository + # repository_template = 'https://{0}/repos/{1}/{2}'.format( + # get_github_api_host(args), + # args.user, + # args.repository) + + repos = retrieve_data(username, password, template, single_request=single_request) + + c_pretty_print(repos[0]) + ##append start repos + starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(), username) + starred_repos = retrieve_data(username, password, starred_template, single_request=False) + # flag each repo as starred for downstream processing + for item in starred_repos: + item.update({'is_starred': True}) + + logger.info("Starred Repos first element") + c_pretty_print(starred_repos[0]) + + ##append start repos + repos.extend(starred_repos) + + + ###appemd gists + gists_template = 'https://{0}/users/{1}/gists'.format(get_github_api_host(), username) + gists = retrieve_data(username, password, gists_template, single_request=False) + # flag each repo as a gist for downstream processing + for item in gists: + item.update({'is_gist': True}) + + logger.info("GIST first element") + c_pretty_print(gists[0]) + repos.extend(gists) + + + ##append star gists by the user + starred_gists_template = 'https://{0}/gists/starred'.format(get_github_api_host()) + starred_gists = retrieve_data(username, password, starred_gists_template, single_request=False) + # flag each repo as a starred gist for downstream processing + for item in starred_gists: + item.update({'is_gist': True, + 'is_starred': True}) + repos.extend(starred_gists) + + + + return repos + + + +def backup_repositories(args, output_directory, repositories): + log_info('Backing up repositories') + repos_template = 'https://{0}/repos'.format(get_github_api_host()) + + # if args.incremental: + # last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa + # last_update_path = os.path.join(output_directory, 'last_update') + # if os.path.exists(last_update_path): + # args.since = open(last_update_path).read().strip() + # else: + # args.since = None + # else: + # args.since = None + + for repository in repositories: + if repository.get('is_gist'): + repo_cwd = os.path.join(output_directory, 'gists', repository['id']) + elif repository.get('is_starred'): + # put starred repos in -o/starred/${owner}/${repo} to prevent collision of + # any repositories with the same name + repo_cwd = os.path.join(output_directory, 'starred', repository['owner']['login'], repository['name']) + else: + repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) + + repo_dir = os.path.join(repo_cwd, 'repository') + repo_url = get_github_repo_url(args, repository) + + + #include_gists = (args.include_gists or args.include_starred_gists) + #if (args.include_repository or args.include_everything) \ + # or (include_gists and repository.get('is_gist')): + repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') + + fetch_repository(repo_name, + repo_url, + repo_dir, + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone) + + if repository.get('is_gist'): + # dump gist information to a file as well + output_file = '{0}/gist.json'.format(repo_cwd) + with codecs.open(output_file, 'w', encoding='utf-8') as f: + json_dump(repository, f) + + continue # don't try to back anything else for a gist; it doesn't exist + + download_wiki = (args.include_wiki or args.include_everything) + if repository['has_wiki'] and download_wiki: + fetch_repository(repository['name'], + repo_url.replace('.git', '.wiki.git'), + os.path.join(repo_cwd, 'wiki'), + skip_existing=args.skip_existing, + bare_clone=args.bare_clone, + lfs_clone=args.lfs_clone) + + if args.include_issues or args.include_everything: + backup_issues(args, repo_cwd, repository, repos_template) + + if args.include_pulls or args.include_everything: + backup_pulls(args, repo_cwd, repository, repos_template) + + if args.include_milestones or args.include_everything: + backup_milestones(args, repo_cwd, repository, repos_template) + + if args.include_labels or args.include_everything: + backup_labels(args, repo_cwd, repository, repos_template) + + if args.include_hooks or args.include_everything: + backup_hooks(args, repo_cwd, repository, repos_template) + + if args.include_releases or args.include_everything: + backup_releases(args, repo_cwd, repository, repos_template, + include_assets=args.include_assets or args.include_everything) + + if args.incremental: + open(last_update_path, 'w').write(last_update) + + + + + +def fetch_repository(name, + remote_url, + local_dir, + skip_existing=False, + bare_clone=False, + lfs_clone=False): + if bare_clone: + if os.path.exists(local_dir): + clone_exists = subprocess.check_output(['git', + 'rev-parse', + '--is-bare-repository'], + cwd=local_dir) == b"true\n" + else: + clone_exists = False + else: + clone_exists = os.path.exists(os.path.join(local_dir, '.git')) + + if clone_exists and skip_existing: + return + + masked_remote_url = mask_password(remote_url) + + initialized = subprocess.call('git ls-remote ' + remote_url, + stdout=FNULL, + stderr=FNULL, + shell=True) + if initialized == 128: + log_info("Skipping {0} ({1}) since it's not initialized".format( + name, masked_remote_url)) + return + + if clone_exists: + log_info('Updating {0} in {1}'.format(name, local_dir)) + + remotes = subprocess.check_output(['git', 'remote', 'show'], + cwd=local_dir) + remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] + + if 'origin' not in remotes: + git_command = ['git', 'remote', 'rm', 'origin'] + logging.info(git_command, None, cwd=local_dir) + git_command = ['git', 'remote', 'add', 'origin', remote_url] + logging.info(git_command, None, cwd=local_dir) + else: + git_command = ['git', 'remote', 'set-url', 'origin', remote_url] + logging.info(git_command, None, cwd=local_dir) + + if lfs_clone: + git_command = ['git', 'lfs', 'fetch', '--all', '--force', '--tags', '--prune'] + else: + git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] + logging.info(git_command, None, cwd=local_dir) + else: + log_info('Cloning {0} repository from {1} to {2}'.format( + name, + masked_remote_url, + local_dir)) + if bare_clone: + if lfs_clone: + git_command = ['git', 'lfs', 'clone', '--mirror', remote_url, local_dir] + else: + git_command = ['git', 'clone', '--mirror', remote_url, local_dir] + else: + if lfs_clone: + git_command = ['git', 'lfs', 'clone', remote_url, local_dir] + else: + git_command = ['git', 'clone', remote_url, local_dir] + logging.info(git_command, None) + + + + + + + + + + + + def main(): + import sys + try: + username = sys.argv[1] + password = sys.argv[2] + logger.info(f"USERNAME=={username} and PASSWORD == {password}") + except : + logger.error("Please provide username and password for your github") print ("Execution started") output_directory = "." - username = "graphicaldot" - password = "mitthuparishweta" + dirname = os.path.dirname(os.path.abspath(__file__)) - output_dirname = "/Users/kaali/Programs/github-backup/python-github-backup/app/account" - + output_dirname = os.path.join(dirname, "account") # if args.lfs_clone: # check_git_lfs_install() ensure_directory(output_dirname) @@ -81,7 +321,7 @@ def main(): authenticated_user = get_authenticated_user(username, password) print (authenticated_user) - #repositories = retrieve_repositories(args, authenticated_user) + repositories = retrieve_repositories(username, password) # repositories = filter_repositories(args, repositories) # backup_repositories(args, output_directory, repositories) # backup_account(args, output_directory) diff --git a/app/errors.py b/app/errors.py index dcf2b26..c15a92d 100644 --- a/app/errors.py +++ b/app/errors.py @@ -3,7 +3,7 @@ from loguru import logger import time -import calender +import calendar import sys def request_http_error(exc, auth, errors): @@ -26,7 +26,7 @@ def request_http_error(exc, auth, errors): delta = max(10, reset - gm_now) limit = headers.get('x-ratelimit-limit') - logger.error'Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa + logger.error('Exceeded rate limit of {} requests; waiting {} seconds to reset'.format(limit, delta), # noqa file=sys.stderr) if auth is None: diff --git a/app/github-backup.py b/app/github-backup.py index 09b89f6..10a2642 100755 --- a/app/github-backup.py +++ b/app/github-backup.py @@ -684,17 +684,17 @@ def filter_repositories(args, unfiltered_repositories): def backup_repositories(args, output_directory, repositories): log_info('Backing up repositories') - repos_template = 'https://{0}/repos'.format(get_github_api_host(args)) - - if args.incremental: - last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa - last_update_path = os.path.join(output_directory, 'last_update') - if os.path.exists(last_update_path): - args.since = open(last_update_path).read().strip() - else: - args.since = None - else: - args.since = None + repos_template = 'https://{0}/repos'.format(get_github_api_host()) + + # if args.incremental: + # last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa + # last_update_path = os.path.join(output_directory, 'last_update') + # if os.path.exists(last_update_path): + # args.since = open(last_update_path).read().strip() + # else: + # args.since = None + # else: + # args.since = None for repository in repositories: if repository.get('is_gist'): @@ -709,10 +709,12 @@ def backup_repositories(args, output_directory, repositories): repo_dir = os.path.join(repo_cwd, 'repository') repo_url = get_github_repo_url(args, repository) - include_gists = (args.include_gists or args.include_starred_gists) - if (args.include_repository or args.include_everything) \ - or (include_gists and repository.get('is_gist')): - repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') + + #include_gists = (args.include_gists or args.include_starred_gists) + #if (args.include_repository or args.include_everything) \ + # or (include_gists and repository.get('is_gist')): + repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') + fetch_repository(repo_name, repo_url, repo_dir, diff --git a/app/utils.py b/app/utils.py index eb733cb..9276d4e 100644 --- a/app/utils.py +++ b/app/utils.py @@ -11,7 +11,9 @@ from urllib.request import build_opener import socket import os +import json from loguru import logger +from errors import request_http_error, request_url_error @@ -63,16 +65,16 @@ def get_response(request, auth, template): try: r = urlopen(request) except HTTPError as exc: - errors, should_continue = _request_http_error(exc, auth, errors) # noqa + errors, should_continue = request_http_error(exc, auth, errors) # noqa r = exc except URLError as e: logger.warning(e.reason) - should_continue = _request_url_error(template, retry_timeout) + should_continue = request_url_error(template, retry_timeout) if not should_continue: raise except socket.error as e: logger.warning(e.strerror) - should_continue = _request_url_error(template, retry_timeout) + should_continue = request_url_error(template, retry_timeout) if not should_continue: raise @@ -80,4 +82,9 @@ def get_response(request, auth, template): continue break - return r, errors \ No newline at end of file + return r, errors + + +def c_pretty_print(data): + p = json.dumps(data, indent=4, sort_keys=True) + logger.info(p) \ No newline at end of file diff --git a/pip-selfcheck.json b/pip-selfcheck.json new file mode 100644 index 0000000..1694549 --- /dev/null +++ b/pip-selfcheck.json @@ -0,0 +1 @@ +{"last_check":"2019-07-15T20:11:20Z","pypi_version":"19.1.1"} \ No newline at end of file From 3795ad01f5ae0a520d77a91df8d657da5c2f13a2 Mon Sep 17 00:00:00 2001 From: Kaali Date: Tue, 16 Jul 2019 16:35:33 +0530 Subject: [PATCH 04/16] Working --- .gitignore | 2 +- app/backup_new.py | 149 +++++++++++++++++++++++++++++----------------- app/utils.py | 62 ++++++++++++++++++- 3 files changed, 155 insertions(+), 58 deletions(-) diff --git a/.gitignore b/.gitignore index 070ee47..9a9f8ed 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,7 @@ doc/_build # Generated man page doc/aws_hostname.1 - +account bin include lib diff --git a/app/backup_new.py b/app/backup_new.py index 15ea09b..f0cacb4 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -3,11 +3,17 @@ import sys from auth import get_auth, get_github_api_host from urllib.request import Request -from utils import construct_request, get_response, ensure_directory, c_pretty_print +from utils import construct_request, get_response, ensure_directory, \ + c_pretty_print, mask_password, logging_subprocess from loguru import logger from pprint import pformat import json +import subprocess +__version__ = "3.9.9" +FNULL = open(os.devnull, 'w') + + def get_authenticated_user(username, password): template = 'https://{0}/user'.format(get_github_api_host()) @@ -132,10 +138,41 @@ def retrieve_repositories(username, password): return repos +def get_github_host(): + ##TODO include gitgub host too + # if args.github_host: + # host = args.github_host + # else: + # host = 'github.com' + + host = 'github.com' + return host + +def get_github_repo_url(username, password, repository): + # if args.prefer_ssh: + # return repository['ssh_url'] + + if repository.get('is_gist'): + return repository['git_pull_url'] + + + + ##if its a private url + auth = get_auth(username, password, False) + if auth: + logger.info(f"Auth is prsent {auth}") + repo_url = 'https://{0}@{1}/{2}/{3}.git'.format( + auth, + get_github_host(), + repository['owner']['login'], + repository['name']) + else: + repo_url = repository['clone_url'] + return repo_url -def backup_repositories(args, output_directory, repositories): - log_info('Backing up repositories') +def backup_repositories(username, password, output_directory, repositories): + logger.info('Backing up repositories') repos_template = 'https://{0}/repos'.format(get_github_api_host()) # if args.incremental: @@ -159,59 +196,60 @@ def backup_repositories(args, output_directory, repositories): repo_cwd = os.path.join(output_directory, 'repositories', repository['name']) repo_dir = os.path.join(repo_cwd, 'repository') - repo_url = get_github_repo_url(args, repository) + repo_url = get_github_repo_url(username, password, repository) + #ensure_directory(repo_dir) + + masked_remote_url = mask_password(repo_url) + logger.info(f"The repo dir on the user machine is {repo_dir}") + logger.info(f"The repo url on the github is {repo_url}") + logger.info(f"The masked_repo url on the github is {masked_remote_url}") - #include_gists = (args.include_gists or args.include_starred_gists) - #if (args.include_repository or args.include_everything) \ - # or (include_gists and repository.get('is_gist')): + # #include_gists = (args.include_gists or args.include_starred_gists) + # #if (args.include_repository or args.include_everything) \ + # # or (include_gists and repository.get('is_gist')): repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') - fetch_repository(repo_name, - repo_url, - repo_dir, - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) + fetch_repository(repo_name, repo_url, repo_dir) - if repository.get('is_gist'): - # dump gist information to a file as well - output_file = '{0}/gist.json'.format(repo_cwd) - with codecs.open(output_file, 'w', encoding='utf-8') as f: - json_dump(repository, f) + # if repository.get('is_gist'): + # # dump gist information to a file as well + # output_file = '{0}/gist.json'.format(repo_cwd) + # with codecs.open(output_file, 'w', encoding='utf-8') as f: + # json_dump(repository, f) - continue # don't try to back anything else for a gist; it doesn't exist + # continue # don't try to back anything else for a gist; it doesn't exist - download_wiki = (args.include_wiki or args.include_everything) - if repository['has_wiki'] and download_wiki: - fetch_repository(repository['name'], - repo_url.replace('.git', '.wiki.git'), - os.path.join(repo_cwd, 'wiki'), - skip_existing=args.skip_existing, - bare_clone=args.bare_clone, - lfs_clone=args.lfs_clone) + # download_wiki = (args.include_wiki or args.include_everything) + # if repository['has_wiki'] and download_wiki: + # fetch_repository(repository['name'], + # repo_url.replace('.git', '.wiki.git'), + # os.path.join(repo_cwd, 'wiki'), + # skip_existing=args.skip_existing, + # bare_clone=args.bare_clone, + # lfs_clone=args.lfs_clone) - if args.include_issues or args.include_everything: - backup_issues(args, repo_cwd, repository, repos_template) + # if args.include_issues or args.include_everything: + # backup_issues(args, repo_cwd, repository, repos_template) - if args.include_pulls or args.include_everything: - backup_pulls(args, repo_cwd, repository, repos_template) + # if args.include_pulls or args.include_everything: + # backup_pulls(args, repo_cwd, repository, repos_template) - if args.include_milestones or args.include_everything: - backup_milestones(args, repo_cwd, repository, repos_template) + # if args.include_milestones or args.include_everything: + # backup_milestones(args, repo_cwd, repository, repos_template) - if args.include_labels or args.include_everything: - backup_labels(args, repo_cwd, repository, repos_template) + # if args.include_labels or args.include_everything: + # backup_labels(args, repo_cwd, repository, repos_template) - if args.include_hooks or args.include_everything: - backup_hooks(args, repo_cwd, repository, repos_template) + # if args.include_hooks or args.include_everything: + # backup_hooks(args, repo_cwd, repository, repos_template) - if args.include_releases or args.include_everything: - backup_releases(args, repo_cwd, repository, repos_template, - include_assets=args.include_assets or args.include_everything) + # if args.include_releases or args.include_everything: + # backup_releases(args, repo_cwd, repository, repos_template, + # include_assets=args.include_assets or args.include_everything) - if args.incremental: - open(last_update_path, 'w').write(last_update) + # if args.incremental: + # open(last_update_path, 'w').write(last_update) @@ -221,7 +259,7 @@ def fetch_repository(name, remote_url, local_dir, skip_existing=False, - bare_clone=False, + bare_clone=True, lfs_clone=False): if bare_clone: if os.path.exists(local_dir): @@ -244,12 +282,12 @@ def fetch_repository(name, stderr=FNULL, shell=True) if initialized == 128: - log_info("Skipping {0} ({1}) since it's not initialized".format( + logger.error("Skipping {0} ({1}) since it's not initialized".format( name, masked_remote_url)) return if clone_exists: - log_info('Updating {0} in {1}'.format(name, local_dir)) + logger.info('Updating {0} in {1}'.format(name, local_dir)) remotes = subprocess.check_output(['git', 'remote', 'show'], cwd=local_dir) @@ -257,20 +295,20 @@ def fetch_repository(name, if 'origin' not in remotes: git_command = ['git', 'remote', 'rm', 'origin'] - logging.info(git_command, None, cwd=local_dir) + logging_subprocess(git_command, None, cwd=local_dir) git_command = ['git', 'remote', 'add', 'origin', remote_url] - logging.info(git_command, None, cwd=local_dir) + logging_subprocess(git_command, None, cwd=local_dir) else: git_command = ['git', 'remote', 'set-url', 'origin', remote_url] - logging.info(git_command, None, cwd=local_dir) + logging_subprocess(git_command, None, cwd=local_dir) if lfs_clone: git_command = ['git', 'lfs', 'fetch', '--all', '--force', '--tags', '--prune'] else: git_command = ['git', 'fetch', '--all', '--force', '--tags', '--prune'] - logging.info(git_command, None, cwd=local_dir) + logging_subprocess(git_command, None, cwd=local_dir) else: - log_info('Cloning {0} repository from {1} to {2}'.format( + logger.info('Cloning {0} repository from {1} to {2}'.format( name, masked_remote_url, local_dir)) @@ -284,7 +322,7 @@ def fetch_repository(name, git_command = ['git', 'lfs', 'clone', remote_url, local_dir] else: git_command = ['git', 'clone', remote_url, local_dir] - logging.info(git_command, None) + logging_subprocess(git_command, None) @@ -309,21 +347,20 @@ def main(): except : logger.error("Please provide username and password for your github") print ("Execution started") - output_directory = "." dirname = os.path.dirname(os.path.abspath(__file__)) - output_dirname = os.path.join(dirname, "account") + output_directory = os.path.join(dirname, "account") # if args.lfs_clone: # check_git_lfs_install() - ensure_directory(output_dirname) - logger.info('Backing up user {0} to {1}'.format(username, output_dirname)) + ensure_directory(output_directory) + logger.info('Backing up user {0} to {1}'.format(username, output_directory)) authenticated_user = get_authenticated_user(username, password) print (authenticated_user) repositories = retrieve_repositories(username, password) # repositories = filter_repositories(args, repositories) - # backup_repositories(args, output_directory, repositories) + backup_repositories(username, password, output_directory, repositories) # backup_account(args, output_directory) if __name__ == "__main__": diff --git a/app/utils.py b/app/utils.py index 9276d4e..a62ad67 100644 --- a/app/utils.py +++ b/app/utils.py @@ -14,7 +14,67 @@ import json from loguru import logger from errors import request_http_error, request_url_error - +import subprocess +import sys +import select + + +def logging_subprocess(popenargs, + logger, + stdout_log_level=logger.debug, + stderr_log_level=logger.error, + **kwargs): + """ + Variant of subprocess.call that accepts a logger instead of stdout/stderr, + and logs stdout messages via logger.debug and stderr messages via + logger.error. + """ + child = subprocess.Popen(popenargs, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, **kwargs) + if sys.platform == 'win32': + log_info("Windows operating system detected - no subprocess logging will be returned") + + log_level = {child.stdout: stdout_log_level, + child.stderr: stderr_log_level} + + def check_io(): + if sys.platform == 'win32': + return + ready_to_read = select.select([child.stdout, child.stderr], + [], + [], + 1000)[0] + for io in ready_to_read: + line = io.readline() + if not logger: + continue + if not (io == child.stderr and not line): + logger.log(log_level[io], line[:-1]) + + # keep checking stdout/stderr until the child exits + while child.poll() is None: + check_io() + + check_io() # check again to catch anything after the process exits + + rc = child.wait() + + if rc != 0: + print ('{} returned {}:'.format(popenargs[0], rc), file=sys.stderr) + print('\t', ' '.join(popenargs), file=sys.stderr) + + return rc + + +def mask_password(url, secret='*****'): + parsed = urlparse(url) + + if not parsed.password: + return url + elif parsed.password == 'x-oauth-basic': + return url.replace(parsed.username, secret) + + return url.replace(parsed.password, secret) def mkdir_p(*args): From 45e7811c691029abd688383d664320c5a78fc1bd Mon Sep 17 00:00:00 2001 From: Kaali Date: Tue, 16 Jul 2019 17:27:08 +0530 Subject: [PATCH 05/16] Adding config object for configurations --- app/backup_functions.py | 0 app/backup_new.py | 20 +++++++++++--------- app/config.py | 27 +++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 app/backup_functions.py create mode 100644 app/config.py diff --git a/app/backup_functions.py b/app/backup_functions.py new file mode 100644 index 0000000..e69de29 diff --git a/app/backup_new.py b/app/backup_new.py index f0cacb4..7280f90 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -5,7 +5,7 @@ from urllib.request import Request from utils import construct_request, get_response, ensure_directory, \ c_pretty_print, mask_password, logging_subprocess - +import time from loguru import logger from pprint import pformat import json @@ -186,6 +186,7 @@ def backup_repositories(username, password, output_directory, repositories): # args.since = None for repository in repositories: + time.sleep(5) if repository.get('is_gist'): repo_cwd = os.path.join(output_directory, 'gists', repository['id']) elif repository.get('is_starred'): @@ -339,7 +340,7 @@ def fetch_repository(name, def main(): - import sys + from config import config_object try: username = sys.argv[1] password = sys.argv[2] @@ -347,21 +348,22 @@ def main(): except : logger.error("Please provide username and password for your github") print ("Execution started") - dirname = os.path.dirname(os.path.abspath(__file__)) + # dirname = os.path.dirname(os.path.abspath(__file__)) - output_directory = os.path.join(dirname, "account") + # output_directory = os.path.join(dirname, "account") # if args.lfs_clone: # check_git_lfs_install() - ensure_directory(output_directory) - logger.info('Backing up user {0} to {1}'.format(username, output_directory)) + logger.info('Backing up user {0} to {1}'.format(username, config_object.GITHUB_OUTPUT_DIR)) + + ensure_directory(config_object.GITHUB_OUTPUT_DIR) authenticated_user = get_authenticated_user(username, password) print (authenticated_user) repositories = retrieve_repositories(username, password) - # repositories = filter_repositories(args, repositories) - backup_repositories(username, password, output_directory, repositories) - # backup_account(args, output_directory) + #repositories = filter_repositories(args, repositories) + backup_repositories(username, password, config_object.GITHUB_OUTPUT_DIR, repositories) + # # backup_account(args, output_directory) if __name__ == "__main__": main() \ No newline at end of file diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..dd04ac8 --- /dev/null +++ b/app/config.py @@ -0,0 +1,27 @@ +import os +import pathlib +import subprocess + + +home = os.path.expanduser("~") +MAIN_DIR = os.path.join(home, ".datapod") +USER_INDEX = f"{MAIN_DIR}/user.index" +KEYS_DIR = os.path.join(MAIN_DIR, "keys") +USERDATA_PATH = os.path.join(MAIN_DIR, "userdata") +PARSED_DATA_PATH = os.path.join(USERDATA_PATH, "parsed") +RAW_DATA_PATH = os.path.join(USERDATA_PATH, "raw") + + + +class Config: + pass + + + +#openssl rand -out .key 32 +class DevelopmentConfig(Config): + DIR = os.path.dirname(os.path.abspath(__file__)) + GITHUB_OUTPUT_DIR = os.path.join(DIR, "account") + + +config_object = DevelopmentConfig \ No newline at end of file From 031c9b793b59ab37aa8d3f405693cf14eb62f383 Mon Sep 17 00:00:00 2001 From: feynman Date: Wed, 17 Jul 2019 04:01:28 +0530 Subject: [PATCH 06/16] added automate update of git ssh keys --- app/backup_new.py | 19 ++++++---- app/utils.py | 92 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/app/backup_new.py b/app/backup_new.py index 7280f90..925114c 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -4,7 +4,7 @@ from auth import get_auth, get_github_api_host from urllib.request import Request from utils import construct_request, get_response, ensure_directory, \ - c_pretty_print, mask_password, logging_subprocess + c_pretty_print, mask_password, logging_subprocess, generate_new_keys import time from loguru import logger from pprint import pformat @@ -202,9 +202,7 @@ def backup_repositories(username, password, output_directory, repositories): masked_remote_url = mask_password(repo_url) - logger.info(f"The repo dir on the user machine is {repo_dir}") - logger.info(f"The repo url on the github is {repo_url}") - logger.info(f"The masked_repo url on the github is {masked_remote_url}") + logger.info(f"The masked_repo url on the github is {masked_remote_url} and is Private: {repository['private']}") # #include_gists = (args.include_gists or args.include_starred_gists) # #if (args.include_repository or args.include_everything) \ @@ -212,6 +210,13 @@ def backup_repositories(username, password, output_directory, repositories): repo_name = repository.get('name') if not repository.get('is_gist') else repository.get('id') fetch_repository(repo_name, repo_url, repo_dir) + if repository.get('is_gist'): + # dump gist information to a file as well + output_file = '{0}/gist.json'.format(repo_cwd) + with codecs.open(output_file, 'w', encoding='utf-8') as f: + json_dump(repository, f) + + continue # don't try to back anything else for a gist; it doesn't exis # if repository.get('is_gist'): # # dump gist information to a file as well @@ -292,6 +297,7 @@ def fetch_repository(name, remotes = subprocess.check_output(['git', 'remote', 'show'], cwd=local_dir) + logger.info(remotes) remotes = [i.strip() for i in remotes.decode('utf-8').splitlines()] if 'origin' not in remotes: @@ -348,8 +354,9 @@ def main(): except : logger.error("Please provide username and password for your github") print ("Execution started") + generate_new_keys() # dirname = os.path.dirname(os.path.abspath(__file__)) - + """ # output_directory = os.path.join(dirname, "account") # if args.lfs_clone: # check_git_lfs_install() @@ -364,6 +371,6 @@ def main(): #repositories = filter_repositories(args, repositories) backup_repositories(username, password, config_object.GITHUB_OUTPUT_DIR, repositories) # # backup_account(args, output_directory) - + """ if __name__ == "__main__": main() \ No newline at end of file diff --git a/app/utils.py b/app/utils.py index a62ad67..1cc7ba2 100644 --- a/app/utils.py +++ b/app/utils.py @@ -17,6 +17,55 @@ import subprocess import sys import select +import subprocess +from Crypto.PublicKey import RSA +import requests + +#curl -u "user:pass" --data '{"title":"test-key","key":"'"$(cat ~/.ssh/id_rsa.pub)"'"}' https://api.github.com/user/keys + +def os_command_output(command, final_message): + + process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) + while True: + line = process.stdout.readline() + if not line: + logger.info(final_message) + break + yield line.decode().split("\r")[0] + return + + +def generate_new_keys(): + key = RSA.generate(4096) + #ssh-keygen -t rsa -C "your_email@example.com" + home = os.path.expanduser("~") + ssh_path = os.path.join(home, ".ssh") + public_key_path = os.path.join(ssh_path, "git_public.key") + private_key_path = os.path.join(ssh_path, "git_private.key") + if not os.path.exists(private_key_path): + + with open(private_key_path, "wb") as content_file: + content_file.write(key.exportKey('PEM')) + + pubkey = key.publickey() + with open(public_key_path, 'wb') as content_file: + content_file.write(pubkey.exportKey('OpenSSH')) + + username = "graphicaldot" + password = "Groot1234#" + public_bytes = pubkey.exportKey('OpenSSH').decode() + response = requests.post('https://api.github.com/user/keys', auth=(username, password), data=json.dumps({ + "title": "Datapod", "key": public_bytes + })) + + + logger.info(response.json()) + + command = "ssh -T git@github.com" + for res in os_command_output(command, "New git keys"): + logger.info(res) + return + def logging_subprocess(popenargs, @@ -68,6 +117,7 @@ def check_io(): def mask_password(url, secret='*****'): parsed = urlparse(url) + logger.info(f"parsed {parsed}") if not parsed.password: return url @@ -147,4 +197,44 @@ def get_response(request, auth, template): def c_pretty_print(data): p = json.dumps(data, indent=4, sort_keys=True) - logger.info(p) \ No newline at end of file + logger.info(p) + + + +class S3HTTPRedirectHandler(HTTPRedirectHandler): + """ + A subclassed redirect handler for downloading Github assets from S3. + + urllib will add the Authorization header to the redirected request to S3, which will result in a 400, + so we should remove said header on redirect. + """ + def redirect_request(self, req, fp, code, msg, headers, newurl): + if PY2: + # HTTPRedirectHandler is an old style class + request = HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) + else: + request = super(S3HTTPRedirectHandler, self).redirect_request(req, fp, code, msg, headers, newurl) + del request.headers['Authorization'] + return request + + +def download_file(url, path, auth): + request = Request(url) + request.add_header('Accept', 'application/octet-stream') + request.add_header('Authorization', 'Basic '.encode('ascii') + auth) + opener = build_opener(S3HTTPRedirectHandler) + response = opener.open(request) + + chunk_size = 16 * 1024 + with open(path, 'wb') as f: + while True: + chunk = response.read(chunk_size) + if not chunk: + break + f.write(chunk) + + +def check_git_lfs_install(): + exit_code = subprocess.call(['git', 'lfs', 'version']) + if exit_code != 0: + log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') \ No newline at end of file From 5288c3d939692c6947d7bc53b33e4d3d5ee4f218 Mon Sep 17 00:00:00 2001 From: feynman Date: Wed, 17 Jul 2019 04:02:42 +0530 Subject: [PATCH 07/16] Added --- app/utils.py | 4 +--- bin/github-backup.py | 7 +++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/app/utils.py b/app/utils.py index 1cc7ba2..a242e78 100644 --- a/app/utils.py +++ b/app/utils.py @@ -35,7 +35,7 @@ def os_command_output(command, final_message): return -def generate_new_keys(): +def generate_new_keys(username, password): key = RSA.generate(4096) #ssh-keygen -t rsa -C "your_email@example.com" home = os.path.expanduser("~") @@ -51,8 +51,6 @@ def generate_new_keys(): with open(public_key_path, 'wb') as content_file: content_file.write(pubkey.exportKey('OpenSSH')) - username = "graphicaldot" - password = "Groot1234#" public_bytes = pubkey.exportKey('OpenSSH').decode() response = requests.post('https://api.github.com/user/keys', auth=(username, password), data=json.dumps({ "title": "Datapod", "key": public_bytes diff --git a/bin/github-backup.py b/bin/github-backup.py index 09b89f6..f6bfcec 100755 --- a/bin/github-backup.py +++ b/bin/github-backup.py @@ -987,9 +987,11 @@ def fetch_repository(name, if clone_exists and skip_existing: return - masked_remote_url = mask_password(remote_url) + ##this function is beyond my comprehension - initialized = subprocess.call('git ls-remote ' + remote_url, + #masked_remote_url = mask_password(remote_url) + logger.info(f"This is the remote url {remote_url}") + initialized = subprocess.call('git ls-remote ' + masked_remote_url, stdout=FNULL, stderr=FNULL, shell=True) @@ -1014,6 +1016,7 @@ def fetch_repository(name, git_command = ['git', 'remote', 'set-url', 'origin', remote_url] logging_subprocess(git_command, None, cwd=local_dir) + if lfs_clone: git_command = ['git', 'lfs', 'fetch', '--all', '--force', '--tags', '--prune'] else: From a4b15c8d234c448a9cce0e7cef90c0096ef6f13b Mon Sep 17 00:00:00 2001 From: feynman Date: Thu, 18 Jul 2019 03:38:14 +0530 Subject: [PATCH 08/16] GIt generate new keys for ssh done --- app/backup_new.py | 10 +++-- app/utils.py | 72 ++++++++++++++++++++++++++++++++---- share/man/man1/ipython.1.gz | Bin 0 -> 1039 bytes 3 files changed, 70 insertions(+), 12 deletions(-) create mode 100644 share/man/man1/ipython.1.gz diff --git a/app/backup_new.py b/app/backup_new.py index 925114c..00ac5e5 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -148,9 +148,11 @@ def get_github_host(): host = 'github.com' return host -def get_github_repo_url(username, password, repository): +def get_github_repo_url(username, password, repository, prefer_ssh=True): # if args.prefer_ssh: # return repository['ssh_url'] + if prefer_ssh: + return repository['ssh_url'] if repository.get('is_gist'): return repository['git_pull_url'] @@ -354,9 +356,9 @@ def main(): except : logger.error("Please provide username and password for your github") print ("Execution started") - generate_new_keys() + + generate_new_keys(username, password) # dirname = os.path.dirname(os.path.abspath(__file__)) - """ # output_directory = os.path.join(dirname, "account") # if args.lfs_clone: # check_git_lfs_install() @@ -371,6 +373,6 @@ def main(): #repositories = filter_repositories(args, repositories) backup_repositories(username, password, config_object.GITHUB_OUTPUT_DIR, repositories) # # backup_account(args, output_directory) - """ + if __name__ == "__main__": main() \ No newline at end of file diff --git a/app/utils.py b/app/utils.py index a242e78..f3a5808 100644 --- a/app/utils.py +++ b/app/utils.py @@ -20,6 +20,7 @@ import subprocess from Crypto.PublicKey import RSA import requests +import paramiko #curl -u "user:pass" --data '{"title":"test-key","key":"'"$(cat ~/.ssh/id_rsa.pub)"'"}' https://api.github.com/user/keys @@ -35,18 +36,71 @@ def os_command_output(command, final_message): return +def append_ssh_config(private_key_path): + home = os.path.expanduser("~") + ssh_config_path = os.path.join(home, ".ssh", "config") + + string = f"Host github.com\n\tHostname github.com\n\tPreferredAuthentications publickey\n\tIdentityFile {private_key_path}" + with open(ssh_config_path, "w+") as f: + f.write(string) + return + +def flush_all_identities(): + """ + Removes all previous entries from the ssh, if the user already has + any identitiy added for github , it will be flushed, please handle it + with care + """ + command = "ssh-add -D" + + +def check_git_identity_exists(host="github.com"): + """ + Check if git identity already exists on the user machine, + If it does then abort generating new keys and configuring remote github account + Use Paramiko + """ + home = os.path.expanduser("~") + ssh_config_path = os.path.join(home, ".ssh", "config") + + conf = paramiko.SSHConfig() + try: + with open(ssh_config_path) as f: + conf.parse(f) + except FileNotFoundError: + logger.info("config file doesnt exists") + return True + + host_config = conf.lookup(host) + if not host_config.get("IdentityFile"): + logger.info(f"Host {host} doesnt exists, Generating new keys") + return True + + logger.error(f"Host {host} exists, Abort Generating new keys") + + return False + def generate_new_keys(username, password): key = RSA.generate(4096) #ssh-keygen -t rsa -C "your_email@example.com" home = os.path.expanduser("~") ssh_path = os.path.join(home, ".ssh") - public_key_path = os.path.join(ssh_path, "git_public.key") - private_key_path = os.path.join(ssh_path, "git_private.key") - if not os.path.exists(private_key_path): + public_key_path = os.path.join(ssh_path, "git_pub.key") + private_key_path = os.path.join(ssh_path, "git_priv.key") + + logger.info(f"Private key path {private_key_path}") + if check_git_identity_exists(): with open(private_key_path, "wb") as content_file: content_file.write(key.exportKey('PEM')) - + + + command = f"chmod 400 {private_key_path}" + for res in os_command_output(command, "Change Private key Permissions"): + logger.info(res) + + + logger.info("Permissions set for git private key") pubkey = key.publickey() with open(public_key_path, 'wb') as content_file: content_file.write(pubkey.exportKey('OpenSSH')) @@ -57,11 +111,13 @@ def generate_new_keys(username, password): })) - logger.info(response.json()) + logger.info(f"Response from updating git wiht public key {response.json()}") - command = "ssh -T git@github.com" - for res in os_command_output(command, "New git keys"): - logger.info(res) + command = f"ssh-add {private_key_path}" + for res in os_command_output(command, "New git keys"): + logger.info(res) + append_ssh_config(private_key_path) + return diff --git a/share/man/man1/ipython.1.gz b/share/man/man1/ipython.1.gz new file mode 100644 index 0000000000000000000000000000000000000000..36cc25b69a04d8bc20bdd85a474af4636b2aed08 GIT binary patch literal 1039 zcmV+q1n~PGiwFn}ippC818H!1bZBpGE-?U&Rbg-2HW2;JUvUTqIPGM49yV+UiUMxp zASxEev%CVqJfxI#ve-(bKvMDa?{_36+iCM*h5^aEyLa#2-O=&x0{++M4*Lj}_lxWF z3Z`$TAhjw>_r_v^xE0!1Xu0MMTGSD&7Wc~tOr^TIfDD1xHRQZC-aExy$>Q8SU0!dK zht+w+eENF?4e#moFay(N26HQv^a#pUs80X}xb`{J0gEdzh!iO=e|r=8=ll(ew|9`N zAGdc8E11K@$FA;S{%Zt3|1_Upc*u2)+z?3drK3`8B@7hYQt}EVK2tW0&%y|~C!2?F z`5t2z8fsDvg4d+sxmG5EQn{ECHOd^Xyi+WzQO`mcp4A9d-%b;O=pjCcLs}yxjwFYx3zM=HOn(5I> zc&IIq2_e(SzXk`>!aMo?%t8J*2W(o~d)L6hsWpf?H7(|%6!}rgQ$u|Pd#CeLI0z}y zyOF~%B~T$Rjug}M%uJ)lb^n^eOx`P(}=yMru8vR7fj(5HvZw zv#O!_%Yn48inO~Fl8F0WsLs+WEn3k_-iVy3>Jzzn3a zm?`x&fD+K9%J5!SqHOPH$|U`z1@ zmb|MiBe);#9`2X3c&ts7a7ymdk-E;E$|#8cqw46wmtH1EBs2sIc;^!t=BX0+jZy4SV4;q9r;R}uYa zS*}l3H&^2zutJ?0KBR=4Za<5YFkDy&3zN4`Av$jR$p9P*bzIqr+ibvi8Ym4iEdC<< zCOzsvf*-W77A0i8FJZLY(7#o!+J;sEw#XZJS6SPdtJ!Q%I&@h~t(Xmj$LIT){RePY JU6%?7000JG`xgKJ literal 0 HcmV?d00001 From 2563222987aecefef21b02fcf3f5dcb91042cd83 Mon Sep 17 00:00:00 2001 From: feynman Date: Thu, 18 Jul 2019 03:40:28 +0530 Subject: [PATCH 09/16] Adding new requirements.txt --- requirements.txt | 27 ++++++++++++++++++++++++++- share/man/man1/ipython.1.gz | Bin 1039 -> 0 bytes 2 files changed, 26 insertions(+), 1 deletion(-) delete mode 100644 share/man/man1/ipython.1.gz diff --git a/requirements.txt b/requirements.txt index 8b13789..6ba395e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,26 @@ - +asn1crypto==0.24.0 +backcall==0.1.0 +bcrypt==3.1.7 +certifi==2019.6.16 +cffi==1.12.3 +chardet==3.0.4 +cryptography==2.7 +decorator==4.4.0 +idna==2.8 +jedi==0.14.1 +loguru==0.3.1 +paramiko==2.6.0 +parso==0.5.1 +pexpect==4.7.0 +pickleshare==0.7.5 +prompt-toolkit==2.0.9 +ptyprocess==0.6.0 +pycparser==2.19 +pycryptodome==3.8.2 +Pygments==2.4.2 +PyNaCl==1.3.0 +requests==2.22.0 +six==1.12.0 +traitlets==4.3.2 +urllib3==1.25.3 +wcwidth==0.1.7 diff --git a/share/man/man1/ipython.1.gz b/share/man/man1/ipython.1.gz deleted file mode 100644 index 36cc25b69a04d8bc20bdd85a474af4636b2aed08..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1039 zcmV+q1n~PGiwFn}ippC818H!1bZBpGE-?U&Rbg-2HW2;JUvUTqIPGM49yV+UiUMxp zASxEev%CVqJfxI#ve-(bKvMDa?{_36+iCM*h5^aEyLa#2-O=&x0{++M4*Lj}_lxWF z3Z`$TAhjw>_r_v^xE0!1Xu0MMTGSD&7Wc~tOr^TIfDD1xHRQZC-aExy$>Q8SU0!dK zht+w+eENF?4e#moFay(N26HQv^a#pUs80X}xb`{J0gEdzh!iO=e|r=8=ll(ew|9`N zAGdc8E11K@$FA;S{%Zt3|1_Upc*u2)+z?3drK3`8B@7hYQt}EVK2tW0&%y|~C!2?F z`5t2z8fsDvg4d+sxmG5EQn{ECHOd^Xyi+WzQO`mcp4A9d-%b;O=pjCcLs}yxjwFYx3zM=HOn(5I> zc&IIq2_e(SzXk`>!aMo?%t8J*2W(o~d)L6hsWpf?H7(|%6!}rgQ$u|Pd#CeLI0z}y zyOF~%B~T$Rjug}M%uJ)lb^n^eOx`P(}=yMru8vR7fj(5HvZw zv#O!_%Yn48inO~Fl8F0WsLs+WEn3k_-iVy3>Jzzn3a zm?`x&fD+K9%J5!SqHOPH$|U`z1@ zmb|MiBe);#9`2X3c&ts7a7ymdk-E;E$|#8cqw46wmtH1EBs2sIc;^!t=BX0+jZy4SV4;q9r;R}uYa zS*}l3H&^2zutJ?0KBR=4Za<5YFkDy&3zN4`Av$jR$p9P*bzIqr+ibvi8Ym4iEdC<< zCOzsvf*-W77A0i8FJZLY(7#o!+J;sEw#XZJS6SPdtJ!Q%I&@h~t(Xmj$LIT){RePY JU6%?7000JG`xgKJ From 522a2b1e18161d3f6ebfe014cd0ce09b2b374432 Mon Sep 17 00:00:00 2001 From: feynman Date: Thu, 18 Jul 2019 11:37:25 +0530 Subject: [PATCH 10/16] removing bugs --- app/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/utils.py b/app/utils.py index f3a5808..aea12bc 100644 --- a/app/utils.py +++ b/app/utils.py @@ -41,7 +41,7 @@ def append_ssh_config(private_key_path): ssh_config_path = os.path.join(home, ".ssh", "config") string = f"Host github.com\n\tHostname github.com\n\tPreferredAuthentications publickey\n\tIdentityFile {private_key_path}" - with open(ssh_config_path, "w+") as f: + with open(ssh_config_path, "a+") as f: f.write(string) return @@ -72,7 +72,9 @@ def check_git_identity_exists(host="github.com"): return True host_config = conf.lookup(host) - if not host_config.get("IdentityFile"): + logger.info(host_config) + + if not host_config.get("identityfile"): logger.info(f"Host {host} doesnt exists, Generating new keys") return True From 0f3dbc032510558cd36de22b7893e8ac5b9337a4 Mon Sep 17 00:00:00 2001 From: Kaali Date: Thu, 18 Jul 2019 14:37:40 +0530 Subject: [PATCH 11/16] New instance for github identity file not tested --- app/backup_functions.py | 264 ++++++++++++++++++++++++++++++++++++++++ app/backup_new.py | 7 +- app/utils.py | 251 +++++++++++++++++++++++++++----------- 3 files changed, 446 insertions(+), 76 deletions(-) diff --git a/app/backup_functions.py b/app/backup_functions.py index e69de29..4eac88e 100644 --- a/app/backup_functions.py +++ b/app/backup_functions.py @@ -0,0 +1,264 @@ + + +import os + + + +def backup_issues(args, repo_cwd, repository, repos_template): + has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) + if args.skip_existing and has_issues_dir: + return + + log_info('Retrieving {0} issues'.format(repository['full_name'])) + issue_cwd = os.path.join(repo_cwd, 'issues') + mkdir_p(repo_cwd, issue_cwd) + + issues = {} + issues_skipped = 0 + issues_skipped_message = '' + _issue_template = '{0}/{1}/issues'.format(repos_template, + repository['full_name']) + + should_include_pulls = args.include_pulls or args.include_everything + issue_states = ['open', 'closed'] + for issue_state in issue_states: + query_args = { + 'filter': 'all', + 'state': issue_state + } + if args.since: + query_args['since'] = args.since + + _issues = retrieve_data(args, + _issue_template, + query_args=query_args) + for issue in _issues: + # skip pull requests which are also returned as issues + # if retrieving pull requests is requested as well + if 'pull_request' in issue and should_include_pulls: + issues_skipped += 1 + continue + + issues[issue['number']] = issue + + if issues_skipped: + issues_skipped_message = ' (skipped {0} pull requests)'.format( + issues_skipped) + + log_info('Saving {0} issues to disk{1}'.format( + len(list(issues.keys())), issues_skipped_message)) + comments_template = _issue_template + '/{0}/comments' + events_template = _issue_template + '/{0}/events' + for number, issue in list(issues.items()): + if args.include_issue_comments or args.include_everything: + template = comments_template.format(number) + issues[number]['comment_data'] = retrieve_data(args, template) + if args.include_issue_events or args.include_everything: + template = events_template.format(number) + issues[number]['event_data'] = retrieve_data(args, template) + + issue_file = '{0}/{1}.json'.format(issue_cwd, number) + with codecs.open(issue_file, 'w', encoding='utf-8') as f: + json_dump(issue, f) + + +def backup_pulls(args, repo_cwd, repository, repos_template): + has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) + if args.skip_existing and has_pulls_dir: + return + + log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa + pulls_cwd = os.path.join(repo_cwd, 'pulls') + mkdir_p(repo_cwd, pulls_cwd) + + pulls = {} + _pulls_template = '{0}/{1}/pulls'.format(repos_template, + repository['full_name']) + query_args = { + 'filter': 'all', + 'state': 'all', + 'sort': 'updated', + 'direction': 'desc', + } + + if not args.include_pull_details: + pull_states = ['open', 'closed'] + for pull_state in pull_states: + query_args['state'] = pull_state + _pulls = retrieve_data_gen(args, + _pulls_template, + query_args=query_args) + for pull in _pulls: + if args.since and pull['updated_at'] < args.since: + break + if not args.since or pull['updated_at'] >= args.since: + pulls[pull['number']] = pull + else: + _pulls = retrieve_data_gen(args, + _pulls_template, + query_args=query_args) + for pull in _pulls: + if args.since and pull['updated_at'] < args.since: + break + if not args.since or pull['updated_at'] >= args.since: + pulls[pull['number']] = retrieve_data( + args, + _pulls_template + '/{}'.format(pull['number']), + single_request=True + )[0] + + log_info('Saving {0} pull requests to disk'.format( + len(list(pulls.keys())))) + comments_template = _pulls_template + '/{0}/comments' + commits_template = _pulls_template + '/{0}/commits' + for number, pull in list(pulls.items()): + if args.include_pull_comments or args.include_everything: + template = comments_template.format(number) + pulls[number]['comment_data'] = retrieve_data(args, template) + if args.include_pull_commits or args.include_everything: + template = commits_template.format(number) + pulls[number]['commit_data'] = retrieve_data(args, template) + + pull_file = '{0}/{1}.json'.format(pulls_cwd, number) + with codecs.open(pull_file, 'w', encoding='utf-8') as f: + json_dump(pull, f) + + +def backup_milestones(args, repo_cwd, repository, repos_template): + milestone_cwd = os.path.join(repo_cwd, 'milestones') + if args.skip_existing and os.path.isdir(milestone_cwd): + return + + log_info('Retrieving {0} milestones'.format(repository['full_name'])) + mkdir_p(repo_cwd, milestone_cwd) + + template = '{0}/{1}/milestones'.format(repos_template, + repository['full_name']) + + query_args = { + 'state': 'all' + } + + _milestones = retrieve_data(args, template, query_args=query_args) + + milestones = {} + for milestone in _milestones: + milestones[milestone['number']] = milestone + + log_info('Saving {0} milestones to disk'.format( + len(list(milestones.keys())))) + for number, milestone in list(milestones.items()): + milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) + with codecs.open(milestone_file, 'w', encoding='utf-8') as f: + json_dump(milestone, f) + + +def backup_labels(args, repo_cwd, repository, repos_template): + label_cwd = os.path.join(repo_cwd, 'labels') + output_file = '{0}/labels.json'.format(label_cwd) + template = '{0}/{1}/labels'.format(repos_template, + repository['full_name']) + _backup_data(args, + 'labels', + template, + output_file, + label_cwd) + + +def backup_hooks(args, repo_cwd, repository, repos_template): + auth = get_auth(args) + if not auth: + log_info("Skipping hooks since no authentication provided") + return + hook_cwd = os.path.join(repo_cwd, 'hooks') + output_file = '{0}/hooks.json'.format(hook_cwd) + template = '{0}/{1}/hooks'.format(repos_template, + repository['full_name']) + try: + _backup_data(args, + 'hooks', + template, + output_file, + hook_cwd) + except SystemExit: + log_info("Unable to read hooks, skipping") + + +def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): + repository_fullname = repository['full_name'] + + # give release files somewhere to live & log intent + release_cwd = os.path.join(repo_cwd, 'releases') + log_info('Retrieving {0} releases'.format(repository_fullname)) + mkdir_p(repo_cwd, release_cwd) + + query_args = {} + + release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) + releases = retrieve_data(args, release_template, query_args=query_args) + + # for each release, store it + log_info('Saving {0} releases to disk'.format(len(releases))) + for release in releases: + release_name = release['tag_name'] + output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) + with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: + json_dump(release, f) + + if include_assets: + assets = retrieve_data(args, release['assets_url']) + for asset in assets: + download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args)) + + + +def backup_account(args, output_directory): + account_cwd = os.path.join(output_directory, 'account') + + if args.include_starred or args.include_everything: + output_file = "{0}/starred.json".format(account_cwd) + template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) + _backup_data(args, + "starred repositories", + template, + output_file, + account_cwd) + + if args.include_watched or args.include_everything: + output_file = "{0}/watched.json".format(account_cwd) + template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) + _backup_data(args, + "watched repositories", + template, + output_file, + account_cwd) + + if args.include_followers or args.include_everything: + output_file = "{0}/followers.json".format(account_cwd) + template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) + _backup_data(args, + "followers", + template, + output_file, + account_cwd) + + if args.include_following or args.include_everything: + output_file = "{0}/following.json".format(account_cwd) + template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) + _backup_data(args, + "following", + template, + output_file, + account_cwd) + + +def _backup_data(args, name, template, output_file, output_directory): + skip_existing = args.skip_existing + if not skip_existing or not os.path.exists(output_file): + log_info('Retrieving {0} {1}'.format(args.user, name)) + mkdir_p(output_directory) + data = retrieve_data(args, template) + + log_info('Writing {0} {1} to disk'.format(len(data), name)) + with codecs.open(output_file, 'w', encoding='utf-8') as f: + json_dump(data, f) \ No newline at end of file diff --git a/app/backup_new.py b/app/backup_new.py index 00ac5e5..be5dab1 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -4,7 +4,7 @@ from auth import get_auth, get_github_api_host from urllib.request import Request from utils import construct_request, get_response, ensure_directory, \ - c_pretty_print, mask_password, logging_subprocess, generate_new_keys + c_pretty_print, mask_password, logging_subprocess, generate_new_keys, GithubIdentity import time from loguru import logger from pprint import pformat @@ -357,7 +357,10 @@ def main(): logger.error("Please provide username and password for your github") print ("Execution started") - generate_new_keys(username, password) + inst = GithubIdentity("github.com", "Macpod", username, password, ssh_dir=None) + ins.add(username, password) + + #generate_new_keys(username, password) # dirname = os.path.dirname(os.path.abspath(__file__)) # output_directory = os.path.join(dirname, "account") # if args.lfs_clone: diff --git a/app/utils.py b/app/utils.py index aea12bc..4508ab4 100644 --- a/app/utils.py +++ b/app/utils.py @@ -1,5 +1,5 @@ - +#-*- coding: utf-8 -*- from urllib.parse import urlparse from urllib.parse import quote as urlquote from urllib.parse import urlencode @@ -21,106 +21,209 @@ from Crypto.PublicKey import RSA import requests import paramiko +import platform #curl -u "user:pass" --data '{"title":"test-key","key":"'"$(cat ~/.ssh/id_rsa.pub)"'"}' https://api.github.com/user/keys -def os_command_output(command, final_message): - - process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) - while True: - line = process.stdout.readline() - if not line: - logger.info(final_message) - break - yield line.decode().split("\r")[0] - return +class GithubIdentity(object): + """ + Add new ssh keys to login into the github account on ssh + protocol, if the host is already exists the whole process will + abort, The public key will be added automatically to the github + account and the config file in .ssh directory will be updated + with the new private key, The keys generated is rsa 2048 + """ + def __init__(self, hostname, key_name, ssh_dir=None): + """ + ssh_dir directory for the .ssh configuration and + keypairs + key_name: This will be reflected in your github account + username: username for the host + password: password for the host + """ + self.hostname = hostname + self.key_name = key_name + if not ssh_dir: + home = os.path.expanduser("~") + self.ssh_dir = os.path.join(home, ".ssh") + else: + self.ssh_dir = ssh_dir -def append_ssh_config(private_key_path): - home = os.path.expanduser("~") - ssh_config_path = os.path.join(home, ".ssh", "config") + ##check if identity for the host already exists or not + if self.identity_exists(): + raise Exception(f"Identity for {self.hostname} already exists") - string = f"Host github.com\n\tHostname github.com\n\tPreferredAuthentications publickey\n\tIdentityFile {private_key_path}" - with open(ssh_config_path, "a+") as f: - f.write(string) - return + self.public_key = os.path.join(ssh_path, "git_pub.key") + self.private_key = os.path.join(ssh_path, "git_priv.key") -def flush_all_identities(): - """ - Removes all previous entries from the ssh, if the user already has - any identitiy added for github , it will be flushed, please handle it - with care - """ - command = "ssh-add -D" + return + + def identity_exist(self): + """ + Check if git identity already exists on the user machine, + If it does then abort generating new keys and configuring remote github account + Use Paramiko + """ + + conf = paramiko.SSHConfig() + try: + with open(os.path.join(self.ssh_dir, "config")) as f: + conf.parse(f) + except FileNotFoundError: + return False -def check_git_identity_exists(host="github.com"): - """ - Check if git identity already exists on the user machine, - If it does then abort generating new keys and configuring remote github account - Use Paramiko - """ - home = os.path.expanduser("~") - ssh_config_path = os.path.join(home, ".ssh", "config") - - conf = paramiko.SSHConfig() - try: - with open(ssh_config_path) as f: - conf.parse(f) - except FileNotFoundError: - logger.info("config file doesnt exists") - return True + host_config = conf.lookup(self.hostname) + logger.info(host_config) - host_config = conf.lookup(host) - logger.info(host_config) + if not host_config.get("identityfile"): + return False - if not host_config.get("identityfile"): - logger.info(f"Host {host} doesnt exists, Generating new keys") return True - logger.error(f"Host {host} exists, Abort Generating new keys") + def add(self, username, password): + logging.info("Generating new ") + privkey, pubkey = self.generate_new_keys() - return False + ##uploading the keys to the host + self.github_upload_keys(pubkey, username, password) + + ##writing keys to the local ssh configuration files + with open(self.private_key_path, "wb") as content_file: + content_file.write(privkey) -def generate_new_keys(username, password): - key = RSA.generate(4096) - #ssh-keygen -t rsa -C "your_email@example.com" - home = os.path.expanduser("~") - ssh_path = os.path.join(home, ".ssh") - public_key_path = os.path.join(ssh_path, "git_pub.key") - private_key_path = os.path.join(ssh_path, "git_priv.key") - - logger.info(f"Private key path {private_key_path}") - if check_git_identity_exists(): - - with open(private_key_path, "wb") as content_file: - content_file.write(key.exportKey('PEM')) + command = f"chmod 400 {self.private_key_path}" + for res in os_command_output(command, "Setting up permissions for {self.hostname} private key"): + logger.info(res) + + with open(self.public_key_path, 'wb') as content_file: + content_file.write(pubkey) - command = f"chmod 400 {private_key_path}" - for res in os_command_output(command, "Change Private key Permissions"): + command = f"ssh-add {private_key_path}" + for res in os_command_output(command, "New git keys"): logger.info(res) + self.append_ssh_config() + seff.add_identity() + return + + - logger.info("Permissions set for git private key") - pubkey = key.publickey() - with open(public_key_path, 'wb') as content_file: - content_file.write(pubkey.exportKey('OpenSSH')) + def generate_new_keys(self): + """ + Change this if you want to use any other cryptographic + algorithm to generate Asymmetric Key pairs + """ + logger.info("Generating RSA keys") - public_bytes = pubkey.exportKey('OpenSSH').decode() + key = RSA.generate(4096) + privkey = key.exportKey('PEM') + pubkey = key.publickey().exportKey('OpenSSH') + return privkey, pubkey + + def github_upload_keys(self, pubkey, username, password): + """ + Upload the generated public key on the github + """ response = requests.post('https://api.github.com/user/keys', auth=(username, password), data=json.dumps({ - "title": "Datapod", "key": public_bytes + "title": "Datapod", "key": pubkey.decode() })) - - logger.info(f"Response from updating git wiht public key {response.json()}") + res = response.json() + if response.status_code == 401: + raise Exception(res.get("message")) + if response.status_code == 422: + raise Exception(res.get("message")) - command = f"ssh-add {private_key_path}" + logger.success(res) + return + + + def os_command_output(command, final_message): + + process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) + while True: + line = process.stdout.readline() + if not line: + logger.info(final_message) + break + yield line.decode().split("\r")[0] + return + + + def append_ssh_config(self): + + if platform.system() == "Darwin": + conf_string = f"Host *\n\tAddKeysToAgent yes\n\UseKeychain yes\n\tIdentityFile {private_key_path}" + else: + conf_string = f"Host github.com\n\tHostname github.com\n\tPreferredAuthentications publickey\n\tIdentityFile {private_key_path}" + logger.info("String which will be appended to the config file is {string}") + with open(os.path.join(self.ssh_dir, "config"), "a+") as f: + f.write(conf_string) + return + + def flush_all_identities(self): + """ + Removes all previous entries from the ssh, if the user already has + any identitiy added for github , it will be flushed, please handle it + with care + """ + command = "ssh-add -D" for res in os_command_output(command, "New git keys"): logger.info(res) - append_ssh_config(private_key_path) - - return + + def add_identity(self): + """ + """ + if platform.system() == "Darwin": + command = "ssh-add -K {self.private_key}" + else: + command = "ssh-add {self.private_key}" + for res in os_command_output(command, "Adding new keys to ssh"): + logger.info(res) + return + + + # def generate_new_keys(username, password): + # #ssh-keygen -t rsa -C "your_email@example.com" + # home = os.path.expanduser("~") + # ssh_path = os.path.join(home, ".ssh") + # public_key_path = os.path.join(ssh_path, "git_pub.key") + # private_key_path = os.path.join(ssh_path, "git_priv.key") + + # logger.info(f"Private key path {private_key_path}") + # if check_git_identity_exists(): + + # with open(private_key_path, "wb") as content_file: + # content_file.write(key.exportKey('PEM')) + + + # command = f"chmod 400 {private_key_path}" + # for res in os_command_output(command, "Change Private key Permissions"): + # logger.info(res) + + + # logger.info("Permissions set for git private key") + # pubkey = key.publickey() + # with open(public_key_path, 'wb') as content_file: + # content_file.write(pubkey.exportKey('OpenSSH')) + + # public_bytes = pubkey.exportKey('OpenSSH').decode() + # response = requests.post('https://api.github.com/user/keys', auth=(username, password), data=json.dumps({ + # "title": "Datapod", "key": public_bytes + # })) + + + # logger.info(f"Response from updating git wiht public key {response.json()}") + + # command = f"ssh-add {private_key_path}" + # for res in os_command_output(command, "New git keys"): + # logger.info(res) + # append_ssh_config(private_key_path) + + # return From 04c7265826f40aebf314ee3ba6bd1cdc748912b5 Mon Sep 17 00:00:00 2001 From: Kaali Date: Thu, 18 Jul 2019 15:31:28 +0530 Subject: [PATCH 12/16] Backup github identity class working fine --- app/backup_new.py | 11 ++++++++--- app/utils.py | 35 +++++++++++++++-------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/app/backup_new.py b/app/backup_new.py index be5dab1..2bd87e6 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -4,7 +4,7 @@ from auth import get_auth, get_github_api_host from urllib.request import Request from utils import construct_request, get_response, ensure_directory, \ - c_pretty_print, mask_password, logging_subprocess, generate_new_keys, GithubIdentity + c_pretty_print, mask_password, logging_subprocess, GithubIdentity import time from loguru import logger from pprint import pformat @@ -357,9 +357,14 @@ def main(): logger.error("Please provide username and password for your github") print ("Execution started") - inst = GithubIdentity("github.com", "Macpod", username, password, ssh_dir=None) - ins.add(username, password) + #try: + inst = GithubIdentity("github.com", "Macpod") + inst.add(username, password) + #except Exception as e: + # logger.error(e) + + #generate_new_keys(username, password) # dirname = os.path.dirname(os.path.abspath(__file__)) # output_directory = os.path.join(dirname, "account") diff --git a/app/utils.py b/app/utils.py index 4508ab4..7cd89f4 100644 --- a/app/utils.py +++ b/app/utils.py @@ -51,11 +51,11 @@ def __init__(self, hostname, key_name, ssh_dir=None): self.ssh_dir = ssh_dir ##check if identity for the host already exists or not - if self.identity_exists(): + if self.identity_exist(): raise Exception(f"Identity for {self.hostname} already exists") - - self.public_key = os.path.join(ssh_path, "git_pub.key") - self.private_key = os.path.join(ssh_path, "git_priv.key") + logger.info(f"Identity for {hostname} doesnt exists, Please run add method to generte a new identity") + self.public_key = os.path.join(self.ssh_dir, "git_pub.key") + self.private_key = os.path.join(self.ssh_dir, "git_priv.key") return @@ -83,30 +83,25 @@ def identity_exist(self): return True def add(self, username, password): - logging.info("Generating new ") privkey, pubkey = self.generate_new_keys() ##uploading the keys to the host self.github_upload_keys(pubkey, username, password) ##writing keys to the local ssh configuration files - with open(self.private_key_path, "wb") as content_file: + with open(self.private_key, "wb") as content_file: content_file.write(privkey) - command = f"chmod 400 {self.private_key_path}" - for res in os_command_output(command, "Setting up permissions for {self.hostname} private key"): + command = f"chmod 400 {self.private_key}" + for res in self.os_command_output(command, "Setting up permissions for {self.hostname} private key"): logger.info(res) - with open(self.public_key_path, 'wb') as content_file: + with open(self.public_key, 'wb') as content_file: content_file.write(pubkey) - command = f"ssh-add {private_key_path}" - for res in os_command_output(command, "New git keys"): - logger.info(res) - self.append_ssh_config() - seff.add_identity() + self.add_identity() return @@ -128,7 +123,7 @@ def github_upload_keys(self, pubkey, username, password): Upload the generated public key on the github """ response = requests.post('https://api.github.com/user/keys', auth=(username, password), data=json.dumps({ - "title": "Datapod", "key": pubkey.decode() + "title": self.key_name, "key": pubkey.decode() })) res = response.json() @@ -141,7 +136,7 @@ def github_upload_keys(self, pubkey, username, password): return - def os_command_output(command, final_message): + def os_command_output(self, command, final_message): process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) while True: @@ -156,9 +151,9 @@ def os_command_output(command, final_message): def append_ssh_config(self): if platform.system() == "Darwin": - conf_string = f"Host *\n\tAddKeysToAgent yes\n\UseKeychain yes\n\tIdentityFile {private_key_path}" + conf_string = f"Host *\n\tAddKeysToAgent yes\n\tUseKeychain yes\n\tIdentityFile {self.private_key}" else: - conf_string = f"Host github.com\n\tHostname github.com\n\tPreferredAuthentications publickey\n\tIdentityFile {private_key_path}" + conf_string = f"Host github.com\n\tHostname github.com\n\tPreferredAuthentications publickey\n\tIdentityFile {self.private_key}" logger.info("String which will be appended to the config file is {string}") with open(os.path.join(self.ssh_dir, "config"), "a+") as f: f.write(conf_string) @@ -171,7 +166,7 @@ def flush_all_identities(self): with care """ command = "ssh-add -D" - for res in os_command_output(command, "New git keys"): + for res in self.os_command_output(command, "New git keys"): logger.info(res) def add_identity(self): @@ -181,7 +176,7 @@ def add_identity(self): command = "ssh-add -K {self.private_key}" else: command = "ssh-add {self.private_key}" - for res in os_command_output(command, "Adding new keys to ssh"): + for res in self.os_command_output(command, "Adding new keys to ssh"): logger.info(res) return From 474c5f3037ed9b56d1aa051a778bc12a0f7cc67c Mon Sep 17 00:00:00 2001 From: Kaali Date: Thu, 18 Jul 2019 16:43:20 +0530 Subject: [PATCH 13/16] Backup issues not wokring correctly --- app/backup_functions.py | 41 +++++------ app/backup_new.py | 148 ++++++++++++++-------------------------- app/utils.py | 107 +++++++++++++++++------------ 3 files changed, 137 insertions(+), 159 deletions(-) diff --git a/app/backup_functions.py b/app/backup_functions.py index 4eac88e..06c8dc5 100644 --- a/app/backup_functions.py +++ b/app/backup_functions.py @@ -1,15 +1,17 @@ import os +from utils import mkdir_p +from loguru import logger +from utils import retrieve_data, retrieve_data_gen, json_dump +import codecs +def backup_issues(username, passwod, repo_cwd, repository, repos_template, since=None): + #has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) + # if args.skip_existing and has_issues_dir: + # return - -def backup_issues(args, repo_cwd, repository, repos_template): - has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) - if args.skip_existing and has_issues_dir: - return - - log_info('Retrieving {0} issues'.format(repository['full_name'])) + logger.info('Retrieving {0} issues'.format(repository['full_name'])) issue_cwd = os.path.join(repo_cwd, 'issues') mkdir_p(repo_cwd, issue_cwd) @@ -19,23 +21,24 @@ def backup_issues(args, repo_cwd, repository, repos_template): _issue_template = '{0}/{1}/issues'.format(repos_template, repository['full_name']) - should_include_pulls = args.include_pulls or args.include_everything + should_include_pulls = True issue_states = ['open', 'closed'] for issue_state in issue_states: query_args = { 'filter': 'all', 'state': issue_state } - if args.since: - query_args['since'] = args.since + ##since os the time stamp after which everything shall be scraped + if since: + query_args['since'] = since - _issues = retrieve_data(args, + _issues = retrieve_data(username, password, _issue_template, query_args=query_args) for issue in _issues: # skip pull requests which are also returned as issues # if retrieving pull requests is requested as well - if 'pull_request' in issue and should_include_pulls: + if 'pull_request' in issue: issues_skipped += 1 continue @@ -50,17 +53,17 @@ def backup_issues(args, repo_cwd, repository, repos_template): comments_template = _issue_template + '/{0}/comments' events_template = _issue_template + '/{0}/events' for number, issue in list(issues.items()): - if args.include_issue_comments or args.include_everything: - template = comments_template.format(number) - issues[number]['comment_data'] = retrieve_data(args, template) - if args.include_issue_events or args.include_everything: - template = events_template.format(number) - issues[number]['event_data'] = retrieve_data(args, template) + #if args.include_issue_comments or args.include_everything: + template = comments_template.format(number) + issues[number]['comment_data'] = retrieve_data(args, template) + #if args.include_issue_events or args.include_everything: + template = events_template.format(number) + issues[number]['event_data'] = retrieve_data(args, template) issue_file = '{0}/{1}.json'.format(issue_cwd, number) with codecs.open(issue_file, 'w', encoding='utf-8') as f: json_dump(issue, f) - + return def backup_pulls(args, repo_cwd, repository, repos_template): has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) diff --git a/app/backup_new.py b/app/backup_new.py index 2bd87e6..a2a02bc 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -4,7 +4,11 @@ from auth import get_auth, get_github_api_host from urllib.request import Request from utils import construct_request, get_response, ensure_directory, \ - c_pretty_print, mask_password, logging_subprocess, GithubIdentity + c_pretty_print, mask_password, logging_subprocess, GithubIdentity,\ + retrieve_data, retrieve_data_gen + +from backup_functions import backup_issues + import time from loguru import logger from pprint import pformat @@ -21,58 +25,11 @@ def get_authenticated_user(username, password): data = retrieve_data(username, password, template, single_request=True) return data[0] -def retrieve_data(username, password, template, query_args=None, single_request=False): - return list(retrieve_data_gen(username, password, template, query_args, single_request)) - -def retrieve_data_gen(username, password, template, query_args=None, single_request=False): - auth = get_auth(username, password) - #query_args = get_query_args(query_args) - logger.info(f"The auth for the user is {auth}") - per_page = 100 - page = 0 - - while True: - page = page + 1 - request = construct_request(per_page, page, template, auth) # noqa - r, errors = get_response(request, auth, template) - - status_code = int(r.getcode()) - - retries = 0 - while retries < 3 and status_code == 502: - print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') - retries += 1 - time.sleep(5) - request = construct_request(per_page, page, query_args, template, auth) # noqa - r, errors = get_response(request, auth, template) - status_code = int(r.getcode()) - - if status_code != 200: - template = 'API request returned HTTP {0}: {1}' - errors.append(template.format(status_code, r.reason)) - logger.error(errors) - - response = json.loads(r.read().decode('utf-8')) - if len(errors) == 0: - if type(response) == list: - for resp in response: - yield resp - if len(response) < per_page: - break - elif type(response) == dict and single_request: - yield response - - if len(errors) > 0: - logger.error(errors) - - if single_request: - break def retrieve_repositories(username, password): - logger.info('Retrieving repositories') single_request = False template = 'https://{0}/user/repos'.format(get_github_api_host()) @@ -98,7 +55,7 @@ def retrieve_repositories(username, password): repos = retrieve_data(username, password, template, single_request=single_request) - c_pretty_print(repos[0]) + #c_pretty_print(repos[0]) ##append start repos starred_template = 'https://{0}/users/{1}/starred'.format(get_github_api_host(), username) starred_repos = retrieve_data(username, password, starred_template, single_request=False) @@ -107,7 +64,7 @@ def retrieve_repositories(username, password): item.update({'is_starred': True}) logger.info("Starred Repos first element") - c_pretty_print(starred_repos[0]) + #c_pretty_print(starred_repos[0]) ##append start repos repos.extend(starred_repos) @@ -121,7 +78,7 @@ def retrieve_repositories(username, password): item.update({'is_gist': True}) logger.info("GIST first element") - c_pretty_print(gists[0]) + #c_pretty_print(gists[0]) repos.extend(gists) @@ -151,11 +108,12 @@ def get_github_host(): def get_github_repo_url(username, password, repository, prefer_ssh=True): # if args.prefer_ssh: # return repository['ssh_url'] + if repository.get('is_gist'): + return repository['git_pull_url'] + if prefer_ssh: return repository['ssh_url'] - if repository.get('is_gist'): - return repository['git_pull_url'] @@ -177,15 +135,18 @@ def backup_repositories(username, password, output_directory, repositories): logger.info('Backing up repositories') repos_template = 'https://{0}/repos'.format(get_github_api_host()) - # if args.incremental: - # last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa - # last_update_path = os.path.join(output_directory, 'last_update') - # if os.path.exists(last_update_path): - # args.since = open(last_update_path).read().strip() - # else: - # args.since = None - # else: - # args.since = None + #if args.incremental: + last_update = max(list(repository['updated_at'] for repository in repositories) or [time.strftime('%Y-%m-%dT%H:%M:%SZ', time.localtime())]) # noqa + last_update_path = os.path.join(output_directory, 'last_update') + if os.path.exists(last_update_path): + since = open(last_update_path).read().strip() + else: + since = None + + logger.info(last_update) + + logger.info(f"Since value is {last_update}") + logger.info(f"Lat update path is {last_update_path}") for repository in repositories: time.sleep(5) @@ -220,44 +181,35 @@ def backup_repositories(username, password, output_directory, repositories): continue # don't try to back anything else for a gist; it doesn't exis - # if repository.get('is_gist'): - # # dump gist information to a file as well - # output_file = '{0}/gist.json'.format(repo_cwd) - # with codecs.open(output_file, 'w', encoding='utf-8') as f: - # json_dump(repository, f) - # continue # don't try to back anything else for a gist; it doesn't exist + #download_wiki = (args.include_wiki or args.include_everything) + if repository['has_wiki']: + fetch_repository(repository['name'], + repo_url.replace('.git', '.wiki.git'), + os.path.join(repo_cwd, 'wiki'), + ) - # download_wiki = (args.include_wiki or args.include_everything) - # if repository['has_wiki'] and download_wiki: - # fetch_repository(repository['name'], - # repo_url.replace('.git', '.wiki.git'), - # os.path.join(repo_cwd, 'wiki'), - # skip_existing=args.skip_existing, - # bare_clone=args.bare_clone, - # lfs_clone=args.lfs_clone) + #if args.include_issues or args.include_everything: + backup_issues(repo_cwd, repository, repos_template) - # if args.include_issues or args.include_everything: - # backup_issues(args, repo_cwd, repository, repos_template) + # if args.include_pulls or args.include_everything: + # backup_pulls(args, repo_cwd, repository, repos_template) - # if args.include_pulls or args.include_everything: - # backup_pulls(args, repo_cwd, repository, repos_template) + # if args.include_milestones or args.include_everything: + # backup_milestones(args, repo_cwd, repository, repos_template) - # if args.include_milestones or args.include_everything: - # backup_milestones(args, repo_cwd, repository, repos_template) + # if args.include_labels or args.include_everything: + # backup_labels(args, repo_cwd, repository, repos_template) - # if args.include_labels or args.include_everything: - # backup_labels(args, repo_cwd, repository, repos_template) + # if args.include_hooks or args.include_everything: + # backup_hooks(args, repo_cwd, repository, repos_template) - # if args.include_hooks or args.include_everything: - # backup_hooks(args, repo_cwd, repository, repos_template) + # if args.include_releases or args.include_everything: + # backup_releases(args, repo_cwd, repository, repos_template, + # include_assets=args.include_assets or args.include_everything) - # if args.include_releases or args.include_everything: - # backup_releases(args, repo_cwd, repository, repos_template, - # include_assets=args.include_assets or args.include_everything) - - # if args.incremental: - # open(last_update_path, 'w').write(last_update) + if args.incremental: + open(last_update_path, 'w').write(last_update) @@ -357,12 +309,12 @@ def main(): logger.error("Please provide username and password for your github") print ("Execution started") - #try: - inst = GithubIdentity("github.com", "Macpod") - inst.add(username, password) + try: + inst = GithubIdentity("github.com", "Macpod") + inst.add(username, password) - #except Exception as e: - # logger.error(e) + except Exception as e: + logger.error(e) #generate_new_keys(username, password) @@ -376,7 +328,7 @@ def main(): authenticated_user = get_authenticated_user(username, password) - print (authenticated_user) + logger.info(f"The user for which the backup will happend {authenticated_user['login']}") repositories = retrieve_repositories(username, password) #repositories = filter_repositories(args, repositories) backup_repositories(username, password, config_object.GITHUB_OUTPUT_DIR, repositories) diff --git a/app/utils.py b/app/utils.py index 7cd89f4..0ab5c20 100644 --- a/app/utils.py +++ b/app/utils.py @@ -22,7 +22,7 @@ import requests import paramiko import platform - +from auth import get_auth #curl -u "user:pass" --data '{"title":"test-key","key":"'"$(cat ~/.ssh/id_rsa.pub)"'"}' https://api.github.com/user/keys class GithubIdentity(object): @@ -93,7 +93,7 @@ def add(self, username, password): content_file.write(privkey) command = f"chmod 400 {self.private_key}" - for res in self.os_command_output(command, "Setting up permissions for {self.hostname} private key"): + for res in self.os_command_output(command, f"Setting up permissions for {self.hostname} private key"): logger.info(res) with open(self.public_key, 'wb') as content_file: @@ -154,7 +154,7 @@ def append_ssh_config(self): conf_string = f"Host *\n\tAddKeysToAgent yes\n\tUseKeychain yes\n\tIdentityFile {self.private_key}" else: conf_string = f"Host github.com\n\tHostname github.com\n\tPreferredAuthentications publickey\n\tIdentityFile {self.private_key}" - logger.info("String which will be appended to the config file is {string}") + logger.info(f"String which will be appended to the config file is {string}") with open(os.path.join(self.ssh_dir, "config"), "a+") as f: f.write(conf_string) return @@ -181,44 +181,6 @@ def add_identity(self): return - # def generate_new_keys(username, password): - # #ssh-keygen -t rsa -C "your_email@example.com" - # home = os.path.expanduser("~") - # ssh_path = os.path.join(home, ".ssh") - # public_key_path = os.path.join(ssh_path, "git_pub.key") - # private_key_path = os.path.join(ssh_path, "git_priv.key") - - # logger.info(f"Private key path {private_key_path}") - # if check_git_identity_exists(): - - # with open(private_key_path, "wb") as content_file: - # content_file.write(key.exportKey('PEM')) - - - # command = f"chmod 400 {private_key_path}" - # for res in os_command_output(command, "Change Private key Permissions"): - # logger.info(res) - - - # logger.info("Permissions set for git private key") - # pubkey = key.publickey() - # with open(public_key_path, 'wb') as content_file: - # content_file.write(pubkey.exportKey('OpenSSH')) - - # public_bytes = pubkey.exportKey('OpenSSH').decode() - # response = requests.post('https://api.github.com/user/keys', auth=(username, password), data=json.dumps({ - # "title": "Datapod", "key": public_bytes - # })) - - - # logger.info(f"Response from updating git wiht public key {response.json()}") - - # command = f"ssh-add {private_key_path}" - # for res in os_command_output(command, "New git keys"): - # logger.info(res) - # append_ssh_config(private_key_path) - - # return @@ -302,7 +264,59 @@ def ensure_directory(dirname): # if args.lfs_clone: # check_git_lfs_install() +def retrieve_data(username, password, template, query_args=None, single_request=False): + return list(retrieve_data_gen(username, password, template, query_args, single_request)) + +def get_query_args(query_args=None): + if not query_args: + query_args = {} + return query_args + +def retrieve_data_gen(username, password, template, query_args=None, single_request=False): + auth = get_auth(username, password) + query_args = get_query_args(query_args) + + logger.info(f"The auth for the user is {auth}") + per_page = 100 + page = 0 + while True: + page = page + 1 + request = construct_request(per_page, page, template, auth) # noqa + r, errors = get_response(request, auth, template) + + status_code = int(r.getcode()) + + retries = 0 + while retries < 3 and status_code == 502: + print('API request returned HTTP 502: Bad Gateway. Retrying in 5 seconds') + retries += 1 + time.sleep(5) + request = construct_request(per_page, page, query_args, template, auth) # noqa + r, errors = get_response(request, auth, template) + + status_code = int(r.getcode()) + + if status_code != 200: + template = 'API request returned HTTP {0}: {1}' + errors.append(template.format(status_code, r.reason)) + logger.error(errors) + + response = json.loads(r.read().decode('utf-8')) + if len(errors) == 0: + if type(response) == list: + for resp in response: + yield resp + if len(response) < per_page: + break + elif type(response) == dict and single_request: + yield response + + if len(errors) > 0: + logger.error(errors) + + if single_request: + break def construct_request(per_page, page, template, auth): querystring = urlencode(dict(list({ @@ -391,4 +405,13 @@ def download_file(url, path, auth): def check_git_lfs_install(): exit_code = subprocess.call(['git', 'lfs', 'version']) if exit_code != 0: - log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') \ No newline at end of file + log_error('The argument --lfs requires you to have Git LFS installed.\nYou can get it from https://git-lfs.github.com.') + + +def json_dump(data, output_file): + json.dump(data, + output_file, + ensure_ascii=False, + sort_keys=True, + indent=4, + separators=(',', ': ')) \ No newline at end of file From 0a5898c8f1fb67d863ccf21181a45ca69c79dccc Mon Sep 17 00:00:00 2001 From: feynman Date: Fri, 19 Jul 2019 02:44:46 +0530 Subject: [PATCH 14/16] Github issue backups working --- app/backup_functions.py | 99 ++++++++++++++++++++++------------------- app/backup_new.py | 12 +++-- app/utils.py | 5 +-- 3 files changed, 62 insertions(+), 54 deletions(-) diff --git a/app/backup_functions.py b/app/backup_functions.py index 06c8dc5..a4e0fe0 100644 --- a/app/backup_functions.py +++ b/app/backup_functions.py @@ -6,7 +6,7 @@ from utils import retrieve_data, retrieve_data_gen, json_dump import codecs -def backup_issues(username, passwod, repo_cwd, repository, repos_template, since=None): +def backup_issues(username, password, repo_cwd, repository, repos_template, since=None): #has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) # if args.skip_existing and has_issues_dir: # return @@ -48,35 +48,37 @@ def backup_issues(username, passwod, repo_cwd, repository, repos_template, since issues_skipped_message = ' (skipped {0} pull requests)'.format( issues_skipped) - log_info('Saving {0} issues to disk{1}'.format( + logger.info('Saving {0} issues to disk{1}'.format( len(list(issues.keys())), issues_skipped_message)) comments_template = _issue_template + '/{0}/comments' events_template = _issue_template + '/{0}/events' for number, issue in list(issues.items()): #if args.include_issue_comments or args.include_everything: template = comments_template.format(number) - issues[number]['comment_data'] = retrieve_data(args, template) + issues[number]['comment_data'] = retrieve_data(username, password, template) #if args.include_issue_events or args.include_everything: template = events_template.format(number) - issues[number]['event_data'] = retrieve_data(args, template) + issues[number]['event_data'] = retrieve_data(username, password, template) issue_file = '{0}/{1}.json'.format(issue_cwd, number) with codecs.open(issue_file, 'w', encoding='utf-8') as f: json_dump(issue, f) return -def backup_pulls(args, repo_cwd, repository, repos_template): - has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) - if args.skip_existing and has_pulls_dir: - return +def backup_pulls(username, password, repo_cwd, repository, repos_template): + + #has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) + # if args.skip_existing and has_pulls_dir: + # return - log_info('Retrieving {0} pull requests'.format(repository['full_name'])) # noqa + logger.info(f"Retrieving {repository['full_name']} pull requests") # noqa pulls_cwd = os.path.join(repo_cwd, 'pulls') mkdir_p(repo_cwd, pulls_cwd) pulls = {} - _pulls_template = '{0}/{1}/pulls'.format(repos_template, - repository['full_name']) + pulls_template = f"{repos_template}/{repository['full_name']}/pulls" + + logger.info(f"Pull template is {pulls_template}") query_args = { 'filter': 'all', 'state': 'all', @@ -84,47 +86,50 @@ def backup_pulls(args, repo_cwd, repository, repos_template): 'direction': 'desc', } - if not args.include_pull_details: - pull_states = ['open', 'closed'] - for pull_state in pull_states: - query_args['state'] = pull_state - _pulls = retrieve_data_gen(args, - _pulls_template, - query_args=query_args) - for pull in _pulls: - if args.since and pull['updated_at'] < args.since: - break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = pull - else: - _pulls = retrieve_data_gen(args, - _pulls_template, - query_args=query_args) - for pull in _pulls: - if args.since and pull['updated_at'] < args.since: - break - if not args.since or pull['updated_at'] >= args.since: - pulls[pull['number']] = retrieve_data( - args, - _pulls_template + '/{}'.format(pull['number']), - single_request=True - )[0] - - log_info('Saving {0} pull requests to disk'.format( + # if not args.include_pull_details: + # pull_states = ['open', 'closed'] + # for pull_state in pull_states: + # query_args['state'] = pull_state + # _pulls = retrieve_data_gen(args, + # _pulls_template, + # query_args=query_args) + # for pull in _pulls: + # if args.since and pull['updated_at'] < args.since: + # break + # if not args.since or pull['updated_at'] >= args.since: + # pulls[pull['number']] = pull + # else: + _pulls = retrieve_data_gen(username, password, + pulls_template, + query_args=query_args) + + for pull in _pulls: + # if args.since and pull['updated_at'] < args.since: + # break + # if not args.since or pull['updated_at'] >= args.since: + pulls[pull['number']] = retrieve_data( + username, password, + pulls_template + '/{}'.format(pull['number']), + single_request=True + )[0] + + logger.info('Saving {0} pull requests to disk'.format( len(list(pulls.keys())))) - comments_template = _pulls_template + '/{0}/comments' - commits_template = _pulls_template + '/{0}/commits' + + comments_template = pulls_template + '/{0}/comments' + commits_template = pulls_template + '/{0}/commits' + for number, pull in list(pulls.items()): - if args.include_pull_comments or args.include_everything: + # if args.include_pull_comments or args.include_everything: template = comments_template.format(number) - pulls[number]['comment_data'] = retrieve_data(args, template) - if args.include_pull_commits or args.include_everything: + pulls[number]['comment_data'] = retrieve_data(username, password, template) + #if args.include_pull_commits or args.include_everything: template = commits_template.format(number) - pulls[number]['commit_data'] = retrieve_data(args, template) + pulls[number]['commit_data'] = retrieve_data(username, password, template) - pull_file = '{0}/{1}.json'.format(pulls_cwd, number) - with codecs.open(pull_file, 'w', encoding='utf-8') as f: - json_dump(pull, f) + pull_file = '{0}/{1}.json'.format(pulls_cwd, number) + with codecs.open(pull_file, 'w', encoding='utf-8') as f: + json_dump(pull, f) def backup_milestones(args, repo_cwd, repository, repos_template): diff --git a/app/backup_new.py b/app/backup_new.py index a2a02bc..fc26874 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -7,7 +7,7 @@ c_pretty_print, mask_password, logging_subprocess, GithubIdentity,\ retrieve_data, retrieve_data_gen -from backup_functions import backup_issues +from backup_functions import backup_issues, backup_pulls import time from loguru import logger @@ -183,17 +183,21 @@ def backup_repositories(username, password, output_directory, repositories): #download_wiki = (args.include_wiki or args.include_everything) + if repository['has_wiki']: + wiki_url = repo_url.replace('.git', '.wiki.git') + logger.info(f"Trying to download wiki for {repository['name']} at {wiki_url}") + fetch_repository(repository['name'], - repo_url.replace('.git', '.wiki.git'), + wiki_url, os.path.join(repo_cwd, 'wiki'), ) #if args.include_issues or args.include_everything: - backup_issues(repo_cwd, repository, repos_template) + backup_issues(username, password, repo_cwd, repository, repos_template) # if args.include_pulls or args.include_everything: - # backup_pulls(args, repo_cwd, repository, repos_template) + backup_pulls(username, password, repo_cwd, repository, repos_template) # if args.include_milestones or args.include_everything: # backup_milestones(args, repo_cwd, repository, repos_template) diff --git a/app/utils.py b/app/utils.py index 0ab5c20..84e0c4e 100644 --- a/app/utils.py +++ b/app/utils.py @@ -154,7 +154,7 @@ def append_ssh_config(self): conf_string = f"Host *\n\tAddKeysToAgent yes\n\tUseKeychain yes\n\tIdentityFile {self.private_key}" else: conf_string = f"Host github.com\n\tHostname github.com\n\tPreferredAuthentications publickey\n\tIdentityFile {self.private_key}" - logger.info(f"String which will be appended to the config file is {string}") + logger.info(f"String which will be appended to the config file is {conf_string}") with open(os.path.join(self.ssh_dir, "config"), "a+") as f: f.write(conf_string) return @@ -233,7 +233,6 @@ def check_io(): def mask_password(url, secret='*****'): parsed = urlparse(url) - logger.info(f"parsed {parsed}") if not parsed.password: return url @@ -248,7 +247,7 @@ def mkdir_p(*args): try: os.makedirs(path) except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST and os.path.isdir(path): + if os.path.isdir(path): pass else: raise From 73c1cab83ce530c5330b0d865d911329078bd197 Mon Sep 17 00:00:00 2001 From: feynman Date: Fri, 19 Jul 2019 03:35:13 +0530 Subject: [PATCH 15/16] intermediate --- app/backup_functions.py | 77 +++++++++++++++++++++-------------------- app/backup_new.py | 3 ++ app/utils.py | 1 + 3 files changed, 44 insertions(+), 37 deletions(-) diff --git a/app/backup_functions.py b/app/backup_functions.py index a4e0fe0..77e0fbf 100644 --- a/app/backup_functions.py +++ b/app/backup_functions.py @@ -132,36 +132,38 @@ def backup_pulls(username, password, repo_cwd, repository, repos_template): json_dump(pull, f) -def backup_milestones(args, repo_cwd, repository, repos_template): +def backup_milestones(username, password, repo_cwd, repository, repos_template): milestone_cwd = os.path.join(repo_cwd, 'milestones') - if args.skip_existing and os.path.isdir(milestone_cwd): - return + # if args.skip_existing and os.path.isdir(milestone_cwd): + # return + + logger.info(f"Retrieving {repository['full_name']} milestones") - log_info('Retrieving {0} milestones'.format(repository['full_name'])) mkdir_p(repo_cwd, milestone_cwd) - template = '{0}/{1}/milestones'.format(repos_template, - repository['full_name']) + template = "{repos_template}/{repository['full_name']}/milestones" + ) query_args = { 'state': 'all' } - _milestones = retrieve_data(args, template, query_args=query_args) + _milestones = retrieve_data(username, password, template, query_args=query_args) milestones = {} for milestone in _milestones: milestones[milestone['number']] = milestone - log_info('Saving {0} milestones to disk'.format( - len(list(milestones.keys())))) + log_info('Saving {len(list(milestones.keys()))} milestones to disk') + for number, milestone in list(milestones.items()): - milestone_file = '{0}/{1}.json'.format(milestone_cwd, number) + milestone_file = f'{milestone}/{number}.json' with codecs.open(milestone_file, 'w', encoding='utf-8') as f: json_dump(milestone, f) + return -def backup_labels(args, repo_cwd, repository, repos_template): +def backup_labels(username, password, repo_cwd, repository, repos_template): label_cwd = os.path.join(repo_cwd, 'labels') output_file = '{0}/labels.json'.format(label_cwd) template = '{0}/{1}/labels'.format(repos_template, @@ -220,53 +222,54 @@ def backup_releases(args, repo_cwd, repository, repos_template, include_assets=F -def backup_account(args, output_directory): +def backup_account(username, password, output_directory): account_cwd = os.path.join(output_directory, 'account') - if args.include_starred or args.include_everything: - output_file = "{0}/starred.json".format(account_cwd) - template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) - _backup_data(args, - "starred repositories", - template, - output_file, - account_cwd) - - if args.include_watched or args.include_everything: - output_file = "{0}/watched.json".format(account_cwd) + # if args.include_starred or args.include_everything: + output_file = f"{account_cwd}/starred.json" + template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) + _backup_data(username, password, + "starred repositories", + template, + output_file, + account_cwd) + + # if args.include_watched or args.include_everything: + output_file = f"{account_cwd}/watched.json" template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) - _backup_data(args, + _backup_data(username, password, "watched repositories", template, output_file, account_cwd) - if args.include_followers or args.include_everything: - output_file = "{0}/followers.json".format(account_cwd) + # if args.include_followers or args.include_everything: + output_file = f"{account_cwd}/followers.json" template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) - _backup_data(args, + _backup_data(username, password, "followers", template, output_file, account_cwd) - if args.include_following or args.include_everything: - output_file = "{0}/following.json".format(account_cwd) - template = "https://{0}/users/{1}/following".format(get_github_api_host(args), args.user) - _backup_data(args, + # if args.include_following or args.include_everything: + output_file = f"{account_cwd}/following.json". + template = f"https://{get_github_api_host(args)}/users/{username}/following" + _backup_d'ata(username, password, "following", template, output_file, account_cwd) -def _backup_data(args, name, template, output_file, output_directory): - skip_existing = args.skip_existing - if not skip_existing or not os.path.exists(output_file): - log_info('Retrieving {0} {1}'.format(args.user, name)) +def _backup_data(username, password, name, template, output_file, output_directory, overwrite=True): + # skip_existing = args.skip_existing + + if overwrite: + logger.info(f'Retrieving {username} {name}') mkdir_p(output_directory) - data = retrieve_data(args, template) + data = retrieve_data(username, password, template) - log_info('Writing {0} {1} to disk'.format(len(data), name)) + logger.info(f'Writing {len(data)} {name} to disk') with codecs.open(output_file, 'w', encoding='utf-8') as f: json_dump(data, f) \ No newline at end of file diff --git a/app/backup_new.py b/app/backup_new.py index fc26874..b72061a 100755 --- a/app/backup_new.py +++ b/app/backup_new.py @@ -193,6 +193,9 @@ def backup_repositories(username, password, output_directory, repositories): os.path.join(repo_cwd, 'wiki'), ) + + if repository.get('is_starred'): + continue #if args.include_issues or args.include_everything: backup_issues(username, password, repo_cwd, repository, repos_template) diff --git a/app/utils.py b/app/utils.py index 84e0c4e..e7b9c6a 100644 --- a/app/utils.py +++ b/app/utils.py @@ -23,6 +23,7 @@ import paramiko import platform from auth import get_auth +import time #curl -u "user:pass" --data '{"title":"test-key","key":"'"$(cat ~/.ssh/id_rsa.pub)"'"}' https://api.github.com/user/keys class GithubIdentity(object): From f0d0e99a2199739ca5d37736bc0ab453554318fd Mon Sep 17 00:00:00 2001 From: Kaali Date: Fri, 19 Jul 2019 15:07:06 +0530 Subject: [PATCH 16/16] Removing bugs --- app/backup_functions.py | 52 ++++++++++++++++++++--------------------- app/utils.py | 4 ++-- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/app/backup_functions.py b/app/backup_functions.py index 77e0fbf..5f9a388 100644 --- a/app/backup_functions.py +++ b/app/backup_functions.py @@ -141,8 +141,7 @@ def backup_milestones(username, password, repo_cwd, repository, repos_template): mkdir_p(repo_cwd, milestone_cwd) - template = "{repos_template}/{repository['full_name']}/milestones" - ) + template = f"{repos_template}/{repository['full_name']}/milestones" query_args = { 'state': 'all' @@ -226,8 +225,9 @@ def backup_account(username, password, output_directory): account_cwd = os.path.join(output_directory, 'account') # if args.include_starred or args.include_everything: + host= get_github_api_host() output_file = f"{account_cwd}/starred.json" - template = "https://{0}/users/{1}/starred".format(get_github_api_host(args), args.user) + template = f"https://{host}/users/{username}/starred" _backup_data(username, password, "starred repositories", template, @@ -235,31 +235,31 @@ def backup_account(username, password, output_directory): account_cwd) # if args.include_watched or args.include_everything: - output_file = f"{account_cwd}/watched.json" - template = "https://{0}/users/{1}/subscriptions".format(get_github_api_host(args), args.user) - _backup_data(username, password, - "watched repositories", - template, - output_file, - account_cwd) + output_file = f'{account_cwd}/watched.json' + template = "https://{host}/users/{username}/subscriptions" + _backup_data(username, password, + "watched repositories", + template, + output_file, + account_cwd) - # if args.include_followers or args.include_everything: - output_file = f"{account_cwd}/followers.json" - template = "https://{0}/users/{1}/followers".format(get_github_api_host(args), args.user) - _backup_data(username, password, - "followers", - template, - output_file, - account_cwd) +# if args.include_followers or args.include_everything: + output_file = f"{account_cwd}/followers.json" + template = "https://{host}/users/{usernamec}/followers" + _backup_data(username, password, + "followers", + template, + output_file, + account_cwd) - # if args.include_following or args.include_everything: - output_file = f"{account_cwd}/following.json". - template = f"https://{get_github_api_host(args)}/users/{username}/following" - _backup_d'ata(username, password, - "following", - template, - output_file, - account_cwd) +# if args.include_following or args.include_everything: + output_file = f"{account_cwd}/following.json" + template = "https://{host}/users/{usernamec}/following" + _backup_data(username, password, + "following", + template, + output_file, + account_cwd) def _backup_data(username, password, name, template, output_file, output_directory, overwrite=True): diff --git a/app/utils.py b/app/utils.py index e7b9c6a..4d694a8 100644 --- a/app/utils.py +++ b/app/utils.py @@ -174,9 +174,9 @@ def add_identity(self): """ """ if platform.system() == "Darwin": - command = "ssh-add -K {self.private_key}" + command = f"ssh-add -K {self.private_key}" else: - command = "ssh-add {self.private_key}" + command = f"ssh-add {self.private_key}" for res in self.os_command_output(command, "Adding new keys to ssh"): logger.info(res) return