From: Magnus Hagander Date: Sat, 16 Nov 2019 13:21:56 +0000 (+0100) Subject: Further hacks to deal with changes on the CM structure X-Git-Url: http://git.postgresql.org/gitweb/edit?a=commitdiff_plain;h=4671d148bb3268ca0a0d9d1d9eb40663f654e72d;p=pgeu-web.git Further hacks to deal with changes on the CM structure It seems they now mix redirects with non-redirects when hitting the same URL. But it also seems to work regardless, so just ignore the page if we get a non-redirect. --- diff --git a/code/pgeusite/cmutuel/management/commands/cmscrape.py b/code/pgeusite/cmutuel/management/commands/cmscrape.py index 54ac1b6..e4a28ec 100755 --- a/code/pgeusite/cmutuel/management/commands/cmscrape.py +++ b/code/pgeusite/cmutuel/management/commands/cmscrape.py @@ -52,13 +52,15 @@ class SessionWrapper(object): def post(self, url, postdict): return self.session.post(url, data=postdict, allow_redirects=False) - def expect_redirect(self, fetchpage, redirectto, postdata=None): + def expect_redirect(self, fetchpage, redirectto, postdata=None, allow_200=True): if postdata: r = self.post(fetchpage, postdata) else: r = self.get(fetchpage) if not r.is_redirect: + if allow_200 and r.status_code == 200: + return "" raise CommandError("Supposed to receive redirect for %s, got %s" % (fetchpage, r.status_code)) if not isinstance(redirectto, list): redirrectto = [redirectto, ] @@ -99,9 +101,13 @@ class Command(BaseCommand): 'flag': 'password', }) + if verbose: + self.stdout.write("Following a redirect chain for cookies") + # Follow a redirect chain to collect more cookies sess.expect_redirect('https://www.creditmutuel.fr/en/banque/pageaccueil.html', - 'https://www.creditmutuel.fr/en/banque/paci_engine/engine.aspx') + 'https://www.creditmutuel.fr/en/banque/paci_engine/engine.aspx', + allow_200=True) # Download the form if verbose: