Switch recaptcha, search, spiders and varnish to requests
authorMagnus Hagander <magnus@hagander.net>
Mon, 21 Jan 2019 11:21:46 +0000 (12:21 +0100)
committerMagnus Hagander <magnus@hagander.net>
Sat, 26 Jan 2019 15:19:26 +0000 (16:19 +0100)
Easiser to switch to the requests package than to figure out how to deal
with some of the encoding changes manually. And as a bonus, it's much
nicer to work with the requests package for the future.

pgweb/account/recaptcha.py
pgweb/search/views.py
tools/ftp/spider_ftp.py
tools/ftp/spider_yum.py
tools/varnishqueue/varnish_queue.py

index a3688c564417851117b2374ec1e6341cc50c41ce..1a10f60680a2da52443bc1c251168ad6714d0d18 100644 (file)
@@ -7,9 +7,7 @@ from django.forms import ValidationError
 from django.utils.safestring import mark_safe
 from django.conf import settings
 
-import httplib
-import urllib
-import json
+import requests
 
 import logging
 log = logging.getLogger(__name__)
@@ -45,38 +43,28 @@ class ReCaptchaField(forms.CharField):
         super(ReCaptchaField, self).clean(value)
 
         # Validate the recaptcha
-        c = httplib.HTTPSConnection('www.google.com', strict=True, timeout=5)
         param = {
             'secret': settings.RECAPTCHA_SECRET_KEY,
             'response': value,
         }
-
-        # Temporarily don't include remoteip, because it only shows our ssl terminating
-        # frontends.
-#        if self.remoteip:
-#            param['remoteip'] = self.remoteip
-
         try:
-            c.request('POST', '/recaptcha/api/siteverify', urllib.urlencode(param), {
-                'Content-type': 'application/x-www-form-urlencoded',
-            })
-            c.sock.settimeout(10)
-        except Exception as e:
-            # Error to connect at TCP level
+            r = requests.post(
+                "https://www.google.com/recaptcha/api/siteverify", param,
+                headers={
+                    'Content-type': 'application/x-www-form-urlencoded',
+                },
+                timeout=5,
+            )
+        except requests.exceptions.Timeout:
             log.error('Failed to connect to google recaptcha API: %s' % e)
             raise ValidationError('Failed in API call to google recaptcha')
 
-        try:
-            r = c.getresponse()
-        except:
-            log.error('Failed in API call to google recaptcha')
-            raise ValidationError('Failed in API call to google recaptcha')
-        if r.status != 200:
+        if r.status_code != 200:
             log.error('Invalid response code from google recaptcha')
             raise ValidationError('Invalid response code from google recaptcha')
 
         try:
-            j = json.loads(r.read())
+            j = r.json()
         except:
             log.error('Invalid response structure from google recaptcha')
             raise ValidationError('Invalid response structure from google recaptcha')
index c1b0e11c438d1f494cbef31520350f151fe6ee69..677f05e243af914f7345f8d3afafc8ef2695af64 100644 (file)
@@ -5,12 +5,9 @@ from django.conf import settings
 
 from pgweb.util.decorators import cache
 
-import httplib
-import urllib
+import urllib.parse
+import requests
 import psycopg2
-import json
-import socket
-import ssl
 
 from pgweb.lists.models import MailingList
 
@@ -177,24 +174,29 @@ def search(request):
                 memc = None
         if not hits:
             # No hits found - so try to get them from the search server
-            if settings.ARCHIVES_SEARCH_PLAINTEXT:
-                c = httplib.HTTPConnection(settings.ARCHIVES_SEARCH_SERVER, strict=True, timeout=5)
-            else:
-                c = httplib.HTTPSConnection(settings.ARCHIVES_SEARCH_SERVER, strict=True, timeout=5)
-            c.request('POST', '/archives-search/', urlstr, {'Content-type': 'application/x-www-form-urlencoded; charset=utf-8'})
-            c.sock.settimeout(20)  # Set a 20 second timeout
             try:
-                r = c.getresponse()
-            except (socket.timeout, ssl.SSLError):
+                r = requests.post(
+                    "{}://{}/archives-search/".format(settings.ARCHIVES_SEARCH_PLAINTEXT and 'http' or 'https', settings.ARCHIVES_SEARCH_SERVER),
+                    urlstr,
+                    headers={
+                        'Content-type': 'application/x-www-form-urlencoded; charset=utf-8',
+                    },
+                    timeout=5,
+                )
+            except requests.exceptions.Timeout:
                 return render(request, 'search/listsearch.html', {
                     'search_error': 'Timeout when talking to search server. Please try your search again later, or with a more restrictive search terms.',
                 })
-            if r.status != 200:
+            except:
+                return render(request, 'search/listsearch.html', {
+                    'search_error': 'General error when talking to search server.',
+                })
+            if r.status_code != 200:
                 memc = None
                 return render(request, 'search/listsearch.html', {
                     'search_error': 'Error talking to search server: %s' % r.reason,
                 })
-            hits = json.loads(r.read())
+            hits = r.json()
             if has_memcached and memc:
                 # Store them in memcached too! But only for 10 minutes...
                 # And always compress it, just because we can
index 4582bc2a137774892da4e414c3991821f2cb3023..68361299264e8bf4cbdf6a92a9e74987613ce0d9 100755 (executable)
@@ -11,7 +11,7 @@ import os
 from datetime import datetime
 import pickle as pickle
 import codecs
-import urllib2
+import requests
 
 # Directories, specified from the root of the ftp tree and down, that
 # will be recursively excluded from the pickle.
@@ -80,14 +80,17 @@ if len(sys.argv) != 3:
 parse_directory(sys.argv[1], len(sys.argv[1]))
 
 if sys.argv[2].startswith("http://") or sys.argv[2].startswith("https://"):
-    o = urllib2.build_opener(urllib2.HTTPHandler)
-    r = urllib2.Request(sys.argv[2], data=pickle.dumps(allnodes))
-    r.add_header('Content-type', 'application/octet-stream')
-    r.add_header('Host', 'www.postgresql.org')
-    r.get_method = lambda: 'PUT'
-    u = o.open(r)
-    x = u.read()
-    if x != "NOT CHANGED" and x != "OK":
+    r = requests.put(
+        sys.argv[2],
+        data=pickle.dumps(allnodes),
+        headers={
+            'Content-type': 'application/octet-stream',
+        },
+    )
+    if r.status_code != 200:
+        print("Failed to upload, code: %s" % r.status_code)
+        sys.exit(1)
+    elif r.text != "NOT CHANGED" and r.text != "OK":
         print("Failed to upload: %s" % x)
         sys.exit(1)
 else:
index 79ab45bd15e3807369f529a81ebd18fbb1cebde0..912d1d68db0b7ece5114a8c9dd8070f840d2e14b 100755 (executable)
@@ -4,7 +4,7 @@ import sys
 import os
 import re
 import json
-import urllib2
+import requests
 from decimal import Decimal
 from tempfile import NamedTemporaryFile
 
@@ -96,14 +96,18 @@ if __name__ == "__main__":
     j = json.dumps({'platforms': platforms, 'reporpms': reporpms})
 
     if args.target.startswith('http://') or args.target.startswith('https://'):
-        o = urllib.request.build_opener(urllib.request.HTTPHandler)
-        r = urllib.request.Request(sys.argv[2], data=j)
-        r.add_header('Content-type', 'application/json')
-        r.add_header('Host', 'www.postgresql.org')
-        r.get_method = lambda: 'PUT'
-        u = o.open(r)
-        x = u.read()
-        if x != "NOT CHANGED" and x != "OK":
+        r = requests.put(
+            args.target,
+            data=j,
+            headers={
+                'Content-type': 'application/json',
+            },
+        )
+        if r.status_code != 200:
+            print("Failed to upload, code: %s" % r.status_code)
+            sys.exit(1)
+
+        if r.text != "NOT CHANGED" and r.text != "OK":
             print("Failed to upload: %s" % x)
             sys.exit(1)
     else:
index 64f55f0f1b7ee48eba118231b608982cacc5306a..3f73818b818ecdfeb318be414a3769b53efee0d0 100755 (executable)
@@ -9,7 +9,7 @@
 import time
 import sys
 import select
-import httplib
+import requests
 import multiprocessing
 import logging
 import psycopg2
@@ -18,13 +18,12 @@ from setproctitle import setproctitle
 
 def do_purge(consumername, headers):
     try:
-        conn = httplib.HTTPSConnection('%s.postgresql.org' % consumername)
-        conn.request("GET", "/varnish-purge-url", '', headers)
-        resp = conn.getresponse()
-        conn.close()
-        if resp.status == 200:
+        r = requests.get("https://{}.postgresql.org/varnish-purge-url".format(consumername),
+                         headers=headers,
+                         timeout=10)
+        if r.status_code == 200:
             return True
-        logging.warning("Varnish purge on %s returned status %s (%s)" % (consumername, resp.status, resp.reason))
+        logging.warning("Varnish purge on %s returned status %s (%s)" % (consumername, r.status_code, r.reason))
         return False
     except Exception as ex:
         logging.error("Exception purging on %s: %s" % (consumername, ex))