#
import sys
-import urllib
-import httplib
+import requests
import re
-import HTMLParser
-
-BOUNDARY = "-=--=foobar-=--="
-
-
-def encode_multipart_formdata(fields, files):
- L = []
- for (key, value) in fields:
- L.append('--' + BOUNDARY)
- L.append('Content-Disposition: form-data; name="%s"' % key)
- L.append('')
- L.append(value)
- for (key, filename, value) in files:
- L.append('--' + BOUNDARY)
- L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
- L.append('Content-Type: text/html')
- L.append('')
- L.append(value)
- L.append('--' + BOUNDARY + '--')
- L.append('')
- body = "\r\n".join(L)
- return body
+import html.parser
if __name__ == "__main__":
print("Usage: localhtmlvalidate.py <local url>")
sys.exit(1)
- contents = urllib.urlopen(sys.argv[1]).read()
+ r = requests.get(sys.argv[1])
+ contents = r.text
# Try to figure out where the actual contents start :)
try:
firstline = 0
# Generate a form body
- body = encode_multipart_formdata(
- [
- ('charset', 'utf-8'),
- ('doctype', 'inline'),
- ('group', '0'),
- ('verbose', '1'),
- ],
- [('uploaded_file', 'test.html', contents)]
- )
+ data = {
+ 'doctype': 'Inline',
+ 'group': '0',
+ 'verbose': '1',
+ 'prefill': '1',
+ 'prefill_doctype': 'html401',
+ 'fragment': contents,
+ }
# Now submit it to the w3c validator
- h = httplib.HTTP("validator.w3.org")
- h.putrequest("POST", "/check")
- h.putheader("User-Agent: localcheck-tester/0.0")
- h.putheader("content-type", "multipart/form-data; boundary=%s" % BOUNDARY)
- h.putheader("content-length", str(len(body)))
- h.endheaders()
- h.send(body)
- errcode, errmsg, headers = h.getreply()
- rbody = h.getfile().read()
- if headers['x-w3c-validator-status'] == 'Valid':
+ resp = requests.post(
+ 'https://validator.w3.org/check',
+ data=data,
+ headers={
+ "User-Agent": "localcheck-tester/0.0",
+ },
+ timeout=20,
+ )
+ if resp.headers['x-w3c-validator-status'] == 'Valid':
print("Page validates!")
sys.exit(0)
- elif headers['x-w3c-validator-status'] == 'Invalid':
+ elif resp.headers['x-w3c-validator-status'] == 'Invalid':
print("Invalid!")
- print("Errors: %s" % headers['x-w3c-validator-errors'])
- print("Warnings: %s" % headers['x-w3c-validator-warnings'])
- hp = HTMLParser.HTMLParser()
- for m in re.findall('<li class="msg_err">.*?</li>', rbody, re.DOTALL):
- r = re.search('<em>Line (\d+).*<span class="msg">(.*?)</span>', m, re.DOTALL)
- print("Line %s (should be around %s): %s" % (r.group(1), int(r.group(1)) - firstline, hp.unescape(r.group(2))))
-
- r2 = re.search('<code class="input">(.*?)<strong title=".*?">(.*?)</strong>(.*?)</code>', unicode(m, 'utf8'), re.DOTALL)
+ print("Errors: %s" % resp.headers['x-w3c-validator-errors'])
+ print("Warnings: %s" % resp.headers['x-w3c-validator-warnings'])
+ hp = html.parser.HTMLParser()
+ for m in re.findall('<li class="msg_err">.*?</li>', resp.text, re.DOTALL):
+ r = re.search('<em>Line <a href="[^"]+">(\d+)</a>.*<span class="msg">(.*?)</span>', m, re.DOTALL)
+ if r:
+ print("Line %s (should be around %s): %s" % (r.group(1), int(r.group(1)) - firstline, hp.unescape(r.group(2))))
+ r2 = re.search('<code class="input">(.*?)<strong title=".*?">(.*?)</strong>(.*?)</code>', m, re.DOTALL)
if r2:
s = "%s%s%s" % r2.groups()
- print("Source: %s" % hp.unescape(s).encode('utf-8'))
+ print("Source: %s" % hp.unescape(s))
print("")
else:
print("Unknown status: %s" % headers['x-w3c-validator-status'])