Sunday, May 24, 2009

Python 3.0 SSL over Proxy

With the IT world the way it is now I'm certain the majority of enterprises put their users behind proxy servers, and I'm also certain that a lot of users behind said proxies need to access SSL secured sites programatically. With python touting itself as an enterprise worthy product and the batteries included philosophy of the core language libraries I'm surprised that there is no built in support for accessing SSL secured sites over a proxy. Admittedly there is a patch in the root bug report for this feature, however it's been around for years and it's only just getting to the implementation stage. Hopefully it's going to make it into 3.1.

The python HOW TO documentation points to a cookbook recipe that handles this issue, but it's still only available in 2.x version. So since I had to implement this for a little code snippet here it is, SSL over proxy, the Python 3.0 version.

import urllib, urllib.parse, ssl, http.client, socket
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError

class ProxyHTTPConnection(http.client.HTTPConnection):
_ports = {'http' : 80, 'https' : 443}
def request(self, method, url, body=None, headers={}):
#request is called before connect, so can interpret url and get
#real host/port to be used to make CONNECT request to proxy
proto, rest = urllib.parse.splittype(url)
if proto is None:
raise ValueError("unknown URL type: %s" % url)
#get host
host, rest = urllib.parse.splithost(rest)
#try to get port
host, port = urllib.parse.splitport(host)
#if port is not defined try to get from proto
if port is None:
try:
port = self._ports[proto]
except KeyError:
raise ValueError("unknown protocol for: %s" % url)
self._real_host = host
self._real_port = port
http.client.HTTPConnection.request(self, method, url, body, headers)

def connect(self):
http.client.HTTPConnection.connect(self)
#send proxy CONNECT request
connect_string="CONNECT {0}:{1} HTTP/1.0\r\n\r\n".format(self._real_host, self._real_port)
self.send(connect_string.encode('utf-8'))
#expect a HTTP/1.0 200 Connection established
response = self.response_class(self.sock, strict=self.strict, method=self._method)
(version, code, message) = response._read_status()
#probably here we can handle auth requests...
if code != 200:
#proxy returned and error, abort connection, and raise exception
self.close()
raise socket.error("Proxy connection failed: %d %s" % (code, message.strip()))
#eat up header block from proxy....
while True:
#should not use directly fp probablu
line = response.fp.readline()
print(line)
if line == b'\r\n': break


class ProxyHTTPSConnection(ProxyHTTPConnection):
default_port = 443
def __init__(self, host, timeout = 10, port = None, key_file = None, cert_file = None, strict = None):
ProxyHTTPConnection.__init__(self, host, port)
self.key_file = key_file
self.cert_file = cert_file

def connect(self):
ProxyHTTPConnection.connect(self)
#make the sock ssl-aware
self.sock = ssl.wrap_socket(self.sock, self.key_file, self.cert_file)


class ConnectHTTPHandler(urllib.request.HTTPHandler):
def do_open(self, http_class, req):
return urllib.request.HTTPHandler.do_open(self, ProxyHTTPConnection, req)

class ConnectHTTPSHandler(urllib.request.HTTPSHandler):

def do_open(self, http_class, req):
return urllib.request.HTTPSHandler.do_open(self, ProxyHTTPSConnection, req)


if __name__ == '__main__':
import sys
# build Proxy handler
proxies = {'http': 'http://aproxyserver:8080/', 'https': 'http://aproxyserver:8080/'}
proxy_handler = urllib.request.ProxyHandler(proxies)
# build basic authentication handler
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, 'https://www.securedurl.com/', 'username', 'password')
auth_handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
# create "opener" (OpenerDirector instance)
opener = urllib.request.build_opener(ConnectHTTPHandler, ConnectHTTPSHandler, proxy_handler, auth_handler)
urllib.request.install_opener(opener)
request_url = "https://www.securedurl.com/default.html"
req = Request(request_url)
try:
response = urlopen(req)
print(response.read())
except URLError as e:
print(e.headers)
print (e.code)

3 comments:

  1. I have been trying to get this to work for a couple days now with no luck. It seems to have trouble parsing my url, causing the error on line 12 of the module. I am using Python 3.1.
    Sample code:
    import urllib
    from ProxyHTTPConnection import ConnectHTTPSHandler
    opener = urllib.request.build_opener(ConnectHTTPSHandler)
    urllib.request.install_opener(opener)
    req = urllib.request.Request(url='https://www.____.com')
    req.set_proxy('proxy._______.com:___', 'https')
    response = urllib.request.urlopen(req).read().strip()
    print(response)

    I'm a newB in Python, transitioning from Perl. Of course the first project I work on is the least trivial :/

    ReplyDelete
  2. Found a solution here. Everything works now :).
    http://bugs.python.org/issue1424152

    ReplyDelete
  3. HTTPSConnection.set_tunnel() is now the official solution (available in python 2.7 or later)

    ReplyDelete