Requests 本身不提供代理池,然而爬数据又要用,所以只能自己搞。其实还挺简单的。我也不知道为什么这么有用的 feature 一直没有被加入。

import requests


class Client:

    def __init__(self):
        self._session = requests.Session()
        self.proxies = None

    def set_proxy_pool(self, proxies, auth=None, https=True):
        """Randomly choose a proxy for every GET/POST request        
        :param proxies: list of proxies, like ["ip1:port1", "ip2:port2"]
        :param auth: if proxy needs auth
        :param https: default is True, pass False if you don't need https proxy
        """
        from random import choice

        if https:
            self.proxies = [{'http': 'http://' + p, 'https': 'https://' + p} for p in proxies]
        else:
            self.proxies = [{'http': 'http://' + p} for p in proxies]

        def get_with_random_proxy(url, **kwargs):
            proxy = choice(self.proxies)
            kwargs['proxies'] = proxy
            if auth:
                kwargs['auth'] = auth
            return self._session.original_get(url, **kwargs)

        def post_with_random_proxy(url, *args, **kwargs):
            proxy = choice(self.proxies)
            kwargs['proxies'] = proxy
            if auth:
                kwargs['auth'] = auth
            return self._session.original_post(url, *args, **kwargs)

        self._session.original_get = self._session.get
        self._session.get = get_with_random_proxy
        self._session.original_post = self._session.post
        self._session.post = post_with_random_proxy

    def remove_proxy_pool(self):
        self.proxies = None
        self._session.get = self._session.original_get
        self._session.post = self._session.original_post
        del self._session.original_get
        del self._session.original_post

    # You can define whatever operations using self._session

替换掉Session原本的getpost方法就行了,不会有什么副作用。class Client并不必需,直接操作Session是一样的。

可以用httpbin来做验证

def test_proxy():
    # visit http://cn-proxy.com/ to get available proxies if test failed
    proxy_ips = ['112.25.41.136', '180.97.29.57']
    client = Client()
    client.set_proxy_pool(proxy_ips)
    for _ in range(5):
        result = client._session.get('http://httpbin.org/ip').json()
        assert result['origin'] in proxy_ips
        result = client._session.post('http://httpbin.org/post',
                                      data={'m':'1'}).json()
        assert result['form'] == {'m': '1'}
        print(result['origin'])
        assert result['origin'] in proxy_ips

    client.remove_proxy_pool()
    client.set_proxy_pool(proxy_ips, https=False)
    for _ in range(5):
        result = client._session.get('http://httpbin.org/ip').json()
        print(result['origin'])
        assert result['origin'] in proxy_ips

转载自 :https://laike9m.com/blog/requests-dai-li-chi,92/
优质内容筛选与推荐>>
1、pycharm 快捷键
2、《深入浅出Google Android》即将隆重上市!
3、分页类
4、图的遍历
5、CAFFE学习笔记(四)将自己的jpg数据转成lmdb格式


长按二维码向我转账

受苹果公司新规定影响,微信 iOS 版的赞赏功能被关闭,可通过二维码转账支持公众号。

    阅读
    好看
    已推荐到看一看
    你的朋友可以在“发现”-“看一看”看到你认为好看的文章。
    已取消,“好看”想法已同步删除
    已推荐到看一看 和朋友分享想法
    最多200字,当前共 发送

    已发送

    朋友将在看一看看到

    确定
    分享你的想法...
    取消

    分享想法到看一看

    确定
    最多200字,当前共

    发送中

    网络异常,请稍后重试

    微信扫一扫
    关注该公众号