Python请求和持久会话

2024-04-23 23:15:54 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在使用requests模块(Python 2.5的0.10.0版)。 我已经知道如何将数据提交到网站上的登录表单并检索会话密钥,但是我看不到在后续请求中使用此会话密钥的明显方法。 有人可以在下面的代码中填写省略号或建议其他方法吗?

>>> import requests
>>> login_data =  {'formPosted':'1', 'login_email':'me@example.com', 'password':'pw'}
>>> r = requests.post('https://localhost/login.py', login_data)
>>> 
>>> r.text
u'You are being redirected <a href="profilePage?_ck=1349394964">here</a>'
>>> r.cookies
{'session_id_myapp': '127-0-0-1-825ff22a-6ed1-453b-aebc-5d3cf2987065'}
>>> 
>>> r2 = requests.get('https://localhost/profile_data.json', ...)

Tags: 模块数据方法代码httpsimportlocalhost表单
3条回答

看看我在类似问题中的回答:

python: urllib2 how to send cookie with urlopen request

import urllib2
import urllib
from cookielib import CookieJar

cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
# input-type values from the html form
formdata = { "username" : username, "password": password, "form-id" : "1234" }
data_encoded = urllib.urlencode(formdata)
response = opener.open("https://page.com/login.php", data_encoded)
content = response.read()

编辑:

我知道我的答案得到了一些反对票,但没有解释性的评论。我猜是因为我指的是urllib库而不是requests。我这样做是因为操作人员需要有关requests的帮助,或者有人建议另一种方法。

可以使用以下方法轻松创建持久会话:

s = requests.Session()

在此之后,继续您的请求,因为您将:

s.post('https://localhost/login.py', login_data)
#logged in! cookies saved for future requests.
r2 = s.get('https://localhost/profile_data.json', ...)
#cookies sent automatically!
#do whatever, s will keep your cookies intact :)

有关会话的详细信息:https://requests.kennethreitz.org/en/master/user/advanced/#session-objects

其他的答案有助于理解如何维护这样的会话。此外,我还想提供一个类,该类在脚本的不同运行期间(使用缓存文件)保持会话的维护。这意味着只有在需要时才执行正确的“登录”(缓存中不存在超时或会话)。此外,它还支持对“get”或“post”的后续调用进行代理设置。

它是用Python3测试的。

使用它作为自己代码的基础。以下代码片段随GPLv3一起发布

import pickle
import datetime
import os
from urllib.parse import urlparse
import requests    

class MyLoginSession:
    """
    a class which handles and saves login sessions. It also keeps track of proxy settings.
    It does also maintine a cache-file for restoring session data from earlier
    script executions.
    """
    def __init__(self,
                 loginUrl,
                 loginData,
                 loginTestUrl,
                 loginTestString,
                 sessionFileAppendix = '_session.dat',
                 maxSessionTimeSeconds = 30 * 60,
                 proxies = None,
                 userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
                 debug = True,
                 forceLogin = False,
                 **kwargs):
        """
        save some information needed to login the session

        you'll have to provide 'loginTestString' which will be looked for in the
        responses html to make sure, you've properly been logged in

        'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ...
        'loginData' will be sent as post data (dictionary of id : value).
        'maxSessionTimeSeconds' will be used to determine when to re-login.
        """
        urlData = urlparse(loginUrl)

        self.proxies = proxies
        self.loginData = loginData
        self.loginUrl = loginUrl
        self.loginTestUrl = loginTestUrl
        self.maxSessionTime = maxSessionTimeSeconds
        self.sessionFile = urlData.netloc + sessionFileAppendix
        self.userAgent = userAgent
        self.loginTestString = loginTestString
        self.debug = debug

        self.login(forceLogin, **kwargs)

    def modification_date(self, filename):
        """
        return last file modification date as datetime object
        """
        t = os.path.getmtime(filename)
        return datetime.datetime.fromtimestamp(t)

    def login(self, forceLogin = False, **kwargs):
        """
        login to a session. Try to read last saved session from cache file. If this fails
        do proper login. If the last cache access was too old, also perform a proper login.
        Always updates session cache file.
        """
        wasReadFromCache = False
        if self.debug:
            print('loading or generating session...')
        if os.path.exists(self.sessionFile) and not forceLogin:
            time = self.modification_date(self.sessionFile)         

            # only load if file less than 30 minutes old
            lastModification = (datetime.datetime.now() - time).seconds
            if lastModification < self.maxSessionTime:
                with open(self.sessionFile, "rb") as f:
                    self.session = pickle.load(f)
                    wasReadFromCache = True
                    if self.debug:
                        print("loaded session from cache (last access %ds ago) "
                              % lastModification)
        if not wasReadFromCache:
            self.session = requests.Session()
            self.session.headers.update({'user-agent' : self.userAgent})
            res = self.session.post(self.loginUrl, data = self.loginData, 
                                    proxies = self.proxies, **kwargs)

            if self.debug:
                print('created new session with login' )
            self.saveSessionToCache()

        # test login
        res = self.session.get(self.loginTestUrl)
        if res.text.lower().find(self.loginTestString.lower()) < 0:
            raise Exception("could not log into provided site '%s'"
                            " (did not find successful login string)"
                            % self.loginUrl)

    def saveSessionToCache(self):
        """
        save session to a cache file
        """
        # always save (to update timeout)
        with open(self.sessionFile, "wb") as f:
            pickle.dump(self.session, f)
            if self.debug:
                print('updated session cache-file %s' % self.sessionFile)

    def retrieveContent(self, url, method = "get", postData = None, **kwargs):
        """
        return the content of the url with respect to the session.

        If 'method' is not 'get', the url will be called with 'postData'
        as a post request.
        """
        if method == 'get':
            res = self.session.get(url , proxies = self.proxies, **kwargs)
        else:
            res = self.session.post(url , data = postData, proxies = self.proxies, **kwargs)

        # the session has been updated on the server, so also update in cache
        self.saveSessionToCache()            

        return res

使用上述类的代码段可能如下所示:

if __name__ == "__main__":
    # proxies = {'https' : 'https://user:pass@server:port',
    #           'http' : 'http://user:pass@server:port'}

    loginData = {'user' : 'usr',
                 'password' :  'pwd'}

    loginUrl = 'https://...'
    loginTestUrl = 'https://...'
    successStr = 'Hello Tom'
    s = MyLoginSession(loginUrl, loginData, loginTestUrl, successStr, 
                       #proxies = proxies
                       )

    res = s.retrieveContent('https://....')
    print(res.text)

    # if, for instance, login via JSON values required try this:
    s = MyLoginSession(loginUrl, None, loginTestUrl, successStr, 
                       #proxies = proxies,
                       json = loginData)

相关问题 更多 >