如何将200多条下载的推文附加到dataframe？

import pandas as pd import numpy as np import tweepy from datetime import timedelta def get_tweets(handle): batch_count_for_tweet_downloads = 200 try: alltweets = [] tweets = api_twitter.user_timeline(screen_name=handle, count=batch_count_for_tweet_downloads, exclude_replies=True, include_rts=False, lang="en", tweet_mode="extended") # ---GET MORE THAN 200 TWEETS alltweets.extend(tweets) oldest = alltweets[-1].id - 1 oldest_datetime = pd.to_datetime(str(pd.to_datetime(oldest))[:-10]).strftime("%Y-%m-%d %H:%M:%S") print(f"Getting Tweets For " + handle + ", After: " + oldest_datetime) while len(tweets) > 0: tweets = api_twitter.user_timeline(screen_name=handle, count=batch_count_for_tweet_downloads, max_id=oldest) alltweets.extend(tweets) oldest = alltweets[-1].id - 1 print("Count: " + f"...{len(alltweets)} " + handle + " Tweets Downloaded") #--- df = pd.DataFrame(data=[tweets.user.screen_name for tweets in alltweets], columns=['Handle']) df['Tweets'] = np.array([tweets.full_text for tweets in alltweets]) df['Date'] = np.array([tweets.created_at - timedelta(hours=4) for tweets in alltweets]) df['Len'] = np.array([len(tweets.full_text) for tweets in alltweets]) df['Like_count'] = np.array([tweets.favorite_count for tweets in alltweets]) df['RT_count'] = np.array([tweets.retweet_count for tweets in alltweets]) total_tweets.extend(alltweets) print(handle + " Total Tweets Extracted: {}".format(len(alltweets))) except: pass return df

import tweepy import pandas as pd import numpy as np from datetime import timedelta handles = ['@MrML16419203', '@d00tn00t'] consumerKey = 'x' consumerSecret = 'x' accessToken = 'x' accessTokenSecret = 'x' authenticate = tweepy.OAuthHandler(consumerKey, consumerSecret) authenticate.set_access_token(accessToken, accessTokenSecret) api_twitter = tweepy.API(authenticate, wait_on_rate_limit=True) total_tweets = [] def get_tweets(handle): batch_count_for_tweet_downloads = 200 try: alltweets = [] tweets = api_twitter.user_timeline(screen_name=handle, count=batch_count_for_tweet_downloads, exclude_replies=True, include_rts=False, lang="en", tweet_mode="extended") alltweets.extend(tweets) oldest = alltweets[-1].id - 1 oldest_datetime = pd.to_datetime(str(pd.to_datetime(oldest))[:-10]).strftime("%Y-%m-%d %H:%M:%S") print(f"Getting Tweets For " + handle + ", After: " + oldest_datetime) while len(tweets) > 0: tweets = api_twitter.user_timeline(screen_name=handle, count=batch_count_for_tweet_downloads, max_id=oldest) alltweets.extend(tweets) if len(alltweets) > 0: oldest = alltweets[-1].id - 1 else: pass print("Count: " + f"...{len(alltweets)} " + handle + " Tweets Downloaded") print('---Total Downloaded: ' + str(len(alltweets)) + ' for ' + handle + '---') df = pd.DataFrame(data=[tweets.user.screen_name for tweets in alltweets], columns=['Handle']) df['Tweets'] = np.array([tweets.full_text for tweets in alltweets]) df['Date'] = np.array([tweets.created_at - timedelta(hours=4) for tweets in alltweets]) df['Len'] = np.array([len(tweets.full_text) for tweets in alltweets]) df['Like_count'] = np.array([tweets.favorite_count for tweets in alltweets]) df['RT_count'] = np.array([tweets.retweet_count for tweets in alltweets]) print([tweets.favorite_count for tweets in alltweets]) print(np.array([tweets.favorite_count for tweets in alltweets])) total_tweets.extend(alltweets) print("----------Total Tweets Extracted: {}".format(df.shape[0]) + "----------") except: pass return df df = pd.DataFrame() for handle in handles: df_new = get_tweets(handle) df = pd.concat((df, df_new)) print(df)

Getting Tweets For @MrML16419203, After: 2011-03-19 07:03:53 Count: ...136 @MrML16419203 Tweets Downloaded ---Total Downloaded: 136 for @MrML16419203--- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ----------Total Tweets Extracted: 136---------- Getting Tweets For @d00tn00t, After: 2009-11-27 19:18:58 Count: ...338 @d00tn00t Tweets Downloaded Count: ...530 @d00tn00t Tweets Downloaded Count: ...546 @d00tn00t Tweets Downloaded Count: ...546 @d00tn00t Tweets Downloaded ---Total Downloaded: 546 for @d00tn00t--- Handle Tweets Date Len Like_count RT_count 0 MrML16419203 132716 2020-09-02 02:18:28 6.0 0.0 0.0 1 MrML16419203 432881 2020-09-02 02:04:23 6.0 0.0 0.0 2 MrML16419203 973625 2020-09-02 02:04:09 6.0 0.0 0.0 3 MrML16419203 1234567 2020-09-02 01:55:10 7.0 0.0 0.0 4 MrML16419203 225865 2020-09-02 01:27:11 6.0 0.0 0.0 .. ... ... ... ... ... ... 541 d00tn00t NaN NaT NaN NaN NaN 542 d00tn00t NaN NaT NaN NaN NaN 543 d00tn00t NaN NaT NaN NaN NaN 544 d00tn00t NaN NaT NaN NaN NaN 545 d00tn00t NaN NaT NaN NaN NaN [682 rows x 6 columns]

1条回答

网友

1楼 · 发布于 2024-05-23 19:28:47

对于任何一个偶然发现这一点的人，我让它发挥作用：

def get_tweets(screen_name):
batch_count_for_tweet_downloads = 200
try:
    alltweets = []
    tweets = api_twitter.user_timeline(screen_name=screen_name,
                                       count=batch_count_for_tweet_downloads,
                                       exclude_replies=True,
                                       include_rts=False,
                                       lang="en")
    alltweets.extend(tweets)
    oldest = alltweets[-1].id - 1
    oldest_datetime = pd.to_datetime(str(pd.to_datetime(oldest))[:-10]).strftime("%Y-%m-%d %H:%M:%S")
    print(f"Getting Tweets For " + handle + ", After: " + oldest_datetime)
    while len(tweets) > 0:
        tweets = api_twitter.user_timeline(screen_name=screen_name, count=batch_count_for_tweet_downloads,
                                           max_id=oldest)
        alltweets.extend(tweets)
        if len(alltweets) > 0:
            oldest = alltweets[-1].id - 1
        else:
            pass
        print("Count: " + f"...{len(alltweets)} " + handle + " Tweets Downloaded")
    outtweets = [
        [tweet.user.screen_name, tweet.text, tweet.created_at, len(tweet.text),
         tweet.favorite_count, tweet.retweet_count] for tweet in alltweets]
    df_tweet_function = pd.DataFrame(outtweets,
                                     columns=['Handle', 'Tweets', 'Date', 'Len', 'Like_count', 'RT_count'])
    print('     Total Downloaded: ' + str(len(alltweets)) + ' for ' + handle + '     ')
except tweepy.error.TweepError:
    pass
return df_tweet_function

df=pd.DataFrame（）如果name='main'：对于句柄中的句柄：获取推文（句柄） df=df.append（获取推文（句柄））打印（“-提取的推文总数：{}”。格式（df.shape[0]）+“-”）

相关问题更多 >

编程相关推荐

热门问题

热门文章