整数超出范围 - Django
我正在用Django和Tweepy处理Twitter的API。
我用一个辅助函数来保存推文,这个函数大约有50%的时间能正常工作,其余时间会抛出一个“DataError: integer out of range”的错误——目前这个错误在下面的代码中被捕获,并且出错的推文会被记录下来(以便后续处理!),但是现在是时候解决这个问题了。
def read_tweet(tweet_data, current_user):
import logging
logger = logging.getLogger('django')
from coreapp.models import Tweet
from django.core.exceptions import ObjectDoesNotExist
from django.db import DataError
#We might get weird results where user has changed their details, so first we check the UID.
try:
#print "trying tweet_data.id"
current_tweet =Tweet.objects.get(id=tweet_data.id)
created=False
return current_user, created
except ObjectDoesNotExist:
pass
try:
current_tweet, created = Tweet.objects.get_or_create(
truncated=tweet_data.truncated,
text=tweet_data.text,
in_reply_to_status_id=tweet_data.in_reply_to_status_id,
id=tweet_data.id,
favorite_count=tweet_data.favorite_count,
author = current_user,
_json = tweet_data._json,
source=tweet_data.source,
retweeted=tweet_data.retweeted,
coordinates = tweet_data.coordinates,
entities = tweet_data.entities,
in_reply_to_screen_name = tweet_data.in_reply_to_screen_name,
id_str = tweet_data.id_str,
retweet_count = tweet_data.retweet_count,
in_reply_to_user_id = tweet_data.in_reply_to_user_id,
favorited = tweet_data.favorited,
user = tweet_data.user,
geo = tweet_data.geo,
in_reply_to_user_id_str = tweet_data.in_reply_to_user_id_str,
lang = tweet_data.lang,
created_at = tweet_data.created_at,
place = tweet_data.place)
return current_tweet, created
except(DataError), e:
try:
current_tweet, created = Tweet.objects.get_or_create(
text=tweet_data.text,
id=tweet_data.id,
author = current_user,
_json = tweet_data._json,
user = tweet_data.user,
created_at = tweet_data.created_at,
)
return current_tweet, created
except(DataError), e:
errormessage = {"errortweet": tweet_data._json}
print errormessage
logger.error(str(errormessage))
return None, None
Models.py
class Tweet(models.Model):
truncated=models.BooleanField(default=False)
text=models.TextField(max_length=140)
in_reply_to_status_id=models.BigIntegerField(blank=True, null=True)
id=models.BigIntegerField(primary_key=True)
favorite_count=models.BigIntegerField(default=0)
author = models.ForeignKey(User)
_json = models.TextField()
source=models.CharField(max_length=25)
retweeted=models.BooleanField(default=False)
coordinates = models.TextField(blank=True, null=True)
entities = models.TextField(blank=True, null=True)
in_reply_to_screen_name = models.CharField(max_length=25, blank=True, null=True)
id_str = models.CharField(max_length=25)
retweet_count = models.BigIntegerField(default=0)
in_reply_to_user_id = models.BigIntegerField(blank=True, null=True)
favorited = models.BooleanField(default=False)
retweeted_status = models.TextField()
user = models.TextField(blank=True, null=True) #User is a dictionary in the response; here we take a serialised version
geo = models.TextField(blank=True, null=True)
in_reply_to_user_id_str = models.CharField(max_length=25, blank=True, null=True)
possibly_sensitive = models.BooleanField(default=False)
lang = models.CharField(max_length=5)
created_at = models.DateTimeField()
in_reply_to_status_id_str = models.CharField(max_length=25, blank=True, null=True)
place = models.TextField(blank=True, null=True)
如果我禁用最后的try/except,这样就会在Django的调试模式下出错,我得到以下的错误追踪信息:
Environment:
Request Method: GET
Request URL: http://127.0.0.1:8001/cursorsearch/surveillance/
Django Version: 1.6
Python Version: 2.7.5
Installed Applications:
('django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'coreapp',
'silk',
'south')
Installed Middleware:
('silk.middleware.SilkyMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware')
Traceback:
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/core/handlers/base.py" in get_response
114. response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/Users/dev/als/coreapp/views.py" in cursor_search
92. current_tweet, created = read_tweet(tweet, current_user)
File "/Users/dev/als/coreapp/tools.py" in read_tweet
173. created_at = tweet_data.created_at,
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/manager.py" in get_or_create
154. return self.get_queryset().get_or_create(**kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/query.py" in get_or_create
388. six.reraise(*exc_info)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/query.py" in get_or_create
380. obj.save(force_insert=True, using=self.db)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/base.py" in save
545. force_update=force_update, update_fields=update_fields)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/base.py" in save_base
573. updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/base.py" in _save_table
654. result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/base.py" in _do_insert
687. using=using, raw=raw)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/manager.py" in _insert
232. return insert_query(self.model, objs, fields, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/query.py" in insert_query
1511. return query.get_compiler(using=using).execute_sql(return_id)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/sql/compiler.py" in execute_sql
898. cursor.execute(sql, params)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/backends/util.py" in execute
69. return super(CursorDebugWrapper, self).execute(sql, params)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/backends/util.py" in execute
53. return self.cursor.execute(sql, params)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/utils.py" in __exit__
99. six.reraise(dj_exc_type, dj_exc_value, traceback)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/backends/util.py" in execute
53. return self.cursor.execute(sql, params)
Exception Type: DataError at /cursorsearch/surveillance/
Exception Value: integer out of range
正如你所看到的,出问题的字段是BigInts,所以应该能处理更大的Twitter ID,因为Twitter的文档说明它们需要64位的有符号整数,所以这方面应该没问题。我还在命令行中尝试过明确转换为Long类型,但没有任何效果。我也尝试了用不同的用户模型来保存推文,但没有区别(这个用户模型之前保存得很好,我可以通过命令行或查询正常检索到它们)。
我使用Silk来检查SQL,成功保存推文和失败的推文之间我能看到的主要区别是,成功的SQL明确将NULL分配给整数值(比如'in_reply_to_user_id'等)——但这应该不是问题吧?如果我在模型中指定了Null=True而保存时没有任何赋值,那应该没问题。
我已经花了一整天来尝试修复这个问题。:)
编辑:我刚刚尝试给这些字段添加default=None,以防万一,但也没有用,还是同样的错误。
第二次编辑:一个成功使用的ID示例是524539416799617024,而一个失败的ID是524539525209808896,所以这应该没问题。我实际上刚刚尝试用一个之前保存的tweet.id从命令行执行,结果抛出了同样的异常,所以显然是其他原因——但我不知道是什么!
更多示例:成功保存的推文ID - 524822288437633024, 524822389821939714;失败的推文ID:524822248499060736, 524823331368091648。
我在命令行中测试这些,通过赋值
u = User.object.get_or_create(id=id_from_tweet)
t = Tweet(id = 524823331368091648, user = current_user) 然后执行t.save() - 还是同样的错误。
我还简单地检查了这些整数
a = -9223372036854775808, b = id, c = 9223372036854775808,结果是a < b < c为True。
我还在命令行中明确将整数强制为Long类型再赋值,但也没有任何区别。
最后编辑:我没有得到具体的答案。看起来这可能与JSON/SNowflake问题有关——在解析过程中可能出现了问题。作为一个间歇性故障,我不太清楚为什么有些可以正常,有些不行,我也不完全确定这是否是导致问题的原因,但这似乎很可能。
1 个回答
你有没有检查过BigIntegerField这个类能接受什么值?确保它能接受在初始化时传入的所有值。
根据文档:
class BigIntegerField([**options])
这是一个64位的整数,跟IntegerField很像,但它保证可以容纳的数字范围是从-9223372036854775808到9223372036854775807。
看起来NULL或者None可能是不被接受的……我注意到你在某些情况下尝试把它们设置为默认的零,但并不是所有的情况都有这样做。