使用executemany将CSV数据插入Django模型
我有一个包含大约90万行和13列的csv文件,前28445行一切正常,但之后就出现了错误。
编程错误
异常信息:格式字符串的参数不够
我尝试通过打印行来检查列是否有问题,但看起来没有什么异常。
['印度', '5', '第一次当妈妈', '帕特纳', 'A2', '品牌驱动因素', '', '', '很难找到', '', '', '1', '0到12个月']
def upload(request):
if request.method == 'POST':
cursor = connection.cursor()
query = ''' INSERT INTO johnson_jnjusage (country,no_of_people_house,nursing_cnt,city,sec,bucket,category1,category2, final_category, responders, usageFrequency, base, child_age_group)
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) '''
x=[]
reader = csv.reader(request.FILES['csvData'],delimiter=',')
i = 0
for row in reader:
x.append(tuple(row))
if i>=5000:
cursor.executemany(query,tuple(x))
transaction.commit()
x=[]
i=0
i = i+1
cursor.close()
return HttpResponse( docfile.name + "'s data inserted into database successfully")
如果你们能帮我解决这个问题,我将非常感激。
3 个回答
0
我明白了。我只是检查了一下那一行的大小,然后对那一行进行了处理。同时,我发现有一些字符,比如“\x00”,所以我用正则表达式把它们去掉了。
def upload(request):
start_time = time.time()
print start_time
if request.method == 'POST':
cursor = connection.cursor()
x=[]
docfile = request.FILES['csvData']
reader = csv.reader(request.FILES['csvData'],delimiter=',')
to_create = []
for i, row in enumerate(reader):
if len(row) != 13:
reader.next()
continue
j = JnJUsage()
j.country =row[0]
j.no_of_people_house = row[1]
j.nursing_cnt = row[2]
j.city = row[3]
j.sec = row[4]
j.bucket = row[5]
j.category1 = row[6]
j.category2 = row[7]
j.final_category = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]', '', row[8])
j.responders = row[9]
j.usageFrequency = row[10]
j.base = row[11]
j.child_age_group = row[12]
to_create.append(j)
# If 900k is too much then you could consider something like this
if i % 10000 == 0:
JnJUsage.objects.bulk_create(to_create)
to_create = []
JnJUsage.objects.bulk_create(to_create)
return HttpResponse( docfile.name + "'s data inserted into database successfully")
感谢Dmitry Mikhaylov和Nathaniel的帮助。
0
当然,你可以使用批量创建(bulk_create)。它的写法大概是这样的:
def upload(request):
if request.method == 'POST':
cursor = connection.cursor()
x=[]
reader = csv.reader(request.FILES['csvData'],delimiter=',')
i = 0
for row in reader:
obj = new MyObject()
obj.country = row[0]
obj.city = row[3]
...
x.append(obj)
if i>=5000:
MyObject.objects.bulk_create(x)
x=[]
i=0
i = i+1
return HttpResponse( docfile.name + "'s data inserted into database successfully")
你可以在文档中找到更多关于bulk_create
的信息。
1
你可以这样做。
#models.py
class JNJUsage(models.Model):
...
# views.py (where ever def upload is)
to_create = []
for i, row in enumerate(reader):
j = JNJUsage()
j.country = row['country']
j.no_of_people_house = row['no_of_people_house']
j.nursing_cnt = row['nursing_cnt']
j.city = row['city']
j.sec = row['sec']
j.bucket = row['bucket']
j.category1 = row['category1']
j.category2 = row['category2']
j.final_category = row['final_category']
j.responders = row['responders']
j.usageFrequency = row['usageFrequency']
j.base = row['base']
j.child_age_group = row['child_age_group']
to_create.append(j)
# If 900k is too much then you could consider something like this
if i % 10000 == 0:
JNJUsage.objects.bulk_create(to_create)
to_create = []
# Clean up the rest
JNJUsage.objects.bulk_create(to_create)