如何将.csv文件中的数据转移到我的Django SQLite数据库中?
这是我的 models.py 文件
from django.db import models
class School(models.Model):
school = models.CharField(max_length=300)
def __unicode__(self):
return self.school
class Lawyer(models.Model):
firm_url = models.URLField('Bio', max_length=200)
firm_name = models.CharField('Firm', max_length=100)
first = models.CharField('First Name', max_length=50)
last = models.CharField('Last Name', max_length=50)
year_graduated = models.IntegerField('Year graduated')
school = models.CharField(max_length=300)
school = models.ForeignKey(School)
class Meta:
ordering = ('?',)
def __unicode__(self):
return self.first
还有从 csv 文件中提取的两行示例数据:
"http://www.graychase.com/aabbas,Gray & Chase LLP, Amr A ,Abbas,The George Washington University Law School, 2005"
"http://www.graychase.com/kadam,Gray & Chase LLP, Karin ,Adam,Ernst Moritz Arndt University Greifswald, 2004"
谢谢你。
编辑
你能多给点关于这个脚本的详细信息吗?我有几个问题:
我的应用在
C:.../Documents/PROJECTS/Django/sw2/wkw2.
这个路径已经在 PYTHONPATH 里了,我还需要这一行吗?如果需要,我该怎么写呢?
sys.path.append('C:\\sw2')
os.environ
是干什么的?我试着看文档,但没搞懂。
os.environ['DJANGO_SETTINGS_MODULE'] = 'sw2.settings'
from django.core.management import setup_environ
from sw2 import settings
from sw2.wkw2.models import *
import csv
dataReader = csv.reader(open('csvtest1.csv'), delimiter=',', quotechar='"')
for row in dataReader:
lawyer=Lawyer()
lawyer.firm_url=row[0]
lawyer.firm_name=row[1]
lawyer.first=row[2]
lawyer.last=row[3]
lawyer.school=row[4]
lawyer.year_graduated=row[5]
lawyer.save()
谢谢!
针对 celopes 的回答的编辑:
celopes:
我看到你的回答有点晚。我一直在尝试用
>>> p1 = Lawyer(school = "The George Washington University Law School", last = "Babbas", firm_url = "http://www.graychase.com/babbas", year_graduated = 2005, firm_name= "Gray & Chase", first= "Amr A")
在命令行更新数据库,但总是出现整数错误。最后我意识到学校需要是 school_id
,也就是说,在表单中,
>>> p1 = Lawyer(school_id = 1, last = "Babbas", firm_url = "http://www.graychase.com/babbas", year_graduated = 2005, firm_name= "Gray & Chase", first= "Amr A")
从这我明白了,我需要知道每个学校的 ID 才能更新律师表。因为这不太可能,我决定去掉 ForeignKey
,因为我不知道怎么解决这个问题。(我对 Python 和 Django 都很陌生。)
今天早上我看到了你的回答。现在我在开发服务器上修改了我的模型,只有一个表:律师。我觉得这就是我将要使用的。再次为没早点看到你的回答而道歉。
编辑 12/14/09:
celopes:
再次感谢这个脚本。它解决了我的问题。很高兴我不需要在保存到数据库之前把 csv 转换成 json 或其他格式。我做了一些修改。首先,正如之前提到的,我把模型改成了只有律师。此外,你通过使用“名称”解决了重复学校的问题。但我在 admin.py
中有 list_display
、list_filter
和 search_fields
,更改字段名称导致了太多错误。
class LawyerAdmin(admin.ModelAdmin):
fieldsets = [
('Name', {'fields': ['last', 'first', 'firm_name', 'firm_url', 'school', 'year_graduated']}),
#('School', {'fields': ['school', 'year_graduated']}),
]
list_display = ('last', 'first', 'firm_name', 'firm_url', 'school', 'year_graduated')
list_filter = ['school', 'year_graduated']
search_fields = ['last', 'school', 'firm_name']
#search_fields = ['school__school']
#search_fields = ['school__lawyer__last']
使用新的 models.py
,简化的脚本运行得很好。这是我正在使用的:
csv_filepathname="C:/Users/A/Documents/Projects/Django/sw2/wkw2/fixtures/data1.csv"
your_djangoproject_home="C:/Users/A/Documents/PROJECTS/Django/"
import sys,os
sys.path.append(your_djangoproject_home)
os.environ['DJANGO_SETTINGS_MODULE'] ='sw2.settings'
from sw2.wkw2.models import Lawyer
import csv
dataReader = csv.reader(open(csv_filepathname), delimiter=',', quotechar='"')
for row in dataReader:
lawyer=Lawyer()
lawyer.firm_url=row[0]
lawyer.firm_name=row[1]
lawyer.first=row[2]
lawyer.last=row[3]
lawyer.school=row[4]
lawyer.year_graduated=row[5]
lawyer.save()
我还去掉了每行的引号。我注意到如果我把年份放在引号里,就会出现整数错误,而不加引号就没问题。你是怎么让它在有引号的情况下也能工作的?
再次感谢,这对我帮助很大。现在我得让它在生产服务器上也能工作。
1 个回答
我用这些数据做了一个完整的脚本测试:
"http://www.graychase.com/aabbas","Gray & Chase LLP","Amr A","Abbas","The George Washington University Law School","2005"
"http://www.graychase.com/kadam","Gray & Chase LLP","Karin","Adam","Ernst Moritz Arndt University Greifswald","2004"
请注意,你的CSV文件如上所示是错误的。因为整个行都被引号包裹,所以CSV文件读取器会把整行当作一个条目。你要么去掉每行开头和结尾的引号,要么像我一样,把每个不同的值都用引号包起来。
下面是与你的脚本兼容的模型:
from django.db import models
class School(models.Model):
name = models.CharField(max_length=300, unique=True)
def __unicode__(self):
return self.name
class Lawyer(models.Model):
firm_url = models.URLField('Bio', max_length=200, unique=True)
firm_name = models.CharField('Firm', max_length=100)
first = models.CharField('First Name', max_length=50)
last = models.CharField('Last Name', max_length=50)
year_graduated = models.IntegerField('Year graduated')
school = models.ForeignKey(School)
def __unicode__(self):
return self.first
这是一个可以读取你的CSV文件的脚本(如果我搞错了你的项目名sw2
和应用名wkw2
,请修正这些引用):
############ All you need to modify is below ############
# Full path and name to your csv file
csv_filepathname="C:/Users/A/Documents/Projects/Django/sw2/wkw2/fixtures/data.csv"
# Full path to the directory immediately above your django project directory
your_djangoproject_home="C:.../Documents/PROJECTS/Django/"
############ All you need to modify is above ############
import sys,os
sys.path.append(your_djangoproject_home)
os.environ['DJANGO_SETTINGS_MODULE'] ='sw2.settings'
from sw2.wkw2.models import School, Lawyer
import csv
dataReader = csv.reader(open(csv_filepathname), delimiter=',', quotechar='"')
old_school = None
for row in dataReader:
if old_school != row[4]:
old_school = row[4]
school = School()
school.name = old_school
school.save()
dataReader = csv.reader(open(csv_filepathname), delimiter=',', quotechar='"')
for row in dataReader:
lawyer=Lawyer()
lawyer.firm_url=row[0]
lawyer.firm_name=row[1]
lawyer.first=row[2]
lawyer.last=row[3]
lawyer_school=School.objects.get(name=row[4])
lawyer.school=lawyer_school
lawyer.year_graduated=row[5]
lawyer.save()
这个脚本首先会从CSV文件中创建所有可能的学校。然后它会再次读取CSV,创建每一个律师。
我用测试数据运行了这个脚本。它运行得很好,成功加载了所有的CSV数据。