我有一系列的文件是80 GB压缩的.bz文件。我必须把这些文件,制作成JSON对象,压缩并导出到焦油.gz压缩文件被分割成500 MB的块。你知道吗
import os
import json
from datetime import datetime, timedelta
import bz2
import time
start_time = time.time()
os.system("kinit svc-cypressadm@1DC.COM -k -t /hadoop/svc-cypressadm/svc-cypressadm.keytab")
now = datetime.utcnow() - timedelta(days=2)
day = now.strftime("%d")
month = now.strftime("%m")
year = now.strftime("%Y")
os.system("hadoop fs -copyToLocal /data/SOUTH.bz2 south.bz2")
bz_file = bz2.BZ2File("south.bz2")
print "Test Point 1A"
os.system("mkfifo gzip_pipe")
print "Test Point 1B"
os.system("gzip -9 -c gzip_pipe >> fast_south.gz")
print "Test Point 1C"
os.system("exec 3>> gzip_pipe")
print "Test Point 1D"
dataJSON = {}
writingFile = open("south.json", "w")
i = 0
print "Test Point 1E"
for line in bz_file:
#dataJSON = {}
line = unicode(line, "ISO-8859-1")
dataJSON["authu_rec_type"] = line[0:0]
dataJSON["authu_avs_code"] = line[1:1]
dataJSON["authu_acct_numb"] = line[2:18]
dataJSON["authu_date"] = line[19:23]
... (More fields here)
json_data = json.dumps(dataJSON)
json_data = json_data + "\n"
#json_data = "\"" + json_data + "\""
i = i + 1
#writingFile = open("south.json", "w")
writingFile.write(json_data)
if i == 1000:
print "Test Point 2A"
commandLineString = "cat south.json >> 3"
#writingFile.close()
os.system(commandLineString)
print "Test Point 2B"
os.system("rm south_credit_debit_auth_20100731.json")
print str(i) + ": " + commandLineString
writingFile = open("south.json", "w")
i = 0
#commandString = "tar -cvzf south.tar.gz south.json"
#system.os(commandString)
print("--- %s seconds ---" % (time.time() - start_time))
我需要在记忆中这样做,因为我需要尽快这样做。非常感谢您的帮助。你知道吗
目前没有回答
相关问题 更多 >
编程相关推荐