<p>我开发了以下函数来转换矩阵的4.5m行,也考虑了无效数据类型异常。虽然它可以通过并行化的过程来改进,但是它对我来说做的很好,不管它值多少钱,我会把它贴在这里。在</p>
<pre><code>def cnvt_data(mat):
from datetime import datetime
_date = lambda x: datetime.strptime(x, "%Y/%m/%d-%H:%M:%S")
# only necessary because '' should be treated as 0
_int = lambda x: int('0' + x)
# specify the type parsers for each column
parsers = 8 * [_int] + [_date, _int, str] + 6 * [_int]
def try_parse(parse, value, _def):
try:
return parse(value), True
except ValueError:
return _def, False
matrix = [];
for idx in range(len(mat)):
try:
row = mat[idx]
matrix.append(np.asarray([parse(input) for parse, input in zip(parsers, row)]))
except ValueError:
l = [];
matrix.append([])
for _idx, args in enumerate(zip(parsers, row)):
val, pres = try_parse(args[0], args[1], 0)
matrix[-1].append(val)
if(not pres): l.append(_idx);
print "\r[Error] value error @row %d @indices(%s): replaced with 0" %(idx, ', '.join(str(x) for x in l))
print "\r[.] %d%% converted" %(idx * 100/len(mat)),
print "\r[+] 100% converted."
return matrix
</code></pre>