擅长:python、mysql、java
<p>在测试了其他的建议之后,我发现我原来的方法要快得多。我使用下面的函数生成一个迭代器,并将其传递到<code>pd.DataFrame</code></p>
<pre><code>def row_factory(index_data, row_len):
"""
Make a generator for iterating for index_data
Parameters:
index_data (dict): a dict mapping the a value to a dict of index mapped to values. All indexes not in
second dict are assumed to be None.
row_len (int): length of row
Example:
index_data = {0: {0:2, 2:1}, 1: {1:1}} would yield [0, 2, None, 1] then [1, None, 1, None]
"""
for key, data in index_data.items():
# Initialize row with the key starting, then None for each value
row = [key] + [None] * (row_len - 1)
for index, value in data.items():
# Only replace indexes that have a value
row[index] = value
yield row
df = pd.DataFrame(row_factory(d), 5)
</code></pre>