如何制作直方图和箱型图
我需要创建200个均匀分布的区间,并把我的数据放到这些区间里,这样我就可以用这些数据制作直方图。有没有人能帮我写一个脚本,来生成这200个区间,并把数据放进去。
这是我现在的代码:
#!/usr/bin/python
import operator
import matplotlib.pyplot as plt
import numpy as np
l=[]
with open("testdata") as f:
line = f.next()
f.next()# skip headers
nat = int(line.split()[0])
print nat
for line in f:
if line.strip():
if line.strip():
l.append(map(float,line.split()[1:]))
b = 0
a = 1
for b in range(53):
for a in range(b+1,54):
vector1 = (l[b][0],l[b][1],l[b][2])
vector2 = (l[a][0],l[a][1],l[a][2])
x = vector1
y = vector2
vector3 = list(np.array(x) - np.array(y))
dotProduct = reduce( operator.add, map( operator.mul, vector3, vector3))
dp = dotProduct**.5
print dp
#data = dp
#num_bins = 200 # <- number of bins for the histogram
#plt.hist(data, num_bins)
#plt.show()
错误信息:
/usr/lib64/python2.6/site-packages/matplotlib/backends/backend_gtk.py:621: DeprecationWarning: Use the new widget gtk.Tooltip
self.tooltips = gtk.Tooltips()
Traceback (most recent call last):
File "vector_final", line 42, in <module>
plt.hist(data, num_bins)
File "/usr/lib64/python2.6/site-packages/matplotlib/pyplot.py", line 2008, in hist
ret = ax.hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, **kwargs)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 7098, in hist
w = [None]*len(x)
TypeError: len() of unsized object
1 个回答
1
你已经很接近了。你唯一缺少的就是正确地存储你的数据,并把它传递给直方图函数。
#!/usr/bin/python
import operator
import matplotlib.pyplot as plt
import numpy as np
l=[]
with open("testdata") as f:
line = f.next()
f.next()# skip headers
nat = int(line.split()[0])
print nat
for line in f:
# store striped line and only store if there is data on the line.
cleaned = line.strip()
if cleaned:
# convert to float and remove characters in first index
l.append(map(float,cleaned.split()[1:]))
b = 0
a = 1
# create a list to store our calculations in
distances = []
num_vects = len(l)
for b in range(num_vects-1):
for a in range(b+1,num_vects):
vector1 = (l[b][0],l[b][1],l[b][2])
vector2 = (l[a][0],l[a][1],l[a][2])
x = vector1
y = vector2
vector3 = list(np.array(x) - np.array(y))
dotProduct = reduce( operator.add, map( operator.mul, vector3, vector3))
dp = dotProduct**.5
# store individual data point into the list of calculated distances
distances.append(dp)
# plot histogram
num_bins = 200 # <- number of bins for the histogram
# store useful data returned by the histogram function
(n, bins, patches) = plt.hist(distances, num_bins)
plt.show()