Numpy中的数组堆叠：UNIX和Windows的不同行为

#!/usr/bin/env python Usage = """ plate_calc.py - version 1.0 Convert a series of plate fluorescence readings to total DNA mass per sample and print them to a tab-delimited output file. This program can take multiple files as inputs (separated by a space) and generates a new output file for each input file. NOTE: 1) Input(s) must be an exported .txt file. 2) Standards must be in columns 1 and 2, or 11 and 12. 3) The program assumes equal volumes across wells. Usage: plate_calc.py input.txt input2.txt input3.txt """ import sys import numpy as np if len(sys.argv)<2: print Usage else: #First, we want to extract the values of interest into a Numpy array Filelist = sys.argv[1:] input_DNA_vol = raw_input("Volume of sample used for AccuClear reading (uL): ") remainder_vol = raw_input("Remaining volume per sample (uL): ") orientation = raw_input("Are the standards on the LEFT (col. 1 & 2), or on the RIGHT (col. 11 and 12)? ") orientation = orientation.lower() for InfileName in Filelist: with open(InfileName) as Infile: fluor_list = [] Linenumber = 1 for line in Infile: #this will extract the relevant information and store as a list of lists if Linenumber == 5: line = line.strip('\n').strip('\r').strip('\t').split('\t') fluor_list.append(line[1:]) elif Linenumber > 5 and Linenumber < 13: line = line.strip('\n').strip('\r').strip('\t').split('\t') fluor_list.append(line) Linenumber += 1 fluor_list = [map(float, x) for x in fluor_list] #converts list items from strings to floats fluor_array = np.asarray(fluor_list) #this takes our list of lists and converts it to a numpy array

#Create conditional statement, depending on where the standards are, to split the array if orientation == "right": #Next, we want to average the 11th and 12th values of each of the 8 rows in our numpy array stds = fluor_array[:,[10,11]] #Creates a sub-array with the standard values (last two columns, (8x2)) data = np.delete(fluor_array,(10,11),axis=1) #Creates a sub-array with the data (first 10 columns, (8x10)) elif orientation == "left": #Next, we want to average the 1st and 2nd values of each of the 8 rows in our numpy array stds = fluor_array[:,[0,1]] #Creates a sub-array with the standard values (first two columns, (8x2)) data = np.delete(fluor_array,(0,1),axis=1) #Creates a sub-array with the data (last 10 columns, (8x10)) else: print "Error: answer must be 'LEFT' or 'RIGHT'" std_av = np.mean(stds, axis=1) #creates an array of our averaged std values #Then, we want to subtract the average value from row 1 (the BLANK) from each of the 8 averages (above) std_av_st = std_av - std_av[0] #Run a linear regression on the points in std_av_st against known concentration values (these data = y axis, need x axis) x = np.array([0.00, 0.03, 0.10, 0.30, 1.00, 3.00, 10.00, 25.00])*10 #ng/uL*10 = ng/well xi = np.vstack([x, np.zeros(len(x))]).T #creates new array of (x, 0) values (for the regression only); also ensures a zero-intercept (when we use (x, 1) values, the y-intercept is not forced to be zero, and the slope is slightly inflated) m, c = np.linalg.lstsq(xi, std_av_st)[0] # m = slope for future calculations #Now we want to subtract the average value from row 1 of std_av (the average BLANK value) from all data points in "data" data_minus_blank = data - std_av[0] #Now we want to divide each number in our "data" array by the value "m" derived above (to get total ng/well for each sample; y/m = x) ng_per_well = data_minus_blank/m #Now we need to account for the volume of sample put in to the AccuClear reading to calculate ng/uL ng_per_microliter = ng_per_well/float(input_DNA_vol) #Next, we multiply those values by the volume of DNA sample (variable "ng") ng_total = ng_per_microliter*float(remainder_vol) #Set number of decimal places to 1 ng_per_microliter = np.around(ng_per_microliter, decimals=1) ng_total = np.around(ng_total, decimals=1)

#Create a row array (values A-H), and a filler array ('-') to add to existing arrays col = [i for i in range(1,13)] row = np.asarray(['A','B','C','D','E','F','G','H']) filler = np.array(['-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',]).reshape((8,2))

#Create output Outfile = open('Total_DNA_{0}'.format(InfileName),"w") Outfile.write("DNA concentration (ng/uL):\n\n") Outfile.write("\t"+"\t".join([str(n) for n in col])+"\n") if orientation == "left": #Add filler to left, then add row to the left of filler ng_per_microliter = np.c_[filler,ng_per_microliter] ng_per_microliter = np.c_[row,ng_per_microliter] Outfile.write("\n".join(["\t".join([n for n in item]) for item in ng_per_microliter.tolist()])+"\n\n") elif orientation == "right": #Add rows to the left, and filler to the right ng_per_microliter = np.c_[row,ng_per_microliter] ng_per_microliter = np.c_[ng_per_microliter,filler] Outfile.write("\n".join(["\t".join([n for n in item]) for item in ng_per_microliter.tolist()])+"\n\n") Outfile.write("Total mass of DNA per sample (ng):\n\n") Outfile.write("\t"+"\t".join([str(n) for n in col])+"\n") if orientation == "left": ng_total = np.c_[filler,ng_total] ng_total = np.c_[row,ng_total] Outfile.write("\n".join(["\t".join([n for n in item]) for item in ng_total.tolist()])) elif orientation == "right": ng_total = np.c_[row,ng_total] ng_total = np.c_[ng_total,filler] Outfile.write("\n".join(["\t".join([n for n in item]) for item in ng_total.tolist()])) Outfile.close

2条回答

网友
1楼 · 编辑于 2024-04-19 09:27:36

在用户hpaulj的帮助下，我发现这不是操作系统和环境之间行为不同的问题。这很可能是因为用户拥有不同版本的numpy。你知道吗
数组的串联自动将“float64”数据类型转换为“S1”（以匹配“filler”数组（'-'）和“row”数组（'A'、'B'等））。你知道吗
numpy的较新版本（特别是v1.12.X）似乎允许在没有这种自动转换的情况下串联数组。你知道吗
我仍然不确定在旧版本的numpy中如何解决这个问题，但是建议人们升级他们的版本以获得完整的性能应该是一件简单的事情。：）

网友
2楼 · 编辑于 2024-04-19 09:27:36

Using a simple print command, I found that the stacking command numpy.c_[ ] is the culprit (NOT the array writing command).
So it appears that numpy.c_[ ] does not truncate these numbers in Windows, but will limit those numbers to the first character in a UNIX environment.
用简单的例子说明这些陈述。np.c_[]不应该做任何不同的事情。你知道吗
在Py3中，默认的字符串类型是unicode。和numpy 1.12
In [149]: col = [i for i in range(1,13)] ...: row = np.asarray(['A','B','C','D','E','F','G','H']) ...: filler = np.array(['-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',]).reshape((8,2)) ...: In [150]: col Out[150]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] In [151]: "\t"+"\t".join([str(n) for n in col])+"\n" Out[151]: '\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\n' In [152]: filler Out[152]: array([['-', '-'], ... ['-', '-'], ['-', '-']], dtype='<U1') In [153]: row Out[153]: array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'], dtype='<U1') In [154]: row.shape Out[154]: (8,) In [155]: filler.shape Out[155]: (8, 2) In [159]: ng_per_microliter=np.arange(8.)+1.23 In [160]: np.c_[filler,ng_per_microliter] Out[160]: array([['-', '-', '1.23'], ['-', '-', '2.23'], ['-', '-', '3.23'], ... ['-', '-', '8.23']], dtype='<U32') In [161]: np.c_[row,ng_per_microliter] Out[161]: array([['A', '1.23'], ['B', '2.23'], ['C', '3.23'], .... ['H', '8.23']], dtype='<U32')
在早期的numpy版本中，U1（或Py2中的S1）数组与数值的串联可能会将数据类型保留在U1。在我的例子中，它们已经扩展到U32。你知道吗
因此，如果您怀疑np.c_，则显示这些结果（如果需要，使用repr）
print(repr(np.c_[row,ng_per_microliter]))
跟踪dtype。你知道吗
对于v1.12发行说明（可能更早）
The astype method now returns an error if the string dtype to cast to is not long enough in “safe” casting mode to hold the max value of integer/float array that is being casted. Previously the casting was allowed even if the result was truncated.
这可能在执行连接时起作用。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章