openpyxl：在某些（关键用途）情况下，公式被读作空白

dl = imp.load_source('downloader', os.getcwd() + '/Project/downloader.py') gdu = imp.load_source('googledriveutils', os.getcwd() + '/Project/googledriveutils.py') remove_file = gdu.remove_file find_folderid = gdu.find_folderid get_file_list = gdu.get_file_list # Define constants COL_LABEL = '\nProbe - ' # TODO: ORP PROBE: REVISE THIS DATE when orp probe is added IGNORE_BEFORE = pd.to_datetime('5.24.2016') PROBE_DICT = {'DO (mg/L)': 'DO mg/L', 'pH': 'pH', 'NH4+ (mgN/L)': 'Ammonium', 'ORP (mV)': 'ORP mV'} TS = '\nTimestamps' def save_to_workbook(newval, date, header, rows_to_skip=12, wbname='temp.xlsx', sheet_name='Reactor Data'): wb = load_workbook(wbname) ws = wb[sheet_name] for cell in ws[rows_to_skip+1]: # TODO: Error if header isn't found if cell.value == header: colno = cell.col_idx break for row in ws.iter_rows(min_row=rows_to_skip+1, min_col=1, max_col=1): for cell in row: # TODO: Error if date isn't found if cell.value == date: rowno = cell.row break ws.cell(row=rowno, column=colno).value = newval wb.save(wbname) return df def find_r1masterfile(): # Navigate through the directories wlab_fid = find_folderid('Winkler Lab', 'root') kp_fid = find_folderid('KathrynsProjects', wlab_fid) amxrct_fid = find_folderid('Anammox Reactor', kp_fid) trials_fid = find_folderid('Reactor Trials', amxrct_fid) # List files in directory file_list = get_file_list(trials_fid) for afile in file_list: if afile['title'] == 'AMX RCT.xlsx': # Return the file we asked for return afile # TODO: error if there was no file with that name def save_r1masterfile(csv, rows_to_skip=12, filename='temp.xlsx', sheet_name='Reactor Data'): # Get the file we want master_file = find_r1masterfile() try: master_file.GetContentFile(filename) except Exception, e: print "Warning: Something wrong with file R1 Master File." print str(e) # TODO: add an email alarm to responsible user if csv: return master_file else: # convert to dataframe wb = load_workbook(filename, data_only=True) ws = wb[sheet_name] print ws["B14"].value data = ws.values data = list(data)[rows_to_skip:] cols = list(data[0]) del cols[0] del data[0] idx = [r[0] for r in data] data = (islice(r, 1, None) for r in data) df = pd.DataFrame(data, index=idx, columns=cols) print df.dropna(how='all') remove_file(filename) return df def upload_r1masterfile(filename='temp.xlsx'): # Get the file we want master_file = find_r1masterfile() try: master_file.SetContentFile(filename) master_file.Upload() except Exception, e: print "Warning: Something wrong with file R1 Master File." print str(e) # TODO: add an email alarm to responsible user def populate_r1masterfile(rows_to_skip=12, filename='temp.xlsx'): # Get the R1 master file as a file save_r1masterfile(True) # Convert the juicy stuff to a dataframe masterdf = pd.read_excel(filename, sheetname='Reactor Data', encoding="utf-16", skiprows=rows_to_skip, sep='\t', index_col='Date', keep_default_na=False, na_values=['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '','N/A', '#NA', 'NA' 'NULL', 'NaN', '-NaN', 'nan', '-nan']) # Find what we will populate with probe data # Find timestamps ts_columns = [col for col in masterdf.columns if TS in col] tsdf = masterdf[ts_columns] # Find probes, ignore before given date probe_columns = [col for col in masterdf.columns if COL_LABEL in col] probedf = masterdf[probe_columns] probedf = probedf[masterdf.index > IGNORE_BEFORE] # Find Indices and column labels of blank values stackdf = probedf.stack(dropna=False) empty = stackdf[stackdf.isnull()].index.tolist() # For each blank look for the probe, time & date of cycle, and return val for each in empty: probe, time = each[1].split(COL_LABEL) time = tsdf.loc[each[0], time+TS] ts = each[0]+pd.DateOffset(hour=time.hour, minute=time.minute) val = dl.get_val_from(1, ts, PROBE_DICT.get(probe)) probedf.set_value(each[0], each[1], val) # Save that value to the workbook save_to_workbook(val, each[0], each[1]) upload_r1masterfile() print 'Master file updated. ' + str(datetime.datetime.now()) remove_file('temp.xlsx') return probedf

2条回答

网友

1楼 · 编辑于 2024-05-15 00:41:16

查理的回答来自mailing list：

So, if you want to keep the formulae then you must not use data only mode.
As previously stated, openpyxl never evaluates formulae so if you want to
know value of A3 you must pass the file to an application such as Excel
or OpenOffice — you can run OpenOffice headless for this kind of thing or
use xlwings for Excel — that does do formula evaluation. You could then
read this file in data only mode to see the result of the calculation.
Alternatively you could try using something like PyCel to do the
evaluation for you. But, basically if you want to do calculations: do them
in Python.

根据他的建议，我的解决办法是按excel文件中的方法逐列重做所有的计算。一、 E.对于这样的excel文件：

        col1 col2   col3    col4
row1    1    3      =A1+B1  =1+3
row2    2    4      =A2+B2  =2+4

我将其作为数据帧导入，如下所示（以字符串形式维护方程）：

^{pr2}$

然后执行以下操作：

parse_excel = lambda x: eval(str(x)[1:]) if isinstance(x, str) else x
for col in df.columns:
    try:
        df[col] = df[col].map(parse_excel)
    except:
        pass
df['col3'] = df['col2']+df['col1']

我敢肯定这也许是最笨拙的方法，但它目前有效。在

网友

2楼 · 编辑于 2024-05-15 00:41:16

使用OpenPYXL2.4，您可以在一次操作中完成您需要的操作。我已经完成了你的第一个功能并进行了调整。在

from itertools import islice
from pandas import DataFrame

def save_to_workbook(newval,
                     date,
                     header,
                     rows_to_skip=12,
                     wbname='temp.xlsx',
                     sheet_name='Reactor Data'):
    wb = load_workbook(wbname)
    ws = wb[sheet_name]
    rowno = None
    colno = None
    for cell in ws[1]:
        # TODO: Error if header isn't found
        if cell.value == header:
            colno = col

    for row in ws.iter_rows(min_row=rows_to_skip+1, min_col=1, max_col=1):
        for cell in row:
        # TODO: Error if date isn't found
            if cell.value == date:
                rowno = row
                break

    # TODO: Fix this
    ws.cell(row=rowno, column=colno).value = newval

    # convert to dataframe
    data = ws.values
    cols = next(data)[1:]
    data = list(data)
    idx = [r[0] for r in data]
    data = (islice(r, 1, None) for r in data)
    df = DataFrame(data, index=idx, columns=cols)

    return df

这可能不是你想做的一切，但希望能让你开始。它还避免了保存和解析整个工作簿，这会使它更快一些。在

要使用openpyxl2.4，您需要执行pip install -U pre openpyxl或使用签出。在

有关同时使用openpyxl和pandas的更多信息，请参见documentation。在

更新

相关问题更多 >

编程相关推荐

热门问题

热门文章