我想问一个简单的问题:
txt
或任何file
,我将在C:\Users\mahmoud\PycharmProjects\text_files
中创建一个ceratin路径。此路径文件夹包含我要转换的所有text
文件,因此我想创建类似于for
的循环,在路径中循环,并将所有这些类型的文件转换为csv
excel
2.如何减少如下代码,因为此代码与第一个问题有关
import pandas as pd
dataf_gsm_car_txt = 'gsmcarrier_mnm.txt'
dataf_gsm_rel_txt = 'gsmrelation_mnm.txt'
class push_all_to_csv(object):
def push_gsmcarrier(self):
dataf_gsm_car_txt_df = pd.read_csv(dataf_gsm_car_txt, sep=';')
dataf_gsm_car_df_column_index = list(dataf_gsm_car_txt_df.columns)
dataf_gsm_car_txt_df.reset_index(inplace=True)
dataf_gsm_car_txt_df.drop(columns=dataf_gsm_car_txt_df.columns[-1], inplace=True)
dataf_gsm_car_df_column_index = dict(zip(list(dataf_gsm_car_txt_df.columns), dataf_gsm_car_df_column_index))
dataf_gsm_car_txt_df.rename(columns=dataf_gsm_car_df_column_index, inplace=True)
dataf_gsm_car_txt_df.to_excel('gsmcarrier_mnm.xlsx', 'Sheet1', index=False)
def push_gsmrelation(self):
dataf_gsm_txt_df = pd.read_csv(dataf_gsm_rel_txt, sep=';')
dataf_gsm_rel_df_column_index = list(dataf_gsm_txt_df.columns)
dataf_gsm_txt_df.reset_index(inplace=True)
dataf_gsm_txt_df.drop(columns=dataf_gsm_txt_df.columns[-1], inplace=True)
dataf_gsm_rel_df_column_index = dict(zip(list(dataf_gsm_txt_df.columns), dataf_gsm_rel_df_column_index))
dataf_gsm_txt_df.rename(columns=dataf_gsm_rel_df_column_index, inplace=True)
dataf_gsm_txt_df.to_excel('gsmrelation_mnm.xlsx', 'Sheet1', index=False)
def push_umtscarrier(self):
dataf_umts_car_txt = 'umtscarrier_mnm.txt'
dataf_umts_car_txt_df = pd.read_csv(dataf_umts_car_txt, sep=';')
dataf_umts_car_df_column_index = list(dataf_umts_car_txt_df.columns)
dataf_umts_car_txt_df.reset_index(inplace=True)
dataf_umts_car_txt_df.drop(columns=dataf_umts_car_txt_df.columns[-1], inplace=True)
dataf_umts_car_df_column_index = dict(zip(list(dataf_umts_car_txt_df.columns), dataf_umts_car_df_column_index))
dataf_umts_car_txt_df.rename(columns=dataf_umts_car_df_column_index, inplace=True)
dataf_umts_car_txt_df.to_excel('umtscarrier_mnm.xlsx', 'Sheet1', index=False)
def push_gsmrelation(self):
dataf_umts_rel_txt = 'umtsrelation_mnm.txt'
dataf_umts_txt_df = pd.read_csv(dataf_umts_rel_txt, sep=';')
dataf_umts_rel_df_column_index = list(dataf_umts_txt_df.columns)
dataf_umts_txt_df.reset_index(inplace=True)
dataf_umts_txt_df.drop(columns=dataf_umts_txt_df.columns[-1], inplace=True)
dataf_umts_rel_df_column_index = dict(zip(list(dataf_umts_txt_df.columns), dataf_umts_rel_df_column_index))
dataf_umts_txt_df.rename(columns=dataf_umts_rel_df_column_index, inplace=True)
dataf_umts_txt_df.to_csv('umtsrelation_mnm.csv')
def get_push_all_to_csv():
return push_all_to_csv()
我只需要使用pandas
、to_csv
、where
、isin
等函数来构造类似这样的代码,作为对代码结构设计的支持,我认为应该如下所示:
database_connection/
│
├── .conncetion.py
to_sql/
│
├── .file_tosql_1.py
├── .file1_tosql_.py
to_csv/
├── .file1_tosql_.py
├── .file1_tosql_.py
assets/
Main_App.py
我发现了一种通过以下代码减少methods
调用的方法:
#Method that call in the methods which in the class
def call_all(obj, *args, **kwargs):
for name in dir(obj):
attribute = getattr(obj, name)
if ismethod(attribute):
attribute(*args, **kwargs)
有什么建议吗
def clean(path):
df = pd.read_csv(path, sep=';')
cols = df.columns.tolist()
df = df.reset_index().drop(columns=df.columns[-1])
new_cols = dict(zip(list(df.columns), cols))
df = df.rename(columns=new_cols)
new_file_path = path.split(".")[:-1].append(".xlsx")
df.to_csv(path, index=False)
def main():
path = r"C:\Users\haroo501\PycharmProjects\ToolUpdated\data_feed"
pathPat = os.path.join(path, "*.txt")
all_file_name = glob.glob(pathPat)
for file_path in all_file_name:
print (file_path)
clean(file_path)
if __name__ == "__main__":
main()
path = r"C:\\Users\\haroo501\\PycharmProjects\\ToolUpdated\\data_feed"
pathPat = os.path.join(path,"*.txt")
all_file_name = glob.glob(pathPat)
for file_path in all_file_name:
with open(file_path) as currentFile:
pd.read_csv(currentFile, delimiter = "\t")
print(file_path)
错误:
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Program Files\JetBrains\PyCharm Community Edition 2019.3.2\plugins\python-ce\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm Community Edition 2019.3.2\plugins\python-ce\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/haroo501/PycharmProjects/ToolUpdated/txt_to_csv/convert_to_csv.py", line 64, in <module>
pd.read_csv(currentFile, delimiter = "\t")
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 676, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 448, in _read
parser = TextFileReader(fp_or_buf, **kwds)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 880, in __init__
self._make_engine(self.engine)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 1114, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 1891, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas\_libs\parsers.pyx", line 532, in pandas._libs.parsers.TextReader.__cinit__
pandas.errors.EmptyDataError: No columns to parse from file
您可以使用
glob
库访问上面trigonom提到的文件。并使用单个函数迭代所有文件并将其保存回更新:我使用
path.split(".")[:-1]
从原始名称创建一个.xlsx文件名,但是这将返回字符串列表,因此导致错误。您可以使用str.replace
替换文件的扩展名问题1
相关问题 更多 >
编程相关推荐