为了使代码运行得更快，我应该对此代码应用哪些更改？

import pandas as pd import geopandas from rasterio.windows import Window import rasterio import numpy as np from rasterstats import zonal_stats from shapely.geometry import box import os import ntpath import time def path_leaf(path): head, tail = ntpath.split(path) return tail or ntpath.basename(head) def getListOfFiles(dirName): listOfFile = os.listdir(dirName) allFiles = list() for entry in listOfFile: # Create full path fullPath = os.path.join(dirName, entry) # If entry is a directory then get the list of files in this directory if os.path.isdir(fullPath): allFiles = allFiles + getListOfFiles(fullPath) else: allFiles.append(fullPath) return allFiles #The address of the folder containing the ROI's SHP_address = 'F:\\nota_tecnica_2\\vetores\\amostras_interesse_final\\ROI_wgs84_9_final_dissolvido.shp' #The address of the satellite images a = getListOfFiles('C:\\dados_nota_tecnica\\MODIS\\MODIS_quality_mosaic_rec') #The address of the masks (binary images where 0 is a electable pixel and 1 is a not electable pixel) b = 'C:\\dados_nota_tecnica\\VIIRS\\final\\' #The folder where I store my vectors at the end c = 'F:\\nota_tecnica_2\\vetores\\amostras_controle\\' #A variable defining which images from the "a" list should be analyzed _from = 0 _to = 90 #The function that will search for the windows def janela_movel(MODIS_list,local_viirs,saida): start = time.time() df_vazio = pd.DataFrame( {'mean_b1': [], 'mean_b2': [], 'mean_b4': [], 'euc_dist': [], 'left': [], 'right': [], 'bottom': [], 'top': [], 'id_alvo': []}) for file in (range(_from,_to)): #len(MODIS_list) MODIS_address = MODIS_list[file] #Searches for a maks image that matches the name of the satellite image viirs_address = local_viirs+path_leaf(MODIS_address) #Open the matched mask image raster_viirs = rasterio.open(viirs_address) #Open the raster image raster_MODIS = rasterio.open(MODIS_address) #Caculates the average digital value in a ROI for 3 bands of the image (RED, GREEN and NEAR-INFRARED) (actually surface reflectance at NADIR); media_zonal_b1 = zonal_stats(SHP_address, MODIS_address, stats="mean", nodata=0, all_touched=False, categorical=False, geojson_out=True, band=1) media_zonal_b2 = zonal_stats(SHP_address, MODIS_address, stats="mean", nodata=0, all_touched=False, categorical=False, geojson_out=True, band=3) media_zonal_b4 = zonal_stats(SHP_address, MODIS_address, stats="mean count", nodata=0, all_touched=False, categorical=False, geojson_out=True, band=5) #Now the code will access the ROI. For each one it will extract not only the already computated mean value, but also the coordinates #of the ROI and its ID (identificator) for x in range(len(media_zonal_b1)): mean_band1_alvo = media_zonal_b1[x]['properties']['mean'] mean_band2_alvo = media_zonal_b2[x]['properties']['mean'] mean_band4_alvo = media_zonal_b4[x]['properties']['mean'] id_alvo = x array_vazio = [] #Here I set the size of the window/kernel i = 3 j = 3 #Now it will access the satellite image and move this 3x3 window through all pixels for i_r in range(raster_viirs.height): for j_r in range(raster_viirs.width): row_start = i_r row_stop = i_r + i col_start = j_r col_stop = j_r + j Win = Window.from_slices(slice(row_start, row_stop), slice(col_start, col_stop)) croped = raster_viirs.read(window=Win) #This is some code to prevent NAN values and not electable pixels if (-999.0 not in croped) and (1 not in croped) and (np.isnan(croped).any() != True): bounds = raster_viirs.window_bounds(Win) croped2 = raster_MODIS.read(window=Win) #aplicando a janela extraída ao dado e reflectancia if ((np.isnan(croped2).any()) != True) and (croped2.size != 0): mean_band1 = np.mean(croped2[0]) mean_band2 = np.mean(croped2[2]) mean_band4 = np.mean(croped2[4]) if (mean_band1_alvo or mean_band2_alvo or mean_band4_alvo) is None: mean_band1_alvo = -999 mean_band2_alvo = -999 mean_band4_alvo = -999 dist = -999 #Calculates the euclidian distance between the bands of the pixels within the window and the ROI else: dist = (((mean_band1 - mean_band1_alvo) ** 2) + ((mean_band2 - mean_band2_alvo) ** 2) + ( (mean_band4 - mean_band4_alvo) ** 2)) ** 0.5 # Creates a dataframe with all the electable kernels array_dados = [dist,mean_band1,mean_band2,mean_band4,bounds[0],bounds[1],bounds[2],bounds[3]] #aggreagte the kernels in a single array array_vazio = array_vazio+array_dados #Transforms in a 2d array and order it to find the most alike kernel (smallest euclidian distance) array_dados_2d = np.reshape(array_vazio, (-1,8)) array_dados_2d = array_dados_2d[np.argsort(array_dados_2d[:, 0])] #Number of windows per ROI numero_de_amostras = 1 array_dados_filtered = array_dados_2d[0:numero_de_amostras, ] #Accumulates the windows of all the ROI in a single vector (.shp file) df_dados = pd.DataFrame( {'euc_dist': array_dados_filtered[:, 0], 'mean_b1': array_dados_filtered[:, 1], 'mean_b2': array_dados_filtered[:, 2], 'mean_b4': array_dados_filtered[:, 3], 'left': array_dados_filtered[:, 4], 'bottom': array_dados_filtered[:, 5], 'right': array_dados_filtered[:, 6], 'top': array_dados_filtered[:, 7], 'id_alvo': id_alvo}) df_vazio = df_vazio.append(df_dados, ignore_index = True) #Some geocoding for the output vector file. bbox = df_vazio.apply(lambda row: box(row.left, row.bottom, row.right, row.top), axis=1) geo_dataframe = geopandas.GeoDataFrame(df_vazio, geometry=bbox) geo_dataframe.crs = {'init': 'epsg:4326'} local = saida geo_dataframe.to_file(driver = 'ESRI Shapefile', filename= (local+path_leaf(MODIS_address)+'.shp')) df_vazio = pd.DataFrame( {'mean_b1': [], 'mean_b2': [], 'mean_b4': [], 'euc_dist': [], 'left': [], 'right': [], 'bottom': [], 'top': [], 'id_alvo': []}) end = time.time() print(end - start) #finally appling the function janela_movel(a,b,c)```

1条回答

网友

1楼 · 发布于 2024-05-23 16:58:45

主要问题可能是对raster_MODIS.read的调用量非常大，这可能会执行缓慢的I/O操作。此外，由于窗口的大小，每个单元格实际上被读取了9次（平均）

要解决这个问题，您可以将整个光栅读入内存中的2D numpy阵列，然后直接对其进行操作。如果光栅太大，则可以将其拆分为线带或二维平铺（尽管这有点复杂）

然后，如果这还不够，您可以将numpy计算部分（2个嵌套的内部循环）放入一个函数中，并使用numba@njit对其进行加速（请注意，如果array_vazio是一个直接分配的2D numpy数组，具有良好的大小，或者甚至具有高估的大小，因为numba不喜欢Python列表，那么它会更好/更快）.

最后，如果这还不够，您可以尝试使用多处理模块并行计算每个光栅（在单独的过程中，并在最后合并过程数据）

生成的代码应该比快几个数量级

相关问题更多 >

编程相关推荐

热门问题

热门文章