我尝试在我的Raspberry Pi 4(4GB)上以不同的线程运行两个对象检测模型
现在,每个模型预测需要1.1秒到1.5秒,这对我来说似乎非常慢,因为单个模型大约需要0.4秒
当我运行这两种型号时,CPU负载上升到85%左右,但它只使用600mb的ram
有没有办法通过优化Pi的ressource使用来获得更好的模型性能
Main.py
c = Camera()
tc = otherCamera()
faceDetection1 = FaceDetection(model="TFlite", modelType="tflite")
faceDetection2 = FaceDetection(model="TFlite", modelType="tflite")
detectionThread1, detectionThread2 = None, None
def detectionThread(inputDict, detector):
t1 = time.time()
detector.prepareImage(inputDict["Image"], inputDict["Scale"])
inputDict["Faces"] = detector.detectFace()
logging.debug(f"Runtime DetectionThread: {time.time() - t1} ")
return inputDict
with ThreadPoolExecutor(max_workers=4) as executor:
while True:
liveFrame = c.takeImage()
# Crop image from PiCam2
liveFrame, size = c.cropImage(liveFrame)
secFrame, data = tc.takeImage()
if detectionThread1 is None and detectionThread2 is None:
detectionThread1 = executor.submit(detectionThread, {"Image": liveFrame, "Size": size, "Scale": 1}, faceDetection1)
detectionThread2 = executor.submit(detectionThread, {"Image": secFrame, "Data": data, "Scale": 1}, faceDetection2)
if detectionThread1.done() and detectionThread2.done():
frameDict = {}
frameDict["PICam"] = detectionThread1.result()
frameDict["OtherCam"] = detectionThread2.result()
detectionThread1, detectionThread2 = None, None
if cv2.waitKey(10) & 0xFF == ord('q'):
break
class FaceDetection():
def __init__(self, modelDir, modeltype="tflite"):
model = ''
for f in os.listdir(modelDir):
if f.endswith('.tflite'):
model = os.path.join(modelDir, f)
break
if model == '': raise Exception(f'No TFLite model found in {modelDir}')
# load model
self.interpreter = tf.lite.Interpreter(model_path=model)
self.interpreter.allocate_tensors()
self.input_details = self.interpreter.get_input_details()
self.output_details = self.interpreter.get_output_details()
def detectFace(self, img=None, normalized=True):
img_org = img.copy()
#prepare Image
img = cv2.resize(img, (300, 300))
img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2]) # (1, 300, 300, 3)
img = img.astype(np.float32)
# set input tensor
self.interpreter.set_tensor(self.input_details[0]['index'], img)
# run
self.interpreter.invoke()
output_dict = {}
# get outpu tensor
output_dict['detection_boxes'] = self.interpreter.get_tensor(self.output_details[0]['index'])
output_dict['detection_classes'] = self.interpreter.get_tensor(self.output_details[1]['index'])
output_dict['detection_scores'] = self.interpreter.get_tensor(self.output_details[2]['index'])
output_dict['num_detections'] = int(self.interpreter.get_tensor(self.output_details[3]['index']))
return output_dict
我的基本模型是一个ssd_mobilenet_v2_coco_2018_03_29
,我对它进行了重新训练,然后用它进行了转换
#Convert to TFlite
python /Tensorflow/models/research/object_detection/export_tflite_ssd_graph.py --pipeline_config_path model/ssd_mobilenet_v2_coco_2018_03_29/custom_pipeline.config --trained_checkpoint_prefix model/ssd_mobilenet_v2_coco_2018_03_29/model.ckpt-10000 --output_directory model/TFlite
tflite_convert --graph_def_file model/TFlite/tflite_graph.pb --output_file=model/TFlite/detect.tflite --output_format=TFLITE --input_shapes=1,300,300,3 --input_arrays=normalized_input_image_tensor --output_arrays='TFLite_Detection_PostProcess','TFLite_Detection_PostProcess:1','TFLite_Detection_PostProcess:2','TFLite_Detection_PostProcess:3' --allow_custom_ops
使用的Tensorflow版本:1.15
目前没有回答
相关问题 更多 >
编程相关推荐