用TensorflowLite进行树莓PI慢速预测

2024-03-28 18:17:54 发布

您现在位置:Python中文网/ 问答频道 /正文

我尝试在我的Raspberry Pi 4(4GB)上以不同的线程运行两个对象检测模型

现在,每个模型预测需要1.1秒到1.5秒,这对我来说似乎非常慢,因为单个模型大约需要0.4秒

当我运行这两种型号时,CPU负载上升到85%左右,但它只使用600mb的ram

有没有办法通过优化Pi的ressource使用来获得更好的模型性能

Main.py

c = Camera()
tc = otherCamera()
faceDetection1 = FaceDetection(model="TFlite", modelType="tflite")
faceDetection2 = FaceDetection(model="TFlite", modelType="tflite")
detectionThread1,  detectionThread2 = None, None 

def detectionThread(inputDict, detector):
    t1 = time.time()
    detector.prepareImage(inputDict["Image"], inputDict["Scale"])
    inputDict["Faces"] = detector.detectFace()
    logging.debug(f"Runtime DetectionThread:  {time.time() - t1} ")
    return inputDict

with ThreadPoolExecutor(max_workers=4) as executor:
    while True:
        liveFrame = c.takeImage()
        # Crop image from PiCam2
        liveFrame, size = c.cropImage(liveFrame)
        
        secFrame, data = tc.takeImage()

        if detectionThread1 is None and detectionThread2 is None:
                detectionThread1 = executor.submit(detectionThread, {"Image": liveFrame, "Size": size, "Scale": 1}, faceDetection1)
                detectionThread2 = executor.submit(detectionThread, {"Image": secFrame, "Data": data, "Scale": 1}, faceDetection2)

        if detectionThread1.done() and detectionThread2.done():
                frameDict = {}
                frameDict["PICam"] = detectionThread1.result()
                frameDict["OtherCam"] = detectionThread2.result()
                detectionThread1, detectionThread2 = None, None

        if cv2.waitKey(10) & 0xFF == ord('q'):
                break
class FaceDetection():
      def __init__(self, modelDir, modeltype="tflite"):
            model = ''
            for f in os.listdir(modelDir):
                if f.endswith('.tflite'):
                    model = os.path.join(modelDir, f)
                    break
            if model == '': raise Exception(f'No TFLite model found in {modelDir}')
            
            # load model
            self.interpreter = tf.lite.Interpreter(model_path=model)
            self.interpreter.allocate_tensors()
            self.input_details = self.interpreter.get_input_details()
            self.output_details = self.interpreter.get_output_details()

     def detectFace(self, img=None, normalized=True):
        img_org = img.copy()

        #prepare Image
        img = cv2.resize(img, (300, 300))
        img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2]) # (1, 300, 300, 3)
        img = img.astype(np.float32)


        # set input tensor
        self.interpreter.set_tensor(self.input_details[0]['index'], img)

        # run
        self.interpreter.invoke()
        output_dict = {}
        # get outpu tensor
        output_dict['detection_boxes'] = self.interpreter.get_tensor(self.output_details[0]['index'])
        output_dict['detection_classes'] = self.interpreter.get_tensor(self.output_details[1]['index'])
        output_dict['detection_scores'] = self.interpreter.get_tensor(self.output_details[2]['index'])
        output_dict['num_detections'] = int(self.interpreter.get_tensor(self.output_details[3]['index']))

        return output_dict

我的基本模型是一个ssd_mobilenet_v2_coco_2018_03_29,我对它进行了重新训练,然后用它进行了转换

#Convert to TFlite
python /Tensorflow/models/research/object_detection/export_tflite_ssd_graph.py --pipeline_config_path model/ssd_mobilenet_v2_coco_2018_03_29/custom_pipeline.config --trained_checkpoint_prefix model/ssd_mobilenet_v2_coco_2018_03_29/model.ckpt-10000 --output_directory model/TFlite
tflite_convert --graph_def_file model/TFlite/tflite_graph.pb --output_file=model/TFlite/detect.tflite --output_format=TFLITE --input_shapes=1,300,300,3 --input_arrays=normalized_input_image_tensor --output_arrays='TFLite_Detection_PostProcess','TFLite_Detection_PostProcess:1','TFLite_Detection_PostProcess:2','TFLite_Detection_PostProcess:3' --allow_custom_ops

使用的Tensorflow版本:1.15


Tags: selfnoneimginputoutputgetmodeldetails