1+ import  cv2 
2+ import  argparse 
3+ import  numpy  as  np 
4+ 
5+ class  yolov5 ():
6+     def  __init__ (self , yolo_type , confThreshold = 0.5 , nmsThreshold = 0.5 , objThreshold = 0.5 ):
7+         with  open ('coco.names' , 'rt' ) as  f :
8+             self .classes  =  f .read ().rstrip ('\n ' ).split ('\n ' )
9+         self .colors  =  [np .random .randint (0 , 255 , size = 3 ).tolist () for  _  in  range (len (self .classes ))]
10+         num_classes  =  len (self .classes )
11+         anchors  =  [[10 , 13 , 16 , 30 , 33 , 23 ], [30 , 61 , 62 , 45 , 59 , 119 ], [116 , 90 , 156 , 198 , 373 , 326 ]]
12+         self .nl  =  len (anchors )
13+         self .na  =  len (anchors [0 ]) //  2 
14+         self .no  =  num_classes  +  5 
15+         self .grid  =  [np .zeros (1 )] *  self .nl 
16+         self .stride  =  np .array ([8. , 16. , 32. ])
17+         self .anchor_grid  =  np .asarray (anchors , dtype = np .float32 ).reshape (self .nl , 1 , - 1 , 1 , 1 , 2 )
18+ 
19+         self .net  =  cv2 .dnn .readNet (yolo_type  +  '.onnx' )
20+         self .confThreshold  =  confThreshold 
21+         self .nmsThreshold  =  nmsThreshold 
22+         self .objThreshold  =  objThreshold 
23+ 
24+     def  _make_grid (self , nx = 20 , ny = 20 ):
25+         xv , yv  =  np .meshgrid (np .arange (ny ), np .arange (nx ))
26+         return  np .stack ((xv , yv ), 2 ).reshape ((1 , 1 , ny , nx , 2 )).astype (np .float32 )
27+ 
28+     def  postprocess (self , frame , outs ):
29+         frameHeight  =  frame .shape [0 ]
30+         frameWidth  =  frame .shape [1 ]
31+         ratioh , ratiow  =  frameHeight  /  640 , frameWidth  /  640 
32+         # Scan through all the bounding boxes output from the network and keep only the 
33+         # ones with high confidence scores. Assign the box's class label as the class with the highest score. 
34+         classIds  =  []
35+         confidences  =  []
36+         boxes  =  []
37+         for  out  in  outs :
38+             for  detection  in  out :
39+                 scores  =  detection [5 :]
40+                 classId  =  np .argmax (scores )
41+                 confidence  =  scores [classId ]
42+                 if  confidence  >  self .confThreshold  and  detection [4 ] >  self .objThreshold :
43+                     center_x  =  int (detection [0 ] *  ratiow )
44+                     center_y  =  int (detection [1 ] *  ratioh )
45+                     width  =  int (detection [2 ] *  ratiow )
46+                     height  =  int (detection [3 ] *  ratioh )
47+                     left  =  int (center_x  -  width  /  2 )
48+                     top  =  int (center_y  -  height  /  2 )
49+                     classIds .append (classId )
50+                     confidences .append (float (confidence ))
51+                     boxes .append ([left , top , width , height ])
52+ 
53+         # Perform non maximum suppression to eliminate redundant overlapping boxes with 
54+         # lower confidences. 
55+         indices  =  cv2 .dnn .NMSBoxes (boxes , confidences , self .confThreshold , self .nmsThreshold )
56+         for  i  in  indices :
57+             i  =  i [0 ]
58+             box  =  boxes [i ]
59+             left  =  box [0 ]
60+             top  =  box [1 ]
61+             width  =  box [2 ]
62+             height  =  box [3 ]
63+             frame  =  self .drawPred (frame , classIds [i ], confidences [i ], left , top , left  +  width , top  +  height )
64+         return  frame 
65+     def  drawPred (self , frame , classId , conf , left , top , right , bottom ):
66+         # Draw a bounding box. 
67+         cv2 .rectangle (frame , (left , top ), (right , bottom ), (0 , 0 , 255 ), thickness = 4 )
68+ 
69+         label  =  '%.2f'  %  conf 
70+         label  =  '%s:%s'  %  (self .classes [classId ], label )
71+ 
72+         # Display the label at the top of the bounding box 
73+         labelSize , baseLine  =  cv2 .getTextSize (label , cv2 .FONT_HERSHEY_SIMPLEX , 0.5 , 1 )
74+         top  =  max (top , labelSize [1 ])
75+         # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED) 
76+         cv2 .putText (frame , label , (left , top  -  10 ), cv2 .FONT_HERSHEY_SIMPLEX , 1 , (0 , 255 , 0 ), thickness = 2 )
77+         return  frame 
78+     def  detect (self , srcimg ):
79+         blob  =  cv2 .dnn .blobFromImage (srcimg , 1  /  255.0 , (640 , 640 ), [0 , 0 , 0 ], swapRB = True , crop = False )
80+         # Sets the input to the network 
81+         self .net .setInput (blob )
82+ 
83+         # Runs the forward pass to get output of the output layers 
84+         outs  =  self .net .forward (self .net .getUnconnectedOutLayersNames ())
85+ 
86+         z  =  []  # inference output 
87+         for  i  in  range (self .nl ):
88+             bs , _ , ny , nx  =  outs [i ].shape   # x(bs,255,20,20) to x(bs,3,20,20,85) 
89+             # outs[i] = outs[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 
90+             outs [i ] =  outs [i ].reshape (bs , self .na , self .no , ny , nx ).transpose (0 , 1 , 3 , 4 , 2 )
91+             if  self .grid [i ].shape [2 :4 ] !=  outs [i ].shape [2 :4 ]:
92+                 self .grid [i ] =  self ._make_grid (nx , ny )
93+ 
94+             y  =  1  /  (1  +  np .exp (- outs [i ]))  ### sigmoid 
95+             ###其实只需要对x,y,w,h做sigmoid变换的, 不过全做sigmoid变换对结果影响不大,因为sigmoid是单调递增函数,那么就不影响类别置信度的排序关系,因此不影响后面的NMS 
96+             ###不过设断点查看类别置信度,都是负数,看来有必要做sigmoid变换把概率值强行拉回到0到1的区间内 
97+             y [..., 0 :2 ] =  (y [..., 0 :2 ] *  2.  -  0.5  +  self .grid [i ]) *  int (self .stride [i ])
98+             y [..., 2 :4 ] =  (y [..., 2 :4 ] *  2 ) **  2  *  self .anchor_grid [i ]  # wh 
99+             z .append (y .reshape (bs , - 1 , self .no ))
100+         z  =  np .concatenate (z , axis = 1 )
101+         return  z 
102+ 
103+ if  __name__  ==  "__main__" :
104+     parser  =  argparse .ArgumentParser ()
105+     parser .add_argument ("--imgpath" , type = str , default = 'bus.jpg' , help = "image path" )
106+     parser .add_argument ('--net_type' , default = 'yolov5s' , choices = ['yolov5s' , 'yolov5l' , 'yolov5m' , 'yolov5x' ])
107+     args  =  parser .parse_args ()
108+ 
109+     yolonet  =  yolov5 (args .net_type )
110+     srcimg  =  cv2 .imread (args .imgpath )
111+     dets  =  yolonet .detect (srcimg )
112+     srcimg  =  yolonet .postprocess (srcimg , dets )
113+ 
114+     winName  =  'Deep learning object detection in OpenCV' 
115+     cv2 .namedWindow (winName , 0 )
116+     cv2 .imshow (winName , srcimg )
117+     cv2 .waitKey (0 )
118+     cv2 .destroyAllWindows ()
0 commit comments