Skip to content
This repository was archived by the owner on Nov 19, 2024. It is now read-only.

Commit a7d93e6

Browse files
committed
tests added
1 parent 9a24f21 commit a7d93e6

16 files changed

Lines changed: 9716 additions & 5 deletions

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
*cache*
2+
.ipynb_checkpoints
13
build/*
24
!build/opencv
35
build/opencv/*
@@ -27,3 +29,5 @@ venv
2729
*.swp
2830
*.whl
2931
TODO.txt
32+
*.bin
33+
*.weights

TODO.md

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22

33
**TESTS**:
44

5-
+ Video open
6-
+ Webcam open
7-
+ dnn module network loading (YOLO or something)
8-
+ IE model loading (e.g. PixelLink)
5+
+ Automatize model weights downloading
6+
+ Webcam open?
97

10-
Check <https://hackmd.io/@banderlog/H1nXBmsYB> and organize them in a separate folder.

tests/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
The [rateme](https://github.com/heyml/rateme) is YOLO3 actually.
2+
3+
These are Intel's models: [text-detection-0004](https://github.com/opencv/open_model_zoo/blob/master/models/intel/text-detection-0004/description/text-detection-0004.md) and [text-recognition-0012](https://github.com/opencv/open_model_zoo/blob/master/models/intel/text-recognition-0012/description/text-recognition-0012.md).
4+
5+
Video from here (free): <https://www.pexels.com/video/a-cattails-fluff-floats-in-air-2156021/>
6+
7+
8+
**MODEL WEIGHTS SHOULD BE DOWNLOADED SEPARATELY (for now)**

tests/dislike.jpg

98.4 KB
Loading

tests/helloworld.png

12.7 KB
Loading

tests/pixellink.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
""" Wrapper class for Intel's PixelLink realisation (text segmentation NN)
2+
text-detection-00[34]
3+
4+
For text-detection-002 you'll need to uncomment string in detect()
5+
"""
6+
import cv2
7+
import numpy as np
8+
from scipy.special import softmax
9+
from skimage.morphology import label
10+
from skimage.measure import regionprops
11+
from typing import List, Tuple
12+
from skimage.measure._regionprops import RegionProperties
13+
14+
15+
class PixelLinkDetector():
16+
""" Wrapper class for Intel's version of PixelLink text-detection-0001
17+
:param xml_model_path: path to XML file
18+
19+
**Example:**
20+
21+
.. code-block:: python
22+
detector = PixelLinkDetector('text-detection-0002.xml')
23+
img = cv2.imread('tmp.jpg')
24+
# ~250ms on i7-6700K
25+
detector.detect(img)
26+
# ~2ms
27+
bboxes = detector.decode()
28+
"""
29+
def __init__(self, xml_model_path: str, txt_threshold=0.5):
30+
"""
31+
:param xml_model_path: path to model's XML file
32+
:param txt_threshold: confidence, defaults to ``0.5``
33+
"""
34+
self._net = cv2.dnn.readNet(xml_model_path, xml_model_path[:-3] + 'bin')
35+
self._txt_threshold = txt_threshold
36+
37+
def detect(self, img: np.ndarray) -> None:
38+
""" GetPixelLink's outputs
39+
:param img: image as ``numpy.ndarray``
40+
"""
41+
self._img_shape = img.shape
42+
blob = cv2.dnn.blobFromImage(img, 1, (1280, 768))
43+
self._net.setInput(blob)
44+
out_layer_names = self._net.getUnconnectedOutLayersNames()
45+
# for text-detection-002
46+
# self.pixels, self.links = self._net.forward(out_layer_names)
47+
# for text-detection-00[34]
48+
self.links, self.pixels = self._net.forward(out_layer_names)
49+
50+
def get_mask(self) -> np.array:
51+
""" Get binary mask of detected text pixels
52+
"""
53+
pixel_mask = self._get_pixel_scores() >= self._txt_threshold
54+
return pixel_mask.astype(np.uint8)
55+
56+
def _get_pixel_scores(self) -> np.array:
57+
"get softmaxed properly shaped pixel scores"
58+
tmp = np.transpose(self.pixels, (0, 2, 3, 1))
59+
return softmax(tmp, axis=-1)[0, :, :, 1]
60+
61+
def _get_txt_regions(self, pixel_mask: np.array) -> List[RegionProperties]:
62+
"kernels are class dependent"
63+
img_h, img_w = self._img_shape[:2]
64+
_, mask = cv2.threshold(pixel_mask, 0, 1, cv2.THRESH_BINARY)
65+
# transmutatioins
66+
# kernel size should be image size dependant (default (21,21))
67+
# on small image it will connect separate words
68+
txt_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
69+
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, txt_kernel)
70+
# label regions on mask of original img size
71+
mask = cv2.resize(mask, (img_w, img_h), interpolation=cv2.INTER_NEAREST)
72+
mask = label(mask, background=0, connectivity=2)
73+
txt_regions = regionprops(mask)
74+
return txt_regions
75+
76+
def _get_txt_bboxes(self, txt_regions: List[RegionProperties]) -> List[Tuple[int, int, int, int]]:
77+
""" Filter text area by area and height
78+
79+
:return: ``[(ymin, xmin, ymax, xmax)]``
80+
"""
81+
min_area = 0
82+
min_height = 4
83+
boxes = []
84+
for p in txt_regions:
85+
if p.area > min_area:
86+
bbox = p.bbox
87+
if (bbox[2] - bbox[0]) > min_height:
88+
boxes.append(bbox)
89+
return boxes
90+
91+
def decode(self) -> List[Tuple[int, int, int, int]]:
92+
""" Decode PixelLink's output
93+
94+
:return: bounding_boxes
95+
96+
.. note::
97+
bounding_boxes format: [ymin ,xmin ,ymax, xmax]
98+
99+
"""
100+
mask = self.get_mask()
101+
bboxes = self._get_txt_bboxes(self._get_txt_regions(mask))
102+
return bboxes

tests/rateme/__init__.py

Whitespace-only changes.

tests/rateme/rateme.cfg

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
[net]
2+
# Testing
3+
#batch=1
4+
#subdivisions=1
5+
# Training
6+
batch=64
7+
subdivisions=32
8+
width=416
9+
height=416
10+
channels=3
11+
momentum=0.9
12+
decay=0.0005
13+
angle=0
14+
saturation = 1.5
15+
exposure = 1.5
16+
hue=.1
17+
18+
learning_rate=0.001
19+
burn_in=1000
20+
max_batches = 4000
21+
policy=steps
22+
steps=3200,3600
23+
scales=.1,.1
24+
25+
[convolutional]
26+
batch_normalize=1
27+
filters=16
28+
size=3
29+
stride=1
30+
pad=1
31+
activation=leaky
32+
33+
[maxpool]
34+
size=2
35+
stride=2
36+
37+
[convolutional]
38+
batch_normalize=1
39+
filters=32
40+
size=3
41+
stride=1
42+
pad=1
43+
activation=leaky
44+
45+
[maxpool]
46+
size=2
47+
stride=2
48+
49+
[convolutional]
50+
batch_normalize=1
51+
filters=64
52+
size=3
53+
stride=1
54+
pad=1
55+
activation=leaky
56+
57+
[maxpool]
58+
size=2
59+
stride=2
60+
61+
[convolutional]
62+
batch_normalize=1
63+
filters=128
64+
size=3
65+
stride=1
66+
pad=1
67+
activation=leaky
68+
69+
[maxpool]
70+
size=2
71+
stride=2
72+
73+
[convolutional]
74+
batch_normalize=1
75+
filters=256
76+
size=3
77+
stride=1
78+
pad=1
79+
activation=leaky
80+
81+
[maxpool]
82+
size=2
83+
stride=2
84+
85+
[convolutional]
86+
batch_normalize=1
87+
filters=512
88+
size=3
89+
stride=1
90+
pad=1
91+
activation=leaky
92+
93+
[maxpool]
94+
size=2
95+
stride=1
96+
97+
[convolutional]
98+
batch_normalize=1
99+
filters=1024
100+
size=3
101+
stride=1
102+
pad=1
103+
activation=leaky
104+
105+
###########
106+
107+
[convolutional]
108+
batch_normalize=1
109+
filters=256
110+
size=1
111+
stride=1
112+
pad=1
113+
activation=leaky
114+
115+
[convolutional]
116+
batch_normalize=1
117+
filters=512
118+
size=3
119+
stride=1
120+
pad=1
121+
activation=leaky
122+
123+
[convolutional]
124+
size=1
125+
stride=1
126+
pad=1
127+
filters=21
128+
activation=linear
129+
130+
131+
132+
[yolo]
133+
mask = 3,4,5
134+
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
135+
classes=2
136+
num=6
137+
jitter=.3
138+
ignore_thresh = .7
139+
truth_thresh = 1
140+
random=1
141+
142+
[route]
143+
layers = -4
144+
145+
[convolutional]
146+
batch_normalize=1
147+
filters=128
148+
size=1
149+
stride=1
150+
pad=1
151+
activation=leaky
152+
153+
[upsample]
154+
stride=2
155+
156+
[route]
157+
layers = -1, 8
158+
159+
[convolutional]
160+
batch_normalize=1
161+
filters=256
162+
size=3
163+
stride=1
164+
pad=1
165+
activation=leaky
166+
167+
[convolutional]
168+
size=1
169+
stride=1
170+
pad=1
171+
filters=21
172+
activation=linear
173+
174+
[yolo]
175+
mask = 0,1,2
176+
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
177+
classes=2
178+
num=6
179+
jitter=.3
180+
ignore_thresh = .7
181+
truth_thresh = 1
182+
random=1

0 commit comments

Comments
 (0)