Seprating Overlapping Bounding Boxes To Non-overlapping Polygon Masks
Published:
Introduction
In this project, I decided to paly with the challenge of identifying objects in images using bounding boxes, which often overlap. While some models, such as Mask RCNN, can handle overlapping bounding boxes without issue, others like U-Net, a semantic segmentation model, may experience difficulties when converting these bounding boxes to masks. In these cases, separating individual objects becomes problematic.
I aim to effectively separates overlapping bounding boxes and establishes a margin between them. I’ll be utilizing data from the Global Wheat Detection Dataset to accomplish this goal.
import numpy as np
import pandas as pd
from fastai.vision import *
import geopandas as gpd
import shapely
from shapely.geometry import Polygon, LineString
from tqdm import tqdm
path = Path('/global-wheat-detection/')
df = pd.read_csv(path/'train.csv')
image_id | width | height | bbox | source | |
---|---|---|---|---|---|
0 | b6ab77fd7 | 1024 | 1024 | [834.0, 222.0, 56.0, 36.0] | usask_1 |
1 | b6ab77fd7 | 1024 | 1024 | [226.0, 548.0, 130.0, 58.0] | usask_1 |
2 | b6ab77fd7 | 1024 | 1024 | [377.0, 504.0, 74.0, 160.0] | usask_1 |
3 | b6ab77fd7 | 1024 | 1024 | [834.0, 95.0, 109.0, 107.0] | usask_1 |
4 | b6ab77fd7 | 1024 | 1024 | [26.0, 144.0, 124.0, 117.0] | usask_1 |
... | ... | ... | ... | ... | ... |
147788 | 5e0747034 | 1024 | 1024 | [64.0, 619.0, 84.0, 95.0] | arvalis_2 |
147789 | 5e0747034 | 1024 | 1024 | [292.0, 549.0, 107.0, 82.0] | arvalis_2 |
147790 | 5e0747034 | 1024 | 1024 | [134.0, 228.0, 141.0, 71.0] | arvalis_2 |
147791 | 5e0747034 | 1024 | 1024 | [430.0, 13.0, 184.0, 79.0] | arvalis_2 |
147792 | 5e0747034 | 1024 | 1024 | [875.0, 740.0, 94.0, 61.0] | arvalis_2 |
147793 rows × 5 columns
def bbox2mask(x):
labels = np.array(x)
mask = torch.zeros(1024,1024)
for l in labels:
mask[l[1]:l[1]+l[3], l[0]:l[0]+l[2]] = 1
return mask
def bbox_center(x):
labels = np.array(x)
mask = torch.zeros(1024,1024)
for l in labels:
mask[(2*l[1]+l[3])//2, (2*l[0]+l[2])//2] = 1
return mask
def box2polygon(x):
return Polygon([(x[0], x[1]), (x[0]+x[2], x[1]), (x[0]+x[2], x[1]+x[3]), (x[0], x[1]+x[3])])
boxes = df.groupby('image_id').agg({'bbox' : lambda x : list(x)})
box = boxes.iloc[2]
file = str(path/'train'/box.name) + '.jpg'
img = open_image(file).data.numpy().transpose(1,2,0)
bbox = np.array([eval(l) for l in box.bbox]).astype(int).tolist()
mask = bbox2mask(bbox)
gdf = gpd.GeoDataFrame({'geometry': [box2polygon(b) for b in bbox]})
gdf.head()
geometry | |
---|---|
0 | POLYGON ((437.000 988.000, 535.000 988.000, 53... |
1 | POLYGON ((309.000 527.000, 419.000 527.000, 41... |
2 | POLYGON ((414.000 595.000, 499.000 595.000, 49... |
3 | POLYGON ((238.000 949.000, 350.000 949.000, 35... |
4 | POLYGON ((442.000 56.000, 570.000 56.000, 570.... |
The following function takes two bounding boxes as input, both of which are shapely Polygons, and returns the sliced region for box A
def slice_box(box_A:Polygon, box_B:Polygon, margin=10, line_mult=10):
vec_AB = np.array([box_B.centroid.x - box_A.centroid.x, box_B.centroid.y - box_A.centroid.y])
vec_ABp = np.array([-(box_B.centroid.y - box_A.centroid.y), box_B.centroid.x - box_A.centroid.x])
vec_AB_norm = np.linalg.norm(vec_AB)
split_point = box_A.centroid + vec_AB/2 - (vec_AB/vec_AB_norm)*margin
line = LineString([split_point-line_mult*vec_ABp, split_point+line_mult*vec_ABp])
split_box = shapely.ops.split(box_A, line)
if len(split_box) == 1: return split_box, None, line
is_center = [s.contains(box_A.centroid) for s in split_box]
if sum(is_center) == 0:
return split_box[0], None, line
where_is_center = np.argwhere(is_center).reshape(-1)[0]
where_not_center = np.argwhere(~np.array(is_center)).reshape(-1)[0]
split_box_center = split_box[where_is_center]
split_box_out = split_box[where_not_center]
return split_box_center, split_box_out, line
inter = gdf.loc[gdf.intersects(gdf.iloc[20].geometry)]
box_A = inter.iloc[0].values[0]
box_B = inter.iloc[1].values[0]
polyA, _, lineA = slice_box(box_A, box_B, margin=10, line_mult=1.2)
polyB, _, lineB = slice_box(box_B, box_A, margin=10, line_mult=1.2)
boxes = gpd.GeoDataFrame({'geometry': [box_A, box_B]})
centroids = gpd.GeoDataFrame({'geometry': [box_A.centroid, box_B.centroid]})
splited_boxes = gpd.GeoDataFrame({'geometry': [polyA, polyB]})
lines = gpd.GeoDataFrame({'geometry': [lineA, lineB]})
fig, ax = plt.subplots(dpi=120)
boxes.plot(ax=ax, facecolor='gray', edgecolor='k', alpha=0.5)
centroids.plot(ax=ax, c='k')
ax.axis('off');
fig, ax = plt.subplots(dpi=120)
boxes.plot(ax=ax, facecolor='gray', edgecolor='k', alpha=0.1)
splited_boxes.plot(ax=ax, facecolor='olive', edgecolor='k')
centroids.plot(ax=ax, c='k')
lines.plot(ax=ax, color='k')
ax.axis('off');
def intersection_list(polylist):
r = polylist[0]
for p in polylist:
r = r.intersection(p)
return r
def slice_one(gdf, index):
inter = gdf.loc[gdf.intersects(gdf.iloc[index].geometry)]
if len(inter) == 1: return inter.geometry.values[0]
box_A = inter.loc[index].values[0]
inter = inter.drop(index, axis=0)
polys = []
for i in range(len(inter)):
box_B = inter.iloc[i].values[0]
polyA, *_ = slice_box(box_A, box_B)
polys.append(polyA)
return intersection_list(polys)
def slice_all(gdf):
polys = []
for i in range(len(gdf)):
polys.append(slice_one(gdf, i))
return gpd.GeoDataFrame({'geometry': polys})
res_df = slice_all(gdf)
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10,5), dpi=120)
gdf.plot(ax=ax1, alpha=0.5, color='gray')
#gdf.plot(ax=ax2, alpha=0.1, facecolor='gray')
res_df.plot(ax=ax2, alpha=0.5, color='olive')
ax1.axis('equal')
ax2.axis('equal')
ax1.set_title('Original bounding boxes')
ax2.set_title('Splited bounding boxes')
fig.tight_layout()
Rasterize polygons
import rasterio.features
raster = rasterio.features.rasterize(res_df.geometry, out_shape=(1024,1024), merge_alg=rasterio.enums.MergeAlg.replace)
fig, axes = plt.subplots(ncols=2, dpi=120)
axes[0].imshow(img)
axes[0].imshow(mask, alpha=0.4)
axes[1].imshow(img)
axes[1].imshow(raster, alpha=0.4)
<matplotlib.image.AxesImage at 0x7f3300c2a850>
Saving new masks
import PIL
import zipfile
import cv2
mask = cv2.imencode('.png', (raster*255).astype(np.uint8))[1]
boxes = df.groupby('image_id').agg({'bbox' : lambda x : list(x)})
with zipfile.ZipFile('split_masks.zip', 'w') as mask_out:
for i in progress_bar(range(len(boxes))):
box = boxes.iloc[i]
file = str(path/'train'/box.name) + '.jpg'
img = open_image(file).data.numpy().transpose(1,2,0)
bbox = np.array([eval(l) for l in box.bbox]).astype(int).tolist()
gdf = gpd.GeoDataFrame({'geometry': [box2polygon(b) for b in bbox]})
res_df = slice_all(gdf)
raster = rasterio.features.rasterize(res_df.geometry, out_shape=(1024,1024), merge_alg=rasterio.enums.MergeAlg.replace)
mask = cv2.imencode('.png', (raster*255).astype(np.uint8))[1]
mask_out.writestr(f'{box.name}.png', mask)