pytorch实现图像算子层

倦鸟已归时 / 2023-08-15 / 原文

一、Sobel边缘提取算子的实现：需要注意的是，torch.nn 的网络层处理图像通常都要求4维 tensor (batch, channel, height, width)，如果维度不够需要扩展，作为图像 plot 的时候还需把维度压缩回去。

import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn

def get_sobel(in_chan, out_chan):
    # 纵向算子和横向算子
    filter_x = np.array([
            [1, 0, -1],
            [2, 0, -2],
            [1, 0, -1],
        ]).astype(np.float32)
    filter_y = np.array([
            [1, 2, 1],
            [0, 0, 0],
            [-1, -2, -1],
        ]).astype(np.float32)
    # torch.nn 进行卷积需要4维tensor，在 batch 和 channel 需要扩展维度
    filter_x = filter_x.reshape((1, 1, 3, 3))
    # 然后在扩展的维度上复制参数
    filter_x = np.repeat(filter_x, in_chan, axis=1)
    filter_x = np.repeat(filter_x, out_chan, axis=0)

    filter_y = filter_y.reshape((1, 1, 3, 3))
    filter_y = np.repeat(filter_y, in_chan, axis=1)
    filter_y = np.repeat(filter_y, out_chan, axis=0)
    
    # 通过构造的 np.array 构造 torch.tensor
    filter_x = torch.from_numpy(filter_x)
    filter_y = torch.from_numpy(filter_y)
    filter_x = nn.Parameter(filter_x, requires_grad=False)
    filter_y = nn.Parameter(filter_y, requires_grad=False)
    conv_x = nn.Conv2d(in_chan, out_chan, kernel_size=3, stride=1, padding=1, bias=False)
    conv_x.weight = filter_x
    conv_y = nn.Conv2d(in_chan, out_chan, kernel_size=3, stride=1, padding=1, bias=False)
    conv_y.weight = filter_y
#     sobel_x = nn.Sequential(conv_x, nn.BatchNorm2d(out_chan))
#     sobel_y = nn.Sequential(conv_y, nn.BatchNorm2d(out_chan))
    sobel_x = nn.Sequential(conv_x)
    sobel_y = nn.Sequential(conv_y)
    return sobel_x, sobel_y

def run_sobel(conv_x, conv_y, data):
    # 两个方向的sobel算子计算结果
    g_x = conv_x(data)
    g_y = conv_y(data)
    g = torch.sqrt(torch.pow(g_x, 2) + torch.pow(g_y, 2))
    return torch.sigmoid(g) * data

读取图像，执行算子程序，并且绘图。cv2（opencv-python）通过cv2.imread() 读取的图像的色彩空间为BGR，而 PIL.Image.imread() 读取的图像为RGB，相比较更为便于使用，计算出的灰度图像也更直观，通常BGR编码还需要转换为RGB，否则可视化出来的色彩肉眼看不出灰度。并且 cv2读取的图像为3通道，Image读取的图像为4通道。都需要在编程中留意。

from PIL import Image
# ----show origin picture
# img_c = cv2.imread("./cecilia.png")  # (511, 509, 3)
img_c = Image.open('./cecilia.png')  # (511, 409, 4)
plt.figure(0)
plt.imshow(img_c)

# input and output are the type of np.uint8, though process with type of np.array
def apply_sobel_iouint8(imgo):
    # ----trans to narray
    imgo = np.asarray(imgo)  # (511, 509, 3)
    # 执行卷积的 tensor 维度是 (channels, height, width)
    imgo = imgo.transpose(2,0,1)
    # 需要扩展 batch 维度
    imgo = imgo[np.newaxis, :,:,:]  # (1, 511, 509, 3)
    imgo = imgo.astype(np.float32)
    imgo = torch.tensor(imgo)
    # s1, s2 = get_sobel(4, 4)  # PIL.Image has 4 channels, while cv2.imread has 3 channels
    # run sobel opts
    imgo = run_sobel(s1, s2, imgo)
    # ---- trans to uint8 again
    # 去掉多余的 batch维度
    imgo = np.asarray(imgo.squeeze())
    # 图像的维度是 (height, width, channels)
    imgo = imgo.transpose(1,2,0)
    return imgo.astype(np.uint8)


# ---- show edge picture
img_e = apply_sobel_iouint8(img_c)
plt.figure(1)
# plt.imshow(img_e)
plt.imshow(img_e, cmap=plt.cm.gray)
plt.axis('off')

二、上采样下采样的实现（MaxPool2d and functional.interpolate）

img_c = Image.open('./cecilia.png')
img_d = np.asarray(img_c)  # (height, width, channel)
img_d = img_d.transpose(2, 0, 1)  # (channel, height, width)

# # Error "max_pool2d" not implemented for 'Byte'
# # 利用了from_array函数，该函数是浅拷贝，新数据与原数据格式相同，而orch.from_numpy默认为torch.uint8
# img_d = torch.from_numpy(img_d)

img_d = torch.from_numpy(img_d).float()

print("shape before MaxPool: ", img_d.shape)

layer_maxp = nn.MaxPool2d(kernel_size=2)  # 2倍池化，压缩1/2
img_p = layer_maxp(img_d)

print("shape after  MaxPool: ", img_p.shape)

# plt.figure(2)
# plt.imshow(img_c)

# nn.interpolate for 2 axes need the dimsions of (batch, channel, height, width)
img_p = img_p[np.newaxis, :,:,:]
img_i = nn.functional.interpolate(img_p, scale_factor=(2, 2), mode='bilinear')
print("shape after interpolate: ", img_i.shape)