pytorch实现图像算子层
一、Sobel边缘提取算子的实现:需要注意的是,torch.nn 的网络层处理图像通常都要求4维 tensor (batch, channel, height, width),如果维度不够需要扩展,作为图像 plot 的时候还需把维度压缩回去。
import cv2 import matplotlib.pyplot as plt import numpy as np import torch import torch.nn as nn def get_sobel(in_chan, out_chan): # 纵向算子和横向算子 filter_x = np.array([ [1, 0, -1], [2, 0, -2], [1, 0, -1], ]).astype(np.float32) filter_y = np.array([ [1, 2, 1], [0, 0, 0], [-1, -2, -1], ]).astype(np.float32) # torch.nn 进行卷积需要4维tensor,在 batch 和 channel 需要扩展维度 filter_x = filter_x.reshape((1, 1, 3, 3)) # 然后在扩展的维度上复制参数 filter_x = np.repeat(filter_x, in_chan, axis=1) filter_x = np.repeat(filter_x, out_chan, axis=0) filter_y = filter_y.reshape((1, 1, 3, 3)) filter_y = np.repeat(filter_y, in_chan, axis=1) filter_y = np.repeat(filter_y, out_chan, axis=0) # 通过构造的 np.array 构造 torch.tensor filter_x = torch.from_numpy(filter_x) filter_y = torch.from_numpy(filter_y) filter_x = nn.Parameter(filter_x, requires_grad=False) filter_y = nn.Parameter(filter_y, requires_grad=False) conv_x = nn.Conv2d(in_chan, out_chan, kernel_size=3, stride=1, padding=1, bias=False) conv_x.weight = filter_x conv_y = nn.Conv2d(in_chan, out_chan, kernel_size=3, stride=1, padding=1, bias=False) conv_y.weight = filter_y # sobel_x = nn.Sequential(conv_x, nn.BatchNorm2d(out_chan)) # sobel_y = nn.Sequential(conv_y, nn.BatchNorm2d(out_chan)) sobel_x = nn.Sequential(conv_x) sobel_y = nn.Sequential(conv_y) return sobel_x, sobel_y def run_sobel(conv_x, conv_y, data): # 两个方向的sobel算子计算结果 g_x = conv_x(data) g_y = conv_y(data) g = torch.sqrt(torch.pow(g_x, 2) + torch.pow(g_y, 2)) return torch.sigmoid(g) * data
读取图像,执行算子程序,并且绘图。cv2(opencv-python)通过cv2.imread() 读取的图像的色彩空间为BGR,而 PIL.Image.imread() 读取的图像为RGB,相比较更为便于使用,计算出的灰度图像也更直观,通常BGR编码还需要转换为RGB,否则可视化出来的色彩肉眼看不出灰度。并且 cv2读取的图像为3通道,Image读取的图像为4通道。都需要在编程中留意。
from PIL import Image # ----show origin picture # img_c = cv2.imread("./cecilia.png") # (511, 509, 3) img_c = Image.open('./cecilia.png') # (511, 409, 4) plt.figure(0) plt.imshow(img_c) # input and output are the type of np.uint8, though process with type of np.array def apply_sobel_iouint8(imgo): # ----trans to narray imgo = np.asarray(imgo) # (511, 509, 3) # 执行卷积的 tensor 维度是 (channels, height, width) imgo = imgo.transpose(2,0,1) # 需要扩展 batch 维度 imgo = imgo[np.newaxis, :,:,:] # (1, 511, 509, 3) imgo = imgo.astype(np.float32) imgo = torch.tensor(imgo) # s1, s2 = get_sobel(4, 4) # PIL.Image has 4 channels, while cv2.imread has 3 channels # run sobel opts imgo = run_sobel(s1, s2, imgo) # ---- trans to uint8 again # 去掉多余的 batch维度 imgo = np.asarray(imgo.squeeze()) # 图像的维度是 (height, width, channels) imgo = imgo.transpose(1,2,0) return imgo.astype(np.uint8) # ---- show edge picture img_e = apply_sobel_iouint8(img_c) plt.figure(1) # plt.imshow(img_e) plt.imshow(img_e, cmap=plt.cm.gray) plt.axis('off')
二、上采样下采样的实现(MaxPool2d and functional.interpolate)
img_c = Image.open('./cecilia.png') img_d = np.asarray(img_c) # (height, width, channel) img_d = img_d.transpose(2, 0, 1) # (channel, height, width) # # Error "max_pool2d" not implemented for 'Byte' # # 利用了from_array函数,该函数是浅拷贝,新数据与原数据格式相同,而orch.from_numpy默认为torch.uint8 # img_d = torch.from_numpy(img_d) img_d = torch.from_numpy(img_d).float() print("shape before MaxPool: ", img_d.shape) layer_maxp = nn.MaxPool2d(kernel_size=2) # 2倍池化,压缩1/2 img_p = layer_maxp(img_d) print("shape after MaxPool: ", img_p.shape) # plt.figure(2) # plt.imshow(img_c) # nn.interpolate for 2 axes need the dimsions of (batch, channel, height, width) img_p = img_p[np.newaxis, :,:,:] img_i = nn.functional.interpolate(img_p, scale_factor=(2, 2), mode='bilinear') print("shape after interpolate: ", img_i.shape)