乐团总谱分谱太麻烦,用过西贝柳斯,效果不满意。最近尝试了字幕提取识别
https://blog.csdn.net/XnCSD/article/details/89376477
想到类似的方法可以用来做乐谱的提取。
进行边缘检测
step one 膨胀腐蚀
设定二值阈的方法对乐谱并不管用,因此采用了腐蚀膨胀算法:让线条能覆盖一定的范围,再二值化提取轮廓
https://www.cnblogs.com/denny402/p/5166258.html
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt5
## 测试图片,为反斜杠
pic = 'F:\\OCR_puzi\\sample\\0002.jpg'
## a.图像的二值化 ,这里没有做阈值处理
src = cv2.imread(pic,cv2.IMREAD_UNCHANGED)
## b.设置卷积核5*5
kernel = np.ones((30,30),np.uint8)
## c.图像的腐蚀,默认迭代次数
erosion = cv2.erode(src,kernel,10)
## 图像的膨胀
dst = cv2.dilate(erosion,kernel)
erosion1 = cv2.erode(dst,kernel,60)
dst1 = cv2.dilate(erosion1,kernel)
## 效果展示
cv2.imshow('origin',erosion)
cv2.waitKey()
cv2.destroyAllWindows()
step two 灰度图统计行均值
#转灰度
import numpy as np
def rgb2gray(rgb):
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
erosiongray=rgb2gray(erosion)
#计算均值
y=np.mean(erosiongray,1)
#打印查看
count=0
for i in y:
count=count+1
print(str(count)+ " " +str(i))
取边缘划分阈值为100
index_b=list(np.where(y<100)[0])
##确定裁剪中心
listcut=[]
listcut.append(index_b[0]-30)
for i in range(len(index_b)-1):
if(index_b[i+1]-index_b[i]>=2):
listcut.append((index_b[i+1]+index_b[i])/2)
listcut.append(index_b[-1]+30)
listcut
#原图像上进行分割
img = Image.fromarray(src)
im=src[:, :, 0]
im=im[3048:3213, :]
# 确定字幕的范围,注意不同的视频文件剪切的索引值不同
img=Image.fromarray(im)
img.show()
遍历页面组合图像
整合:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt5
from PIL import Image
import scipy.misc
import os
# 遍历指定目录,显示目录下的所有文件名
def eachFile(filepath):
list=[]
pathDir = os.listdir(filepath)
for allDir in pathDir:
child = os.path.join('%s%s' % (filepath, allDir))
child.replace("\\","\\\\")
list.append(child)
return list
dirlist=eachFile('F:\\OCR_puzi\\sample\\')
def rgb2gray(rgb):
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
###############遍历加入list
all_content=[]
for imm in dirlist:
##################################################
src = cv2.imread(imm,cv2.IMREAD_UNCHANGED)
## b.设置卷积核5*5
kernel = np.ones((30,30),np.uint8)
## c.图像的腐蚀,默认迭代次数
erosion = cv2.erode(src,kernel)
erosiongray=rgb2gray(erosion)
y=np.mean(erosiongray,1)
index_b=list(np.where(y<100)[0])
listcut=[]
listcut.append(index_b[0]-30)
for i in range(len(index_b)-1):
if(index_b[i+1]-index_b[i]>=2):
listcut.append((index_b[i+1]+index_b[i])/2)
listcut.append(index_b[-1]+30)
#########################################################
page_content=[]
for i in range(len(listcut)-1):
img = Image.fromarray(src)
im=src[:, :, 0]
im=im[int(listcut[i])-10:int(listcut[i+1])+10, :]
page_content.append(im)
all_content.append(page_content)
#######################从list取出进行拼接,以一种乐器为例########
a=all_content[1][0]
for i in range(2,len(all_content)):
c = np.vstack((a,all_content[i][0]))
a=c
img=Image.fromarray(a)
img.show()
欢迎大家交流想法~