Skip to content

Commit

Permalink
Updated to V1.7
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhengjcVP authored Apr 30, 2024
1 parent f374bfd commit c92ea86
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 60 deletions.
128 changes: 83 additions & 45 deletions LoadScanPDFMain.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,41 @@
start_time = time.time()

base_path = "C:\\Users\\Zhengjc\\Pictures\\Saved Pictures\\"
# file_path=base_path+"Chopin Score pages\\7. Chopin Prelude Paderewski\\Chopin_prelude_paderewski.pdf"
file_path= base_path+"Other Composer\\Brahms\\Sonata HN\\Unused\\SB_CPBA_BS1HNSupp.pdf"
#file_path=base_path+"Chopin Score pages\\7. Chopin Prelude Paderewski\\Chopin_prelude_paderewski.pdf"
file_path= base_path+"Beethoven sonata\\Op120 Diabelli\\Beethoven_op_120_diabelli.pdf"
if (not os.path.isfile(file_path)):
print("======================")
print("\tInput PDF does not exist.")
print("\t Please check the file:\n\t"+file_path)
print("======================")
raise ValueError('Input PDF does not exist.')
reader = PdfReader(file_path)
file_name = "SB CPBA BS1HN Supp"
file_name = "Op120 Diabelli"

page = reader.pages[0]
# 0 = Default rotation
# 1 = Auto
# 2 = Text rotation (slower)
# -1 = No rotation
# 0 = Default rotation
# 1 = Auto
# 2 = Text rotation (slower)
RotationMode = 0

start_page = 0
page_num = 8
start_page = 34
page_num = 64
page_num = min(page_num, len(reader.pages))

Use_start_end = True

page_range = []
if (Use_start_end):
page_range = range(start_page, page_num)
else:
# Customize page number here.
page_range = [1, 23, 24]

# Change start and end, for detecting input file
start_page = page_range[0]
page_num = page_range[-1]

print("File Name:"+file_name)
print("\tProcessing from Page", str(start_page), "to", str(page_num-1))
print("\t", (page_num-start_page), "Pages Total.")
Expand Down Expand Up @@ -111,48 +131,52 @@ def readPDF():
SCL = 0.77*DPIRatio
Filter_Thresh = 160
outDPI = 600.00
dspBlackAmt = 12
dspWhiteAmt = 18
dspBlackAmt = 2 #18
dspWhiteAmt = 16 #10
elif (Default_3to4):
Tgt_W = 4500
Tgt_H = 6000
DPIRatio = 600/inDPI
SCL = DPIRatio*0.8
Filter_Thresh = 160
Filter_Thresh = 180
outDPI = 600.00
dspBlackAmt = 18
dspWhiteAmt = 12
dspWhiteAmt = 9
elif (HiRes_3to4):
Tgt_W = 6750
Tgt_H = 9000
DPIRatio = 600/inDPI
SCL = DPIRatio*1.2
Filter_Thresh = 160
Filter_Thresh = 170
outDPI = 900.00
dspBlackAmt = 32
dspWhiteAmt = 20
dspBlackAmt = 20
dspWhiteAmt = 10
else:
Tgt_W = 8000
Tgt_H = 6000
Tgt_W = 6750
Tgt_H = 9000
DPIRatio = 600/inDPI
SCL = DPIRatio*0.8
Filter_Thresh = 110
SCL = DPIRatio*1.0
Filter_Thresh = 160
outDPI = 600.00
dspBlackAmt = 0
dspWhiteAmt = 0
# Filp or rotate page
Odd_page_flip = False
Even_page_flip = False

# Crop this amount (pixels) before using the CropWhite() command
# set to 0 for no cropping
PreCrop_Amt = 140
PreCrop_Amt = 0
DoCrop = True

# Customize pre-cropping for
Custom_PreCrop = False
Custom_PreCrop = True
# If true, even page will have precrop LR reversed.
Even_page_reverse_precrop = True
PreCrop_T = 200
PreCrop_L = 50
PreCrop_B = 240
PreCrop_R = 320
PreCrop_T = 180
PreCrop_L = 240
PreCrop_B = 80
PreCrop_R = 15

# Step Size for crop line. Smaller step size may capture more unwanted dusts,
# while large step size may miss actual content
Expand All @@ -162,7 +186,9 @@ def readPDF():
# Then combine with the original image with bitwise and
# Can provide limited support to partially blurry scans
# Not recomended for clean scans
UseStrongEnhance = True
UseStrongEnhance = False
Use2LvFilter = False
SE_Thresh = 230
def printParams():
print("Parameters:")
print("\tDefault_A4:", Default_A4)
Expand All @@ -184,7 +210,7 @@ def printParams():
printParams()
#file_name1="mozart_sonata_wiener_smph_book1_Page_"
#file_name2="_Image_0001.jpg"
for i in range(start_page, page_num):
for i in page_range:

# Alternative read file
# if(i<=9):
Expand All @@ -205,6 +231,19 @@ def printParams():
img_W = img.shape[:2][0]
img_H = img.shape[:2][1]

# Flip horizontal then vertical. Equivalent to 180deg rotate.
if ((Odd_page_flip and i % 2 == 1) or (Even_page_flip and i % 2 == 0)):
if (Verbose):
print("\tRotated 180 degrees")
img = cv.flip(img, 0)
img = cv.flip(img, 1)

# Rotate
if(RotationMode != -1):
(img[:], rot_angle) = RotateByStraightLine(img, 2, 1440, RotationMode)
if (Verbose):
print("\tAngle:", rot_angle)
#
# Crop before proceeding
if(Custom_PreCrop):
# Even page will have reflected precrop from odd page
Expand All @@ -222,22 +261,18 @@ def printParams():
# Re-calculate height and width after pre cropping.
img_W = img.shape[:2][0]
img_H = img.shape[:2][1]
# Flip horizontal then vertical. Equivalent to 180deg rotate.
if ((Odd_page_flip and i % 2 == 1) or (Even_page_flip and i % 2 == 0)):
if (Verbose):
print("\tRotated 180 degrees")
img = cv.flip(img, 0)
img = cv.flip(img, 1)

# Rotate
(img[:], rot_angle) = RotateByStraightLine(img, 2, 1440, RotationMode)
if (Verbose):
print("\tAngle:", rot_angle)

# Crop
(img, x0, x1, y0, y1) = CropWhite(img, Tgt_H, Tgt_W, 200, 120, 10)
if (Verbose):
print("\tCrop Edge:", x0, x1, y0, y1)
if(DoCrop):
(img, x0, x1, y0, y1) = CropWhite(img, Tgt_H, Tgt_W, 200, 120, 10)
if (Verbose):
print("\tCrop Edge:", x0, x1, y0, y1)
# x0=0
# y0=0
# x1=min(6999, img_W)
# y1=min(5399, img_H)
# img = img[0:x1, 0:y1]


# Resize
New_H = int(img.shape[1]*SCL)
Expand All @@ -247,14 +282,17 @@ def printParams():
# Thresholding. 160 by default
# Strong Enhance can only provide limited support to partially blurry scans
if (UseStrongEnhance):
img = StrongEnhance(img, Filter_Thresh, 175, 80)
img = StrongEnhance(img, Filter_Thresh, SE_Thresh, 80)
elif (Use2LvFilter):
img = TwoLvFilter(img, Filter_Thresh, SE_Thresh, dspBlackAmt, dspWhiteAmt, True)
else:
img[:] = cv.threshold(img, Filter_Thresh, 255, cv.THRESH_BINARY)[1]

# Despeckle.
(img[:], W_Count, B_Count) = DespecklePatch(img, dspWhiteAmt, dspBlackAmt)
if (Verbose):
print("\tBlack/White Patches:", B_Count, W_Count)
if(not Use2LvFilter):
(img[:], W_Count, B_Count) = DespecklePatch(img, dspWhiteAmt, dspBlackAmt)
if (Verbose):
print("\tBlack/White Patches:", B_Count, W_Count)

# Fit the image to the given canvas size.
# May pad or crop edges. Avoid cropping black parts.
Expand Down
68 changes: 53 additions & 15 deletions MusicScoreProc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import cv2 as cv
import numpy as np
import math
import random

# Rotate arrcording to angle of horizontal line.
# Should not exceed +- 5 degrees
Expand Down Expand Up @@ -149,14 +150,14 @@ def DespecklePatch(img, Despeckle_White_Size=5, Despeckle_Black_Size=10):
width = stats[:, cv.CC_STAT_WIDTH]
height = stats[:, cv.CC_STAT_HEIGHT]
# small_label=np.where(areas<= Despeckle_White_Size)[0]+1

for j in range(1, nlabels):
if areas[j] <= Despeckle_White_Size:
White_Counter += 1
for x in range(left[j], left[j] + width[j]):
for y in range(top[j], top[j] + height[j]):
if labels[y, x] == j:
img[y, x] = 0
if(Despeckle_White_Size!=0):
for j in range(1, nlabels):
if areas[j] <= Despeckle_White_Size:
White_Counter += 1
for x in range(left[j], left[j] + width[j]):
for y in range(top[j], top[j] + height[j]):
if labels[y, x] == j:
img[y, x] = 0

# Now invert color and despeckle black
img[:] = 255-img
Expand All @@ -168,13 +169,14 @@ def DespecklePatch(img, Despeckle_White_Size=5, Despeckle_Black_Size=10):
width = stats[:, cv.CC_STAT_WIDTH]
height = stats[:, cv.CC_STAT_HEIGHT]

for j in range(1, nlabels):
if areas[j] <= Despeckle_Black_Size:
Black_Counter += 1
for x in range(left[j], left[j] + width[j]):
for y in range(top[j], top[j] + height[j]):
if labels[y, x] == j:
img[y, x] = 0
if(Despeckle_Black_Size!=0):
for j in range(1, nlabels):
if areas[j] <= Despeckle_Black_Size:
Black_Counter += 1
for x in range(left[j], left[j] + width[j]):
for y in range(top[j], top[j] + height[j]):
if labels[y, x] == j:
img[y, x] = 0

# Invert color back to original
return (255-img, White_Counter, Black_Counter)
Expand All @@ -194,6 +196,42 @@ def StrongEnhance(img, Normal_Thresh=160, Strong_Thresh=200, Despeckle_Black_Siz

return cv.bitwise_and(img, img_strong)

# Similar to StrongEnhance()
# But use checkboard instead of stronger despeckle to merge
# Also integrates despeckle, so don't use despeckle after this
# Convolution (if turned on) only applies to high level.
def TwoLvFilter(img, Normal_Thresh=160, Strong_Thresh=200,
Despeckle_Black_Size = 16, Despeckle_White_Size = 8, Convolve = False):
img_low = cv.threshold(img, Normal_Thresh, 255, cv.THRESH_BINARY)[1]
conv_img = img
if(Convolve):
kernel = np.array([[0.1, 0.15, 0.1], [0.15, 0, 0.15], [0.1, 0.15, 0.1]])
conv_img = cv.filter2D(img, -1, kernel, borderType=cv.BORDER_CONSTANT)
img_high = cv.threshold(conv_img, Strong_Thresh, 255, cv.THRESH_BINARY)[1]
img_low = DespecklePatch(img_low, Despeckle_White_Size, Despeckle_Black_Size)[0]
img_high = DespecklePatch(img_high, Despeckle_White_Size, Despeckle_Black_Size)[0]
img_high = RemoveCheckBoard(img_high)

return cv.bitwise_and(img_high, img_low)

# Remove half of the pixels:
# mode = 0: remove interlaced pixels like a chessboard
# mode = 1: remove every pixel randomly if a random number is less than randparam.
def RemoveCheckBoard(img, mode = 0, randparam = 0.5):
w, h = img.shape
doRemove = False
for i in range(w):
for j in range(h):
if (mode == 0):
if(doRemove):
img[i, j] = 1
doRemove = not doRemove
elif (mode == 1):
if(random.random()<randparam):
img[i, j] = 1

return img

# Fit the image to the given canvas size.
# May pad or crop edges.
# If cropping must be done, avoid cropping black parts
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# MusicScoreScanHelper
🌏 **[中文版](README_zh.md)**
## Changelog
### v1.7 alpha
Added 2 Level Mode ```TwoLvFilter()``` in file ```MusicScoreProc.py```, which can better process grayscale printings. The principles are similar to ```StrongEnhance()```.<br />
Added corresponding function in the main program ```LoadScanPDFMain.py```.<br />
### v1.6 alpha
Added ```reset_dpi.py``` program, which can despeckle and reset dpi 0/1 bitmap PNG files in batch.<br />
The following changes are made regarding ```LoadScanPDFMain.py```and```MusicScoreProc.py```: <br />
Expand Down
3 changes: 3 additions & 0 deletions README_zh.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# MusicScoreScanHelper
🌏 **[English](README.md)**
## 更新日志
### v1.7 alpha
```MusicScoreProc.py```中增加2层处理模式```TwoLvFilter()```,可以更好适应多层印刷图片。原理类似```StrongEnhance()```<br />
在主程序 ```LoadScanPDFMain.py```加入对应功能<br />
### v1.6 alpha
增加了 ```reset_dpi.py```程序。可以批量对01位图PNG文件重置DPI以及除尘。<br />
以下为 ```LoadScanPDFMain.py``````MusicScoreProc.py```的变化: <br />
Expand Down

0 comments on commit c92ea86

Please sign in to comment.