-
Notifications
You must be signed in to change notification settings - Fork 6
/
image-downloader2.py
79 lines (67 loc) · 2.6 KB
/
image-downloader2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from imutils import paths
import argparse
import requests
import cv2
import os
import numpy as np
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-u", "--urls", required=True,
help="path to file containing image URLs")
ap.add_argument("-o", "--output", required=True,
help="path to output directory of images")
ap.add_argument("-s", "--start", required=False,
help="First number to start at",
default=0)
ap.add_argument("-v", "--verbose", required=False,
help="Print information as we go",
action="store_true", default=False)
# grab the list of URLs from the input file, then initialize the
def read_url_list(filename):
return open(filename).read().strip().split("\n")
# generator to read URLs and return data
def read_urls(urls, is_verbose=False):
for url in urls:
try:
# try to download the image
if is_verbose:
print("Downloading {}".format(url))
r = requests.get(url, timeout=60)
yield ((url, r.content))
except requests.exceptions.RequestException as e:
print("Error downloading {} : {}".format(url, e))
# test if files are image data that OpenCV can read
def return_images(file_contents, is_verbose=False):
for url, data in file_contents:
try:
# convert to numpy array that OpenCV can read
nparr = np.fromstring(data, np.uint8)
# attempt to decode data as image
img_cv = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if img_cv is None:
if is_verbose:
print("Skipping {} - not an image".format(url))
else:
# return the image
yield ((url, img_cv))
except Exception as err:
if is_verbose:
print("Error parsing {} : {}".format(url, err))
def write_images(images, output_dir, start=0, is_verbose=False):
file_num = start
for url, image in images:
p = os.path.sep.join(
[output_dir, "{}.jpg".format(str(file_num).zfill(8))])
cv2.imwrite(p, image)
file_num += 1
if is_verbose:
print("{} <- {}".format(p, url))
if __name__ == "__main__":
args = vars(ap.parse_args())
output_dir = args["output"]
is_verbose = args["verbose"]
start = int(args["start"])
urls = read_url_list(args["urls"])
url_data = read_urls(urls, is_verbose)
images = return_images(url_data, is_verbose)
write_images(images, output_dir, start, is_verbose)