c4software · Bash- · Dec 13, 2018 · Dec 13, 2018 · Dec 13, 2018 · Garrett-R
diff --git a/README.md b/README.md
@@ -14,6 +14,10 @@ Read a config file to set parameters:
 ***You can overide (or add for list) any parameters define in the config.json***
 
 	>>> python main.py --config config/config.json
+More configuration options can be found in config.py:
+ - Set custom xml tags for the sitemap
+ - Set an user agent
+ - Configure the crawling rate
 
 #### Enable debug:
 

diff --git a/config.py b/config.py
@@ -9,3 +9,6 @@
 xml_footer = "</urlset>"
 
 crawler_user_agent = 'Sitemap crawler'
+
+number_calls = 1  # number of requests per call period
+call_period = 15  # time in seconds per number of requests
diff --git a/crawler.py b/crawler.py
@@ -13,6 +13,7 @@
 
 import mimetypes
 import os
+from ratelimit import limits, sleep_and_retry
 
 class IllegalArgumentError(ValueError):
 	pass
@@ -24,7 +25,6 @@ class Crawler:
 	output 	= None
 	report 	= False
 
-	config 	= None
 	domain	= ""
 
 	exclude = []
@@ -144,8 +144,8 @@ async def crawl_all_pending_urls(self, executor):
 		logging.debug('all crawl tasks have completed nicely')
 		return
 
-
-
+	@sleep_and_retry
+	@limits(calls=config.number_calls, period=config.call_period)
 	def __crawl(self, current_url):
 		url = urlparse(current_url)
 		logging.info("Crawling #{}: {}".format(self.num_crawled, url.geturl()))