-
Notifications
You must be signed in to change notification settings - Fork 42
/
run_crawler.py
60 lines (60 loc) · 1.58 KB
/
run_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# #
# # NOT TO BE USED NOW!!
# # This file can run your several spiders one by one.
#
#
# import os
#
# os.environ.setdefault('SCRAPY_SETTINGS_MODULE', 'angelco.settings')
#
# from twisted.internet import reactor
#
# from scrapy import log, signals
# from scrapy.crawler import Crawler
# from scrapy.settings import CrawlerSettings
# from scrapy.xlib.pydispatch import dispatcher
#
# #from testspiders.spiders.followall import FollowAllSpider
# from angelco.spiders.spiders import MySpider
# from angelco.spiders.genericSpider import FollowAllSpider
#
# from sys import argv
#
#
# def stop_reactor():
# log.msg("Crawler Stopped!", level=log.DEBUG)
# reactor.stop()
#
#
# if __name__ == "__main__":
#
# if len(argv) == 1:
# print "Usage : python run_crawler.py <domain_name>"
# exit(0)
#
# else:
# dispatcher.connect(stop_reactor, signal=signals.spider_closed) # To detect closing of spider
#
# spider = MySpider(src_json=argv[1])
#
# # settings = CrawlerSettings()
# # settings.overrides['ITEM_PIPELINES'] = {
# # 'angelco.pipelines.JsonExportPipeline':500,
# # }
#
# # crawler = Crawler(settings)
# # crawler.install()
# # crawler.configure()
#
# crawler = Crawler(CrawlerSettings())
# crawler.install()
# crawler.configure()
#
# crawler.crawl(spider)
#
# crawler.start()
# log.start(loglevel=log.DEBUG)
#
# log.msg('Running reactor...')
# reactor.run() # the script will block here until the spider is closed
# log.msg('Reactor stopped.')