-
Notifications
You must be signed in to change notification settings - Fork 19
/
file2mongo.py
36 lines (30 loc) · 1.46 KB
/
file2mongo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import json
import argparse
from wpscraper.connector import MongoDBConnector
from wpscraper.document import RawDocument
parser = argparse.ArgumentParser()
parser.add_argument("filepath", type=str)
parser.add_argument("db_host", type=str)
parser.add_argument("db_port", type=int)
parser.add_argument("db_database", type=str)
parser.add_argument("db_collection", type=str)
parser.add_argument("username", type=str)
parser.add_argument("password", type=str)
def files_to_mongodb(filepath: str, db_host: str, db_port: int, db_database: str, db_collection: str,
username: str, password: str, **kwargs):
files = [os.path.join(filepath, x) for x in os.listdir(filepath) if x.split(".")[-1].lower() == "json"]
c = MongoDBConnector(db_host=db_host, db_port=db_port, db_database=db_database, db_collection=db_collection,
username=username, password=password, **kwargs)
for file in files:
with open(file, 'r') as f:
json_obj = json.load(f)
doc = RawDocument(raw_data=json_obj)
resource = doc.data['resource_type']
c.process_document(doc, resource)
print("{} uploaded.".format(file))
print("done.")
if __name__ == "__main__":
args = parser.parse_args()
files_to_mongodb(filepath=args.filepath, db_host=args.db_host, db_port=args.db_port, db_database=args.db_database,
db_collection=args.db_collection, username=args.username, password=args.password)