From 3485a05add72ea120613f1b49b767a8795b0a19a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20=C5=A0milauer?= Date: Thu, 21 Nov 2024 11:12:33 +0100 Subject: [PATCH] Simulate parent attribute with GridFS instead of getParent/setParent; update scripts accordingly --- mupifDB/api/main.py | 17 +++++++---------- mupifDB/models.py | 25 +++++++++++++++---------- mupifDB/schedulerMonitor.py | 2 +- tools/database-add-parents.py | 15 +++++++-------- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/mupifDB/api/main.py b/mupifDB/api/main.py index a2d5f31..619d6dd 100644 --- a/mupifDB/api/main.py +++ b/mupifDB/api/main.py @@ -401,17 +401,14 @@ class M_ModifyExecutionOntoBaseObjectID(BaseModel): name: str value: str - @app.patch("/executions/{uid}/set_onto_base_object_id/", tags=["Executions"]) def modify_execution_id(uid: str, data: M_ModifyExecutionOntoBaseObjectID): db.WorkflowExecutions.update_one({'_id': bson.objectid.ObjectId(uid), "EDMMapping.Name": data.name}, {"$set": {"EDMMapping.$.id": data.value}}) return get_execution(uid) - class M_ModifyExecutionOntoBaseObjectIDMultiple(BaseModel): data: list[dict] - @app.patch("/executions/{uid}/set_onto_base_object_id_multiple/", tags=["Executions"]) def modify_execution_id_multiple(uid: str, data: List[M_ModifyExecutionOntoBaseObjectID]): for d in data: @@ -423,7 +420,6 @@ class M_ModifyExecutionOntoBaseObjectIDs(BaseModel): name: str value: list[str] - @app.patch("/executions/{uid}/set_onto_base_object_ids/", tags=["Executions"]) def modify_execution_ids(uid: str, data: M_ModifyExecutionOntoBaseObjectIDs): db.WorkflowExecutions.update_one({'_id': bson.objectid.ObjectId(uid), "EDMMapping.Name": data.name}, {"$set": {"EDMMapping.$.ids": data.value}}) @@ -434,7 +430,6 @@ class M_ModifyExecution(BaseModel): key: str value: str - @app.patch("/executions/{uid}", tags=["Executions"]) def modify_execution(uid: str, data: M_ModifyExecution): db.WorkflowExecutions.update_one({'_id': bson.objectid.ObjectId(uid)}, {"$set": {data.key: data.value}}) @@ -456,12 +451,12 @@ def schedule_execution(uid: str): # -------------------------------------------------- @app.get("/iodata/{uid}", tags=["IOData"]) -def get_execution_iodata(uid: str): +def get_execution_iodata(uid: str) -> models.IODataRecord_Model: res = db.IOData.find_one({'_id': bson.objectid.ObjectId(uid)}) - return fix_id(res) - # return res.get('DataSet', None) - + if res is None: raise NotFoundError(f'Database reports no iodata with uid={uid}.') + return models.IODataRecord_Model.model_validate(res) +# TODO: pass and store parent data as well @app.post("/iodata/", tags=["IOData"]) def insert_execution_iodata(data: models.IODataRecord_Model): res = db.IOData.insert_one(data.model_dump_db()) @@ -494,7 +489,7 @@ def get_file(uid: str, tdir=Depends(get_temp_dir)): fn = foundfile.filename return StreamingResponse(wfile, headers={"Content-Disposition": "attachment; filename=" + fn}) - +# TODO: store parent as metadata, validate the fs.files record as well @app.post("/file/", tags=["Files"]) def upload_file(file: UploadFile): if file: @@ -506,6 +501,7 @@ def upload_file(file: UploadFile): @app.get("/property_array_data/{fid}/{i_start}/{i_count}/", tags=["Additional"]) def get_property_array_data(fid: str, i_start: int, i_count: int): + # XXX: make a direct function call, no need to go through REST API again (or is that for granta?) pfile, fn = mupifDB.restApiControl.getBinaryFileByID(fid) with tempfile.TemporaryDirectory(dir="/tmp", prefix='mupifDB') as tempDir: full_path = tempDir + "/file.h5" @@ -526,6 +522,7 @@ def get_property_array_data(fid: str, i_start: int, i_count: int): @app.get("/field_as_vtu/{fid}", tags=["Additional"]) def get_field_as_vtu(fid: str, tdir=Depends(get_temp_dir)): + # XXX: make a direct function call, no need to go through REST API again (or is that for granta?) pfile, fn = mupifDB.restApiControl.getBinaryFileByID(fid) full_path = tdir + "/file.h5" f = open(full_path, 'wb') diff --git a/mupifDB/models.py b/mupifDB/models.py index 5a74748..921f538 100644 --- a/mupifDB/models.py +++ b/mupifDB/models.py @@ -58,10 +58,6 @@ def TEMP_getLookupChildren(self) -> List[TEMP_DbLookup_Model]: return [] class ObjectWithParent_Mixin(abc.ABC): - @abc.abstractmethod - def getParent(self) -> Optional[DbRef_Model]: pass - @abc.abstractmethod - def TEMP_setParent(self, parent: DbRef_Model) -> None: pass @abc.abstractmethod def TEMP_mongoParentQuerySet(self) -> Tuple[Dict,Dict]: pass @@ -71,23 +67,32 @@ class GridFSFile_Model(MongoObjBase_Model,ObjectWithParent_Mixin): chunkSize: int uploadDate: datetime.datetime metadata: Dict[str,Any]={} - def getParent(self) -> Optional[DbRef_Model]: + # enable to make parent mandatory when validating the model + if 0: + @pydantic.field_validator('metadata') + @classmethod + def validate_metadata_parent(cls, md: Dict[str,Any]): + assert 'parent' in md + DbRef_Model.model_validate(md['parent']) + return md + # emulate parent as property, dynamically get/set via metadata (API in alignment with MongoObj_Model which store it in the db directly) + @property + def parent(self): if parent:=self.metadata.get('parent',None): return DbRef_Model.model_validate(parent) return None - def TEMP_setParent(self,parent: DbRef_Model) -> None: + @parent.setter + def parent(self,parent): if 'parent' in self.metadata: raise RuntimeError(f'Parent is already defined: {self.metadata["parent"]=}.') self.metadata['parent']=parent.model_dump(mode='json') def TEMP_mongoParentQuerySet(self) -> Tuple[Dict,Dict]: assert 'parent' in self.metadata + DbRef_Model.model_validate(self.metadata['parent']) return {'_id':bson.objectid.ObjectId(self.dbID)},{'$set':{'metadata':self.metadata}} class MongoObj_Model(MongoObjBase_Model,ObjectWithParent_Mixin): + # remove Optional to make parent mandatory when validating the model parent: Optional[DbRef_Model]=None - def getParent(self) -> Optional[DbRef_Model]: return self.parent - def TEMP_setParent(self, parent: DbRef_Model) -> None: - if self.parent is not None: raise RuntimeError(f'Parent is already defined: {self.parent=}.') - self.parent=parent def TEMP_mongoParentQuerySet(self) -> Tuple[Dict,Dict]: assert self.parent is not None return {'_id':bson.objectid.ObjectId(self.dbID)},{'$set':{'parent':self.parent.model_dump(mode='json')}} diff --git a/mupifDB/schedulerMonitor.py b/mupifDB/schedulerMonitor.py index b506b04..10505c9 100644 --- a/mupifDB/schedulerMonitor.py +++ b/mupifDB/schedulerMonitor.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import sys -import Pyro5 +import Pyro5.api from builtins import str diff --git a/tools/database-add-parents.py b/tools/database-add-parents.py index 64693ba..5a3bf1b 100644 --- a/tools/database-add-parents.py +++ b/tools/database-add-parents.py @@ -78,16 +78,15 @@ def resolve_DbRef(db, ref: models.DbRef_Model): return coll2model[ref.where].model_validate(rec) def set_parent_db(dbColl, child, parent: models.DbRef_Model): - if parentPrev:=child.getParent(): - if parentPrev!=parent: raise RuntimeError(f'Old and new parents differ: {parentPrev=} {parent=}') - return - child.TEMP_setParent(parent) + if child.parent and child.parent!=parent: + raise RuntimeError(f'Old and new parents differ: {child.parent=} {parent=}') + child.parent=parent querySet=child.TEMP_mongoParentQuerySet() child2a=dbColl.find_one_and_update(*querySet,return_document=pymongo.ReturnDocument.AFTER) # pprint(child2a) if child2a is None: raise RuntimeError('Object to be updated not found?') #child2=type(child).model_validate(child2a) - #assert child.getParent()==parent + #assert child.parent==parent # TODO: this will set dangling reference in the parent object to null def set_attr_null(dbColl,obj,attr): @@ -132,7 +131,7 @@ def set_attr_null(dbColl,obj,attr): log.error(f'Unresolvable child {cref=} (setting to null not yet implemented)') print_mongo(rec) continue - if child.getParent() is None: + if child.parent is None: parentsAdded+=1 set_parent_db(db.get_collection(cref.where),child,thisRef) for clook in obj.TEMP_getLookupChildren(): @@ -142,7 +141,7 @@ def set_attr_null(dbColl,obj,attr): log.error(f'Unresolvable child {cref=} {clook=} (setting to null not yet implemented)') print_mongo(rec) continue - if child.getParent() is None: + if child.parent is None: parentsAdded+=1 set_parent_db(db.get_collection(cref.where),child,thisRef) progress.update(chi_task,visible=False) @@ -178,7 +177,7 @@ def set_attr_null(dbColl,obj,attr): progress.start_task(doc_task) for irec,rec in enumerate(cursor): obj=Model.model_validate(rec) - if parent:=obj.getParent() is None: noParent.append((coll,obj.dbID)) + if obj.parent is None: noParent.append((coll,obj.dbID)) # print(coll,obj.dbID) progress.advance(doc_task) # if irec>1000: break