Skip to content

Commit

Permalink
run input4MIPsFuncs through pep8
Browse files Browse the repository at this point in the history
  • Loading branch information
durack1 committed May 12, 2023
1 parent 1ef46c3 commit 8e85d5b
Showing 1 changed file with 124 additions and 93 deletions.
217 changes: 124 additions & 93 deletions src/input4MIPsFuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,163 +37,183 @@
PJD 23 Jul 2019 - Update os.chmod calls to take octal arguments, prefix '0o' rather than py2 '0'
PJD 23 Jul 2019 - Updated os.chmod file calls from 664 (-rw-rw-r--) to 644 (-rw-r--r--)
PJD 21 Nov 2019 - Updated to deal with lack of climatew defined on detect (corrected back)
PJD 12 May 2023 - Run through pep8
- TODO: Need to add test for uid and gid existence before running any code (use grp)
- TODO: Note other Synda sensitive entries are "priority" and "type"
@author: durack1
"""
import datetime,errno,json,os,pytz,re,sys
import datetime
import errno
import json
import os
import pytz
import re
import sys

#%% Create master vars for validation
MIPList = ['AerChemMIP','C4MIP','CDRMIP','CFMIP','CMIP','CORDEX',
'DAMIP','DCPP','DynVarMIP','FAFMIP','GMMIP','GeoMIP',
'HighResMIP','ISMIP6','LS3MIP','LUMIP','OMIP','PAMIP',
'PMIP','RFMIP','SIMIP','ScenarioMIP','VIACSAB','VolMIP']
# %% Create master vars for validation
MIPList = ['AerChemMIP', 'C4MIP', 'CDRMIP', 'CFMIP', 'CMIP', 'CORDEX',
'DAMIP', 'DCPP', 'DynVarMIP', 'FAFMIP', 'GMMIP', 'GeoMIP',
'HighResMIP', 'ISMIP6', 'LS3MIP', 'LUMIP', 'OMIP', 'PAMIP',
'PMIP', 'RFMIP', 'SIMIP', 'ScenarioMIP', 'VIACSAB', 'VolMIP']

# %% Create uuid and gid entries for durack1
# (cdatpy3N190723) > id durack1
# uid=40336(durack1) gid=1026(climate) groups=1026(climate),3669(cmipXmls),4669(cmipXmlsAdmin),2669(climatew),6669(xclimw)uid = 40336
uid = 40336 # durack1
gid = 2669 # climatew
# gid = 1026 ; #climate

# %% Test tracking_id for valid form

#%% Create uuid and gid entries for durack1
#(cdatpy3N190723) > id durack1
#uid=40336(durack1) gid=1026(climate) groups=1026(climate),3669(cmipXmls),4669(cmipXmlsAdmin),2669(climatew),6669(xclimw)uid = 40336
uid = 40336 ; #durack1
gid = 2669 ; #climatew
#gid = 1026 ; #climate

#%% Test tracking_id for valid form
def checkTrackingId(trackingId):
# Test tracking id for matching form
#:tracking_id = "hdl:21.14100/0499b180-3af7-43e8-9bfe-228722d64100" ; Consists of a prefix “hdl:21.14100/” with a unique UUID4 appended for each unique file
# :tracking_id = "hdl:21.14100/0499b180-3af7-43e8-9bfe-228722d64100" ; Consists of a prefix “hdl:21.14100/” with a unique UUID4 appended for each unique file
# Spec indicates no upper-case https://stackoverflow.com/questions/8258480/type-of-character-generated-by-uuid
reTrackIdTest = re.compile(r'[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}')
reTrackIdTest = re.compile(
r'[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}')
tId = trackingId.split('/')
#print tId
# print tId
if tId[0] != 'hdl:21.14100':
print('checkTrackingId: invalid prefix != hdl:21.14100 = ',tId[0])
print('checkTrackingId: invalid prefix != hdl:21.14100 = ', tId[0])
result = False
elif not re.match(reTrackIdTest,tId[1]):
print('checkTrackingId: invalid UUID4 = ',tId[1])
elif not re.match(reTrackIdTest, tId[1]):
print('checkTrackingId: invalid UUID4 = ', tId[1])
result = False
else:
print('checkTrackingId: valid prefix/UUID4')
result = True

return result

#%% Generate publication files
def createPubFiles(destPath,jsonId,jsonFilePaths,variableFilePaths):
# %% Generate publication files


def createPubFiles(destPath, jsonId, jsonFilePaths, variableFilePaths):
os.chdir(destPath)
print('createPubFiles: os.cwd() =',os.getcwd())
print('createPubFiles: jsonId =',jsonId)
print('createPubFiles: os.cwd() =', os.getcwd())
print('createPubFiles: jsonId =', jsonId)
# Check jsonId format
testStr = jsonId.split('-')
if len(testStr) != 2:
print('teststr:',testStr)
print('teststr:', testStr)
print('jsonWriteFile: format invalid, exiting..')
if testStr[0] not in MIPList:
#print 'teststr 1:',testStr[1]
# print 'teststr 1:',testStr[1]
print('createPubFiles: jsonId format issue - activity_id invalid, exiting ..')
sys.exit()
if testStr[1] == '':
#print 'teststr 2:',testStr[2]
# print 'teststr 2:',testStr[2]
print('createPubFiles: jsonId format issue - User identifier invalid, exiting ..')
sys.exit()
key = '-'.join(['input4MIPs',jsonId])
key = '-'.join(['input4MIPs', jsonId])
# Create output files for publication
timeNow = datetime.datetime.now();
timeNow = datetime.datetime.now()
local = pytz.timezone('America/Los_Angeles')
localTimeNow = timeNow.replace(tzinfo = local)
localTimeNow = timeNow.replace(tzinfo=local)
dateStamp = localTimeNow.strftime('%y%m%d_%H%M%S')
# Change to target directory
pubFileDir = '/p/user_pub/publish-queue/input4MIPs-list-todo/'
os.chdir(pubFileDir)
jsonFilePath = '_'.join([dateStamp,key,'jsonList.txt'])
jsonFilePath = '_'.join([dateStamp, key, 'jsonList.txt'])
# Now trim lists for unique
jsonFilePaths = removeDuplicates(jsonFilePaths)
with open(jsonFilePath, 'w') as f:
for item in jsonFilePaths:
f.write('%s\n' % item)
# Wash perms of file
os.chmod(jsonFilePath,0o644) ; # Note a leading 0 is required to trick python into thinking this is octal
# Note a leading 0 is required to trick python into thinking this is octal
os.chmod(jsonFilePath, 0o644)
# https://stackoverflow.com/questions/15607903/python-module-os-chmodfile-664-does-not-change-the-permission-to-rw-rw-r-bu
# https://stackoverflow.com/questions/1627198/python-mkdir-giving-me-wrong-permissions
os.chown(jsonFilePath,uid,gid)
fileListPaths = '_'.join([dateStamp,key,'fileList.txt'])
os.chown(jsonFilePath, uid, gid)
fileListPaths = '_'.join([dateStamp, key, 'fileList.txt'])
# Now trim lists for unique
variableFilePaths = removeDuplicates(variableFilePaths)
with open(fileListPaths, 'w') as f:
for item in variableFilePaths:
f.write('%s\n' % item)
# Wash perms of file
os.chmod(fileListPaths,0o644)
os.chown(fileListPaths,uid,gid)
print('createPubFiles: Publication files successfully written to:',pubFileDir)
os.chmod(fileListPaths, 0o644)
os.chown(fileListPaths, uid, gid)
print('createPubFiles: Publication files successfully written to:', pubFileDir)

# %% Generate json file for publication step

#%% Generate json file for publication step
def jsonWriteFile(conventions,activityId,contact,creationDate,datasetCategory,datasetVersionNumber,
frequency,furtherInfoUrl,gridLabel,institution,institutionId,mipEra,
nominalResolution,realm,source,sourceId,targetMip,targetMipJson,title,
variableId,fileList,trackingIdList,deprecated,dataVersion,destPath,
jsonId,overWriteFile=None):

def jsonWriteFile(conventions, activityId, contact, creationDate, datasetCategory, datasetVersionNumber,
frequency, furtherInfoUrl, gridLabel, institution, institutionId, mipEra,
nominalResolution, realm, source, sourceId, targetMip, targetMipJson, title,
variableId, fileList, trackingIdList, deprecated, dataVersion, destPath,
jsonId, overWriteFile=None):
if overWriteFile == None:
overWriteFile = False
esgfPubDict = {}
# Check jsonId format
testStr = jsonId.split('-')
print('testStr:',testStr)
print('len(testStr):',len(testStr))
print('testStr:', testStr)
print('len(testStr):', len(testStr))
if len(testStr) != 2:
print('teststr 0:',testStr[0])
print('teststr 0:', testStr[0])
print('jsonWriteFile: format invalid, exiting..')
# if testStr[0] != 'input4MIPs':
# #print 'teststr 0:',testStr[0]
# print 'jsonWriteFile: format invalid, exiting..'
# sys.exit()
if testStr[0] not in MIPList:
#print 'teststr 1:',testStr[1]
# print 'teststr 1:',testStr[1]
print('jsonWriteFile: format issue: activity_id invalid, exiting ..')
sys.exit()
if testStr[1] == '':
#print 'teststr 2:',testStr[2]
# print 'teststr 2:',testStr[2]
print('jsonWriteFile: format issue: User identifier invalid, exiting ..')
sys.exit()

# Test targetMip
print('jsonWriteFile: targetMip =',targetMip)
print('jsonWriteFile: targetMip =', targetMip)

#key = '-'.join(['input4MIPs',jsonId,targetMip])
key = '-'.join(['input4MIPs',jsonId])
print('key:',key)
# key = '-'.join(['input4MIPs',jsonId,targetMip])
key = '-'.join(['input4MIPs', jsonId])
print('key:', key)
esgfPubDict[key] = {}
esgfPubDict[key]['Conventions'] = ' '.join(conventions.split())
esgfPubDict[key]['activity_id'] = 'input4MIPs' ; #' '.join(activityId.split())
# ' '.join(activityId.split())
esgfPubDict[key]['activity_id'] = 'input4MIPs'
esgfPubDict[key]['contact'] = ' '.join(contact.split())
esgfPubDict[key]['creation_date'] = creationDate ; #'.join(creationDate.split())
# '.join(creationDate.split())
esgfPubDict[key]['creation_date'] = creationDate
esgfPubDict[key]['dataset_category'] = ' '.join(datasetCategory.split())
esgfPubDict[key]['source_version'] = ' '.join(datasetVersionNumber.split()) ; # dataset_version_number
esgfPubDict[key]['source_version'] = ' '.join(
datasetVersionNumber.split()) # dataset_version_number
esgfPubDict[key]['frequency'] = ' '.join(frequency.split())
esgfPubDict[key]['further_info_url'] = ' '.join(furtherInfoUrl.split())
esgfPubDict[key]['grid_label'] = ' '.join(gridLabel.split())
esgfPubDict[key]['institution'] = ' '.join(institution.split())
esgfPubDict[key]['institution_id'] = ' '.join(institutionId.split())
esgfPubDict[key]['mip_era'] = ' '.join(mipEra.split())
esgfPubDict[key]['nominal_resolution'] = ' '.join(nominalResolution.split())
esgfPubDict[key]['nominal_resolution'] = ' '.join(
nominalResolution.split())
esgfPubDict[key]['realm'] = ' '.join(realm.split())
esgfPubDict[key]['realm_drs'] = ' '.join(realm.split())
esgfPubDict[key]['source'] = ' '.join(source.split())
esgfPubDict[key]['source_id'] = sourceId ; #' '.join(sourceId.split())
esgfPubDict[key]['target_mip'] = targetMip ; # First entry only
esgfPubDict[key]['source_id'] = sourceId # ' '.join(sourceId.split())
esgfPubDict[key]['target_mip'] = targetMip # First entry only
# test target_mip
if targetMip not in MIPList:
print('jsonWriteFile: MIP invalid, exiting..')
sys.exit()
esgfPubDict[key]['target_mip_list'] = targetMipJson ; #list([' '.join(targetMip.split())])
esgfPubDict[key]['title'] = ' '.join(title.split()) # Comes from file
# list([' '.join(targetMip.split())])
esgfPubDict[key]['target_mip_list'] = targetMipJson
esgfPubDict[key]['title'] = ' '.join(title.split()) # Comes from file
esgfPubDict[key]['variable_id'] = ' '.join(variableId.split())
# Single files
#esgfPubDict[key]['file_list'] = list([os.path.join(mipEra,'input4MIPs',outPath,fileName)])
#esgfPubDict[key]['tracking_id_list'] = list([trackingId]) # Comes from file
# esgfPubDict[key]['file_list'] = list([os.path.join(mipEra,'input4MIPs',outPath,fileName)])
# esgfPubDict[key]['tracking_id_list'] = list([trackingId]) # Comes from file
# Case of multiple files
if type(fileList) == list:
print('jsonWriteFile: fileList test pass')
esgfPubDict[key]['file_list'] = fileList ; # Ensure list type
esgfPubDict[key]['file_list'] = fileList # Ensure list type
else:
print('jsonWriteFile: fileList test fail')
esgfPubDict[key]['file_list'] = list([fileList])
Expand All @@ -203,46 +223,52 @@ def jsonWriteFile(conventions,activityId,contact,creationDate,datasetCategory,da
esgfPubDict[key]['product'] = 'forcing_dataset'
esgfPubDict[key]['project'] = 'input4MIPs'
# Conditional on sourceId
esgfPubDict[key]['deprecated'] = deprecated ; # Add for republication
retracted = False ; # hard coded for all as no retracted data will be republished
esgfPubDict[key]['retracted'] = retracted ; # Add for republication
esgfPubDict[key]['deprecated'] = deprecated # Add for republication
retracted = False # hard coded for all as no retracted data will be republished
esgfPubDict[key]['retracted'] = retracted # Add for republication
# Boolean logic
if not deprecated and not retracted:
# case false and false
esgfPubDict[key]['dataset_status'] = 'latest' ; # Add for republication
esgfPubDict[key]['dataset_status'] = 'latest' # Add for republication
elif deprecated:
# case true for deprecated
esgfPubDict[key]['dataset_status'] = 'deprecated'
elif retracted:
# case true for retracted
esgfPubDict[key]['status'] = 'retracted'
esgfPubDict[key]['version'] = dataVersion
utcNow = datetime.datetime.utcnow();
utcNow = utcNow.replace(tzinfo=pytz.utc)
timeFormat = utcNow.strftime("%Y-%m-%dT%H:%M:%SZ")
utcNow = datetime.datetime.utcnow()
utcNow = utcNow.replace(tzinfo=pytz.utc)
timeFormat = utcNow.strftime("%Y-%m-%dT%H:%M:%SZ")
esgfPubDict[key]['timestamp'] = timeFormat
# Write to json file
outFile = os.path.join(destPath,activityId,mipEra,targetMip,institutionId,''.join(['_'.join([institutionId,frequency,sourceId,variableId,gridLabel,dataVersion]),'.json']))
print('jsonWriteFile: json filename - ',outFile)
#outFile = os.path.join(userPath,'tmp',''.join(['_'.join([institutionId,frequency,sourceId,variableId]),'.json']))
outFile = os.path.join(destPath, activityId, mipEra, targetMip, institutionId, ''.join(
['_'.join([institutionId, frequency, sourceId, variableId, gridLabel, dataVersion]), '.json']))
print('jsonWriteFile: json filename - ', outFile)
# outFile = os.path.join(userPath,'tmp',''.join(['_'.join([institutionId,frequency,sourceId,variableId]),'.json']))
# Validate path - test for valid targetMip
input4MIPsInd = outFile.split('/').index('input4MIPs') ; # Correct no matter what destPath is set
# Correct no matter what destPath is set
input4MIPsInd = outFile.split('/').index('input4MIPs')
if outFile.split('/')[input4MIPsInd+2] not in MIPList:
print('jsonWriteFile: targetMip ',outFile.split('/')[6],' invalid MIP, exiting..')
print('jsonWriteFile: targetMip ', outFile.split(
'/')[6], ' invalid MIP, exiting..')
sys.exit()
# Validate if json file exists - DO NOT DELETE/OVERWRITE
if os.path.exists(outFile):
if overWriteFile:
print('jsonWriteFile: File existing, purging - ',outFile)
print('jsonWriteFile: File existing, purging - ', outFile)
os.remove(outFile)
elif overWriteFile: # Condition must be True
elif overWriteFile: # Condition must be True
print('jsonWriteFile: File exists, exiting..')
sys.exit()
fH = open(outFile,'w',encoding='utf8') ; # Update from straight open call
json.dump(esgfPubDict,fH,ensure_ascii=True,sort_keys=True,indent=4,separators=(',',':'))
fH = open(outFile, 'w', encoding='utf8') # Update from straight open call
json.dump(esgfPubDict, fH, ensure_ascii=True,
sort_keys=True, indent=4, separators=(',', ':'))
fH.close()

#%% Remove duplicate elements from list
# %% Remove duplicate elements from list


def removeDuplicates(listofElements):
# Create an empty list to store unique elements
uniqueList = []
Expand All @@ -255,29 +281,34 @@ def removeDuplicates(listofElements):
# Return the list of unique elements
return uniqueList

#%% Wash permissions
def washPerms(destPath,activityId,mipEra,targetMip,institutionId,sourceId,realm,frequency,gridLabel,dataVersion):
# %% Wash permissions


def washPerms(destPath, activityId, mipEra, targetMip, institutionId, sourceId, realm, frequency, gridLabel, dataVersion):
os.chdir(destPath)
print('washPerms: os.cwd() = ',os.getcwd())
#[durack1@oceanonly input4MIPs]$ chmod 755 -R FAFMIP/
#pathX = os.path.join(destPath,activityId,mipEra,targetMip)
pathX = os.path.join(activityId,mipEra,targetMip) ; # Remove sourceId as cases of multiple exist
print('washPerms: pathX = ',pathX)
for root, dirs, files in os.walk(pathX,topdown=True):
print('washPerms: os.cwd() = ', os.getcwd())
# [durack1@oceanonly input4MIPs]$ chmod 755 -R FAFMIP/
# pathX = os.path.join(destPath,activityId,mipEra,targetMip)
# Remove sourceId as cases of multiple exist
pathX = os.path.join(activityId, mipEra, targetMip)
print('washPerms: pathX = ', pathX)
for root, dirs, files in os.walk(pathX, topdown=True):
# Prune dirs in place - retracted dirs
dirs[:] = [d for d in dirs if '-retracted' not in d]
for d in dirs:
print('washPerms: dir =',d)
print('washPerms: dir =', d)
try:
os.chmod(os.path.join(root,d),0o775) ; # Note a leading 0o is required to trick python into thinking this is octal
# Note a leading 0o is required to trick python into thinking this is octal
os.chmod(os.path.join(root, d), 0o775)
# https://stackoverflow.com/questions/15607903/python-module-os-chmodfile-664-does-not-change-the-permission-to-rw-rw-r-bu
except OSError as e:
print('e:',e)
print('e:', e)
if (e[0] == errno.EPERM):
print('Permissions to complete task not assigned, skipping')
sys.stderr.write('Permissions to complete task not assigned, skipping')
sys.stderr.write(
'Permissions to complete task not assigned, skipping')
continue

for f in files:
print('washPerms: file =',f)
os.chmod(os.path.join(root,f),0o644)
print('washPerms: file =', f)
os.chmod(os.path.join(root, f), 0o644)

0 comments on commit 8e85d5b

Please sign in to comment.