Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored validate-refs to use new OpenSearch Serverless and registry-common library #906

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 40 additions & 8 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -367,19 +367,51 @@
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.opensearch.client</groupId>
<artifactId>opensearch-rest-client</artifactId>
<version>2.16.0</version>
<groupId>org.mp4parser</groupId>
<artifactId>isoparser</artifactId>
<version>1.9.56</version>
</dependency>
<!-- three (4) artifacts for complete configuration parsing and RI calls -->
<dependency>
<groupId>gov.nasa.pds</groupId>
<artifactId>registry-common</artifactId>
<version>2.1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>jakarta.xml.bind</groupId>
<artifactId>jakarta.xml.bind-api</artifactId>
<version>4.0.1</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>4.0.4</version>
</dependency>
<dependency>
<groupId>jakarta.activation</groupId>
<artifactId>jakarta.activation-api</artifactId>
<version>2.1.2</version>
</dependency>
<!-- four artifacts for opensearch serverless -->
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.opensearch.client</groupId>
<artifactId>opensearch-rest-high-level-client</artifactId>
<version>2.5.0</version>
<artifactId>opensearch-java</artifactId>
<version>2.13.0</version>
</dependency>
<dependency>
<groupId>org.mp4parser</groupId>
<artifactId>isoparser</artifactId>
<version>1.9.56</version>
<groupId>software.amazon.awssdk</groupId>
<artifactId>opensearch</artifactId>
<version>2.25.31</version>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>apache-client</artifactId>
<version>2.25.31</version>
</dependency>
</dependencies>

Expand Down
125 changes: 32 additions & 93 deletions src/main/java/gov/nasa/pds/validate/ri/AuthInformation.java
Original file line number Diff line number Diff line change
@@ -1,102 +1,41 @@
package gov.nasa.pds.validate.ri;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Properties;
import java.util.Scanner;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.lang.NotImplementedException;
import gov.nasa.pds.registry.common.ConnectionFactory;
import gov.nasa.pds.registry.common.EstablishConnectionFactory;

public class AuthInformation {
final public static AuthInformation NO_AUTH = new AuthInformation(false, "", "", "");
final private boolean trustSelfSigned;
final private String password;
final private String url;
final private String username;

private AuthInformation(boolean tss, String pwd, String un, String url) {
this.password = pwd;
this.trustSelfSigned = tss;
this.url = url;
this.username = un;
final private String apiAuthFile;
final private String osAuthFile;
final private String regConn;
private transient ConnectionFactory factory = null;
private AuthInformation(String a, String A, String r) {
this.apiAuthFile = A;
this.osAuthFile = a;
this.regConn = r;
}

public static AuthInformation buildFrom(String filename)
throws IOException, ParserConfigurationException, SAXException {
boolean tss;
File file = new File(filename);
Scanner textReader;
String line = null, pwd, un, url;

if (filename == null || filename.length() == 0)
return NO_AUTH;
if (!file.exists())
throw new IOException("Filename '" + filename + "' does not exist");

// Get the first non-comment line
textReader = new Scanner(file, Charset.defaultCharset().name());
while (textReader.hasNext() && line == null) {
line = textReader.nextLine().strip();
if (line.charAt(0) == '#')
line = null;
}
textReader.close();

// Determine which file processing to use
if (line.startsWith("<?xml ") && line.endsWith("?>")) { // XML
// <registry url="http://localhost:9200" index="registry" auth="/path/to/auth.cfg" />
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document document = builder.parse(file);
NodeList registries = document.getElementsByTagName("registry");

if (registries.getLength() != 1)
throw new SAXException(
"There should be one and only registry tag in the harvest config file but found "
+ registries.getLength());
if (registries.item(0).getAttributes().getNamedItem("auth") == null)
throw new SAXException("Requires an authorization file or 'auth' attribute on <registry>.");

filename = registries.item(0).getAttributes().getNamedItem("auth").getNodeValue();
url = registries.item(0).getAttributes().getNamedItem("url").getNodeValue();
} else { // java property
FileInputStream input = new FileInputStream(file);
Properties properties = new Properties();
properties.load(input);
url = properties.getProperty("url");
filename = properties.getProperty("credentials");
input.close();
}

// Get credentials
FileInputStream input = new FileInputStream(filename);
Properties properties = new Properties();
properties.load(input);
pwd = properties.getProperty("password");
tss = Boolean.valueOf(properties.getProperty("trust.self-signed", "false"));
un = properties.getProperty("user");
input.close();
return new AuthInformation(tss, pwd, un, url);
}

public String getPassword() {
return password;
}

public boolean getTrustSelfSigned() {
return trustSelfSigned;
public static AuthInformation buildFrom(CommandLine cl) {
return new AuthInformation(
cl.getOptionValue("a",""),
cl.getOptionValue("A",""),
cl.getOptionValue("r",""));
}

public String getUsername() {
return username;
public synchronized ConnectionFactory getConnectionFactory() throws Exception {
if (this.factory == null) {
if (!this.apiAuthFile.isBlank()) {
throw new NotImplementedException();
}
if (!this.osAuthFile.isBlank()) {
this.factory = EstablishConnectionFactory.from(this.regConn, this.osAuthFile);
}
if (this.factory == null) {
throw new IllegalArgumentException("did not supply necessary arguments on the CLI");
}
}
return this.factory;
}

public String getUrl() {
return url;
public String getURL() {
return factory != null ? this.factory.toString() : "uninitialized connection factory";
}
}
73 changes: 28 additions & 45 deletions src/main/java/gov/nasa/pds/validate/ri/CommandLineInterface.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@ public CommandLineInterface() {
"file with the URL and credential content to have full (all product states) read-only access to the Registry Search API")
.hasArg(true).longOpt("auth-api").numberOfArgs(1).optionalArg(true).build()); */
this.opts.addOption(Option.builder("a").argName("auth-file").desc(
"file with the URL and credential content to have full, direct read-only access to the Registry OpenSearch DB")
"file with the credential content to have full, direct read-only access to the Registry OpenSearch DB")
.hasArg(true).longOpt("auth-opensearch").numberOfArgs(1).optionalArg(true).build());
this.opts.addOption(Option.builder("h").desc("show this text and exit").hasArg(false)
.longOpt("help").optionalArg(true).build());
this.opts.addOption(Option.builder("r").argName("registry-connection").desc(
"URL point to the registry connection information usually of the form app://connection/direct/localhost.xml")
.hasArg(true).longOpt("registry-connection").numberOfArgs(1).optionalArg(true).build());
this.opts.addOption(Option.builder("t").argName("count").desc(
"process the lidvids in parallel (multiple threads) with this argument being the maximum number of threads")
.hasArg(true).longOpt("threads").optionalArg(true).build());
Expand All @@ -55,16 +58,11 @@ public void help() {
"Multiple arguments may be given in any order, for example:\n" +
" > validate-refs urn:nasa:pds:foo::1.0 label.xml urn:nasa:pds:bar::2.0 manifest.txt\n\n",
opts,
"\nAn auth-file is either a text file of the Java property format " +
"with two variables, 'url' and 'credentials': \n\n" +
" - The 'url' property is the complete base URL to the Registry OpenSearch endpoint or Search API\n" +
" * 'https://my-registry.es.amazonaws.com/_search'\n\n" +
" - The 'credentials' is the path to:\n" +
" * Harvest config file containing the necessary Registry OpenSearch authorization\n" +
" <registry url=\"http://localhost:9200\" index=\"registry\" auth=\"/path/to/auth.cfg\" />\n" +
" * Java Properties file with a 'user' and 'password' specified, for example: \n" +
" user=janedoe\n" +
" password=mypassword\n\n",
"\nAn auth-file is a text file of the Java property format " +
"with two variables, 'user' and 'password' for example: \n" +
" user=janedoe\n" +
" password=mypassword\n\n" +
"Both -a and -r are required.\n\n",
true);
}

Expand All @@ -89,17 +87,16 @@ public int process(String[] args)
loggerConfig.setLevel(Level.INFO);
ctx.updateLoggers();

if (!cl.hasOption("a")) {
throw new ParseException("Not yet implemented. Must provide OpenSearch Registry authorization information.");
} else if (!cl.hasOption("A")) {
log.warn("Using Registry OpenSearch Database to check references.");
if (cl.hasOption("A")) {
throw new ParseException("Not yet implemented. Must provide OpenSearch Registry authorization information through -a and -r.");
} else {
/* not true statement until registry handles authentication
* throw new ParseException("Must supply authorization file for access to either OpenSearch Database (auth-opensearch) or OpenSearch Registry (auth-api).");
*/
throw new ParseException("Must define authorization file for access to OpenSearch Database (auth-opensearch).");
boolean both = cl.hasOption("a") && cl.hasOption("r");
if (!both) {
throw new ParseException("Both -a and -r must be given.");
} else {
log.warn("Using Registry OpenSearch Database to check references.");
}
}

if (cl.getArgList().size() < 1)
throw new ParseException("Must provide at least one LIDVID, Label file path, or manifest file path as a starting point.");

Expand All @@ -115,31 +112,17 @@ public int process(String[] args)
} else
this.log.info("lidvids will be sequentially processed.");

try {
DuplicateFileAreaFilenames scanner = new DuplicateFileAreaFilenames(
AuthInformation.buildFrom(cl.getOptionValue("auth-api", "")),
AuthInformation.buildFrom(cl.getOptionValue("auth-opensearch", "")));
Engine engine = new Engine(cylinders, UserInput.toLidvids (cl.getArgList()),
AuthInformation.buildFrom(cl.getOptionValue("auth-api", "")),
AuthInformation.buildFrom(cl.getOptionValue("auth-opensearch", "")));
this.log.info("Starting the duplicate filename in FileArea checks.");
scanner.findDuplicatesInBackground();
this.log.info("Starting the reference integrity checks.");
engine.processQueueUntilEmpty();
scanner.waitTillDone();
this.broken = engine.getBroken();
this.duplicates = scanner.getResults();
this.total = engine.getTotal();
} catch (IOException e) {
this.log.fatal("Cannot process request because of IO problem.", e);
throw e;
} catch (ParserConfigurationException e) {
this.log.fatal("Could not parse the harvest configuration file.", e);
throw e;
} catch (SAXException e) {
this.log.fatal("Mal-formed harvest configuration file.", e);
throw e;
}
DuplicateFileAreaFilenames scanner = new DuplicateFileAreaFilenames(AuthInformation.buildFrom(cl));
Engine engine = new Engine(cylinders, UserInput.toLidvids (cl.getArgList()), AuthInformation.buildFrom(cl));
this.log.info("Starting the duplicate filename in FileArea checks.");
scanner.findDuplicatesInBackground();
this.log.info("Starting the reference integrity checks.");
engine.processQueueUntilEmpty();
scanner.waitTillDone();
this.broken = engine.getBroken();
this.duplicates = scanner.getResults();
this.total = engine.getTotal();

if (-1 < this.total) {
this.log.info("Reference Summary:");
this.log.info(" " + this.total + " products processed");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package gov.nasa.pds.validate.ri;

import java.io.Serializable;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.core.Appender;
import org.apache.logging.log4j.core.ErrorHandler;
Expand All @@ -11,7 +12,7 @@ class CountingAppender implements Appender {
private int err = 0, fatal = 0, warn = 0;
private ErrorHandler handler = null;
private Filter filter = null;
private Layout layout = null;
private Layout<Serializable> layout = null;
private String name = "";

public void addFilter(Filter newFilter) {
Expand Down Expand Up @@ -54,7 +55,7 @@ public ErrorHandler getHandler() {
}

@Override
public Layout getLayout() {
public Layout<Serializable> getLayout() {
return this.layout;
}

Expand Down
11 changes: 4 additions & 7 deletions src/main/java/gov/nasa/pds/validate/ri/Cylinder.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,16 @@
import org.apache.logging.log4j.Logger;

public class Cylinder implements Runnable {
final private AuthInformation registry;
final private AuthInformation search;
final private CamShaft cam;
final private Logger log = LogManager.getLogger(Cylinder.class);
final public Logger reporter = LogManager.getLogger("Reference Integrity");
final private String lidvid;
private long broken = 0;

public Cylinder(String lidvid, AuthInformation registry, AuthInformation search, CamShaft cam) {
public Cylinder(String lidvid, AuthInformation search, CamShaft cam) {
this.cam = cam;
this.lidvid = lidvid;
this.registry = registry;
this.search = search;
}

Expand All @@ -37,8 +35,7 @@ public long getBroken() {
public void run() {
try {
ArrayList<String> referenced_valid_lidvids = new ArrayList<String>();
DocumentInfo search = AuthInformation.NO_AUTH.equals (this.registry) ? new OpensearchDocument(this.search) : new RegistryDocument(this.registry);;
String magicWord = AuthInformation.NO_AUTH.equals (this.registry) ? "database." : "registry.";
DocumentInfo search = new OpensearchDocument(this.search);

if (search.exists(this.lidvid)) {
this.log.info(
Expand All @@ -49,11 +46,11 @@ public void run() {
else {
this.broken++;
this.reporter.error("In the search the lidvid '" + this.lidvid + "' references '"
+ reference + "' that is missing in the " + magicWord);
+ reference + "' that is missing in the database.");
}
}
} else
this.reporter.error("The given lidvid '" + this.lidvid + "' is missing from the " + magicWord);
this.reporter.error("The given lidvid '" + this.lidvid + "' is missing from the database.");

if (this.has_children(search))
this.cam.addAll(referenced_valid_lidvids);
Expand Down
Loading
Loading