-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Chris Li
committed
Aug 12, 2021
1 parent
4a14042
commit b5dc5c0
Showing
8 changed files
with
511 additions
and
2 deletions.
There are no files selected for viewing
188 changes: 188 additions & 0 deletions
188
cdi-core/src/main/java/com/linkedin/cdi/connection/SftpConnection.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
// Copyright 2021 LinkedIn Corporation. All rights reserved. | ||
// Licensed under the BSD-2 Clause license. | ||
// See LICENSE in the project root for license information. | ||
|
||
package com.linkedin.cdi.connection; | ||
|
||
import com.linkedin.cdi.configuration.MultistageProperties; | ||
import com.linkedin.cdi.exception.RetriableAuthenticationException; | ||
import com.linkedin.cdi.factory.ConnectionClientFactory; | ||
import com.linkedin.cdi.factory.DefaultConnectionClientFactory; | ||
import com.linkedin.cdi.factory.sftp.SftpClient; | ||
import com.linkedin.cdi.keys.ExtractorKeys; | ||
import com.linkedin.cdi.keys.HttpKeys; | ||
import com.linkedin.cdi.keys.JobKeys; | ||
import com.linkedin.cdi.keys.SftpKeys; | ||
import com.linkedin.cdi.util.InputStreamUtils; | ||
import com.linkedin.cdi.util.WorkUnitStatus; | ||
import java.io.File; | ||
import java.io.IOException; | ||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.apache.commons.lang.StringUtils; | ||
import org.apache.gobblin.configuration.State; | ||
import org.apache.gobblin.source.extractor.filebased.FileBasedHelperException; | ||
import org.apache.gobblin.source.extractor.filebased.TimestampAwareFileBasedHelper; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
|
||
@Slf4j | ||
public class SftpConnection extends MultistageConnection { | ||
private static final Logger LOG = LoggerFactory.getLogger(SftpConnection.class); | ||
|
||
final private SftpKeys sftpSourceKeys; | ||
SftpClient fsClient; | ||
|
||
public SftpConnection(State state, JobKeys jobKeys, ExtractorKeys extractorKeys) { | ||
super(state, jobKeys, extractorKeys); | ||
assert jobKeys instanceof SftpKeys; | ||
sftpSourceKeys = (SftpKeys) jobKeys; | ||
} | ||
|
||
@Override | ||
public WorkUnitStatus execute(WorkUnitStatus status) { | ||
return null; | ||
} | ||
|
||
@Override | ||
public boolean closeAll(String message) { | ||
if (this.fsClient != null) { | ||
log.info("Shutting down FileSystem connection"); | ||
this.fsClient.close(); | ||
fsClient = null; | ||
} | ||
return true; | ||
} | ||
|
||
/** | ||
This method is the main method to list files based on source base directory and source entity | ||
ms.source.file.pattern | ||
if Is not blank: | ||
List the files and output as CSV | ||
if is blank: | ||
ms.extract.target.file.name? | ||
if is blank: | ||
List the files and output as CSV | ||
if is not blank | ||
if file size is 1 | ||
Dump the file | ||
if files size is >1 | ||
Dump only the file which matches the pattern | ||
*/ | ||
@Override | ||
public WorkUnitStatus executeFirst(WorkUnitStatus workUnitStatus) throws RetriableAuthenticationException { | ||
WorkUnitStatus status = super.executeFirst(workUnitStatus); | ||
String path = getPath(); | ||
String finalPrefix = getWorkUnitSpecificString(path, getExtractorKeys().getDynamicParameters()); | ||
log.info("File path found is: " + finalPrefix); | ||
try { | ||
if (getFsClient() == null) { | ||
log.error("Error initializing SFTP connection"); | ||
return null; | ||
} | ||
} catch (Exception e) { | ||
log.error("Error initializing SFTP connection", e); | ||
return null; | ||
} | ||
|
||
//get List of files matching the pattern | ||
List<String> files; | ||
try { | ||
files = getFiles(finalPrefix); | ||
} catch (Exception e) { | ||
log.error("Error reading file list", e); | ||
return null; | ||
} | ||
|
||
boolean isFilewithPrefixExist = files.stream().anyMatch(file -> file.equals(finalPrefix)); | ||
log.info("No Of Files to be processed matching the pattern: {}", files.size()); | ||
if (StringUtils.isNotBlank(sftpSourceKeys.getFilesPattern())) { | ||
status.setBuffer(InputStreamUtils.convertListToInputStream(getFilteredFiles(files))); | ||
} else { | ||
if (StringUtils.isBlank(sftpSourceKeys.getTargetFilePattern())) { | ||
status.setBuffer(InputStreamUtils.convertListToInputStream(files)); | ||
} else { | ||
String fileToDownload = ""; | ||
if (files.size() == 1) { | ||
fileToDownload = files.get(0); | ||
} else if (isFilewithPrefixExist) { | ||
fileToDownload = finalPrefix; | ||
} | ||
if (StringUtils.isNotBlank(fileToDownload)) { | ||
log.info("Downloading file: {}", files.get(0)); | ||
try { | ||
status.setBuffer(this.fsClient.getFileStream(fileToDownload)); | ||
} catch (FileBasedHelperException e) { | ||
log.error("Error downloading file {}", fileToDownload, e); | ||
return null; | ||
} | ||
} else { | ||
log.warn("Invalid set of parameters. Please make sure to set source directory, entity and file pattern"); | ||
} | ||
} | ||
} | ||
return status; | ||
} | ||
|
||
private SftpClient getFsClient() { | ||
if (this.fsClient == null) { | ||
try { | ||
Class<?> factoryClass = Class.forName(MultistageProperties.MSTAGE_CONNECTION_CLIENT_FACTORY.getValidNonblankWithDefault(this.getState())); | ||
ConnectionClientFactory factory = (ConnectionClientFactory) factoryClass.getDeclaredConstructor().newInstance(); | ||
this.fsClient = factory.getSftpChannelClient(this.getState()); | ||
} catch (Exception e) { | ||
LOG.error("Error initiating SFTP client", e); | ||
} | ||
} | ||
return this.fsClient; | ||
} | ||
|
||
/** | ||
* //TODO: List files based on pattern on parent nodes as well. | ||
* The current version supports pattern only on leaf node. | ||
* Ex: file path supported "/a/b/*c*" | ||
* file path not supported "/a/*b/*c* | ||
* Get files list based on pattern | ||
* @param filesPattern | ||
* @return | ||
*/ | ||
private List<String> getFiles(String filesPattern) { | ||
List<String> files = new ArrayList<>(); | ||
log.info("Files to be processed from input " + filesPattern); | ||
try { | ||
files = fsClient.ls(filesPattern); | ||
int i = 0; | ||
for (String file : files) { | ||
URI uri = new URI(file); | ||
String filepath = uri.toString(); | ||
if (!uri.isAbsolute()) { | ||
File f = new File(getBaseDir(filesPattern), filepath); | ||
filepath = f.getAbsolutePath(); | ||
} | ||
files.set(i, filepath); | ||
i++; | ||
} | ||
} catch (FileBasedHelperException | URISyntaxException e) { | ||
log.error("Unable to list files " + e.getMessage()); | ||
} | ||
return files; | ||
} | ||
private String getPath() { | ||
return sftpSourceKeys.getFilesPath(); | ||
} | ||
|
||
private List<String> getFilteredFiles(List<String> files) { | ||
return files.stream().filter(file -> file.matches(sftpSourceKeys.getFilesPattern())).collect(Collectors.toList()); | ||
} | ||
|
||
private String getBaseDir(String uri) { | ||
File file = new File(uri); | ||
return file.getParentFile().getAbsolutePath() + sftpSourceKeys.getPathSeparator(); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
cdi-core/src/main/java/com/linkedin/cdi/factory/sftp/SftpChannelFileInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// Copyright 2021 LinkedIn Corporation. All rights reserved. | ||
// Licensed under the BSD-2 Clause license. | ||
// See LICENSE in the project root for license information. | ||
|
||
package com.linkedin.cdi.factory.sftp; | ||
|
||
import com.jcraft.jsch.Channel; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import org.apache.gobblin.util.io.SeekableFSInputStream; | ||
|
||
/** | ||
* A {@link SeekableFSInputStream} that holds a handle on the Sftp {@link Channel} used to open the | ||
* {@link InputStream}. The {@link Channel} is disconnected when {@link InputStream#close()} is called. | ||
*/ | ||
public class SftpChannelFileInputStream extends SeekableFSInputStream { | ||
private final Channel channel; | ||
|
||
public SftpChannelFileInputStream(InputStream in, Channel channel) { | ||
super(in); | ||
this.channel = channel; | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
super.close(); | ||
this.channel.disconnect(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.