From 0106646775f12eb1f5b6e1b7a506e0478c74c66e Mon Sep 17 00:00:00 2001 From: Julia Damerow Date: Thu, 7 Jul 2022 16:57:53 -0400 Subject: [PATCH 01/35] [CITE-177] started on processor to import from crossref --- citesphere-importer/pom.xml | 10 ++- .../kafka/impl/ReferenceImportListener.java | 27 +++++++- .../core/service/AbstractImportProcessor.java | 68 +++++++++++++++++++ ...ortProcessor.java => ImportProcessor.java} | 2 +- .../CrossrefReferenceImportProcessor.java | 50 ++++++++++++++ ...rocessor.java => FileImportProcessor.java} | 54 ++------------- .../importer/core/service/impl/JobInfo.java | 9 +++ 7 files changed, 166 insertions(+), 54 deletions(-) create mode 100644 citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java rename citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/{IImportProcessor.java => ImportProcessor.java} (82%) create mode 100644 citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java rename citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/{ImportProcessor.java => FileImportProcessor.java} (76%) diff --git a/citesphere-importer/pom.xml b/citesphere-importer/pom.xml index 4d1ee8d..4659def 100644 --- a/citesphere-importer/pom.xml +++ b/citesphere-importer/pom.xml @@ -15,7 +15,8 @@ Lovelace-RELEASE 3.0.11.RELEASE 2.2.6.RELEASE - 0.2 + 0.5 + 0.2 $2a$04$oQo44vqcDIFRoYKiAXoNheurzkwX9dcNmowvTX/hsWuBMwijqn44i @@ -69,6 +70,11 @@ citesphere-messages ${citesphere.messages.version} + + edu.asu.diging + crossref-connect + ${crossref-connect-version} + @@ -328,7 +334,7 @@ org.apache.maven.plugins maven-war-plugin - 2.6 + 3.3.1 diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java index 6b4f845..dff58bd 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java @@ -5,11 +5,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.kafka.annotation.KafkaListener; import com.fasterxml.jackson.databind.ObjectMapper; -import edu.asu.diging.citesphere.importer.core.service.IImportProcessor; +import edu.asu.diging.citesphere.importer.core.service.ImportProcessor; import edu.asu.diging.citesphere.messages.KafkaTopics; import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; @@ -18,7 +19,12 @@ public class ReferenceImportListener { private final Logger logger = LoggerFactory.getLogger(getClass()); @Autowired - private IImportProcessor processor; + @Qualifier("fileImportProcessor") + private ImportProcessor fileProcessor; + + @Autowired + @Qualifier("crossrefReferenceImportProcessor") + private ImportProcessor crossrefProcessor; @KafkaListener(topics = KafkaTopics.REFERENCES_IMPORT_TOPIC) public void receiveMessage(String message) { @@ -32,6 +38,21 @@ public void receiveMessage(String message) { return; } - processor.process(msg); + fileProcessor.process(msg); + } + + @KafkaListener(topics = KafkaTopics.REFERENCES_IMPORT_CROSSREF_TOPIC) + public void receiveCrossrefImportMessage(String message) { + ObjectMapper mapper = new ObjectMapper(); + KafkaJobMessage msg = null; + try { + msg = mapper.readValue(message, KafkaJobMessage.class); + } catch (IOException e) { + logger.error("Could not unmarshall message.", e); + // FIXME: handle this case + return; + } + + crossrefProcessor.process(msg); } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java new file mode 100644 index 0000000..c5423c9 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java @@ -0,0 +1,68 @@ +package edu.asu.diging.citesphere.importer.core.service; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; + +import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException; +import edu.asu.diging.citesphere.importer.core.exception.MessageCreationException; +import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer; +import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; +import edu.asu.diging.citesphere.messages.KafkaTopics; +import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; +import edu.asu.diging.citesphere.messages.model.KafkaImportReturnMessage; +import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; +import edu.asu.diging.citesphere.messages.model.ResponseCode; +import edu.asu.diging.citesphere.messages.model.Status; + +public abstract class AbstractImportProcessor implements ImportProcessor { + + protected final Logger logger = LoggerFactory.getLogger(getClass()); + + @Autowired + private KafkaRequestProducer requestProducer; + + @Autowired + private ICitesphereConnector connector; + + + @Override + public void process(KafkaJobMessage message) { + JobInfo info = getJobInfo(message); + if (info == null) { + sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X10); + return; + } + startImport(message, info); + } + + protected abstract void startImport(KafkaJobMessage message, JobInfo info); + + private JobInfo getJobInfo(KafkaJobMessage message) { + JobInfo info = null; + try { + info = connector.getJobInfo(message.getId()); + } catch (CitesphereCommunicationException e) { + // FIXME this needs to be handled better + logger.error("Could not get Zotero info.", e); + return null; + } + return info; + } + + protected void sendMessage(ItemCreationResponse message, String jobId, Status status, ResponseCode code) { + KafkaImportReturnMessage returnMessage = new KafkaImportReturnMessage(message, jobId); + returnMessage.setStatus(status); + returnMessage.setCode(code); + try { + requestProducer.sendRequest(returnMessage, KafkaTopics.REFERENCES_IMPORT_DONE_TOPIC); + } catch (MessageCreationException e) { + // FIXME handle this case + logger.error("Exception sending message.", e); + } + } + + public ICitesphereConnector getCitesphereConnector() { + return connector; + } +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/IImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/ImportProcessor.java similarity index 82% rename from citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/IImportProcessor.java rename to citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/ImportProcessor.java index 5ff6d2b..1a3c34f 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/IImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/ImportProcessor.java @@ -2,7 +2,7 @@ import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; -public interface IImportProcessor { +public interface ImportProcessor { void process(KafkaJobMessage message); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java new file mode 100644 index 0000000..3da0888 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -0,0 +1,50 @@ +package edu.asu.diging.citesphere.importer.core.service.impl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import javax.annotation.PostConstruct; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; +import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; +import edu.asu.diging.crossref.exception.RequestFailedException; +import edu.asu.diging.crossref.model.Item; +import edu.asu.diging.crossref.service.CrossrefConfiguration; +import edu.asu.diging.crossref.service.CrossrefWorksService; +import edu.asu.diging.crossref.service.impl.CrossrefWorksServiceImpl; + +@Service +public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private CrossrefWorksService crossrefService; + + @PostConstruct + public void init() { + crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); + } + + public void startImport(KafkaJobMessage message, JobInfo info) { + logger.info("Starting import for " + info.getDois()); + + List items = new ArrayList<>(); + for (String doi : info.getDois()) { + try { + items.add(crossrefService.get(doi)); + } catch (RequestFailedException | IOException e) { + logger.error("Couuld not retrieve work for doi: "+ doi, e); + // for now we just log the exceptions + // we might want to devise a way to decide if the + // service might be down and we should stop sending requests. + } + } + + } +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/ImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java similarity index 76% rename from citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/ImportProcessor.java rename to citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java index b4c5a9b..5fc1a54 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/ImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java @@ -6,8 +6,6 @@ import javax.annotation.PostConstruct; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -19,20 +17,16 @@ import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException; import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; -import edu.asu.diging.citesphere.importer.core.exception.MessageCreationException; -import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer; import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.ItemType; import edu.asu.diging.citesphere.importer.core.model.impl.Publication; +import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.ICitesphereConnector; -import edu.asu.diging.citesphere.importer.core.service.IImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; -import edu.asu.diging.citesphere.messages.KafkaTopics; import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; -import edu.asu.diging.citesphere.messages.model.KafkaImportReturnMessage; import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; import edu.asu.diging.citesphere.messages.model.ResponseCode; import edu.asu.diging.citesphere.messages.model.Status; @@ -46,12 +40,7 @@ * */ @Service -public class ImportProcessor implements IImportProcessor { - - private final Logger logger = LoggerFactory.getLogger(getClass()); - - @Autowired - private ICitesphereConnector connector; +public class FileImportProcessor extends AbstractImportProcessor { @Autowired private IHandlerRegistry handlerRegistry; @@ -62,9 +51,7 @@ public class ImportProcessor implements IImportProcessor { @Autowired private IJsonGenerationService generationService; - @Autowired - private KafkaRequestProducer requestProducer; - + /** * Map that maps internal bibliographical formats (contants of {@link Publication} * class) to Zotero item types ({@link ItemType} enum). @@ -91,13 +78,7 @@ public void init() { * (edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaJobMessage) */ @Override - public void process(KafkaJobMessage message) { - JobInfo info = getJobInfo(message); - if (info == null) { - sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X10); - return; - } - + public void startImport(KafkaJobMessage message, JobInfo info) { String filePath = downloadFile(message); if (filePath == null) { sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X20); @@ -153,18 +134,6 @@ public void process(KafkaJobMessage message) { sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } - private void sendMessage(ItemCreationResponse message, String jobId, Status status, ResponseCode code) { - KafkaImportReturnMessage returnMessage = new KafkaImportReturnMessage(message, jobId); - returnMessage.setStatus(status); - returnMessage.setCode(code); - try { - requestProducer.sendRequest(returnMessage, KafkaTopics.REFERENCES_IMPORT_DONE_TOPIC); - } catch (MessageCreationException e) { - // FIXME handle this case - logger.error("Exception sending message.", e); - } - } - private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { ObjectMapper mapper = new ObjectMapper(); try { @@ -186,22 +155,10 @@ private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { return null; } - private JobInfo getJobInfo(KafkaJobMessage message) { - JobInfo info = null; - try { - info = connector.getJobInfo(message.getId()); - } catch (CitesphereCommunicationException e) { - // FIXME this needs to be handled better - logger.error("Could not get Zotero info.", e); - return null; - } - return info; - } - private String downloadFile(KafkaJobMessage message) { String file = null; try { - file = connector.getUploadeFile(message.getId()); + file = getCitesphereConnector().getUploadeFile(message.getId()); } catch (CitesphereCommunicationException e) { // FIXME this needs to be handled better logger.error("Could not get Zotero info.", e); @@ -209,4 +166,5 @@ private String downloadFile(KafkaJobMessage message) { } return file; } + } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java index 8eb8026..2731d51 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java @@ -1,5 +1,7 @@ package edu.asu.diging.citesphere.importer.core.service.impl; +import java.util.List; + import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @JsonIgnoreProperties(ignoreUnknown=true) @@ -8,6 +10,7 @@ public class JobInfo { private String zotero; private String zoteroId; private String groupId; + private List dois; public String getZotero() { return zotero; @@ -27,5 +30,11 @@ public String getGroupId() { public void setGroupId(String groupId) { this.groupId = groupId; } + public List getDois() { + return dois; + } + public void setDois(List dois) { + this.dois = dois; + } } From 656fab70dad971ee9bdfbfb47656556d410f2a51 Mon Sep 17 00:00:00 2001 From: Pratik Prakash Giri <32401726+PratikGiri@users.noreply.github.com> Date: Mon, 6 Mar 2023 17:02:27 -0700 Subject: [PATCH 02/35] [CITE-177] Trying to add functionality for starting the import --- .../CrossrefReferenceImportProcessor.java | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 3da0888..1d35eb9 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -11,8 +11,18 @@ import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + +import edu.asu.diging.citesphere.importer.core.model.BibEntry; +import edu.asu.diging.citesphere.importer.core.model.ItemType; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; +import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; +import edu.asu.diging.citesphere.messages.model.ResponseCode; +import edu.asu.diging.citesphere.messages.model.Status; import edu.asu.diging.crossref.exception.RequestFailedException; import edu.asu.diging.crossref.model.Item; import edu.asu.diging.crossref.service.CrossrefConfiguration; @@ -38,6 +48,42 @@ public void startImport(KafkaJobMessage message, JobInfo info) { for (String doi : info.getDois()) { try { items.add(crossrefService.get(doi)); + + // + ObjectMapper mapper = new ObjectMapper(); + ArrayNode root = mapper.createArrayNode(); + int entryCounter = 0; + while (items.hasNext()) { + BibEntry entry = bibIterator.next(); + if (entry.getArticleType() == null) { + // something is wrong with this entry, let's ignore it + continue; + } +// ItemType type = itemTypeMapping.get(entry.getArticleType()); +// JsonNode template = zoteroConnector.getTemplate(type); +// ObjectNode bibNode = generationService.generateJson(template, entry); + + root.add(bibNode); + entryCounter++; + + // we can submit max 50 entries to Zotoro + if (entryCounter >= 50) { + submitEntries(root, info); + entryCounter = 0; + root = mapper.createArrayNode(); + } + + } + + bibIterator.close(); + + ItemCreationResponse response = null; + if (entryCounter > 0) { + response = submitEntries(root, info); + } + + response = response != null ? response : new ItemCreationResponse(); + sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } catch (RequestFailedException | IOException e) { logger.error("Couuld not retrieve work for doi: "+ doi, e); // for now we just log the exceptions From a82d99608b29026875b57c9768eada3a1a64608e Mon Sep 17 00:00:00 2001 From: Pratik Prakash Giri <32401726+PratikGiri@users.noreply.github.com> Date: Tue, 7 Mar 2023 17:05:36 -0700 Subject: [PATCH 03/35] [CITE-177] Trying to add functionality for picking the job. --- .../CrossrefReferenceImportProcessor.java | 78 ++++++++++--------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 1d35eb9..8101616 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -2,7 +2,9 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import javax.annotation.PostConstruct; @@ -36,6 +38,8 @@ public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { private CrossrefWorksService crossrefService; + private Map itemTypeMapping = new HashMap<>(); + @PostConstruct public void init() { crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); @@ -45,45 +49,16 @@ public void startImport(KafkaJobMessage message, JobInfo info) { logger.info("Starting import for " + info.getDois()); List items = new ArrayList<>(); + + // + ObjectMapper mapper = new ObjectMapper(); + ArrayNode root = mapper.createArrayNode(); + int entryCounter = 0; + // + for (String doi : info.getDois()) { try { items.add(crossrefService.get(doi)); - - // - ObjectMapper mapper = new ObjectMapper(); - ArrayNode root = mapper.createArrayNode(); - int entryCounter = 0; - while (items.hasNext()) { - BibEntry entry = bibIterator.next(); - if (entry.getArticleType() == null) { - // something is wrong with this entry, let's ignore it - continue; - } -// ItemType type = itemTypeMapping.get(entry.getArticleType()); -// JsonNode template = zoteroConnector.getTemplate(type); -// ObjectNode bibNode = generationService.generateJson(template, entry); - - root.add(bibNode); - entryCounter++; - - // we can submit max 50 entries to Zotoro - if (entryCounter >= 50) { - submitEntries(root, info); - entryCounter = 0; - root = mapper.createArrayNode(); - } - - } - - bibIterator.close(); - - ItemCreationResponse response = null; - if (entryCounter > 0) { - response = submitEntries(root, info); - } - - response = response != null ? response : new ItemCreationResponse(); - sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } catch (RequestFailedException | IOException e) { logger.error("Couuld not retrieve work for doi: "+ doi, e); // for now we just log the exceptions @@ -91,6 +66,37 @@ public void startImport(KafkaJobMessage message, JobInfo info) { // service might be down and we should stop sending requests. } } + + // + + items.forEach((item) -> { + if (item.getDoi() == null) { + // something is wrong with this entry, let's ignore it + continue; + } + ItemType type = itemTypeMapping.get(item.getDoi()); + JsonNode template = zoteroConnector.getTemplate(type); + ObjectNode bibNode = generationService.generateJson(template, entry); + + root.add(item); + entryCounter++; + + // we can submit max 50 entries to Zotoro + if (entryCounter >= 50) { + submitEntries(root, info); + entryCounter = 0; + root = mapper.createArrayNode(); + } + + }); + + ItemCreationResponse response = null; + if (entryCounter > 0) { + response = submitEntries(root, info); + } + + response = response != null ? response : new ItemCreationResponse(); + sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } } From d3aa18fa41654bd511239a09426f46bd457fe230 Mon Sep 17 00:00:00 2001 From: Pratik Prakash Giri <32401726+PratikGiri@users.noreply.github.com> Date: Wed, 8 Mar 2023 16:28:29 -0700 Subject: [PATCH 04/35] [CITE-177] Adding function for starting the import --- .../CrossrefReferenceImportProcessor.java | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 8101616..a3c382b 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -1,6 +1,7 @@ package edu.asu.diging.citesphere.importer.core.service.impl; import java.io.IOException; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -11,8 +12,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; @@ -21,6 +24,8 @@ import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.ItemType; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; +import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; +import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; import edu.asu.diging.citesphere.messages.model.ResponseCode; @@ -40,6 +45,12 @@ public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { private Map itemTypeMapping = new HashMap<>(); + @Autowired + private IZoteroConnector zoteroConnector; + + @Autowired + private IJsonGenerationService generationService; + @PostConstruct public void init() { crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); @@ -76,7 +87,7 @@ public void startImport(KafkaJobMessage message, JobInfo info) { } ItemType type = itemTypeMapping.get(item.getDoi()); JsonNode template = zoteroConnector.getTemplate(type); - ObjectNode bibNode = generationService.generateJson(template, entry); + ObjectNode bibNode = generationService.generateJson(template, item); root.add(item); entryCounter++; @@ -99,4 +110,26 @@ public void startImport(KafkaJobMessage message, JobInfo info) { sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } + + private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { + ObjectMapper mapper = new ObjectMapper(); + try { + String msg = mapper.writeValueAsString(entries); + logger.info("Submitting " + msg); + ItemCreationResponse response = zoteroConnector.addEntries(info, entries); + if (response != null) { + logger.info(response.getSuccessful() + ""); + logger.error(response.getFailed() + ""); + } else { + logger.error("Item creation failed."); + } + return response; + } catch (URISyntaxException e) { + logger.error("Could not store new entry.", e); + } catch (JsonProcessingException e) { + logger.error("Could not write JSON."); + } + return null; + } + } From 1710f069148a098f88a55ce5cbc26f1c185e3a28 Mon Sep 17 00:00:00 2001 From: Pratik Prakash Giri <32401726+PratikGiri@users.noreply.github.com> Date: Thu, 9 Mar 2023 16:53:04 -0700 Subject: [PATCH 05/35] [CITE-177] Trying to add crossref import functionality --- .../CrossrefReferenceImportProcessor.java | 80 +++++++----- .../service/parse/impl/CrossRefHandler.java | 88 +++++++++++++ .../parse/iterators/CrossRefIterator.java | 121 ++++++++++++++++++ 3 files changed, 260 insertions(+), 29 deletions(-) create mode 100644 citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java create mode 100644 citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index a3c382b..379dde0 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -57,6 +57,11 @@ public void init() { } public void startImport(KafkaJobMessage message, JobInfo info) { + // message = jobToken: eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJKT0IxOTciLCJleHAiOjE2NzgzNDU3NDF9.5Xqh_AoMHcdlatULkCLFtny9pOF_uJ-SRARw0gCybY3h3qHL2mkIIQlk-qTA0Pn0VlhOLuW4FwACHmIdwZVmoA + // info = dois: [10.2307/j.ctvcm4h07.67, 10.1515/9780691242507] +// null +// zotero: byRZjIk2y4e3kay1cnwy3KpB +// zoteroId: 9154965 logger.info("Starting import for " + info.getDois()); List items = new ArrayList<>(); @@ -69,7 +74,24 @@ public void startImport(KafkaJobMessage message, JobInfo info) { for (String doi : info.getDois()) { try { - items.add(crossrefService.get(doi)); + Item item = crossrefService.get(doi); + + if (item.getType() == null) { + // something is wrong with this entry, let's ignore it + continue; + } + + ItemType type = itemTypeMapping.get(item.getType()); + JsonNode template = zoteroConnector.getTemplate(type); +// ObjectNode crossRefNode = generationService.generateJson(template, item); + +// items.add(item); + +// root.add(crossRefNode); + entryCounter++; + + + } catch (RequestFailedException | IOException e) { logger.error("Couuld not retrieve work for doi: "+ doi, e); // for now we just log the exceptions @@ -80,34 +102,34 @@ public void startImport(KafkaJobMessage message, JobInfo info) { // - items.forEach((item) -> { - if (item.getDoi() == null) { - // something is wrong with this entry, let's ignore it - continue; - } - ItemType type = itemTypeMapping.get(item.getDoi()); - JsonNode template = zoteroConnector.getTemplate(type); - ObjectNode bibNode = generationService.generateJson(template, item); - - root.add(item); - entryCounter++; - - // we can submit max 50 entries to Zotoro - if (entryCounter >= 50) { - submitEntries(root, info); - entryCounter = 0; - root = mapper.createArrayNode(); - } - - }); - - ItemCreationResponse response = null; - if (entryCounter > 0) { - response = submitEntries(root, info); - } - - response = response != null ? response : new ItemCreationResponse(); - sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); +// items.forEach((item) -> { +// if (item.getDoi() == null) { +// // something is wrong with this entry, let's ignore it +// continue; +// } +// ItemType type = itemTypeMapping.get(item.getDoi()); +// JsonNode template = zoteroConnector.getTemplate(type); +// ObjectNode bibNode = generationService.generateJson(template, item); +// +// root.add(item); +// entryCounter++; +// +// // we can submit max 50 entries to Zotoro +// if (entryCounter >= 50) { +// submitEntries(root, info); +// entryCounter = 0; +// root = mapper.createArrayNode(); +// } +// +// }); +// +// ItemCreationResponse response = null; +// if (entryCounter > 0) { +// response = submitEntries(root, info); +// } +// +// response = response != null ? response : new ItemCreationResponse(); +// sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java new file mode 100644 index 0000000..c0edb91 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java @@ -0,0 +1,88 @@ +package edu.asu.diging.citesphere.importer.core.service.parse.impl; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import org.apache.commons.io.ByteOrderMark; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.LineIterator; +import org.apache.commons.io.input.BOMInputStream; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.PropertySource; +import org.springframework.stereotype.Service; + +import edu.asu.diging.citesphere.importer.core.exception.HandlerTestException; +import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; +import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.FileHandler; +import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; +import edu.asu.diging.citesphere.importer.core.service.parse.iterators.WoSTaggedFieldsIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.wos.tagged.IArticleWoSTagParser; + +@Service +public class CrossRefHandler implements FileHandler { + + @Autowired + private IArticleWoSTagParser parserRegistry; + + @Value("${_citesphere_download_path}") + private String downloadPath; + + @Override + public boolean canHandle(String path) throws HandlerTestException { +// File file = new File(path); +// try { +// BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(file), false, +// ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, +// ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE); +// if (inputStream.hasBOM()) { +// BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); +// File fileCopy = new File(file.getParent() + File.separator + "CopyOf" + file.getName()); +// while(reader.ready()) { +// FileUtils.write(fileCopy, reader.readLine(), inputStream.getBOMCharsetName(), true); +// } +// reader.close(); +// Files.copy(fileCopy.toPath(), file.toPath(), StandardCopyOption.REPLACE_EXISTING); +// Files.deleteIfExists(fileCopy.toPath()); +// } +// } catch (IOException e1) { +// throw new HandlerTestException("Unsupported file format.", e1); +// } + +// if (path.toLowerCase().endsWith(".txt") && !file.getName().startsWith(".")) { + if (path == null) { +// try (LineIterator it = FileUtils.lineIterator(file)) { +// int linesToRead = 10; +// int linesRead = 0; +// +// // we check the first 10 lines if they start with two capitals letters +// // followed by a space; if they all match, we assume it's WoS' data format. +// while (it.hasNext() && linesRead < linesToRead) { +// String line = it.nextLine(); +// line = line.replaceAll("\\p{C}", ""); +// if (!line.matches("([A-Z0-9]{2}| {2})( .*$|$)") && !line.trim().isEmpty()) { +// return false; +// } +// linesRead++; +// } + + return true; +// } catch (IOException e) { +// throw new HandlerTestException("Could not read lines.", e); +// } + } + return false; + } + + @Override + public BibEntryIterator getIterator(String path, IHandlerRegistry callback, JobInfo info) + throws IteratorCreationException { + return new WoSTaggedFieldsIterator(path, parserRegistry); + } + +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java new file mode 100644 index 0000000..9cbdd1a --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -0,0 +1,121 @@ +package edu.asu.diging.citesphere.importer.core.service.parse.iterators; + +import java.io.File; +import java.io.IOException; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.LineIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import edu.asu.diging.citesphere.importer.core.model.BibEntry; +import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.wos.tagged.IArticleWoSTagParser; + +public class CrossRefIterator implements BibEntryIterator { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private IArticleWoSTagParser tagParserRegistry; + private String filePath; + private LineIterator lineIterator; + private String currentLine = null; + + public CrossRefIterator(String filePath, IArticleWoSTagParser parserRegistry) { + this.filePath = filePath; + this.tagParserRegistry = parserRegistry; + init(); + } + + private void init() { + try { + lineIterator = FileUtils.lineIterator(new File(filePath)); + if (lineIterator.hasNext()) { + // we're at the beginning, so we'll signal that with and empty string + currentLine = ""; + } + } catch (IOException e) { + logger.error("Could not create line iterator.", e); + } + + } + + @Override + public BibEntry next() { + ArticleMeta articleMeta = new ArticleMeta(); + ContainerMeta containerMeta = new ContainerMeta(); + + BibEntry entry = new Publication(); + entry.setArticleMeta(articleMeta); + entry.setJournalMeta(containerMeta); + + String previousField = null; + int fieldIdx = 0; + + while (lineIterator.hasNext()) { + String line = lineIterator.nextLine(); + // this means we are at the end of an entry + if (line.trim().isEmpty()) { + break; + } + + // not a valid line or not filled field + if (line.length() < 2) { + continue; + } + String field = line.substring(0, 2); + String value = ""; + + if (line.length() > 2) { + value = line.substring(3); + } + + if (field.trim().isEmpty()) { + field = previousField; + fieldIdx++; + } else { + fieldIdx = 0; + } + tagParserRegistry.parseMetaTag(field, value, previousField, fieldIdx, entry, false); + + previousField = field; + } + + // in case there are several empty lines between entries + // let's skip them + advanceToNext(); + + return entry; + } + + private void advanceToNext() { + if (lineIterator.hasNext()) { + currentLine = lineIterator.next(); + if (currentLine.trim().isEmpty()) { + advanceToNext(); + } + } else { + currentLine = null; + } + } + + @Override + public boolean hasNext() { + return currentLine != null; + } + + @Override + public void close() { + if (lineIterator != null) { + try { + lineIterator.close(); + } catch (IOException e) { + logger.error("Couldn't close line iterator.", e); + } + } + } + +} From 10d837208904cfd95a9a5096bdc1b4046b3e360b Mon Sep 17 00:00:00 2001 From: Pratik Prakash Giri <32401726+PratikGiri@users.noreply.github.com> Date: Fri, 10 Mar 2023 16:45:58 -0700 Subject: [PATCH 06/35] [CITE-177] Trying to create Iterator for Crossref --- .../service/parse/impl/CrossRefHandler.java | 57 +-------- .../parse/iterators/CrossRefIterator.java | 109 ++++++------------ 2 files changed, 39 insertions(+), 127 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java index c0edb91..0a6c4a4 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java @@ -1,18 +1,7 @@ package edu.asu.diging.citesphere.importer.core.service.parse.impl; -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; -import org.apache.commons.io.ByteOrderMark; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.LineIterator; -import org.apache.commons.io.input.BOMInputStream; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.annotation.PropertySource; import org.springframework.stereotype.Service; import edu.asu.diging.citesphere.importer.core.exception.HandlerTestException; @@ -21,60 +10,22 @@ import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.service.parse.FileHandler; import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; -import edu.asu.diging.citesphere.importer.core.service.parse.iterators.WoSTaggedFieldsIterator; -import edu.asu.diging.citesphere.importer.core.service.parse.wos.tagged.IArticleWoSTagParser; +import edu.asu.diging.citesphere.importer.core.service.parse.iterators.CrossRefIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.jstor.xml.IArticleTagParser; @Service public class CrossRefHandler implements FileHandler { @Autowired - private IArticleWoSTagParser parserRegistry; + private IArticleTagParser parserRegistry; @Value("${_citesphere_download_path}") private String downloadPath; @Override public boolean canHandle(String path) throws HandlerTestException { -// File file = new File(path); -// try { -// BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(file), false, -// ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, -// ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE); -// if (inputStream.hasBOM()) { -// BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); -// File fileCopy = new File(file.getParent() + File.separator + "CopyOf" + file.getName()); -// while(reader.ready()) { -// FileUtils.write(fileCopy, reader.readLine(), inputStream.getBOMCharsetName(), true); -// } -// reader.close(); -// Files.copy(fileCopy.toPath(), file.toPath(), StandardCopyOption.REPLACE_EXISTING); -// Files.deleteIfExists(fileCopy.toPath()); -// } -// } catch (IOException e1) { -// throw new HandlerTestException("Unsupported file format.", e1); -// } - -// if (path.toLowerCase().endsWith(".txt") && !file.getName().startsWith(".")) { if (path == null) { -// try (LineIterator it = FileUtils.lineIterator(file)) { -// int linesToRead = 10; -// int linesRead = 0; -// -// // we check the first 10 lines if they start with two capitals letters -// // followed by a space; if they all match, we assume it's WoS' data format. -// while (it.hasNext() && linesRead < linesToRead) { -// String line = it.nextLine(); -// line = line.replaceAll("\\p{C}", ""); -// if (!line.matches("([A-Z0-9]{2}| {2})( .*$|$)") && !line.trim().isEmpty()) { -// return false; -// } -// linesRead++; -// } - return true; -// } catch (IOException e) { -// throw new HandlerTestException("Could not read lines.", e); -// } } return false; } @@ -82,7 +33,7 @@ public boolean canHandle(String path) throws HandlerTestException { @Override public BibEntryIterator getIterator(String path, IHandlerRegistry callback, JobInfo info) throws IteratorCreationException { - return new WoSTaggedFieldsIterator(path, parserRegistry); + return new CrossRefIterator(path, parserRegistry); } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 9cbdd1a..4323450 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -2,6 +2,8 @@ import java.io.File; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; @@ -13,109 +15,68 @@ import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta; import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.jstor.xml.IArticleTagParser; import edu.asu.diging.citesphere.importer.core.service.parse.wos.tagged.IArticleWoSTagParser; public class CrossRefIterator implements BibEntryIterator { private final Logger logger = LoggerFactory.getLogger(getClass()); - private IArticleWoSTagParser tagParserRegistry; + private IArticleTagParser tagParserRegistry; private String filePath; - private LineIterator lineIterator; - private String currentLine = null; + private BibEntry article; + + private boolean iteratorDone = false; + private Map typeMap; - public CrossRefIterator(String filePath, IArticleWoSTagParser parserRegistry) { + public CrossRefIterator(String filePath, IArticleTagParser parserRegistry) { this.filePath = filePath; this.tagParserRegistry = parserRegistry; init(); } private void init() { - try { - lineIterator = FileUtils.lineIterator(new File(filePath)); - if (lineIterator.hasNext()) { - // we're at the beginning, so we'll signal that with and empty string - currentLine = ""; - } - } catch (IOException e) { - logger.error("Could not create line iterator.", e); - } + typeMap = new HashMap(); + typeMap.put("research-article", Publication.ARTICLE); + typeMap.put("book-review", Publication.REVIEW); +// parseDocument(); } @Override public BibEntry next() { - ArticleMeta articleMeta = new ArticleMeta(); - ContainerMeta containerMeta = new ContainerMeta(); - - BibEntry entry = new Publication(); - entry.setArticleMeta(articleMeta); - entry.setJournalMeta(containerMeta); - - String previousField = null; - int fieldIdx = 0; - - while (lineIterator.hasNext()) { - String line = lineIterator.nextLine(); - // this means we are at the end of an entry - if (line.trim().isEmpty()) { - break; - } - - // not a valid line or not filled field - if (line.length() < 2) { - continue; - } - String field = line.substring(0, 2); - String value = ""; - - if (line.length() > 2) { - value = line.substring(3); - } - - if (field.trim().isEmpty()) { - field = previousField; - fieldIdx++; - } else { - fieldIdx = 0; - } - tagParserRegistry.parseMetaTag(field, value, previousField, fieldIdx, entry, false); - - previousField = field; + if (iteratorDone) { + return null; } - - // in case there are several empty lines between entries - // let's skip them - advanceToNext(); - - return entry; + iteratorDone = true; + return article; } - private void advanceToNext() { - if (lineIterator.hasNext()) { - currentLine = lineIterator.next(); - if (currentLine.trim().isEmpty()) { - advanceToNext(); - } - } else { - currentLine = null; - } - } +// private void advanceToNext() { +// if (lineIterator.hasNext()) { +// currentLine = lineIterator.next(); +// if (currentLine.trim().isEmpty()) { +// advanceToNext(); +// } +// } else { +// currentLine = null; +// } +// } @Override public boolean hasNext() { - return currentLine != null; + return !iteratorDone; } @Override public void close() { - if (lineIterator != null) { - try { - lineIterator.close(); - } catch (IOException e) { - logger.error("Couldn't close line iterator.", e); - } - } +// if (lineIterator != null) { +// try { +// lineIterator.close(); +// } catch (IOException e) { +// logger.error("Couldn't close line iterator.", e); +// } +// } } } From 27ee0911fd59eb162f037c669e0e4530e019e1cb Mon Sep 17 00:00:00 2001 From: Pratik Prakash Giri <32401726+PratikGiri@users.noreply.github.com> Date: Mon, 13 Mar 2023 17:09:46 -0700 Subject: [PATCH 07/35] [CITE-177] Iterator changes. --- .../parse/iterators/CrossRefIterator.java | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 4323450..58d8a69 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -5,10 +5,16 @@ import java.util.HashMap; import java.util.Map; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerFactoryConfigurationError; + import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; @@ -51,6 +57,52 @@ public BibEntry next() { iteratorDone = true; return article; } + + private ContainerMeta parseJournalMeta(Element element) { + NodeList journalMetaList = element.getElementsByTagName("journal-meta"); + if (journalMetaList.getLength() == 0) { + return null; + } + + ContainerMeta meta = new ContainerMeta(); + // there should only be one + Node journalMetaNode = journalMetaList.item(0); + + NodeList children = journalMetaNode.getChildNodes(); + for (int i = 0; i Date: Thu, 16 Mar 2023 16:13:53 -0700 Subject: [PATCH 08/35] [CITE-177] Updating the Iterator and ImportProcessor --- .../CrossrefReferenceImportProcessor.java | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 379dde0..8c70cc4 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -21,9 +21,12 @@ import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.ItemType; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; @@ -51,6 +54,9 @@ public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { @Autowired private IJsonGenerationService generationService; + @Autowired + private IHandlerRegistry handlerRegistry; + @PostConstruct public void init() { crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); @@ -71,7 +77,19 @@ public void startImport(KafkaJobMessage message, JobInfo info) { ArrayNode root = mapper.createArrayNode(); int entryCounter = 0; // - + sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); + BibEntryIterator bibIterator = null; + try { + bibIterator = handlerRegistry.handleFile(info, ); + } catch (IteratorCreationException e1) { + logger.error("Could not create iterator.", e1); + } + + if (bibIterator == null) { + sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); + return; + } + for (String doi : info.getDois()) { try { Item item = crossrefService.get(doi); @@ -83,11 +101,11 @@ public void startImport(KafkaJobMessage message, JobInfo info) { ItemType type = itemTypeMapping.get(item.getType()); JsonNode template = zoteroConnector.getTemplate(type); -// ObjectNode crossRefNode = generationService.generateJson(template, item); + ObjectNode crossRefNode = generationService.generateJson(template, item); -// items.add(item); + items.add(item); -// root.add(crossRefNode); + root.add(crossRefNode); entryCounter++; From 58f195cb71e3113a1378f4d240e97fdb818dcbbc Mon Sep 17 00:00:00 2001 From: Pratik Prakash Giri <32401726+PratikGiri@users.noreply.github.com> Date: Mon, 27 Mar 2023 16:04:43 -0700 Subject: [PATCH 09/35] [CITE-177] Adding iterator --- .../CrossrefReferenceImportProcessor.java | 58 +++++++++---------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 8c70cc4..e75aed4 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -80,7 +80,7 @@ public void startImport(KafkaJobMessage message, JobInfo info) { sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); BibEntryIterator bibIterator = null; try { - bibIterator = handlerRegistry.handleFile(info, ); + bibIterator = handlerRegistry.handleFile(info, null); } catch (IteratorCreationException e1) { logger.error("Could not create iterator.", e1); } @@ -120,34 +120,34 @@ public void startImport(KafkaJobMessage message, JobInfo info) { // -// items.forEach((item) -> { -// if (item.getDoi() == null) { -// // something is wrong with this entry, let's ignore it -// continue; -// } -// ItemType type = itemTypeMapping.get(item.getDoi()); -// JsonNode template = zoteroConnector.getTemplate(type); -// ObjectNode bibNode = generationService.generateJson(template, item); -// -// root.add(item); -// entryCounter++; -// -// // we can submit max 50 entries to Zotoro -// if (entryCounter >= 50) { -// submitEntries(root, info); -// entryCounter = 0; -// root = mapper.createArrayNode(); -// } -// -// }); -// -// ItemCreationResponse response = null; -// if (entryCounter > 0) { -// response = submitEntries(root, info); -// } -// -// response = response != null ? response : new ItemCreationResponse(); -// sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); + items.forEach((item) -> { + if (item.getDoi() == null) { + // something is wrong with this entry, let's ignore it + continue; + } + ItemType type = itemTypeMapping.get(item.getDoi()); + JsonNode template = zoteroConnector.getTemplate(type); + ObjectNode bibNode = generationService.generateJson(template, item); + + root.add(item); + entryCounter++; + + // we can submit max 50 entries to Zotoro + if (entryCounter >= 50) { + submitEntries(root, info); + entryCounter = 0; + root = mapper.createArrayNode(); + } + + }); + + ItemCreationResponse response = null; + if (entryCounter > 0) { + response = submitEntries(root, info); + } + + response = response != null ? response : new ItemCreationResponse(); + sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } From a2d4979989b0d6e6b21aa7743f20d686d0b2a1c7 Mon Sep 17 00:00:00 2001 From: Pratik Prakash Giri <32401726+PratikGiri@users.noreply.github.com> Date: Thu, 30 Mar 2023 17:14:16 -0700 Subject: [PATCH 10/35] Adding iterator. --- .../CrossrefReferenceImportProcessor.java | 62 ++++++++++++++----- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index e75aed4..cb9e44c 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -24,6 +24,7 @@ import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.ItemType; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; @@ -59,7 +60,14 @@ public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { @PostConstruct public void init() { - crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); + crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); + itemTypeMapping.put(Publication.ARTICLE, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK, ItemType.BOOK); + itemTypeMapping.put(Publication.BOOK_CHAPTER, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.LETTER, ItemType.LETTER); + itemTypeMapping.put(Publication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE); + itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.DOCUMENT, ItemType.DOCUMENT); } public void startImport(KafkaJobMessage message, JobInfo info) { @@ -118,18 +126,17 @@ public void startImport(KafkaJobMessage message, JobInfo info) { } } - // - - items.forEach((item) -> { - if (item.getDoi() == null) { + while (bibIterator.hasNext()) { + BibEntry entry = bibIterator.next(); + if (entry.getArticleType() == null) { // something is wrong with this entry, let's ignore it continue; } - ItemType type = itemTypeMapping.get(item.getDoi()); + ItemType type = itemTypeMapping.get(entry.getArticleType()); JsonNode template = zoteroConnector.getTemplate(type); - ObjectNode bibNode = generationService.generateJson(template, item); + ObjectNode bibNode = generationService.generateJson(template, entry); - root.add(item); + root.add(bibNode); entryCounter++; // we can submit max 50 entries to Zotoro @@ -139,15 +146,38 @@ public void startImport(KafkaJobMessage message, JobInfo info) { root = mapper.createArrayNode(); } - }); - - ItemCreationResponse response = null; - if (entryCounter > 0) { - response = submitEntries(root, info); } - - response = response != null ? response : new ItemCreationResponse(); - sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); + + // + +// items.forEach((item) -> { +// if (item.getDoi() == null) { +// // something is wrong with this entry, let's ignore it +// continue; +// } +// ItemType type = itemTypeMapping.get(item.getDoi()); +// JsonNode template = zoteroConnector.getTemplate(type); +// ObjectNode bibNode = generationService.generateJson(template, item); +// +// root.add(item); +// entryCounter++; +// +// // we can submit max 50 entries to Zotoro +// if (entryCounter >= 50) { +// submitEntries(root, info); +// entryCounter = 0; +// root = mapper.createArrayNode(); +// } +// +// }); +// +// ItemCreationResponse response = null; +// if (entryCounter > 0) { +// response = submitEntries(root, info); +// } +// +// response = response != null ? response : new ItemCreationResponse(); +// sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } From 9616319c093be966e51d7a44da2b568f0a8f3afa Mon Sep 17 00:00:00 2001 From: Pratik Prakash Giri <32401726+PratikGiri@users.noreply.github.com> Date: Fri, 31 Mar 2023 16:55:55 -0700 Subject: [PATCH 11/35] [CITE-177] Correcting the iterator --- .../service/parse/impl/CrossRefHandler.java | 2 +- .../parse/iterators/CrossRefIterator.java | 19 +++---------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java index 0a6c4a4..12fdcc3 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java @@ -33,7 +33,7 @@ public boolean canHandle(String path) throws HandlerTestException { @Override public BibEntryIterator getIterator(String path, IHandlerRegistry callback, JobInfo info) throws IteratorCreationException { - return new CrossRefIterator(path, parserRegistry); + return new CrossRefIterator(); } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 58d8a69..be428a8 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -35,9 +35,9 @@ public class CrossRefIterator implements BibEntryIterator { private boolean iteratorDone = false; private Map typeMap; - public CrossRefIterator(String filePath, IArticleTagParser parserRegistry) { - this.filePath = filePath; - this.tagParserRegistry = parserRegistry; + public CrossRefIterator() { +// this.filePath = filePath; +// this.tagParserRegistry = parserRegistry; init(); } @@ -90,19 +90,6 @@ private ArticleMeta parseArticleMeta(Element element) { return meta; } - private void parseBack(Element element, ArticleMeta meta) throws TransformerConfigurationException, TransformerFactoryConfigurationError { - NodeList backList = element.getElementsByTagName("back"); - if (backList.getLength() == 0) { - return; - } - Node backNode = backList.item(0); - NodeList children = backNode.getChildNodes(); - - for (int i = 0; i Date: Mon, 3 Apr 2023 17:35:28 -0700 Subject: [PATCH 12/35] [CITE-177] Updating the Crossref Iterator --- .../parse/iterators/CrossRefIterator.java | 43 ------------------- 1 file changed, 43 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index be428a8..85bdd0b 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -58,49 +58,6 @@ public BibEntry next() { return article; } - private ContainerMeta parseJournalMeta(Element element) { - NodeList journalMetaList = element.getElementsByTagName("journal-meta"); - if (journalMetaList.getLength() == 0) { - return null; - } - - ContainerMeta meta = new ContainerMeta(); - // there should only be one - Node journalMetaNode = journalMetaList.item(0); - - NodeList children = journalMetaNode.getChildNodes(); - for (int i = 0; i Date: Mon, 10 Apr 2023 16:41:25 -0700 Subject: [PATCH 13/35] [CITE-177] CrossrefIterator and identifier --- .../parse/iterators/CrossRefIterator.java | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 85bdd0b..142e719 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -5,6 +5,9 @@ import java.util.HashMap; import java.util.Map; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerFactoryConfigurationError; @@ -12,9 +15,11 @@ import org.apache.commons.io.LineIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; @@ -34,6 +39,63 @@ public class CrossRefIterator implements BibEntryIterator { private boolean iteratorDone = false; private Map typeMap; + + private void parseDocument() { + DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); + dbFactory.setNamespaceAware(true); + DocumentBuilder dBuilder; + Document doc; + try { + dBuilder = dbFactory.newDocumentBuilder(); + doc = dBuilder.parse(filePath); + } catch (ParserConfigurationException | SAXException | IOException e) { + logger.error("Could not parse XML.", e); + return; + } + + article = new Publication(); + article.setArticleType(typeMap.get(doc.getDocumentElement().getAttribute("article-type"))); + article.setJournalMeta(parseJournalMeta(doc.getDocumentElement())); + article.setArticleMeta(parseArticleMeta(doc.getDocumentElement())); + try { + parseBack(doc.getDocumentElement(), article.getArticleMeta()); + } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) { + logger.error("Could not parse back.", e); + } + + } + + private ContainerMeta parseJournalMeta(Element element) { + NodeList journalMetaList = element.getElementsByTagName("journal-meta"); + if (journalMetaList.getLength() == 0) { + return null; + } + + ContainerMeta meta = new ContainerMeta(); + // there should only be one + Node journalMetaNode = journalMetaList.item(0); + + NodeList children = journalMetaNode.getChildNodes(); + for (int i = 0; i Date: Wed, 17 Apr 2024 13:59:14 -0700 Subject: [PATCH 14/35] [CITE-177] Resolved error CrossrefReferenceImportProcessor class --- .../core/service/impl/CrossrefReferenceImportProcessor.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index cb9e44c..e3d6e22 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -107,9 +107,12 @@ public void startImport(KafkaJobMessage message, JobInfo info) { continue; } + BibEntry entry = new Publication(); + entry.setArticleType(item.getType()); + ItemType type = itemTypeMapping.get(item.getType()); JsonNode template = zoteroConnector.getTemplate(type); - ObjectNode crossRefNode = generationService.generateJson(template, item); + ObjectNode crossRefNode = generationService.generateJson(template, entry); items.add(item); From 74bcd5fb8cda478c1bdc94d88bc7527cc6837712 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Fri, 19 Apr 2024 16:29:44 -0700 Subject: [PATCH 15/35] [CITE-177] Added CrossRef types to CrossRefPublication, and updated CrossrefReferenceImportProcessor accordingly --- .../core/model/impl/CrossRefPublication.java | 70 +++++++++++++++++++ .../CrossrefReferenceImportProcessor.java | 20 +++--- 2 files changed, 81 insertions(+), 9 deletions(-) create mode 100644 citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java new file mode 100644 index 0000000..0ea9d88 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java @@ -0,0 +1,70 @@ +package edu.asu.diging.citesphere.importer.core.model.impl; + +import edu.asu.diging.citesphere.importer.core.model.BibEntry; + +public class CrossRefPublication implements BibEntry { + public final static String ARTICLE = "journal-article"; + public final static String BOOK = "book"; + public final static String BOOK_CHAPTER = "book-chapter"; +// public final static String REVIEW = "review"; +// public final static String LETTER = "letter"; +// public final static String NEWS_ITEM = "newspaperArticle"; +// public final static String PROCEEDINGS_PAPER = "conferencePaper"; +// public final static String DOCUMENT = "document"; + public final static String MONOGRAPH = "monograph"; + public final static String JOURNAL_ISSUE = "journal-issue"; + public final static String REFERNCE_ENTRY = "reference-entry"; + public final static String POSTED_CONTENT = "posted-content"; + public final static String COMPONENT = "component"; + public final static String EDITED_BOOK = "edited-book"; + public final static String PROCEEDINGS_ARTICLE = "proceedings-article"; + public final static String DISSERTATION = "dissertation"; + public final static String BOOK_SECTION = "book-section"; + public final static String REPORT_COMPONENT = "report-component"; + public final static String REPORT = "report"; + public final static String PEER_REVIEW = "peer-review"; + public final static String BOOK_TRACK = "book-track"; + public final static String BOOK_PART = "book-part"; + public final static String OTHER = "other"; + public final static String JORUNAL_VOLUME = "journal-volume"; + public final static String BOOK_SET = "book-set"; + public final static String JOURNAL = "journal"; + public final static String PROCEEDINGS_SERIES = "proceedings-series"; + public final static String REPORT_SERIES = "report-series"; + public final static String PROCEEDINGS = "proceedings"; + public final static String DATABASE = "database"; + public final static String STANDARD = "standard"; + public final static String REFERENCE_BOOK = "reference-book"; + public final static String GRANT = "grant"; + public final static String DATASET = "dataset"; + public final static String BOOK_SERIES = "book-series"; + + private String articleType; + private ContainerMeta containerMeta; + private ArticleMeta articleMeta; + + @Override + public String getArticleType() { + return articleType; + } + @Override + public void setArticleType(String articleType) { + this.articleType = articleType; + } + @Override + public ContainerMeta getContainerMeta() { + return containerMeta; + } + @Override + public void setJournalMeta(ContainerMeta journalMeta) { + this.containerMeta = journalMeta; + } + @Override + public ArticleMeta getArticleMeta() { + return articleMeta; + } + @Override + public void setArticleMeta(ArticleMeta articleMeta) { + this.articleMeta = articleMeta; + } +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index e3d6e22..242d229 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -24,6 +24,7 @@ import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.ItemType; +import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; @@ -61,13 +62,13 @@ public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { @PostConstruct public void init() { crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); - itemTypeMapping.put(Publication.ARTICLE, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(Publication.BOOK, ItemType.BOOK); - itemTypeMapping.put(Publication.BOOK_CHAPTER, ItemType.BOOK_SECTION); - itemTypeMapping.put(Publication.LETTER, ItemType.LETTER); - itemTypeMapping.put(Publication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE); - itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(Publication.DOCUMENT, ItemType.DOCUMENT); + itemTypeMapping.put(CrossRefPublication.ARTICLE, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(CrossRefPublication.BOOK, ItemType.BOOK); + itemTypeMapping.put(CrossRefPublication.BOOK_CHAPTER, ItemType.BOOK_SECTION); +// itemTypeMapping.put(CrossRefPublication.LETTER, ItemType.LETTER); +// itemTypeMapping.put(CrossRefPublication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE); +// itemTypeMapping.put(CrossRefPublication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); +// itemTypeMapping.put(CrossRefPublication.DOCUMENT, ItemType.DOCUMENT); } public void startImport(KafkaJobMessage message, JobInfo info) { @@ -88,6 +89,7 @@ public void startImport(KafkaJobMessage message, JobInfo info) { sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); BibEntryIterator bibIterator = null; try { + //TODO: Change the handleFile method. returns null currrently for crossref. bibIterator = handlerRegistry.handleFile(info, null); } catch (IteratorCreationException e1) { logger.error("Could not create iterator.", e1); @@ -106,8 +108,8 @@ public void startImport(KafkaJobMessage message, JobInfo info) { // something is wrong with this entry, let's ignore it continue; } - - BibEntry entry = new Publication(); + + BibEntry entry = new CrossRefPublication(); entry.setArticleType(item.getType()); ItemType type = itemTypeMapping.get(item.getType()); From 7ecdfd08ed0226988968e9302555494364a5f7c1 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Tue, 23 Apr 2024 14:59:03 -0700 Subject: [PATCH 16/35] [CITE-177] Updated CrossRefIterator --- .../core/model/impl/CrossRefPublication.java | 5 - .../CrossrefReferenceImportProcessor.java | 51 +--- .../service/parse/impl/CrossRefHandler.java | 2 +- .../parse/iterators/CrossRefIterator.java | 246 ++++++++++++------ 4 files changed, 165 insertions(+), 139 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java index 0ea9d88..3932144 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java @@ -6,11 +6,6 @@ public class CrossRefPublication implements BibEntry { public final static String ARTICLE = "journal-article"; public final static String BOOK = "book"; public final static String BOOK_CHAPTER = "book-chapter"; -// public final static String REVIEW = "review"; -// public final static String LETTER = "letter"; -// public final static String NEWS_ITEM = "newspaperArticle"; -// public final static String PROCEEDINGS_PAPER = "conferencePaper"; -// public final static String DOCUMENT = "document"; public final static String MONOGRAPH = "monograph"; public final static String JOURNAL_ISSUE = "journal-issue"; public final static String REFERNCE_ENTRY = "reference-entry"; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 242d229..d34bd24 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -1,12 +1,8 @@ package edu.asu.diging.citesphere.importer.core.service.impl; -import java.io.IOException; import java.net.URISyntaxException; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import javax.annotation.PostConstruct; @@ -25,7 +21,6 @@ import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.ItemType; import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; -import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; @@ -35,19 +30,12 @@ import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; import edu.asu.diging.citesphere.messages.model.ResponseCode; import edu.asu.diging.citesphere.messages.model.Status; -import edu.asu.diging.crossref.exception.RequestFailedException; -import edu.asu.diging.crossref.model.Item; -import edu.asu.diging.crossref.service.CrossrefConfiguration; -import edu.asu.diging.crossref.service.CrossrefWorksService; -import edu.asu.diging.crossref.service.impl.CrossrefWorksServiceImpl; @Service public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { private final Logger logger = LoggerFactory.getLogger(getClass()); - - private CrossrefWorksService crossrefService; - + private Map itemTypeMapping = new HashMap<>(); @Autowired @@ -60,8 +48,7 @@ public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { private IHandlerRegistry handlerRegistry; @PostConstruct - public void init() { - crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); + public void init() { itemTypeMapping.put(CrossRefPublication.ARTICLE, ItemType.JOURNAL_ARTICLE); itemTypeMapping.put(CrossRefPublication.BOOK, ItemType.BOOK); itemTypeMapping.put(CrossRefPublication.BOOK_CHAPTER, ItemType.BOOK_SECTION); @@ -79,13 +66,10 @@ public void startImport(KafkaJobMessage message, JobInfo info) { // zoteroId: 9154965 logger.info("Starting import for " + info.getDois()); - List items = new ArrayList<>(); - - // ObjectMapper mapper = new ObjectMapper(); ArrayNode root = mapper.createArrayNode(); int entryCounter = 0; - // + sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); BibEntryIterator bibIterator = null; try { @@ -100,36 +84,7 @@ public void startImport(KafkaJobMessage message, JobInfo info) { return; } - for (String doi : info.getDois()) { - try { - Item item = crossrefService.get(doi); - - if (item.getType() == null) { - // something is wrong with this entry, let's ignore it - continue; - } - BibEntry entry = new CrossRefPublication(); - entry.setArticleType(item.getType()); - - ItemType type = itemTypeMapping.get(item.getType()); - JsonNode template = zoteroConnector.getTemplate(type); - ObjectNode crossRefNode = generationService.generateJson(template, entry); - - items.add(item); - - root.add(crossRefNode); - entryCounter++; - - - - } catch (RequestFailedException | IOException e) { - logger.error("Couuld not retrieve work for doi: "+ doi, e); - // for now we just log the exceptions - // we might want to devise a way to decide if the - // service might be down and we should stop sending requests. - } - } while (bibIterator.hasNext()) { BibEntry entry = bibIterator.next(); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java index 12fdcc3..6e017d5 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java @@ -33,7 +33,7 @@ public boolean canHandle(String path) throws HandlerTestException { @Override public BibEntryIterator getIterator(String path, IHandlerRegistry callback, JobInfo info) throws IteratorCreationException { - return new CrossRefIterator(); + return new CrossRefIterator(info); } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 142e719..a9cf344 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -1,116 +1,192 @@ package edu.asu.diging.citesphere.importer.core.service.parse.iterators; -import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerFactoryConfigurationError; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.LineIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.xml.sax.SAXException; import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.ArticlePublicationDate; import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta; -import edu.asu.diging.citesphere.importer.core.model.impl.Publication; +import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; +import edu.asu.diging.citesphere.importer.core.model.impl.Issn; +import edu.asu.diging.citesphere.importer.core.model.impl.Reference; +import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; -import edu.asu.diging.citesphere.importer.core.service.parse.jstor.xml.IArticleTagParser; -import edu.asu.diging.citesphere.importer.core.service.parse.wos.tagged.IArticleWoSTagParser; +import edu.asu.diging.crossref.exception.RequestFailedException; +import edu.asu.diging.crossref.model.IssnType; +import edu.asu.diging.crossref.model.Item; +import edu.asu.diging.crossref.service.CrossrefConfiguration; +import edu.asu.diging.crossref.service.CrossrefWorksService; +import edu.asu.diging.crossref.service.impl.CrossrefWorksServiceImpl; public class CrossRefIterator implements BibEntryIterator { private final Logger logger = LoggerFactory.getLogger(getClass()); - private IArticleTagParser tagParserRegistry; - private String filePath; + private JobInfo info; private BibEntry article; - + private boolean iteratorDone = false; private Map typeMap; - private void parseDocument() { - DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); - dbFactory.setNamespaceAware(true); - DocumentBuilder dBuilder; - Document doc; - try { - dBuilder = dbFactory.newDocumentBuilder(); - doc = dBuilder.parse(filePath); - } catch (ParserConfigurationException | SAXException | IOException e) { - logger.error("Could not parse XML.", e); - return; - } + private CrossrefWorksService crossrefService; + + + + public CrossRefIterator(JobInfo info) { + this.info = info; + System.out.println(info.toString()); + init(); + } + + private void init() { + crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); + typeMap = new HashMap(); + typeMap.put("journal-article", CrossRefPublication.ARTICLE); + typeMap.put("book", CrossRefPublication.BOOK); + parseCrossRef(); + + } + + private void parseCrossRef() { - article = new Publication(); - article.setArticleType(typeMap.get(doc.getDocumentElement().getAttribute("article-type"))); - article.setJournalMeta(parseJournalMeta(doc.getDocumentElement())); - article.setArticleMeta(parseArticleMeta(doc.getDocumentElement())); - try { - parseBack(doc.getDocumentElement(), article.getArticleMeta()); - } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) { - logger.error("Could not parse back.", e); + List items = new ArrayList<>(); + for (String doi : info.getDois()) { + try { + Item item = crossrefService.get(doi); + + BibEntry article = new CrossRefPublication(); + article.setArticleType(typeMap.get(item.getType())); + article.setJournalMeta(parseJournalMeta(item)); + article.setArticleMeta(parseArticleMeta(item)); +// +// ItemType type = typeMap.get(entry.getArticleType()); +// System.out.println("type ================================= " + type.toString() + " " + type.getZoteroKey()); +// JsonNode template = zoteroConnector.getTemplate(type); +// ObjectNode crossRefNode = generationService.generateJson(template, entry); + + items.add(item); + +// root.add(crossRefNode); +// entryCounter++; + + + + } catch (RequestFailedException | IOException e) { + logger.error("Couuld not retrieve work for doi: "+ doi, e); + // for now we just log the exceptions + // we might want to devise a way to decide if the + // service might be down and we should stop sending requests. + } } - } - private ContainerMeta parseJournalMeta(Element element) { - NodeList journalMetaList = element.getElementsByTagName("journal-meta"); - if (journalMetaList.getLength() == 0) { - return null; - } - + private ContainerMeta parseJournalMeta(Item item) { ContainerMeta meta = new ContainerMeta(); - // there should only be one - Node journalMetaNode = journalMetaList.item(0); - - NodeList children = journalMetaNode.getChildNodes(); - for (int i = 0; i issnList = new ArrayList(); + for(IssnType issnType : item.getIssnType()) { + Issn issn = new Issn(); + issn.setIssn(issnType.getValue()); + issn.setPubType(issnType.getType()); + issnList.add(issn); } + meta.setIssns(issnList); + meta.setContributors(null); return meta; } - private ArticleMeta parseArticleMeta(Element element) { - NodeList articlelMetaList = element.getElementsByTagName("article-meta"); - if (articlelMetaList.getLength() == 0) { - return null; - } - + private ArticleMeta parseArticleMeta(Item item) { ArticleMeta meta = new ArticleMeta(); - Node articleMetaNode = articlelMetaList.item(0); - NodeList children = articleMetaNode.getChildNodes(); - for (int i = 0; i dateParts = item.getPublished().getIndexedDateParts(); + publicationDate.setPublicationDate(dateParts.get(2).toString()); + publicationDate.setPublicationMonth(dateParts.get(1).toString()); + publicationDate.setPublicationYear(dateParts.get(0).toString()); + meta.setPublicationDate(publicationDate); + meta.setVolume(item.getVolume()); + meta.setIssue(item.getIssue()); + meta.setIssueId(null); + meta.setSpecialIssue(null); + meta.setPartNumber(item.getPartNumber()); + meta.setSupplement(null); + meta.setFirstPage(item.getPage()); + meta.setLastPage(null); + meta.setPageCount(null); + meta.setChapterCount(null); + meta.setCopyrightStatement(null); + meta.setCopyrightYear(null); + meta.setCopyrightHolder(null); + meta.setSelfUri(item.getUrl()); + meta.setArticleAbstract(item.getAbstractText()); + meta.setLanguage(item.getLanguage()); + meta.setReviewInfo(null); // check if data can be added + meta.setDocumentType(null); // might be type of article + meta.setConferenceTitle(null); + meta.setConferenceDate(null); + meta.setConferenceLocation(null); + meta.setConferenceSponsor(null); + meta.setConferenceHost(null); + meta.setKeywords(null); + meta.setReprintAddress(null); + meta.setAdditionalData(null); + meta.setUnassignedIds(null); + meta.setFundingInfo(null); + meta.setFundingText(null); + List references = new ArrayList<>(); + for(edu.asu.diging.crossref.model.Reference itemRef: item.getReference()) { + Reference ref = new Reference(); + ref.setAuthorString(itemRef.getAuthor()); + ref.setContributors(null); + ref.setTitle(itemRef.getArticleTitle()); + ref.setYear(itemRef.getYear()); + if(itemRef.getDoi()!=null && !itemRef.getDoi().isBlank()) { + ref.setIdentifier(itemRef.getDoi()); + ref.setIdentifierType("DOI"); + ref.setSource(itemRef.getDoiAssertedBy()); + } else if (itemRef.getIssn()!=null && !itemRef.getIssn().isBlank()) { + ref.setIdentifier(itemRef.getIssn()); + ref.setIdentifierType("ISSN"); + } else if (itemRef.getIsbn()!=null && !itemRef.getIsbn().isBlank()) { + ref.setIdentifier(itemRef.getIsbn()); + ref.setIdentifierType("ISBN"); + } + ref.setFirstPage(itemRef.getFirstPage()); + ref.setEndPage(null); + ref.setVolume(itemRef.getVolume()); + ref.setReferenceId(itemRef.getKey()); + ref.setReferenceLabel(null); + ref.setPublicationType(null); + ref.setCitationId(null); + ref.setReferenceString(itemRef.getUnstructured()); + ref.setReferenceStringRaw(itemRef.getUnstructured()); + references.add(ref); } + meta.setReferences(references); + meta.setReferenceCount(item.getReferenceCount().toString()); + meta.setRetrievalDate(null); + return meta; } - public CrossRefIterator() { -// this.filePath = filePath; -// this.tagParserRegistry = parserRegistry; - init(); - } - - private void init() { - typeMap = new HashMap(); - typeMap.put("research-article", Publication.ARTICLE); - typeMap.put("book-review", Publication.REVIEW); -// parseDocument(); - - } - @Override public BibEntry next() { if (iteratorDone) { @@ -119,7 +195,7 @@ public BibEntry next() { iteratorDone = true; return article; } - + @Override public boolean hasNext() { @@ -128,13 +204,13 @@ public boolean hasNext() { @Override public void close() { -// if (lineIterator != null) { -// try { -// lineIterator.close(); -// } catch (IOException e) { -// logger.error("Couldn't close line iterator.", e); -// } -// } + // if (lineIterator != null) { + // try { + // lineIterator.close(); + // } catch (IOException e) { + // logger.error("Couldn't close line iterator.", e); + // } + // } } } From 41fed60e843be36ce53af3556ebe0e86ade90d5e Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Thu, 25 Apr 2024 14:07:59 -0700 Subject: [PATCH 17/35] [CITE-177] Updated CrossRefIterator --- .../core/model/impl/ContributionType.java | 2 + .../CrossrefReferenceImportProcessor.java | 14 +- .../parse/iterators/CrossRefIterator.java | 243 +++++++++++------- 3 files changed, 171 insertions(+), 88 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ContributionType.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ContributionType.java index 5d8c724..e2e5e84 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ContributionType.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ContributionType.java @@ -4,4 +4,6 @@ public interface ContributionType { public final static String AUTHOR = "author"; public final static String EDITOR = "editor"; + public final static String TRANSLATOR = "translator"; + public final static String CHAIR = "chair"; } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index d34bd24..d4db701 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -92,6 +92,7 @@ public void startImport(KafkaJobMessage message, JobInfo info) { // something is wrong with this entry, let's ignore it continue; } + System.out.println("======================================" + entry.getArticleType()); ItemType type = itemTypeMapping.get(entry.getArticleType()); JsonNode template = zoteroConnector.getTemplate(type); ObjectNode bibNode = generationService.generateJson(template, entry); @@ -108,7 +109,16 @@ public void startImport(KafkaJobMessage message, JobInfo info) { } - // + bibIterator.close(); + + ItemCreationResponse response = null; + if (entryCounter > 0) { + response = submitEntries(root, info); + } + + response = response != null ? response : new ItemCreationResponse(); + sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); // giving error 500 as response code mentioned + } // items.forEach((item) -> { // if (item.getDoi() == null) { @@ -139,7 +149,7 @@ public void startImport(KafkaJobMessage message, JobInfo info) { // response = response != null ? response : new ItemCreationResponse(); // sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); - } +// } private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { ObjectMapper mapper = new ObjectMapper(); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index a9cf344..0a812b2 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -10,17 +11,25 @@ import org.slf4j.LoggerFactory; import edu.asu.diging.citesphere.importer.core.model.BibEntry; +import edu.asu.diging.citesphere.importer.core.model.impl.Affiliation; import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; import edu.asu.diging.citesphere.importer.core.model.impl.ArticlePublicationDate; import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.ContributionType; +import edu.asu.diging.citesphere.importer.core.model.impl.Contributor; +import edu.asu.diging.citesphere.importer.core.model.impl.ContributorId; import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; import edu.asu.diging.citesphere.importer.core.model.impl.Issn; import edu.asu.diging.citesphere.importer.core.model.impl.Reference; +import edu.asu.diging.citesphere.importer.core.model.impl.ReviewInfo; import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.crossref.exception.RequestFailedException; +import edu.asu.diging.crossref.model.Institution; import edu.asu.diging.crossref.model.IssnType; import edu.asu.diging.crossref.model.Item; +import edu.asu.diging.crossref.model.Person; +import edu.asu.diging.crossref.model.Review; import edu.asu.diging.crossref.service.CrossrefConfiguration; import edu.asu.diging.crossref.service.CrossrefWorksService; import edu.asu.diging.crossref.service.impl.CrossrefWorksServiceImpl; @@ -34,9 +43,9 @@ public class CrossRefIterator implements BibEntryIterator { private boolean iteratorDone = false; private Map typeMap; - + private CrossrefWorksService crossrefService; - + public CrossRefIterator(JobInfo info) { @@ -53,31 +62,29 @@ private void init() { parseCrossRef(); } - + private void parseCrossRef() { - - List items = new ArrayList<>(); + + List items = new ArrayList<>(); for (String doi : info.getDois()) { try { Item item = crossrefService.get(doi); - - BibEntry article = new CrossRefPublication(); + + article = new CrossRefPublication(); article.setArticleType(typeMap.get(item.getType())); article.setJournalMeta(parseJournalMeta(item)); article.setArticleMeta(parseArticleMeta(item)); -// -// ItemType type = typeMap.get(entry.getArticleType()); -// System.out.println("type ================================= " + type.toString() + " " + type.getZoteroKey()); -// JsonNode template = zoteroConnector.getTemplate(type); -// ObjectNode crossRefNode = generationService.generateJson(template, entry); - - items.add(item); - -// root.add(crossRefNode); -// entryCounter++; - - - + // + // ItemType type = typeMap.get(entry.getArticleType()); + // System.out.println("type ================================= " + type.toString() + " " + type.getZoteroKey()); + // JsonNode template = zoteroConnector.getTemplate(type); + // ObjectNode crossRefNode = generationService.generateJson(template, entry); + + items.add(article); + + // root.add(crossRefNode); + // entryCounter++; + } catch (RequestFailedException | IOException e) { logger.error("Couuld not retrieve work for doi: "+ doi, e); // for now we just log the exceptions @@ -86,17 +93,12 @@ private void parseCrossRef() { } } } - + private ContainerMeta parseJournalMeta(Item item) { ContainerMeta meta = new ContainerMeta(); - meta.setJournalIds(null); meta.setContainerTitle(String.join(", ", item.getContainerTitle())); - meta.setJournalAbbreviations(null); meta.setPublisherName(item.getPublisher()); meta.setPublisherLocation(item.getPublisherLocation()); - meta.setPublisherAddress(null); - meta.setSeriesTitle(null); - meta.setSeriesSubTitle(null); List issnList = new ArrayList(); for(IssnType issnType : item.getIssnType()) { Issn issn = new Issn(); @@ -105,85 +107,154 @@ private ContainerMeta parseJournalMeta(Item item) { issnList.add(issn); } meta.setIssns(issnList); - meta.setContributors(null); + + List contributors = new ArrayList<>(); + if(item.getChair() != null) { + Person itemChair = item.getChair(); + Contributor chair = new Contributor(); + chair.setContributionType(ContributionType.CHAIR); + chair.setGivenName(itemChair.getGiven()); + chair.setSurname(itemChair.getFamily()); + chair.setFullName(itemChair.getName()); + List affiliations = new ArrayList<>(); + for(Institution institute: itemChair.getAffiliation()) { + Affiliation affiliation = new Affiliation(); + affiliation.setName(institute.getName()); + affiliations.add(affiliation); + } + chair.setAffiliations(affiliations); + ContributorId contributorID = new ContributorId(); + contributorID.setId(itemChair.getOrcid()); + contributorID.setIdSystem("ORCID"); + chair.setIds(Arrays.asList(contributorID)); + contributors.add(chair); + } + // added Editors & translators to article meta. + return meta; } - + private ArticleMeta parseArticleMeta(Item item) { ArticleMeta meta = new ArticleMeta(); - meta.setArticleIds(null); meta.setArticleTitle(String.join(", ", item.getTitle())); - meta.setCategories(null); - meta.setContributors(null); + List contributors = new ArrayList<>(); + // List of authors + if(item.getAuthor() != null) { + for(Person itemAuthor: item.getAuthor()) { + Contributor author = new Contributor(); + author.setContributionType(ContributionType.AUTHOR); + author.setGivenName(itemAuthor.getGiven()); + author.setSurname(itemAuthor.getFamily()); + author.setFullName(itemAuthor.getName()); + List affiliations = new ArrayList<>(); + for(Institution institute: itemAuthor.getAffiliation()) { + Affiliation affiliation = new Affiliation(); + affiliation.setName(institute.getName()); + affiliations.add(affiliation); + } + author.setAffiliations(affiliations); + ContributorId contributorID = new ContributorId(); + contributorID.setId(itemAuthor.getOrcid()); + contributorID.setIdSystem("ORCID"); + author.setIds(Arrays.asList(contributorID)); + contributors.add(author); + } + } + // List of editors + if(item.getEditor() != null) { + for(Person itemEditor: item.getEditor()) { + Contributor editor = new Contributor(); + editor.setContributionType(ContributionType.EDITOR); + editor.setGivenName(itemEditor.getGiven()); + editor.setSurname(itemEditor.getFamily()); + editor.setFullName(itemEditor.getName()); + List affiliations = new ArrayList<>(); + for(Institution institute: itemEditor.getAffiliation()) { + Affiliation affiliation = new Affiliation(); + affiliation.setName(institute.getName()); + affiliations.add(affiliation); + } + editor.setAffiliations(affiliations); + ContributorId contributorID = new ContributorId(); + contributorID.setId(itemEditor.getOrcid()); + contributorID.setIdSystem("ORCID"); + editor.setIds(Arrays.asList(contributorID)); + contributors.add(editor); + } + } + // List of translators + if(item.getTranslator() != null) { + for(Person itemTranslator: item.getTranslator()) { + Contributor translator = new Contributor(); + translator.setContributionType(ContributionType.EDITOR); + translator.setGivenName(itemTranslator.getGiven()); + translator.setSurname(itemTranslator.getFamily()); + translator.setFullName(itemTranslator.getName()); + List affiliations = new ArrayList<>(); + for(Institution institute: itemTranslator.getAffiliation()) { + Affiliation affiliation = new Affiliation(); + affiliation.setName(institute.getName()); + affiliations.add(affiliation); + } + translator.setAffiliations(affiliations); + ContributorId contributorID = new ContributorId(); + contributorID.setId(itemTranslator.getOrcid()); + contributorID.setIdSystem("ORCID"); + translator.setIds(Arrays.asList(contributorID)); + contributors.add(translator); + } + } + meta.setContributors(contributors); meta.setAuthorNotesCorrespondence(null); ArticlePublicationDate publicationDate = new ArticlePublicationDate(); List dateParts = item.getPublished().getIndexedDateParts(); - publicationDate.setPublicationDate(dateParts.get(2).toString()); - publicationDate.setPublicationMonth(dateParts.get(1).toString()); - publicationDate.setPublicationYear(dateParts.get(0).toString()); + if(dateParts != null) { + publicationDate.setPublicationDate(dateParts.get(2).toString()); + publicationDate.setPublicationMonth(dateParts.get(1).toString()); + publicationDate.setPublicationYear(dateParts.get(0).toString()); + } meta.setPublicationDate(publicationDate); meta.setVolume(item.getVolume()); meta.setIssue(item.getIssue()); - meta.setIssueId(null); - meta.setSpecialIssue(null); meta.setPartNumber(item.getPartNumber()); - meta.setSupplement(null); meta.setFirstPage(item.getPage()); - meta.setLastPage(null); - meta.setPageCount(null); - meta.setChapterCount(null); - meta.setCopyrightStatement(null); - meta.setCopyrightYear(null); - meta.setCopyrightHolder(null); meta.setSelfUri(item.getUrl()); meta.setArticleAbstract(item.getAbstractText()); meta.setLanguage(item.getLanguage()); - meta.setReviewInfo(null); // check if data can be added - meta.setDocumentType(null); // might be type of article - meta.setConferenceTitle(null); - meta.setConferenceDate(null); - meta.setConferenceLocation(null); - meta.setConferenceSponsor(null); - meta.setConferenceHost(null); - meta.setKeywords(null); - meta.setReprintAddress(null); - meta.setAdditionalData(null); - meta.setUnassignedIds(null); - meta.setFundingInfo(null); - meta.setFundingText(null); + Review itemReview = item.getReview(); + ReviewInfo review = new ReviewInfo(); +// review.setFullDescription(itemReview.getCompetingInterestStatement()); //TODO: giving null pointer error + meta.setReviewInfo(review); + meta.setDocumentType(item.getType()); + List references = new ArrayList<>(); - for(edu.asu.diging.crossref.model.Reference itemRef: item.getReference()) { - Reference ref = new Reference(); - ref.setAuthorString(itemRef.getAuthor()); - ref.setContributors(null); - ref.setTitle(itemRef.getArticleTitle()); - ref.setYear(itemRef.getYear()); - if(itemRef.getDoi()!=null && !itemRef.getDoi().isBlank()) { - ref.setIdentifier(itemRef.getDoi()); - ref.setIdentifierType("DOI"); - ref.setSource(itemRef.getDoiAssertedBy()); - } else if (itemRef.getIssn()!=null && !itemRef.getIssn().isBlank()) { - ref.setIdentifier(itemRef.getIssn()); - ref.setIdentifierType("ISSN"); - } else if (itemRef.getIsbn()!=null && !itemRef.getIsbn().isBlank()) { - ref.setIdentifier(itemRef.getIsbn()); - ref.setIdentifierType("ISBN"); - } - ref.setFirstPage(itemRef.getFirstPage()); - ref.setEndPage(null); - ref.setVolume(itemRef.getVolume()); - ref.setReferenceId(itemRef.getKey()); - ref.setReferenceLabel(null); - ref.setPublicationType(null); - ref.setCitationId(null); - ref.setReferenceString(itemRef.getUnstructured()); - ref.setReferenceStringRaw(itemRef.getUnstructured()); - references.add(ref); - } +// for(edu.asu.diging.crossref.model.Reference itemRef: item.getReference()) { // TODO: giving null pointer error +// Reference ref = new Reference(); +// ref.setAuthorString(itemRef.getAuthor()); +// ref.setContributors(null); +// ref.setTitle(itemRef.getArticleTitle()); +// ref.setYear(itemRef.getYear()); +// if(itemRef.getDoi()!=null && !itemRef.getDoi().isBlank()) { +// ref.setIdentifier(itemRef.getDoi()); +// ref.setIdentifierType("DOI"); +// ref.setSource(itemRef.getDoiAssertedBy()); +// } else if (itemRef.getIssn()!=null && !itemRef.getIssn().isBlank()) { +// ref.setIdentifier(itemRef.getIssn()); +// ref.setIdentifierType("ISSN"); +// } else if (itemRef.getIsbn()!=null && !itemRef.getIsbn().isBlank()) { +// ref.setIdentifier(itemRef.getIsbn()); +// ref.setIdentifierType("ISBN"); +// } +// ref.setFirstPage(itemRef.getFirstPage()); +// ref.setVolume(itemRef.getVolume()); +// ref.setReferenceId(itemRef.getKey()); +// ref.setReferenceString(itemRef.getUnstructured()); +// ref.setReferenceStringRaw(itemRef.getUnstructured()); +// references.add(ref); +// } meta.setReferences(references); meta.setReferenceCount(item.getReferenceCount().toString()); - meta.setRetrievalDate(null); - + return meta; } From 7f097df3f594ce5b4d25e9fa458f4c3fdea67221 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Fri, 26 Apr 2024 16:38:15 -0700 Subject: [PATCH 18/35] [CITE-177] updated generateJson method of JsonGenerationService --- .../service/impl/CitesphereConnector.java | 3 +- .../CrossrefReferenceImportProcessor.java | 34 ---- .../parse/iterators/CrossRefIterator.java | 148 +++++++----------- .../zotero/template/ItemJsonGenerator.java | 2 +- .../template/impl/CrossRefGenerator.java | 15 ++ .../template/impl/JsonGenerationService.java | 6 +- 6 files changed, 79 insertions(+), 129 deletions(-) create mode 100644 citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/CrossRefGenerator.java diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CitesphereConnector.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CitesphereConnector.java index 93430ac..e2646bd 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CitesphereConnector.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CitesphereConnector.java @@ -109,7 +109,8 @@ public JobInfo getJobInfo(String apiToken) throws CitesphereCommunicationExcepti JobInfo info = null; if (status == HttpStatus.OK) { - String responseBody = response.getBody(); + String responseBody = response.getBody(); //TODO: Not gettting group ID here. + System.out.println("==================== response - " + responseBody); ObjectMapper mapper = new ObjectMapper(); try { info = mapper.readValue(responseBody, JobInfo.class); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index d4db701..7f6a569 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -73,7 +73,6 @@ public void startImport(KafkaJobMessage message, JobInfo info) { sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); BibEntryIterator bibIterator = null; try { - //TODO: Change the handleFile method. returns null currrently for crossref. bibIterator = handlerRegistry.handleFile(info, null); } catch (IteratorCreationException e1) { logger.error("Could not create iterator.", e1); @@ -84,8 +83,6 @@ public void startImport(KafkaJobMessage message, JobInfo info) { return; } - - while (bibIterator.hasNext()) { BibEntry entry = bibIterator.next(); if (entry.getArticleType() == null) { @@ -119,37 +116,6 @@ public void startImport(KafkaJobMessage message, JobInfo info) { response = response != null ? response : new ItemCreationResponse(); sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); // giving error 500 as response code mentioned } - -// items.forEach((item) -> { -// if (item.getDoi() == null) { -// // something is wrong with this entry, let's ignore it -// continue; -// } -// ItemType type = itemTypeMapping.get(item.getDoi()); -// JsonNode template = zoteroConnector.getTemplate(type); -// ObjectNode bibNode = generationService.generateJson(template, item); -// -// root.add(item); -// entryCounter++; -// -// // we can submit max 50 entries to Zotoro -// if (entryCounter >= 50) { -// submitEntries(root, info); -// entryCounter = 0; -// root = mapper.createArrayNode(); -// } -// -// }); -// -// ItemCreationResponse response = null; -// if (entryCounter > 0) { -// response = submitEntries(root, info); -// } -// -// response = response != null ? response : new ItemCreationResponse(); -// sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); - -// } private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { ObjectMapper mapper = new ObjectMapper(); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 0a812b2..ac40936 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -74,17 +74,8 @@ private void parseCrossRef() { article.setArticleType(typeMap.get(item.getType())); article.setJournalMeta(parseJournalMeta(item)); article.setArticleMeta(parseArticleMeta(item)); - // - // ItemType type = typeMap.get(entry.getArticleType()); - // System.out.println("type ================================= " + type.toString() + " " + type.getZoteroKey()); - // JsonNode template = zoteroConnector.getTemplate(type); - // ObjectNode crossRefNode = generationService.generateJson(template, entry); items.add(article); - - // root.add(crossRefNode); - // entryCounter++; - } catch (RequestFailedException | IOException e) { logger.error("Couuld not retrieve work for doi: "+ doi, e); // for now we just log the exceptions @@ -140,69 +131,15 @@ private ArticleMeta parseArticleMeta(Item item) { List contributors = new ArrayList<>(); // List of authors if(item.getAuthor() != null) { - for(Person itemAuthor: item.getAuthor()) { - Contributor author = new Contributor(); - author.setContributionType(ContributionType.AUTHOR); - author.setGivenName(itemAuthor.getGiven()); - author.setSurname(itemAuthor.getFamily()); - author.setFullName(itemAuthor.getName()); - List affiliations = new ArrayList<>(); - for(Institution institute: itemAuthor.getAffiliation()) { - Affiliation affiliation = new Affiliation(); - affiliation.setName(institute.getName()); - affiliations.add(affiliation); - } - author.setAffiliations(affiliations); - ContributorId contributorID = new ContributorId(); - contributorID.setId(itemAuthor.getOrcid()); - contributorID.setIdSystem("ORCID"); - author.setIds(Arrays.asList(contributorID)); - contributors.add(author); - } + contributors.addAll(mapPersonToContributor(item.getAuthor())); } // List of editors if(item.getEditor() != null) { - for(Person itemEditor: item.getEditor()) { - Contributor editor = new Contributor(); - editor.setContributionType(ContributionType.EDITOR); - editor.setGivenName(itemEditor.getGiven()); - editor.setSurname(itemEditor.getFamily()); - editor.setFullName(itemEditor.getName()); - List affiliations = new ArrayList<>(); - for(Institution institute: itemEditor.getAffiliation()) { - Affiliation affiliation = new Affiliation(); - affiliation.setName(institute.getName()); - affiliations.add(affiliation); - } - editor.setAffiliations(affiliations); - ContributorId contributorID = new ContributorId(); - contributorID.setId(itemEditor.getOrcid()); - contributorID.setIdSystem("ORCID"); - editor.setIds(Arrays.asList(contributorID)); - contributors.add(editor); - } + contributors.addAll(mapPersonToContributor(item.getEditor())); } // List of translators if(item.getTranslator() != null) { - for(Person itemTranslator: item.getTranslator()) { - Contributor translator = new Contributor(); - translator.setContributionType(ContributionType.EDITOR); - translator.setGivenName(itemTranslator.getGiven()); - translator.setSurname(itemTranslator.getFamily()); - translator.setFullName(itemTranslator.getName()); - List affiliations = new ArrayList<>(); - for(Institution institute: itemTranslator.getAffiliation()) { - Affiliation affiliation = new Affiliation(); - affiliation.setName(institute.getName()); - affiliations.add(affiliation); - } - translator.setAffiliations(affiliations); - ContributorId contributorID = new ContributorId(); - contributorID.setId(itemTranslator.getOrcid()); - contributorID.setIdSystem("ORCID"); - translator.setIds(Arrays.asList(contributorID)); - contributors.add(translator); - } + contributors.addAll(mapPersonToContributor(item.getTranslator())); } meta.setContributors(contributors); meta.setAuthorNotesCorrespondence(null); @@ -221,43 +158,70 @@ private ArticleMeta parseArticleMeta(Item item) { meta.setSelfUri(item.getUrl()); meta.setArticleAbstract(item.getAbstractText()); meta.setLanguage(item.getLanguage()); - Review itemReview = item.getReview(); ReviewInfo review = new ReviewInfo(); -// review.setFullDescription(itemReview.getCompetingInterestStatement()); //TODO: giving null pointer error + if (item.getReview() != null) { + review.setFullDescription(item.getReview().getCompetingInterestStatement()); //TODO: giving null pointer error + } meta.setReviewInfo(review); meta.setDocumentType(item.getType()); List references = new ArrayList<>(); -// for(edu.asu.diging.crossref.model.Reference itemRef: item.getReference()) { // TODO: giving null pointer error -// Reference ref = new Reference(); -// ref.setAuthorString(itemRef.getAuthor()); -// ref.setContributors(null); -// ref.setTitle(itemRef.getArticleTitle()); -// ref.setYear(itemRef.getYear()); -// if(itemRef.getDoi()!=null && !itemRef.getDoi().isBlank()) { -// ref.setIdentifier(itemRef.getDoi()); -// ref.setIdentifierType("DOI"); -// ref.setSource(itemRef.getDoiAssertedBy()); -// } else if (itemRef.getIssn()!=null && !itemRef.getIssn().isBlank()) { -// ref.setIdentifier(itemRef.getIssn()); -// ref.setIdentifierType("ISSN"); -// } else if (itemRef.getIsbn()!=null && !itemRef.getIsbn().isBlank()) { -// ref.setIdentifier(itemRef.getIsbn()); -// ref.setIdentifierType("ISBN"); -// } -// ref.setFirstPage(itemRef.getFirstPage()); -// ref.setVolume(itemRef.getVolume()); -// ref.setReferenceId(itemRef.getKey()); -// ref.setReferenceString(itemRef.getUnstructured()); -// ref.setReferenceStringRaw(itemRef.getUnstructured()); -// references.add(ref); -// } + if(item.getReference() != null) { + for(edu.asu.diging.crossref.model.Reference itemRef: item.getReference()) { // TODO: giving null pointer error + Reference ref = new Reference(); + ref.setAuthorString(itemRef.getAuthor()); + ref.setContributors(null); + ref.setTitle(itemRef.getArticleTitle()); + ref.setYear(itemRef.getYear()); + if(itemRef.getDoi()!=null && !itemRef.getDoi().isBlank()) { + ref.setIdentifier(itemRef.getDoi()); + ref.setIdentifierType("DOI"); + ref.setSource(itemRef.getDoiAssertedBy()); + } else if (itemRef.getIssn()!=null && !itemRef.getIssn().isBlank()) { + ref.setIdentifier(itemRef.getIssn()); + ref.setIdentifierType("ISSN"); + } else if (itemRef.getIsbn()!=null && !itemRef.getIsbn().isBlank()) { + ref.setIdentifier(itemRef.getIsbn()); + ref.setIdentifierType("ISBN"); + } + ref.setFirstPage(itemRef.getFirstPage()); + ref.setVolume(itemRef.getVolume()); + ref.setReferenceId(itemRef.getKey()); + ref.setReferenceString(itemRef.getUnstructured()); + ref.setReferenceStringRaw(itemRef.getUnstructured()); + references.add(ref); + } + } meta.setReferences(references); meta.setReferenceCount(item.getReferenceCount().toString()); return meta; } + public List mapPersonToContributor(List personList) { + List contributors = new ArrayList(); + for(Person person: personList) { + Contributor contributor = new Contributor(); + contributor.setContributionType(ContributionType.EDITOR); + contributor.setGivenName(person.getGiven()); + contributor.setSurname(person.getFamily()); + contributor.setFullName(person.getName()); + List affiliations = new ArrayList<>(); + for(Institution institute: person.getAffiliation()) { + Affiliation affiliation = new Affiliation(); + affiliation.setName(institute.getName()); + affiliations.add(affiliation); + } + contributor.setAffiliations(affiliations); + ContributorId contributorID = new ContributorId(); + contributorID.setId(person.getOrcid()); + contributorID.setIdSystem("ORCID"); + contributor.setIds(Arrays.asList(contributorID)); + contributors.add(contributor); + } + return contributors; + } + @Override public BibEntry next() { if (iteratorDone) { diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java index 2d9240a..d2c16d6 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java @@ -162,7 +162,7 @@ public ArrayNode processCreators(JsonNode node, BibEntry article) { creators.add(contributorNode); } - if (article.getArticleMeta().getReviewInfo() != null) { + if (article.getArticleMeta().getReviewInfo() != null && article.getArticleMeta().getReviewInfo().getContributors() != null) { for (Contributor reviewedAuthor : article.getArticleMeta().getReviewInfo().getContributors()) { ObjectNode contributorNode = getObjectMapper().createObjectNode(); contributorNode.put("creatorType", ZoteroCreatorTypes.REVIEWED_AUTHOR); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/CrossRefGenerator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/CrossRefGenerator.java new file mode 100644 index 0000000..3b88aa1 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/CrossRefGenerator.java @@ -0,0 +1,15 @@ +package edu.asu.diging.citesphere.importer.core.zotero.template.impl; + +import org.springframework.stereotype.Service; + +import edu.asu.diging.citesphere.importer.core.zotero.template.ItemJsonGenerator; + +@Service +public class CrossRefGenerator extends ItemJsonGenerator { + + @Override + public String responsibleFor() { + return "CrossRef"; + } + +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java index 371d029..551ca37 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java @@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import edu.asu.diging.citesphere.importer.core.model.BibEntry; +import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; import edu.asu.diging.citesphere.importer.core.zotero.template.ItemJsonGenerator; @@ -41,8 +42,11 @@ public ObjectNode generateJson(JsonNode template, BibEntry entry) { ItemJsonGenerator generator = generators.get(entry.getArticleType()); if (generator != null) { return generator.generate(template, entry); + } else if (entry instanceof CrossRefPublication){ + generator = generators.get("CrossRef"); + return generator.generate(template, entry); } - + return null; } } From 46c290425c19d35d762d12516ca917c1fa657fa6 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Tue, 30 Apr 2024 15:09:33 -0700 Subject: [PATCH 19/35] [CITE-177] Updated itemTypeMapping in crossref import processer --- .../importer/core/model/ItemType.java | 4 ++- .../service/impl/CitesphereConnector.java | 3 +- .../CrossrefReferenceImportProcessor.java | 31 ++++++++++++++++--- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java index 78d38be..380a24b 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java @@ -45,7 +45,9 @@ public enum ItemType { THESIS("thesis"), TV_BROADCAST("tvBroadcast"), VIDEO_RECORDIG("videoRecording"), - WEBPAGE("webpage"); + WEBPAGE("webpage"), + MONOGRAPH("monograph"), + JOURNAL_ISSUE("journal-issue"); final private String zoteroKey; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CitesphereConnector.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CitesphereConnector.java index e2646bd..93430ac 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CitesphereConnector.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CitesphereConnector.java @@ -109,8 +109,7 @@ public JobInfo getJobInfo(String apiToken) throws CitesphereCommunicationExcepti JobInfo info = null; if (status == HttpStatus.OK) { - String responseBody = response.getBody(); //TODO: Not gettting group ID here. - System.out.println("==================== response - " + responseBody); + String responseBody = response.getBody(); ObjectMapper mapper = new ObjectMapper(); try { info = mapper.readValue(responseBody, JobInfo.class); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 7f6a569..d0e9ae3 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -52,10 +52,33 @@ public void init() { itemTypeMapping.put(CrossRefPublication.ARTICLE, ItemType.JOURNAL_ARTICLE); itemTypeMapping.put(CrossRefPublication.BOOK, ItemType.BOOK); itemTypeMapping.put(CrossRefPublication.BOOK_CHAPTER, ItemType.BOOK_SECTION); -// itemTypeMapping.put(CrossRefPublication.LETTER, ItemType.LETTER); -// itemTypeMapping.put(CrossRefPublication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE); -// itemTypeMapping.put(CrossRefPublication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); -// itemTypeMapping.put(CrossRefPublication.DOCUMENT, ItemType.DOCUMENT); + itemTypeMapping.put(CrossRefPublication.MONOGRAPH, ItemType.MONOGRAPH); + itemTypeMapping.put(CrossRefPublication.JOURNAL_ISSUE, ItemType.JOURNAL_ISSUE); + itemTypeMapping.put(CrossRefPublication.REFERNCE_ENTRY, ItemType.REFERNCE_ENTRY); + itemTypeMapping.put(CrossRefPublication.POSTED_CONTENT, ItemType.POSTED_CONTENT); + itemTypeMapping.put(CrossRefPublication.COMPONENT, ItemType.COMPONENT); + itemTypeMapping.put(CrossRefPublication.EDITED_BOOK, ItemType.BOOK); + itemTypeMapping.put(CrossRefPublication.PROCEEDINGS_ARTICLE, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(CrossRefPublication.DISSERTATION, ItemType.DISSERTATION); + itemTypeMapping.put(CrossRefPublication.BOOK_SECTION, ItemType.BOOK_SECTION); + itemTypeMapping.put(CrossRefPublication.REPORT_COMPONENT, ItemType.REPORT_COMPONENT); + itemTypeMapping.put(CrossRefPublication.REPORT, ItemType.REPORT); + itemTypeMapping.put(CrossRefPublication.PEER_REVIEW, ItemType.PEER_REVIEW); + itemTypeMapping.put(CrossRefPublication.BOOK_TRACK, ItemType.BOOK_TRACK); + itemTypeMapping.put(CrossRefPublication.BOOK_PART, ItemType.BOOK_PART); + itemTypeMapping.put(CrossRefPublication.OTHER, ItemType.OTHER); + itemTypeMapping.put(CrossRefPublication.JORUNAL_VOLUME, ItemType.JORUNAL_VOLUME); + itemTypeMapping.put(CrossRefPublication.BOOK_SET, ItemType.BOOK_SET); + itemTypeMapping.put(CrossRefPublication.JOURNAL, ItemType.JOURNAL); + itemTypeMapping.put(CrossRefPublication.PROCEEDINGS_SERIES, ItemType.PROCEEDINGS_SERIES); + itemTypeMapping.put(CrossRefPublication.REPORT_SERIES, ItemType.REPORT_SERIES); + itemTypeMapping.put(CrossRefPublication.PROCEEDINGS, ItemType.PROCEEDINGS); + itemTypeMapping.put(CrossRefPublication.DATABASE, ItemType.DATABASE); + itemTypeMapping.put(CrossRefPublication.STANDARD, ItemType.STANDARD); + itemTypeMapping.put(CrossRefPublication.REFERENCE_BOOK, ItemType.REFERENCE_BOOK); + itemTypeMapping.put(CrossRefPublication.GRANT, ItemType.GRANT); + itemTypeMapping.put(CrossRefPublication.DATASET, ItemType.DATASET); + itemTypeMapping.put(CrossRefPublication.BOOK_SERIES, ItemType.BOOK_SERIES); } public void startImport(KafkaJobMessage message, JobInfo info) { From e36ff3d62c10f618bb052c411ff7f540e238801a Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Wed, 1 May 2024 14:10:09 -0700 Subject: [PATCH 20/35] [CITE-177] Added more mapping in CrossRefInportProcessor --- .../importer/core/model/ItemType.java | 4 +- .../CrossrefReferenceImportProcessor.java | 44 +++++++++---------- .../parse/iterators/CrossRefIterator.java | 1 + 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java index 380a24b..4fef7d0 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java @@ -46,8 +46,8 @@ public enum ItemType { TV_BROADCAST("tvBroadcast"), VIDEO_RECORDIG("videoRecording"), WEBPAGE("webpage"), - MONOGRAPH("monograph"), - JOURNAL_ISSUE("journal-issue"); + DATABASE("database"); + final private String zoteroKey; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index d0e9ae3..4971311 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -52,33 +52,33 @@ public void init() { itemTypeMapping.put(CrossRefPublication.ARTICLE, ItemType.JOURNAL_ARTICLE); itemTypeMapping.put(CrossRefPublication.BOOK, ItemType.BOOK); itemTypeMapping.put(CrossRefPublication.BOOK_CHAPTER, ItemType.BOOK_SECTION); - itemTypeMapping.put(CrossRefPublication.MONOGRAPH, ItemType.MONOGRAPH); - itemTypeMapping.put(CrossRefPublication.JOURNAL_ISSUE, ItemType.JOURNAL_ISSUE); - itemTypeMapping.put(CrossRefPublication.REFERNCE_ENTRY, ItemType.REFERNCE_ENTRY); - itemTypeMapping.put(CrossRefPublication.POSTED_CONTENT, ItemType.POSTED_CONTENT); - itemTypeMapping.put(CrossRefPublication.COMPONENT, ItemType.COMPONENT); + itemTypeMapping.put(CrossRefPublication.MONOGRAPH, ItemType.BOOK); + itemTypeMapping.put(CrossRefPublication.JOURNAL_ISSUE, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(CrossRefPublication.REFERNCE_ENTRY, ItemType.DICTIONARY_ENTRY); + itemTypeMapping.put(CrossRefPublication.POSTED_CONTENT, ItemType.WEBPAGE); + itemTypeMapping.put(CrossRefPublication.COMPONENT, ItemType.ATTACHMENT); itemTypeMapping.put(CrossRefPublication.EDITED_BOOK, ItemType.BOOK); itemTypeMapping.put(CrossRefPublication.PROCEEDINGS_ARTICLE, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(CrossRefPublication.DISSERTATION, ItemType.DISSERTATION); + itemTypeMapping.put(CrossRefPublication.DISSERTATION, ItemType.THESIS); itemTypeMapping.put(CrossRefPublication.BOOK_SECTION, ItemType.BOOK_SECTION); - itemTypeMapping.put(CrossRefPublication.REPORT_COMPONENT, ItemType.REPORT_COMPONENT); + itemTypeMapping.put(CrossRefPublication.REPORT_COMPONENT, ItemType.REPORT); itemTypeMapping.put(CrossRefPublication.REPORT, ItemType.REPORT); - itemTypeMapping.put(CrossRefPublication.PEER_REVIEW, ItemType.PEER_REVIEW); - itemTypeMapping.put(CrossRefPublication.BOOK_TRACK, ItemType.BOOK_TRACK); - itemTypeMapping.put(CrossRefPublication.BOOK_PART, ItemType.BOOK_PART); - itemTypeMapping.put(CrossRefPublication.OTHER, ItemType.OTHER); - itemTypeMapping.put(CrossRefPublication.JORUNAL_VOLUME, ItemType.JORUNAL_VOLUME); - itemTypeMapping.put(CrossRefPublication.BOOK_SET, ItemType.BOOK_SET); - itemTypeMapping.put(CrossRefPublication.JOURNAL, ItemType.JOURNAL); - itemTypeMapping.put(CrossRefPublication.PROCEEDINGS_SERIES, ItemType.PROCEEDINGS_SERIES); - itemTypeMapping.put(CrossRefPublication.REPORT_SERIES, ItemType.REPORT_SERIES); - itemTypeMapping.put(CrossRefPublication.PROCEEDINGS, ItemType.PROCEEDINGS); + itemTypeMapping.put(CrossRefPublication.PEER_REVIEW, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(CrossRefPublication.BOOK_TRACK, ItemType.BOOK); + itemTypeMapping.put(CrossRefPublication.BOOK_PART, ItemType.BOOK_SECTION); + itemTypeMapping.put(CrossRefPublication.OTHER, ItemType.DOCUMENT); + itemTypeMapping.put(CrossRefPublication.JORUNAL_VOLUME, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(CrossRefPublication.BOOK_SET, ItemType.BOOK); + itemTypeMapping.put(CrossRefPublication.JOURNAL, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(CrossRefPublication.PROCEEDINGS_SERIES, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(CrossRefPublication.REPORT_SERIES, ItemType.REPORT); + itemTypeMapping.put(CrossRefPublication.PROCEEDINGS, ItemType.CONFERENCE_PAPER); itemTypeMapping.put(CrossRefPublication.DATABASE, ItemType.DATABASE); - itemTypeMapping.put(CrossRefPublication.STANDARD, ItemType.STANDARD); - itemTypeMapping.put(CrossRefPublication.REFERENCE_BOOK, ItemType.REFERENCE_BOOK); - itemTypeMapping.put(CrossRefPublication.GRANT, ItemType.GRANT); - itemTypeMapping.put(CrossRefPublication.DATASET, ItemType.DATASET); - itemTypeMapping.put(CrossRefPublication.BOOK_SERIES, ItemType.BOOK_SERIES); + itemTypeMapping.put(CrossRefPublication.STANDARD, ItemType.STATUTE); + itemTypeMapping.put(CrossRefPublication.REFERENCE_BOOK, ItemType.DICTIONARY_ENTRY); + itemTypeMapping.put(CrossRefPublication.GRANT, ItemType.DOCUMENT); + itemTypeMapping.put(CrossRefPublication.DATASET, ItemType.DATABASE); + itemTypeMapping.put(CrossRefPublication.BOOK_SERIES, ItemType.BOOK); } public void startImport(KafkaJobMessage message, JobInfo info) { diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index ac40936..ca2b07b 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -59,6 +59,7 @@ private void init() { typeMap = new HashMap(); typeMap.put("journal-article", CrossRefPublication.ARTICLE); typeMap.put("book", CrossRefPublication.BOOK); + //TODO: Add more to the list parseCrossRef(); } From 1c94f97b555e462b1f2e7627cccb509a4966f47d Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Fri, 3 May 2024 16:47:18 -0700 Subject: [PATCH 21/35] [CITE-177] Udpated CrossRefIterator for typeMap and iterator logic --- .../parse/iterators/CrossRefIterator.java | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index ca2b07b..d181e01 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -39,7 +39,9 @@ public class CrossRefIterator implements BibEntryIterator { private final Logger logger = LoggerFactory.getLogger(getClass()); private JobInfo info; + private List articles; private BibEntry article; + private int currentIndex; private boolean iteratorDone = false; private Map typeMap; @@ -50,7 +52,7 @@ public class CrossRefIterator implements BibEntryIterator { public CrossRefIterator(JobInfo info) { this.info = info; - System.out.println(info.toString()); + currentIndex = 0; init(); } @@ -59,24 +61,50 @@ private void init() { typeMap = new HashMap(); typeMap.put("journal-article", CrossRefPublication.ARTICLE); typeMap.put("book", CrossRefPublication.BOOK); - //TODO: Add more to the list + typeMap.put("book-chapter", CrossRefPublication.BOOK_CHAPTER); + typeMap.put("monograph", CrossRefPublication.MONOGRAPH); + typeMap.put("journal-issue", CrossRefPublication.JOURNAL_ISSUE); + typeMap.put("reference-entry", CrossRefPublication.REFERNCE_ENTRY); + typeMap.put("posted-content", CrossRefPublication.POSTED_CONTENT); + typeMap.put("component", CrossRefPublication.COMPONENT); + typeMap.put("edited-book", CrossRefPublication.EDITED_BOOK); + typeMap.put("proceedings-article", CrossRefPublication.PROCEEDINGS_ARTICLE); + typeMap.put("dissertation", CrossRefPublication.DISSERTATION); + typeMap.put("book-section", CrossRefPublication.BOOK_SECTION); + typeMap.put("report-component", CrossRefPublication.REPORT_COMPONENT); + typeMap.put("report", CrossRefPublication.REPORT); + typeMap.put("peer-review", CrossRefPublication.PEER_REVIEW); + typeMap.put("book-track", CrossRefPublication.BOOK_TRACK); + typeMap.put("book-part", CrossRefPublication.BOOK_PART); + typeMap.put("other", CrossRefPublication.OTHER); + typeMap.put("journal-volume", CrossRefPublication.JORUNAL_VOLUME); + typeMap.put("book-set", CrossRefPublication.BOOK_SET); + typeMap.put("journal", CrossRefPublication.JOURNAL); + typeMap.put("proceedings-series", CrossRefPublication.PROCEEDINGS_SERIES); + typeMap.put("report-series", CrossRefPublication.REPORT_SERIES); + typeMap.put("proceedings", CrossRefPublication.PROCEEDINGS); + typeMap.put("database", CrossRefPublication.DATABASE); + typeMap.put("standard", CrossRefPublication.STANDARD); + typeMap.put("reference-book", CrossRefPublication.REFERENCE_BOOK); + typeMap.put("grant", CrossRefPublication.GRANT); + typeMap.put("dataset", CrossRefPublication.DATASET); + typeMap.put("book-series", CrossRefPublication.BOOK_SERIES); parseCrossRef(); } private void parseCrossRef() { - List items = new ArrayList<>(); + articles = new ArrayList<>(); for (String doi : info.getDois()) { try { Item item = crossrefService.get(doi); - article = new CrossRefPublication(); article.setArticleType(typeMap.get(item.getType())); article.setJournalMeta(parseJournalMeta(item)); article.setArticleMeta(parseArticleMeta(item)); - items.add(article); + articles.add(article); } catch (RequestFailedException | IOException e) { logger.error("Couuld not retrieve work for doi: "+ doi, e); // for now we just log the exceptions @@ -161,14 +189,14 @@ private ArticleMeta parseArticleMeta(Item item) { meta.setLanguage(item.getLanguage()); ReviewInfo review = new ReviewInfo(); if (item.getReview() != null) { - review.setFullDescription(item.getReview().getCompetingInterestStatement()); //TODO: giving null pointer error + review.setFullDescription(item.getReview().getCompetingInterestStatement()); } meta.setReviewInfo(review); meta.setDocumentType(item.getType()); List references = new ArrayList<>(); if(item.getReference() != null) { - for(edu.asu.diging.crossref.model.Reference itemRef: item.getReference()) { // TODO: giving null pointer error + for(edu.asu.diging.crossref.model.Reference itemRef: item.getReference()) { Reference ref = new Reference(); ref.setAuthorString(itemRef.getAuthor()); ref.setContributors(null); @@ -228,13 +256,18 @@ public BibEntry next() { if (iteratorDone) { return null; } - iteratorDone = true; - return article; +// iteratorDone = true; + BibEntry nextEntry = articles.get(currentIndex); + currentIndex++; + return nextEntry; } @Override public boolean hasNext() { + if (currentIndex >= articles.size()) { + iteratorDone = true; + } return !iteratorDone; } From bed4968e7a9b2673d7cb4ff548f8e1a3bdb8d4f2 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Thu, 13 Jun 2024 11:14:28 -0700 Subject: [PATCH 22/35] [CITE-177] Added test cases for CrossrefReferenceImportProcessor --- .../CrossrefReferenceImportProcessor.java | 7 +- .../parse/iterators/CrossRefIterator.java | 9 +- .../CrossrefReferenceImportProcessorTest.java | 112 ++++++++++++++++++ 3 files changed, 116 insertions(+), 12 deletions(-) create mode 100644 citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 4971311..478d2a8 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -84,9 +84,9 @@ public void init() { public void startImport(KafkaJobMessage message, JobInfo info) { // message = jobToken: eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJKT0IxOTciLCJleHAiOjE2NzgzNDU3NDF9.5Xqh_AoMHcdlatULkCLFtny9pOF_uJ-SRARw0gCybY3h3qHL2mkIIQlk-qTA0Pn0VlhOLuW4FwACHmIdwZVmoA // info = dois: [10.2307/j.ctvcm4h07.67, 10.1515/9780691242507] -// null -// zotero: byRZjIk2y4e3kay1cnwy3KpB -// zoteroId: 9154965 + // null + // zotero: byRZjIk2y4e3kay1cnwy3KpB + // zoteroId: 9154965 logger.info("Starting import for " + info.getDois()); ObjectMapper mapper = new ObjectMapper(); @@ -112,7 +112,6 @@ public void startImport(KafkaJobMessage message, JobInfo info) { // something is wrong with this entry, let's ignore it continue; } - System.out.println("======================================" + entry.getArticleType()); ItemType type = itemTypeMapping.get(entry.getArticleType()); JsonNode template = zoteroConnector.getTemplate(type); ObjectNode bibNode = generationService.generateJson(template, entry); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index d181e01..6b22743 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -256,7 +256,6 @@ public BibEntry next() { if (iteratorDone) { return null; } -// iteratorDone = true; BibEntry nextEntry = articles.get(currentIndex); currentIndex++; return nextEntry; @@ -273,13 +272,7 @@ public boolean hasNext() { @Override public void close() { - // if (lineIterator != null) { - // try { - // lineIterator.close(); - // } catch (IOException e) { - // logger.error("Couldn't close line iterator.", e); - // } - // } + // do nothing } } diff --git a/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java b/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java new file mode 100644 index 0000000..378ecf0 --- /dev/null +++ b/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java @@ -0,0 +1,112 @@ +package edu.asu.diging.citesphere.importer.core.service; + +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.Arrays; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.mockito.Spy; +import org.mockito.runners.MockitoJUnitRunner; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + +import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; +import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer; +import edu.asu.diging.citesphere.importer.core.model.BibEntry; +import edu.asu.diging.citesphere.importer.core.model.ItemType; +import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; +import edu.asu.diging.citesphere.importer.core.service.impl.CrossrefReferenceImportProcessor; +import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; +import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; +import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; +import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; +import edu.asu.diging.citesphere.messages.model.ResponseCode; +import edu.asu.diging.citesphere.messages.model.Status; + +@RunWith(MockitoJUnitRunner.class) +public class CrossrefReferenceImportProcessorTest { + + @Spy + @InjectMocks + private CrossrefReferenceImportProcessor processor; + + @Mock + private IZoteroConnector zoteroConnector; + + @Mock + private IJsonGenerationService generationService; + + @Mock + private IHandlerRegistry handlerRegistry; + + @Mock + private BibEntryIterator bibIterator; + + @Mock + private KafkaJobMessage message; + + @Mock + private JobInfo info; + + @Mock + private KafkaRequestProducer requestProducer; + + private ObjectMapper mapper; + + @Before + public void setUp() { + MockitoAnnotations.initMocks(this); + processor.init(); + mapper = new ObjectMapper(); + when(message.getId()).thenReturn("testMessageId"); + doNothing().when(processor).sendMessage(any(), anyString(), any(Status.class), any(ResponseCode.class)); + } + + @Test + public void testStartImport_successful() throws Exception { + when(info.getDois()).thenReturn(Arrays.asList("10.1234/example1", "10.5678/example2")); + when(handlerRegistry.handleFile(info, null)).thenReturn(bibIterator); + + BibEntry entry = mock(BibEntry.class); + when(bibIterator.hasNext()).thenReturn(true, false); + when(bibIterator.next()).thenReturn(entry); + when(entry.getArticleType()).thenReturn(CrossRefPublication.ARTICLE); + + JsonNode template = mock(ObjectNode.class); + when(zoteroConnector.getTemplate(ItemType.JOURNAL_ARTICLE)).thenReturn(template); + ObjectNode bibNode = mapper.createObjectNode(); + when(generationService.generateJson(template, entry)).thenReturn(bibNode); + + processor.startImport(message, info); + + verify(zoteroConnector).addEntries(eq(info), any(ArrayNode.class)); + verify(processor).sendMessage(any(), eq("testMessageId"), eq(Status.DONE), eq(ResponseCode.S00)); + } + + @Test + public void testStartImport_iteratorCreationException() throws Exception { + when(info.getDois()).thenReturn(Arrays.asList("10.1234/example1", "10.5678/example2")); + when(handlerRegistry.handleFile(info, null)).thenThrow(new IteratorCreationException("error")); + + processor.startImport(message, info); + + verify(processor).sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); + } + +} From e48be13b16137c3ebfae102604eddc0c0b3e180e Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Thu, 13 Jun 2024 11:38:12 -0700 Subject: [PATCH 23/35] [CITE-177] Refactoring code to fix issues. --- .../importer/core/kafka/impl/ReferenceImportListener.java | 1 - .../importer/core/service/AbstractImportProcessor.java | 2 -- .../importer/core/service/impl/FileImportProcessor.java | 1 - 3 files changed, 4 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java index dff58bd..82f744d 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java @@ -49,7 +49,6 @@ public void receiveCrossrefImportMessage(String message) { msg = mapper.readValue(message, KafkaJobMessage.class); } catch (IOException e) { logger.error("Could not unmarshall message.", e); - // FIXME: handle this case return; } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java index c5423c9..3877326 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java @@ -43,7 +43,6 @@ private JobInfo getJobInfo(KafkaJobMessage message) { try { info = connector.getJobInfo(message.getId()); } catch (CitesphereCommunicationException e) { - // FIXME this needs to be handled better logger.error("Could not get Zotero info.", e); return null; } @@ -57,7 +56,6 @@ protected void sendMessage(ItemCreationResponse message, String jobId, Status st try { requestProducer.sendRequest(returnMessage, KafkaTopics.REFERENCES_IMPORT_DONE_TOPIC); } catch (MessageCreationException e) { - // FIXME handle this case logger.error("Exception sending message.", e); } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java index 5fc1a54..0ffd022 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java @@ -160,7 +160,6 @@ private String downloadFile(KafkaJobMessage message) { try { file = getCitesphereConnector().getUploadeFile(message.getId()); } catch (CitesphereCommunicationException e) { - // FIXME this needs to be handled better logger.error("Could not get Zotero info.", e); return null; } From c086f44c21ff36c6853aad614c54001ea8855d73 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Thu, 13 Jun 2024 11:51:57 -0700 Subject: [PATCH 24/35] [CITE-177] Renamed file to remove unwanted file commit --- .../importer/core/kafka/impl/ReferenceImportListener.java | 6 +++--- .../importer/core/service/AbstractImportProcessor.java | 2 +- .../service/{ImportProcessor.java => IImportProcessor.java} | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/{ImportProcessor.java => IImportProcessor.java} (82%) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java index 82f744d..ba6b319 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java @@ -10,7 +10,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; -import edu.asu.diging.citesphere.importer.core.service.ImportProcessor; +import edu.asu.diging.citesphere.importer.core.service.IImportProcessor; import edu.asu.diging.citesphere.messages.KafkaTopics; import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; @@ -20,11 +20,11 @@ public class ReferenceImportListener { @Autowired @Qualifier("fileImportProcessor") - private ImportProcessor fileProcessor; + private IImportProcessor fileProcessor; @Autowired @Qualifier("crossrefReferenceImportProcessor") - private ImportProcessor crossrefProcessor; + private IImportProcessor crossrefProcessor; @KafkaListener(topics = KafkaTopics.REFERENCES_IMPORT_TOPIC) public void receiveMessage(String message) { diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java index 3877326..1d7d0b2 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java @@ -15,7 +15,7 @@ import edu.asu.diging.citesphere.messages.model.ResponseCode; import edu.asu.diging.citesphere.messages.model.Status; -public abstract class AbstractImportProcessor implements ImportProcessor { +public abstract class AbstractImportProcessor implements IImportProcessor { protected final Logger logger = LoggerFactory.getLogger(getClass()); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/ImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/IImportProcessor.java similarity index 82% rename from citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/ImportProcessor.java rename to citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/IImportProcessor.java index 1a3c34f..5ff6d2b 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/ImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/IImportProcessor.java @@ -2,7 +2,7 @@ import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; -public interface ImportProcessor { +public interface IImportProcessor { void process(KafkaJobMessage message); From 287ffed9c75328ec49c345505e8fdb5c5796e12a Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Thu, 13 Jun 2024 15:35:49 -0700 Subject: [PATCH 25/35] [CITE-177] Updated CrossRefIterator.java --- .../service/parse/iterators/CrossRefIterator.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 6b22743..abdbfb6 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -120,11 +120,13 @@ private ContainerMeta parseJournalMeta(Item item) { meta.setPublisherName(item.getPublisher()); meta.setPublisherLocation(item.getPublisherLocation()); List issnList = new ArrayList(); - for(IssnType issnType : item.getIssnType()) { - Issn issn = new Issn(); - issn.setIssn(issnType.getValue()); - issn.setPubType(issnType.getType()); - issnList.add(issn); + if(item.getIssnType() != null) { + for(IssnType issnType : item.getIssnType()) { + Issn issn = new Issn(); + issn.setIssn(issnType.getValue()); + issn.setPubType(issnType.getType()); + issnList.add(issn); + } } meta.setIssns(issnList); From 5fd853e9df3a00ccff6f2502ade62a95d62225d6 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Tue, 25 Jun 2024 16:38:37 -0700 Subject: [PATCH 26/35] [CITE-177] Addressed PR comments --- .../core/service/AbstractImportProcessor.java | 2 +- .../impl/CrossrefReferenceImportProcessor.java | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java index 1d7d0b2..c56bcc5 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java @@ -60,7 +60,7 @@ protected void sendMessage(ItemCreationResponse message, String jobId, Status st } } - public ICitesphereConnector getCitesphereConnector() { + protected ICitesphereConnector getCitesphereConnector() { return connector; } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 478d2a8..6344358 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -6,8 +6,6 @@ import javax.annotation.PostConstruct; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -33,9 +31,7 @@ @Service public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { - - private final Logger logger = LoggerFactory.getLogger(getClass()); - + private Map itemTypeMapping = new HashMap<>(); @Autowired @@ -82,11 +78,6 @@ public void init() { } public void startImport(KafkaJobMessage message, JobInfo info) { - // message = jobToken: eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJKT0IxOTciLCJleHAiOjE2NzgzNDU3NDF9.5Xqh_AoMHcdlatULkCLFtny9pOF_uJ-SRARw0gCybY3h3qHL2mkIIQlk-qTA0Pn0VlhOLuW4FwACHmIdwZVmoA - // info = dois: [10.2307/j.ctvcm4h07.67, 10.1515/9780691242507] - // null - // zotero: byRZjIk2y4e3kay1cnwy3KpB - // zoteroId: 9154965 logger.info("Starting import for " + info.getDois()); ObjectMapper mapper = new ObjectMapper(); From c4c4f1375b55efc8b26bbe249e95e5d138d9f20c Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Thu, 27 Jun 2024 16:34:20 -0700 Subject: [PATCH 27/35] [CITE-177] Addressing PR comments --- .../impl/CrossrefReferenceImportProcessor.java | 12 ++++++++---- .../zotero/template/impl/CrossRefGenerator.java | 15 --------------- 2 files changed, 8 insertions(+), 19 deletions(-) delete mode 100644 citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/CrossRefGenerator.java diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 6344358..e4f54c1 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -21,7 +21,7 @@ import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; -import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; +import edu.asu.diging.citesphere.importer.core.service.parse.impl.CrossRefHandler; import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; @@ -40,8 +40,11 @@ public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { @Autowired private IJsonGenerationService generationService; +// @Autowired +// private IHandlerRegistry handlerRegistry; + @Autowired - private IHandlerRegistry handlerRegistry; + private CrossRefHandler crossRefHandler; @PostConstruct public void init() { @@ -87,7 +90,8 @@ public void startImport(KafkaJobMessage message, JobInfo info) { sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); BibEntryIterator bibIterator = null; try { - bibIterator = handlerRegistry.handleFile(info, null); +// bibIterator = handlerRegistry.handleFile(info, null); + bibIterator = crossRefHandler.getIterator(null, null, info); } catch (IteratorCreationException e1) { logger.error("Could not create iterator.", e1); } @@ -127,7 +131,7 @@ public void startImport(KafkaJobMessage message, JobInfo info) { } response = response != null ? response : new ItemCreationResponse(); - sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); // giving error 500 as response code mentioned + sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/CrossRefGenerator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/CrossRefGenerator.java deleted file mode 100644 index 3b88aa1..0000000 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/CrossRefGenerator.java +++ /dev/null @@ -1,15 +0,0 @@ -package edu.asu.diging.citesphere.importer.core.zotero.template.impl; - -import org.springframework.stereotype.Service; - -import edu.asu.diging.citesphere.importer.core.zotero.template.ItemJsonGenerator; - -@Service -public class CrossRefGenerator extends ItemJsonGenerator { - - @Override - public String responsibleFor() { - return "CrossRef"; - } - -} From eb6a3a174e8359b4a3fd95bac041a95d44b8a120 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Mon, 1 Jul 2024 16:36:15 -0700 Subject: [PATCH 28/35] [CITE-177] Addressed PR comments --- .../core/model/impl/CrossRefPublication.java | 65 ----------- .../importer/core/model/impl/Publication.java | 28 +++++ .../core/service/AbstractImportProcessor.java | 30 ++++++ .../CrossrefReferenceImportProcessor.java | 101 ++++++------------ .../service/impl/FileImportProcessor.java | 24 ----- .../service/parse/impl/CrossRefHandler.java | 39 ------- .../parse/iterators/CrossRefIterator.java | 65 ++++++----- .../template/impl/JsonGenerationService.java | 4 +- 8 files changed, 126 insertions(+), 230 deletions(-) delete mode 100644 citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java delete mode 100644 citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java deleted file mode 100644 index 3932144..0000000 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/CrossRefPublication.java +++ /dev/null @@ -1,65 +0,0 @@ -package edu.asu.diging.citesphere.importer.core.model.impl; - -import edu.asu.diging.citesphere.importer.core.model.BibEntry; - -public class CrossRefPublication implements BibEntry { - public final static String ARTICLE = "journal-article"; - public final static String BOOK = "book"; - public final static String BOOK_CHAPTER = "book-chapter"; - public final static String MONOGRAPH = "monograph"; - public final static String JOURNAL_ISSUE = "journal-issue"; - public final static String REFERNCE_ENTRY = "reference-entry"; - public final static String POSTED_CONTENT = "posted-content"; - public final static String COMPONENT = "component"; - public final static String EDITED_BOOK = "edited-book"; - public final static String PROCEEDINGS_ARTICLE = "proceedings-article"; - public final static String DISSERTATION = "dissertation"; - public final static String BOOK_SECTION = "book-section"; - public final static String REPORT_COMPONENT = "report-component"; - public final static String REPORT = "report"; - public final static String PEER_REVIEW = "peer-review"; - public final static String BOOK_TRACK = "book-track"; - public final static String BOOK_PART = "book-part"; - public final static String OTHER = "other"; - public final static String JORUNAL_VOLUME = "journal-volume"; - public final static String BOOK_SET = "book-set"; - public final static String JOURNAL = "journal"; - public final static String PROCEEDINGS_SERIES = "proceedings-series"; - public final static String REPORT_SERIES = "report-series"; - public final static String PROCEEDINGS = "proceedings"; - public final static String DATABASE = "database"; - public final static String STANDARD = "standard"; - public final static String REFERENCE_BOOK = "reference-book"; - public final static String GRANT = "grant"; - public final static String DATASET = "dataset"; - public final static String BOOK_SERIES = "book-series"; - - private String articleType; - private ContainerMeta containerMeta; - private ArticleMeta articleMeta; - - @Override - public String getArticleType() { - return articleType; - } - @Override - public void setArticleType(String articleType) { - this.articleType = articleType; - } - @Override - public ContainerMeta getContainerMeta() { - return containerMeta; - } - @Override - public void setJournalMeta(ContainerMeta journalMeta) { - this.containerMeta = journalMeta; - } - @Override - public ArticleMeta getArticleMeta() { - return articleMeta; - } - @Override - public void setArticleMeta(ArticleMeta articleMeta) { - this.articleMeta = articleMeta; - } -} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java index 6963fdc..81fb1af 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java @@ -12,6 +12,34 @@ public class Publication implements BibEntry { public final static String NEWS_ITEM = "newspaperArticle"; public final static String PROCEEDINGS_PAPER = "conferencePaper"; public final static String DOCUMENT = "document"; + // publication types in CrossRef + public final static String MONOGRAPH = "monograph"; + public final static String JOURNAL_ISSUE = "journal-issue"; + public final static String REFERNCE_ENTRY = "reference-entry"; + public final static String POSTED_CONTENT = "posted-content"; + public final static String COMPONENT = "component"; + public final static String EDITED_BOOK = "edited-book"; + public final static String PROCEEDINGS_ARTICLE = "proceedings-article"; + public final static String DISSERTATION = "dissertation"; + public final static String BOOK_SECTION = "book-section"; + public final static String REPORT_COMPONENT = "report-component"; + public final static String REPORT = "report"; + public final static String PEER_REVIEW = "peer-review"; + public final static String BOOK_TRACK = "book-track"; + public final static String BOOK_PART = "book-part"; + public final static String OTHER = "other"; + public final static String JORUNAL_VOLUME = "journal-volume"; + public final static String BOOK_SET = "book-set"; + public final static String JOURNAL = "journal"; + public final static String PROCEEDINGS_SERIES = "proceedings-series"; + public final static String REPORT_SERIES = "report-series"; + public final static String PROCEEDINGS = "proceedings"; + public final static String DATABASE = "database"; + public final static String STANDARD = "standard"; + public final static String REFERENCE_BOOK = "reference-book"; + public final static String GRANT = "grant"; + public final static String DATASET = "dataset"; + public final static String BOOK_SERIES = "book-series"; private String articleType; private ContainerMeta containerMeta; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java index c56bcc5..48a37f6 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java @@ -1,13 +1,20 @@ package edu.asu.diging.citesphere.importer.core.service; +import java.net.URISyntaxException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; + import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException; import edu.asu.diging.citesphere.importer.core.exception.MessageCreationException; import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer; import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; +import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; import edu.asu.diging.citesphere.messages.KafkaTopics; import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; import edu.asu.diging.citesphere.messages.model.KafkaImportReturnMessage; @@ -25,6 +32,8 @@ public abstract class AbstractImportProcessor implements IImportProcessor { @Autowired private ICitesphereConnector connector; + @Autowired + private IZoteroConnector zoteroConnector; @Override public void process(KafkaJobMessage message) { @@ -63,4 +72,25 @@ protected void sendMessage(ItemCreationResponse message, String jobId, Status st protected ICitesphereConnector getCitesphereConnector() { return connector; } + + protected ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { + ObjectMapper mapper = new ObjectMapper(); + try { + String msg = mapper.writeValueAsString(entries); + logger.info("Submitting " + msg); + ItemCreationResponse response = zoteroConnector.addEntries(info, entries); + if (response != null) { + logger.info(response.getSuccessful() + ""); + logger.error(response.getFailed() + ""); + } else { + logger.error("Item creation failed."); + } + return response; + } catch (URISyntaxException e) { + logger.error("Could not store new entry.", e); + } catch (JsonProcessingException e) { + logger.error("Could not write JSON."); + } + return null; + } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index e4f54c1..43debd7 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -1,6 +1,5 @@ package edu.asu.diging.citesphere.importer.core.service.impl; -import java.net.URISyntaxException; import java.util.HashMap; import java.util.Map; @@ -9,19 +8,17 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.ItemType; -import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; -import edu.asu.diging.citesphere.importer.core.service.parse.impl.CrossRefHandler; +import edu.asu.diging.citesphere.importer.core.service.parse.iterators.CrossRefIterator; import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; @@ -40,44 +37,38 @@ public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { @Autowired private IJsonGenerationService generationService; -// @Autowired -// private IHandlerRegistry handlerRegistry; - - @Autowired - private CrossRefHandler crossRefHandler; - @PostConstruct public void init() { - itemTypeMapping.put(CrossRefPublication.ARTICLE, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(CrossRefPublication.BOOK, ItemType.BOOK); - itemTypeMapping.put(CrossRefPublication.BOOK_CHAPTER, ItemType.BOOK_SECTION); - itemTypeMapping.put(CrossRefPublication.MONOGRAPH, ItemType.BOOK); - itemTypeMapping.put(CrossRefPublication.JOURNAL_ISSUE, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(CrossRefPublication.REFERNCE_ENTRY, ItemType.DICTIONARY_ENTRY); - itemTypeMapping.put(CrossRefPublication.POSTED_CONTENT, ItemType.WEBPAGE); - itemTypeMapping.put(CrossRefPublication.COMPONENT, ItemType.ATTACHMENT); - itemTypeMapping.put(CrossRefPublication.EDITED_BOOK, ItemType.BOOK); - itemTypeMapping.put(CrossRefPublication.PROCEEDINGS_ARTICLE, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(CrossRefPublication.DISSERTATION, ItemType.THESIS); - itemTypeMapping.put(CrossRefPublication.BOOK_SECTION, ItemType.BOOK_SECTION); - itemTypeMapping.put(CrossRefPublication.REPORT_COMPONENT, ItemType.REPORT); - itemTypeMapping.put(CrossRefPublication.REPORT, ItemType.REPORT); - itemTypeMapping.put(CrossRefPublication.PEER_REVIEW, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(CrossRefPublication.BOOK_TRACK, ItemType.BOOK); - itemTypeMapping.put(CrossRefPublication.BOOK_PART, ItemType.BOOK_SECTION); - itemTypeMapping.put(CrossRefPublication.OTHER, ItemType.DOCUMENT); - itemTypeMapping.put(CrossRefPublication.JORUNAL_VOLUME, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(CrossRefPublication.BOOK_SET, ItemType.BOOK); - itemTypeMapping.put(CrossRefPublication.JOURNAL, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(CrossRefPublication.PROCEEDINGS_SERIES, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(CrossRefPublication.REPORT_SERIES, ItemType.REPORT); - itemTypeMapping.put(CrossRefPublication.PROCEEDINGS, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(CrossRefPublication.DATABASE, ItemType.DATABASE); - itemTypeMapping.put(CrossRefPublication.STANDARD, ItemType.STATUTE); - itemTypeMapping.put(CrossRefPublication.REFERENCE_BOOK, ItemType.DICTIONARY_ENTRY); - itemTypeMapping.put(CrossRefPublication.GRANT, ItemType.DOCUMENT); - itemTypeMapping.put(CrossRefPublication.DATASET, ItemType.DATABASE); - itemTypeMapping.put(CrossRefPublication.BOOK_SERIES, ItemType.BOOK); + itemTypeMapping.put(Publication.ARTICLE, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK, ItemType.BOOK); + itemTypeMapping.put(Publication.BOOK_CHAPTER, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.MONOGRAPH, ItemType.BOOK); + itemTypeMapping.put(Publication.JOURNAL_ISSUE, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.REFERNCE_ENTRY, ItemType.DICTIONARY_ENTRY); + itemTypeMapping.put(Publication.POSTED_CONTENT, ItemType.WEBPAGE); + itemTypeMapping.put(Publication.COMPONENT, ItemType.ATTACHMENT); + itemTypeMapping.put(Publication.EDITED_BOOK, ItemType.BOOK); + itemTypeMapping.put(Publication.PROCEEDINGS_ARTICLE, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.DISSERTATION, ItemType.THESIS); + itemTypeMapping.put(Publication.BOOK_SECTION, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.REPORT_COMPONENT, ItemType.REPORT); + itemTypeMapping.put(Publication.REPORT, ItemType.REPORT); + itemTypeMapping.put(Publication.PEER_REVIEW, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK_TRACK, ItemType.BOOK); + itemTypeMapping.put(Publication.BOOK_PART, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.OTHER, ItemType.DOCUMENT); + itemTypeMapping.put(Publication.JORUNAL_VOLUME, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK_SET, ItemType.BOOK); + itemTypeMapping.put(Publication.JOURNAL, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.PROCEEDINGS_SERIES, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.REPORT_SERIES, ItemType.REPORT); + itemTypeMapping.put(Publication.PROCEEDINGS, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.DATABASE, ItemType.DATABASE); + itemTypeMapping.put(Publication.STANDARD, ItemType.STATUTE); + itemTypeMapping.put(Publication.REFERENCE_BOOK, ItemType.DICTIONARY_ENTRY); + itemTypeMapping.put(Publication.GRANT, ItemType.DOCUMENT); + itemTypeMapping.put(Publication.DATASET, ItemType.DATABASE); + itemTypeMapping.put(Publication.BOOK_SERIES, ItemType.BOOK); } public void startImport(KafkaJobMessage message, JobInfo info) { @@ -89,12 +80,7 @@ public void startImport(KafkaJobMessage message, JobInfo info) { sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); BibEntryIterator bibIterator = null; - try { -// bibIterator = handlerRegistry.handleFile(info, null); - bibIterator = crossRefHandler.getIterator(null, null, info); - } catch (IteratorCreationException e1) { - logger.error("Could not create iterator.", e1); - } + bibIterator = new CrossRefIterator(info); if (bibIterator == null) { sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); @@ -134,25 +120,6 @@ public void startImport(KafkaJobMessage message, JobInfo info) { sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } - private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { - ObjectMapper mapper = new ObjectMapper(); - try { - String msg = mapper.writeValueAsString(entries); - logger.info("Submitting " + msg); - ItemCreationResponse response = zoteroConnector.addEntries(info, entries); - if (response != null) { - logger.info(response.getSuccessful() + ""); - logger.error(response.getFailed() + ""); - } else { - logger.error("Item creation failed."); - } - return response; - } catch (URISyntaxException e) { - logger.error("Could not store new entry.", e); - } catch (JsonProcessingException e) { - logger.error("Could not write JSON."); - } - return null; - } + } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java index 0ffd022..ff8b72f 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java @@ -1,6 +1,5 @@ package edu.asu.diging.citesphere.importer.core.service.impl; -import java.net.URISyntaxException; import java.util.HashMap; import java.util.Map; @@ -9,7 +8,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; @@ -21,7 +19,6 @@ import edu.asu.diging.citesphere.importer.core.model.ItemType; import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; -import edu.asu.diging.citesphere.importer.core.service.ICitesphereConnector; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; @@ -133,27 +130,6 @@ public void startImport(KafkaJobMessage message, JobInfo info) { response = response != null ? response : new ItemCreationResponse(); sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } - - private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { - ObjectMapper mapper = new ObjectMapper(); - try { - String msg = mapper.writeValueAsString(entries); - logger.info("Submitting " + msg); - ItemCreationResponse response = zoteroConnector.addEntries(info, entries); - if (response != null) { - logger.info(response.getSuccessful() + ""); - logger.error(response.getFailed() + ""); - } else { - logger.error("Item creation failed."); - } - return response; - } catch (URISyntaxException e) { - logger.error("Could not store new entry.", e); - } catch (JsonProcessingException e) { - logger.error("Could not write JSON."); - } - return null; - } private String downloadFile(KafkaJobMessage message) { String file = null; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java deleted file mode 100644 index 6e017d5..0000000 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefHandler.java +++ /dev/null @@ -1,39 +0,0 @@ -package edu.asu.diging.citesphere.importer.core.service.parse.impl; - -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Service; - -import edu.asu.diging.citesphere.importer.core.exception.HandlerTestException; -import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; -import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; -import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; -import edu.asu.diging.citesphere.importer.core.service.parse.FileHandler; -import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; -import edu.asu.diging.citesphere.importer.core.service.parse.iterators.CrossRefIterator; -import edu.asu.diging.citesphere.importer.core.service.parse.jstor.xml.IArticleTagParser; - -@Service -public class CrossRefHandler implements FileHandler { - - @Autowired - private IArticleTagParser parserRegistry; - - @Value("${_citesphere_download_path}") - private String downloadPath; - - @Override - public boolean canHandle(String path) throws HandlerTestException { - if (path == null) { - return true; - } - return false; - } - - @Override - public BibEntryIterator getIterator(String path, IHandlerRegistry callback, JobInfo info) - throws IteratorCreationException { - return new CrossRefIterator(info); - } - -} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index abdbfb6..1208e09 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -18,8 +18,8 @@ import edu.asu.diging.citesphere.importer.core.model.impl.ContributionType; import edu.asu.diging.citesphere.importer.core.model.impl.Contributor; import edu.asu.diging.citesphere.importer.core.model.impl.ContributorId; -import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; import edu.asu.diging.citesphere.importer.core.model.impl.Issn; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.model.impl.Reference; import edu.asu.diging.citesphere.importer.core.model.impl.ReviewInfo; import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; @@ -29,7 +29,6 @@ import edu.asu.diging.crossref.model.IssnType; import edu.asu.diging.crossref.model.Item; import edu.asu.diging.crossref.model.Person; -import edu.asu.diging.crossref.model.Review; import edu.asu.diging.crossref.service.CrossrefConfiguration; import edu.asu.diging.crossref.service.CrossrefWorksService; import edu.asu.diging.crossref.service.impl.CrossrefWorksServiceImpl; @@ -59,36 +58,36 @@ public CrossRefIterator(JobInfo info) { private void init() { crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); typeMap = new HashMap(); - typeMap.put("journal-article", CrossRefPublication.ARTICLE); - typeMap.put("book", CrossRefPublication.BOOK); - typeMap.put("book-chapter", CrossRefPublication.BOOK_CHAPTER); - typeMap.put("monograph", CrossRefPublication.MONOGRAPH); - typeMap.put("journal-issue", CrossRefPublication.JOURNAL_ISSUE); - typeMap.put("reference-entry", CrossRefPublication.REFERNCE_ENTRY); - typeMap.put("posted-content", CrossRefPublication.POSTED_CONTENT); - typeMap.put("component", CrossRefPublication.COMPONENT); - typeMap.put("edited-book", CrossRefPublication.EDITED_BOOK); - typeMap.put("proceedings-article", CrossRefPublication.PROCEEDINGS_ARTICLE); - typeMap.put("dissertation", CrossRefPublication.DISSERTATION); - typeMap.put("book-section", CrossRefPublication.BOOK_SECTION); - typeMap.put("report-component", CrossRefPublication.REPORT_COMPONENT); - typeMap.put("report", CrossRefPublication.REPORT); - typeMap.put("peer-review", CrossRefPublication.PEER_REVIEW); - typeMap.put("book-track", CrossRefPublication.BOOK_TRACK); - typeMap.put("book-part", CrossRefPublication.BOOK_PART); - typeMap.put("other", CrossRefPublication.OTHER); - typeMap.put("journal-volume", CrossRefPublication.JORUNAL_VOLUME); - typeMap.put("book-set", CrossRefPublication.BOOK_SET); - typeMap.put("journal", CrossRefPublication.JOURNAL); - typeMap.put("proceedings-series", CrossRefPublication.PROCEEDINGS_SERIES); - typeMap.put("report-series", CrossRefPublication.REPORT_SERIES); - typeMap.put("proceedings", CrossRefPublication.PROCEEDINGS); - typeMap.put("database", CrossRefPublication.DATABASE); - typeMap.put("standard", CrossRefPublication.STANDARD); - typeMap.put("reference-book", CrossRefPublication.REFERENCE_BOOK); - typeMap.put("grant", CrossRefPublication.GRANT); - typeMap.put("dataset", CrossRefPublication.DATASET); - typeMap.put("book-series", CrossRefPublication.BOOK_SERIES); + typeMap.put("journal-article", Publication.ARTICLE); + typeMap.put("book", Publication.BOOK); + typeMap.put("book-chapter", Publication.BOOK_CHAPTER); + typeMap.put("monograph", Publication.MONOGRAPH); + typeMap.put("journal-issue", Publication.JOURNAL_ISSUE); + typeMap.put("reference-entry", Publication.REFERNCE_ENTRY); + typeMap.put("posted-content", Publication.POSTED_CONTENT); + typeMap.put("component", Publication.COMPONENT); + typeMap.put("edited-book", Publication.EDITED_BOOK); + typeMap.put("proceedings-article", Publication.PROCEEDINGS_ARTICLE); + typeMap.put("dissertation", Publication.DISSERTATION); + typeMap.put("book-section", Publication.BOOK_SECTION); + typeMap.put("report-component", Publication.REPORT_COMPONENT); + typeMap.put("report", Publication.REPORT); + typeMap.put("peer-review", Publication.PEER_REVIEW); + typeMap.put("book-track", Publication.BOOK_TRACK); + typeMap.put("book-part", Publication.BOOK_PART); + typeMap.put("other", Publication.OTHER); + typeMap.put("journal-volume", Publication.JORUNAL_VOLUME); + typeMap.put("book-set", Publication.BOOK_SET); + typeMap.put("journal", Publication.JOURNAL); + typeMap.put("proceedings-series", Publication.PROCEEDINGS_SERIES); + typeMap.put("report-series", Publication.REPORT_SERIES); + typeMap.put("proceedings", Publication.PROCEEDINGS); + typeMap.put("database", Publication.DATABASE); + typeMap.put("standard", Publication.STANDARD); + typeMap.put("reference-book", Publication.REFERENCE_BOOK); + typeMap.put("grant", Publication.GRANT); + typeMap.put("dataset", Publication.DATASET); + typeMap.put("book-series", Publication.BOOK_SERIES); parseCrossRef(); } @@ -99,7 +98,7 @@ private void parseCrossRef() { for (String doi : info.getDois()) { try { Item item = crossrefService.get(doi); - article = new CrossRefPublication(); + article = new Publication(); article.setArticleType(typeMap.get(item.getType())); article.setJournalMeta(parseJournalMeta(item)); article.setArticleMeta(parseArticleMeta(item)); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java index 551ca37..88e947d 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java @@ -13,7 +13,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import edu.asu.diging.citesphere.importer.core.model.BibEntry; -import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; import edu.asu.diging.citesphere.importer.core.zotero.template.ItemJsonGenerator; @@ -42,7 +42,7 @@ public ObjectNode generateJson(JsonNode template, BibEntry entry) { ItemJsonGenerator generator = generators.get(entry.getArticleType()); if (generator != null) { return generator.generate(template, entry); - } else if (entry instanceof CrossRefPublication){ + } else if (entry instanceof Publication){ generator = generators.get("CrossRef"); return generator.generate(template, entry); } From a06f8af55f13bf298781815d7d3b2161a7657b03 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Tue, 2 Jul 2024 15:52:59 -0700 Subject: [PATCH 29/35] [CITE-177] Addressed PR comments --- .../core/service/CrossrefReferenceImportProcessorTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java b/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java index 378ecf0..205e1e3 100644 --- a/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java +++ b/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java @@ -28,7 +28,7 @@ import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer; import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.ItemType; -import edu.asu.diging.citesphere.importer.core.model.impl.CrossRefPublication; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.impl.CrossrefReferenceImportProcessor; import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; @@ -86,7 +86,7 @@ public void testStartImport_successful() throws Exception { BibEntry entry = mock(BibEntry.class); when(bibIterator.hasNext()).thenReturn(true, false); when(bibIterator.next()).thenReturn(entry); - when(entry.getArticleType()).thenReturn(CrossRefPublication.ARTICLE); + when(entry.getArticleType()).thenReturn(Publication.ARTICLE); JsonNode template = mock(ObjectNode.class); when(zoteroConnector.getTemplate(ItemType.JOURNAL_ARTICLE)).thenReturn(template); From 57675561d3c368c133d1434c7be8c803c451d970 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Tue, 2 Jul 2024 16:01:55 -0700 Subject: [PATCH 30/35] [CITE-177] Changed crossref-connect-version in pom.xml --- citesphere-importer/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/citesphere-importer/pom.xml b/citesphere-importer/pom.xml index 4659def..437ca6c 100644 --- a/citesphere-importer/pom.xml +++ b/citesphere-importer/pom.xml @@ -16,7 +16,7 @@ 3.0.11.RELEASE 2.2.6.RELEASE 0.5 - 0.2 + 0.3-SNAPSHOT $2a$04$oQo44vqcDIFRoYKiAXoNheurzkwX9dcNmowvTX/hsWuBMwijqn44i From bfcf53f0eeee690b25ba208f47f6dbacfdadd0ae Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Wed, 10 Jul 2024 16:42:43 -0700 Subject: [PATCH 31/35] [CITE-177] Addressed PR comments --- .../core/service/AbstractImportProcessor.java | 113 ++++++++++++++++- .../CrossrefReferenceImportProcessor.java | 114 +----------------- .../service/impl/FileImportProcessor.java | 112 ++--------------- 3 files changed, 124 insertions(+), 215 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java index 48a37f6..4ca3f6d 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java @@ -1,20 +1,31 @@ package edu.asu.diging.citesphere.importer.core.service; import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.Map; + +import javax.annotation.PostConstruct; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException; import edu.asu.diging.citesphere.importer.core.exception.MessageCreationException; import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer; +import edu.asu.diging.citesphere.importer.core.model.BibEntry; +import edu.asu.diging.citesphere.importer.core.model.ItemType; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; +import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; import edu.asu.diging.citesphere.messages.KafkaTopics; import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; import edu.asu.diging.citesphere.messages.model.KafkaImportReturnMessage; @@ -34,6 +45,54 @@ public abstract class AbstractImportProcessor implements IImportProcessor { @Autowired private IZoteroConnector zoteroConnector; + + @Autowired + private IJsonGenerationService generationService; + + /** + * Map that maps internal bibliographical formats (contants of {@link Publication} + * class) to Zotero item types ({@link ItemType} enum). + */ + private Map itemTypeMapping = new HashMap<>(); + + @PostConstruct + public void init() { + // this needs to be changed and improved, but for now it works + itemTypeMapping.put(Publication.ARTICLE, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK, ItemType.BOOK); + itemTypeMapping.put(Publication.BOOK_CHAPTER, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.LETTER, ItemType.LETTER); + itemTypeMapping.put(Publication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE); + itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.DOCUMENT, ItemType.DOCUMENT); + itemTypeMapping.put(Publication.MONOGRAPH, ItemType.BOOK); + itemTypeMapping.put(Publication.JOURNAL_ISSUE, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.REFERNCE_ENTRY, ItemType.DICTIONARY_ENTRY); + itemTypeMapping.put(Publication.POSTED_CONTENT, ItemType.WEBPAGE); + itemTypeMapping.put(Publication.COMPONENT, ItemType.ATTACHMENT); + itemTypeMapping.put(Publication.EDITED_BOOK, ItemType.BOOK); + itemTypeMapping.put(Publication.PROCEEDINGS_ARTICLE, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.DISSERTATION, ItemType.THESIS); + itemTypeMapping.put(Publication.BOOK_SECTION, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.REPORT_COMPONENT, ItemType.REPORT); + itemTypeMapping.put(Publication.REPORT, ItemType.REPORT); + itemTypeMapping.put(Publication.PEER_REVIEW, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK_TRACK, ItemType.BOOK); + itemTypeMapping.put(Publication.BOOK_PART, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.OTHER, ItemType.DOCUMENT); + itemTypeMapping.put(Publication.JORUNAL_VOLUME, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK_SET, ItemType.BOOK); + itemTypeMapping.put(Publication.JOURNAL, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.PROCEEDINGS_SERIES, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.REPORT_SERIES, ItemType.REPORT); + itemTypeMapping.put(Publication.PROCEEDINGS, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.DATABASE, ItemType.DATABASE); + itemTypeMapping.put(Publication.STANDARD, ItemType.STATUTE); + itemTypeMapping.put(Publication.REFERENCE_BOOK, ItemType.DICTIONARY_ENTRY); + itemTypeMapping.put(Publication.GRANT, ItemType.DOCUMENT); + itemTypeMapping.put(Publication.DATASET, ItemType.DATABASE); + itemTypeMapping.put(Publication.BOOK_SERIES, ItemType.BOOK); + } @Override public void process(KafkaJobMessage message) { @@ -44,9 +103,7 @@ public void process(KafkaJobMessage message) { } startImport(message, info); } - - protected abstract void startImport(KafkaJobMessage message, JobInfo info); - + private JobInfo getJobInfo(KafkaJobMessage message) { JobInfo info = null; try { @@ -73,7 +130,7 @@ protected ICitesphereConnector getCitesphereConnector() { return connector; } - protected ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { + private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { ObjectMapper mapper = new ObjectMapper(); try { String msg = mapper.writeValueAsString(entries); @@ -93,4 +150,52 @@ protected ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { } return null; } + + private void startImport(KafkaJobMessage message, JobInfo info) { + ObjectMapper mapper = new ObjectMapper(); + ArrayNode root = mapper.createArrayNode(); + int entryCounter = 0; + + sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); + + BibEntryIterator bibIterator = getbibIterator(message, info); + if (bibIterator == null) { + sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); + return; + } + + while (bibIterator.hasNext()) { + BibEntry entry = bibIterator.next(); + if (entry.getArticleType() == null) { + // something is wrong with this entry, let's ignore it + continue; + } + ItemType type = itemTypeMapping.get(entry.getArticleType()); + JsonNode template = zoteroConnector.getTemplate(type); + ObjectNode bibNode = generationService.generateJson(template, entry); + + root.add(bibNode); + entryCounter++; + + // we can submit max 50 entries to Zotoro + if (entryCounter >= 50) { + submitEntries(root, info); + entryCounter = 0; + root = mapper.createArrayNode(); + } + + } + + bibIterator.close(); + + ItemCreationResponse response = null; + if (entryCounter > 0) { + response = submitEntries(root, info); + } + + response = response != null ? response : new ItemCreationResponse(); + sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); + } + + protected abstract BibEntryIterator getbibIterator(KafkaJobMessage message, JobInfo info); } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 43debd7..3719e2e 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -1,125 +1,17 @@ package edu.asu.diging.citesphere.importer.core.service.impl; -import java.util.HashMap; -import java.util.Map; - -import javax.annotation.PostConstruct; - -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ArrayNode; -import com.fasterxml.jackson.databind.node.ObjectNode; - -import edu.asu.diging.citesphere.importer.core.model.BibEntry; -import edu.asu.diging.citesphere.importer.core.model.ItemType; -import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.service.parse.iterators.CrossRefIterator; -import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; -import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; -import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; -import edu.asu.diging.citesphere.messages.model.ResponseCode; -import edu.asu.diging.citesphere.messages.model.Status; @Service public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { - private Map itemTypeMapping = new HashMap<>(); - - @Autowired - private IZoteroConnector zoteroConnector; - - @Autowired - private IJsonGenerationService generationService; - - @PostConstruct - public void init() { - itemTypeMapping.put(Publication.ARTICLE, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(Publication.BOOK, ItemType.BOOK); - itemTypeMapping.put(Publication.BOOK_CHAPTER, ItemType.BOOK_SECTION); - itemTypeMapping.put(Publication.MONOGRAPH, ItemType.BOOK); - itemTypeMapping.put(Publication.JOURNAL_ISSUE, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(Publication.REFERNCE_ENTRY, ItemType.DICTIONARY_ENTRY); - itemTypeMapping.put(Publication.POSTED_CONTENT, ItemType.WEBPAGE); - itemTypeMapping.put(Publication.COMPONENT, ItemType.ATTACHMENT); - itemTypeMapping.put(Publication.EDITED_BOOK, ItemType.BOOK); - itemTypeMapping.put(Publication.PROCEEDINGS_ARTICLE, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(Publication.DISSERTATION, ItemType.THESIS); - itemTypeMapping.put(Publication.BOOK_SECTION, ItemType.BOOK_SECTION); - itemTypeMapping.put(Publication.REPORT_COMPONENT, ItemType.REPORT); - itemTypeMapping.put(Publication.REPORT, ItemType.REPORT); - itemTypeMapping.put(Publication.PEER_REVIEW, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(Publication.BOOK_TRACK, ItemType.BOOK); - itemTypeMapping.put(Publication.BOOK_PART, ItemType.BOOK_SECTION); - itemTypeMapping.put(Publication.OTHER, ItemType.DOCUMENT); - itemTypeMapping.put(Publication.JORUNAL_VOLUME, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(Publication.BOOK_SET, ItemType.BOOK); - itemTypeMapping.put(Publication.JOURNAL, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(Publication.PROCEEDINGS_SERIES, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(Publication.REPORT_SERIES, ItemType.REPORT); - itemTypeMapping.put(Publication.PROCEEDINGS, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(Publication.DATABASE, ItemType.DATABASE); - itemTypeMapping.put(Publication.STANDARD, ItemType.STATUTE); - itemTypeMapping.put(Publication.REFERENCE_BOOK, ItemType.DICTIONARY_ENTRY); - itemTypeMapping.put(Publication.GRANT, ItemType.DOCUMENT); - itemTypeMapping.put(Publication.DATASET, ItemType.DATABASE); - itemTypeMapping.put(Publication.BOOK_SERIES, ItemType.BOOK); + @Override + protected BibEntryIterator getbibIterator(KafkaJobMessage message, JobInfo info) { + return new CrossRefIterator(info); } - - public void startImport(KafkaJobMessage message, JobInfo info) { - logger.info("Starting import for " + info.getDois()); - - ObjectMapper mapper = new ObjectMapper(); - ArrayNode root = mapper.createArrayNode(); - int entryCounter = 0; - - sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); - BibEntryIterator bibIterator = null; - bibIterator = new CrossRefIterator(info); - - if (bibIterator == null) { - sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); - return; - } - - while (bibIterator.hasNext()) { - BibEntry entry = bibIterator.next(); - if (entry.getArticleType() == null) { - // something is wrong with this entry, let's ignore it - continue; - } - ItemType type = itemTypeMapping.get(entry.getArticleType()); - JsonNode template = zoteroConnector.getTemplate(type); - ObjectNode bibNode = generationService.generateJson(template, entry); - - root.add(bibNode); - entryCounter++; - - // we can submit max 50 entries to Zotoro - if (entryCounter >= 50) { - submitEntries(root, info); - entryCounter = 0; - root = mapper.createArrayNode(); - } - - } - - bibIterator.close(); - - ItemCreationResponse response = null; - if (entryCounter > 0) { - response = submitEntries(root, info); - } - - response = response != null ? response : new ItemCreationResponse(); - sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); - } - - - } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java index ff8b72f..8b5024d 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java @@ -1,29 +1,13 @@ package edu.asu.diging.citesphere.importer.core.service.impl; -import java.util.HashMap; -import java.util.Map; - -import javax.annotation.PostConstruct; - import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ArrayNode; -import com.fasterxml.jackson.databind.node.ObjectNode; - import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException; import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; -import edu.asu.diging.citesphere.importer.core.model.BibEntry; -import edu.asu.diging.citesphere.importer.core.model.ItemType; -import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; -import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; -import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; -import edu.asu.diging.citesphere.messages.model.ItemCreationResponse; import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; import edu.asu.diging.citesphere.messages.model.ResponseCode; import edu.asu.diging.citesphere.messages.model.Status; @@ -42,47 +26,25 @@ public class FileImportProcessor extends AbstractImportProcessor { @Autowired private IHandlerRegistry handlerRegistry; - @Autowired - private IZoteroConnector zoteroConnector; - - @Autowired - private IJsonGenerationService generationService; - - - /** - * Map that maps internal bibliographical formats (contants of {@link Publication} - * class) to Zotero item types ({@link ItemType} enum). - */ - private Map itemTypeMapping = new HashMap<>(); - - @PostConstruct - public void init() { - // this needs to be changed and improved, but for now it works - itemTypeMapping.put(Publication.ARTICLE, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(Publication.BOOK, ItemType.BOOK); - itemTypeMapping.put(Publication.BOOK_CHAPTER, ItemType.BOOK_SECTION); - itemTypeMapping.put(Publication.LETTER, ItemType.LETTER); - itemTypeMapping.put(Publication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE); - itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(Publication.DOCUMENT, ItemType.DOCUMENT); + private String downloadFile(KafkaJobMessage message) { + String file = null; + try { + file = getCitesphereConnector().getUploadeFile(message.getId()); + } catch (CitesphereCommunicationException e) { + logger.error("Could not get Zotero info.", e); + return null; + } + return file; } - /* - * (non-Javadoc) - * - * @see - * edu.asu.diging.citesphere.importer.core.service.impl.IImportProcessor#process - * (edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaJobMessage) - */ @Override - public void startImport(KafkaJobMessage message, JobInfo info) { + protected BibEntryIterator getbibIterator(KafkaJobMessage message, JobInfo info) { String filePath = downloadFile(message); if (filePath == null) { sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X20); - return; + return null; } - sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); BibEntryIterator bibIterator = null; try { bibIterator = handlerRegistry.handleFile(info, filePath); @@ -90,56 +52,6 @@ public void startImport(KafkaJobMessage message, JobInfo info) { logger.error("Could not create iterator.", e1); } - if (bibIterator == null) { - sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); - return; - } - - ObjectMapper mapper = new ObjectMapper(); - ArrayNode root = mapper.createArrayNode(); - int entryCounter = 0; - while (bibIterator.hasNext()) { - BibEntry entry = bibIterator.next(); - if (entry.getArticleType() == null) { - // something is wrong with this entry, let's ignore it - continue; - } - ItemType type = itemTypeMapping.get(entry.getArticleType()); - JsonNode template = zoteroConnector.getTemplate(type); - ObjectNode bibNode = generationService.generateJson(template, entry); - - root.add(bibNode); - entryCounter++; - - // we can submit max 50 entries to Zotoro - if (entryCounter >= 50) { - submitEntries(root, info); - entryCounter = 0; - root = mapper.createArrayNode(); - } - - } - - bibIterator.close(); - - ItemCreationResponse response = null; - if (entryCounter > 0) { - response = submitEntries(root, info); - } - - response = response != null ? response : new ItemCreationResponse(); - sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); + return bibIterator; } - - private String downloadFile(KafkaJobMessage message) { - String file = null; - try { - file = getCitesphereConnector().getUploadeFile(message.getId()); - } catch (CitesphereCommunicationException e) { - logger.error("Could not get Zotero info.", e); - return null; - } - return file; - } - } From 6670084a87a68df77c8259d04aab7508a48f1bb2 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Thu, 11 Jul 2024 16:46:16 -0700 Subject: [PATCH 32/35] [CITE-177] Addressed PR comments --- citesphere-importer/pom.xml | 2 +- .../parse/iterators/CrossRefIterator.java | 65 +++++----- .../template/impl/JsonGenerationService.java | 6 +- .../CrossrefReferenceImportProcessorTest.java | 112 ------------------ 4 files changed, 30 insertions(+), 155 deletions(-) delete mode 100644 citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java diff --git a/citesphere-importer/pom.xml b/citesphere-importer/pom.xml index 437ca6c..4659def 100644 --- a/citesphere-importer/pom.xml +++ b/citesphere-importer/pom.xml @@ -16,7 +16,7 @@ 3.0.11.RELEASE 2.2.6.RELEASE 0.5 - 0.3-SNAPSHOT + 0.2 $2a$04$oQo44vqcDIFRoYKiAXoNheurzkwX9dcNmowvTX/hsWuBMwijqn44i diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 1208e09..47187aa 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -38,8 +38,6 @@ public class CrossRefIterator implements BibEntryIterator { private final Logger logger = LoggerFactory.getLogger(getClass()); private JobInfo info; - private List articles; - private BibEntry article; private int currentIndex; private boolean iteratorDone = false; @@ -88,29 +86,6 @@ private void init() { typeMap.put("grant", Publication.GRANT); typeMap.put("dataset", Publication.DATASET); typeMap.put("book-series", Publication.BOOK_SERIES); - parseCrossRef(); - - } - - private void parseCrossRef() { - - articles = new ArrayList<>(); - for (String doi : info.getDois()) { - try { - Item item = crossrefService.get(doi); - article = new Publication(); - article.setArticleType(typeMap.get(item.getType())); - article.setJournalMeta(parseJournalMeta(item)); - article.setArticleMeta(parseArticleMeta(item)); - - articles.add(article); - } catch (RequestFailedException | IOException e) { - logger.error("Couuld not retrieve work for doi: "+ doi, e); - // for now we just log the exceptions - // we might want to devise a way to decide if the - // service might be down and we should stop sending requests. - } - } } private ContainerMeta parseJournalMeta(Item item) { @@ -150,8 +125,6 @@ private ContainerMeta parseJournalMeta(Item item) { chair.setIds(Arrays.asList(contributorID)); contributors.add(chair); } - // added Editors & translators to article meta. - return meta; } @@ -193,11 +166,19 @@ private ArticleMeta parseArticleMeta(Item item) { review.setFullDescription(item.getReview().getCompetingInterestStatement()); } meta.setReviewInfo(review); - meta.setDocumentType(item.getType()); + meta.setDocumentType(item.getType()); + if(item.getReference() != null) { + meta.setReferences(mapReferences(item.getReference())); + } + meta.setReferenceCount(item.getReferenceCount().toString()); + return meta; + } + + private List mapReferences(List itemReferences) { List references = new ArrayList<>(); - if(item.getReference() != null) { - for(edu.asu.diging.crossref.model.Reference itemRef: item.getReference()) { + if(itemReferences != null) { + for(edu.asu.diging.crossref.model.Reference itemRef: itemReferences) { Reference ref = new Reference(); ref.setAuthorString(itemRef.getAuthor()); ref.setContributors(null); @@ -222,13 +203,10 @@ private ArticleMeta parseArticleMeta(Item item) { references.add(ref); } } - meta.setReferences(references); - meta.setReferenceCount(item.getReferenceCount().toString()); - - return meta; + return references; } - public List mapPersonToContributor(List personList) { + private List mapPersonToContributor(List personList) { List contributors = new ArrayList(); for(Person person: personList) { Contributor contributor = new Contributor(); @@ -257,7 +235,20 @@ public BibEntry next() { if (iteratorDone) { return null; } - BibEntry nextEntry = articles.get(currentIndex); + BibEntry nextEntry = new Publication();; + + try { + Item item = crossrefService.get(info.getDois().get(currentIndex)); + nextEntry.setArticleType(typeMap.get(item.getType())); + nextEntry.setJournalMeta(parseJournalMeta(item)); + nextEntry.setArticleMeta(parseArticleMeta(item)); + } catch (RequestFailedException | IOException e) { + logger.error("Could not retrieve work for doi: "+ info.getDois().get(currentIndex), e); + // for now we just log the exceptions + // we might want to devise a way to decide if the + // service might be down and we should stop sending requests. + } + currentIndex++; return nextEntry; } @@ -265,7 +256,7 @@ public BibEntry next() { @Override public boolean hasNext() { - if (currentIndex >= articles.size()) { + if (currentIndex >= info.getDois().size()) { iteratorDone = true; } return !iteratorDone; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java index 88e947d..d1a2675 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java @@ -42,11 +42,7 @@ public ObjectNode generateJson(JsonNode template, BibEntry entry) { ItemJsonGenerator generator = generators.get(entry.getArticleType()); if (generator != null) { return generator.generate(template, entry); - } else if (entry instanceof Publication){ - generator = generators.get("CrossRef"); - return generator.generate(template, entry); - } - + } return null; } } diff --git a/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java b/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java deleted file mode 100644 index 205e1e3..0000000 --- a/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/CrossrefReferenceImportProcessorTest.java +++ /dev/null @@ -1,112 +0,0 @@ -package edu.asu.diging.citesphere.importer.core.service; - -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; -import static org.mockito.Mockito.doNothing; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.util.Arrays; - -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.InjectMocks; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; -import org.mockito.Spy; -import org.mockito.runners.MockitoJUnitRunner; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ArrayNode; -import com.fasterxml.jackson.databind.node.ObjectNode; - -import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; -import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer; -import edu.asu.diging.citesphere.importer.core.model.BibEntry; -import edu.asu.diging.citesphere.importer.core.model.ItemType; -import edu.asu.diging.citesphere.importer.core.model.impl.Publication; -import edu.asu.diging.citesphere.importer.core.service.impl.CrossrefReferenceImportProcessor; -import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; -import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; -import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; -import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; -import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; -import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; -import edu.asu.diging.citesphere.messages.model.ResponseCode; -import edu.asu.diging.citesphere.messages.model.Status; - -@RunWith(MockitoJUnitRunner.class) -public class CrossrefReferenceImportProcessorTest { - - @Spy - @InjectMocks - private CrossrefReferenceImportProcessor processor; - - @Mock - private IZoteroConnector zoteroConnector; - - @Mock - private IJsonGenerationService generationService; - - @Mock - private IHandlerRegistry handlerRegistry; - - @Mock - private BibEntryIterator bibIterator; - - @Mock - private KafkaJobMessage message; - - @Mock - private JobInfo info; - - @Mock - private KafkaRequestProducer requestProducer; - - private ObjectMapper mapper; - - @Before - public void setUp() { - MockitoAnnotations.initMocks(this); - processor.init(); - mapper = new ObjectMapper(); - when(message.getId()).thenReturn("testMessageId"); - doNothing().when(processor).sendMessage(any(), anyString(), any(Status.class), any(ResponseCode.class)); - } - - @Test - public void testStartImport_successful() throws Exception { - when(info.getDois()).thenReturn(Arrays.asList("10.1234/example1", "10.5678/example2")); - when(handlerRegistry.handleFile(info, null)).thenReturn(bibIterator); - - BibEntry entry = mock(BibEntry.class); - when(bibIterator.hasNext()).thenReturn(true, false); - when(bibIterator.next()).thenReturn(entry); - when(entry.getArticleType()).thenReturn(Publication.ARTICLE); - - JsonNode template = mock(ObjectNode.class); - when(zoteroConnector.getTemplate(ItemType.JOURNAL_ARTICLE)).thenReturn(template); - ObjectNode bibNode = mapper.createObjectNode(); - when(generationService.generateJson(template, entry)).thenReturn(bibNode); - - processor.startImport(message, info); - - verify(zoteroConnector).addEntries(eq(info), any(ArrayNode.class)); - verify(processor).sendMessage(any(), eq("testMessageId"), eq(Status.DONE), eq(ResponseCode.S00)); - } - - @Test - public void testStartImport_iteratorCreationException() throws Exception { - when(info.getDois()).thenReturn(Arrays.asList("10.1234/example1", "10.5678/example2")); - when(handlerRegistry.handleFile(info, null)).thenThrow(new IteratorCreationException("error")); - - processor.startImport(message, info); - - verify(processor).sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); - } - -} From 0023dd2efa8584ca4eeff470d70b277138d61bc0 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Fri, 2 Aug 2024 16:32:55 -0700 Subject: [PATCH 33/35] [CITE-177] Addressing PR comments --- .../importer/core/model/impl/Publication.java | 3 - .../core/service/AbstractImportProcessor.java | 18 ++---- .../CrossrefReferenceImportProcessor.java | 2 +- .../service/impl/FileImportProcessor.java | 2 +- .../parse/iterators/CrossRefIterator.java | 62 ++++++------------- .../template/impl/JsonGenerationService.java | 1 - 6 files changed, 28 insertions(+), 60 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java index 81fb1af..31c177f 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java @@ -13,15 +13,12 @@ public class Publication implements BibEntry { public final static String PROCEEDINGS_PAPER = "conferencePaper"; public final static String DOCUMENT = "document"; // publication types in CrossRef - public final static String MONOGRAPH = "monograph"; public final static String JOURNAL_ISSUE = "journal-issue"; public final static String REFERNCE_ENTRY = "reference-entry"; public final static String POSTED_CONTENT = "posted-content"; public final static String COMPONENT = "component"; public final static String EDITED_BOOK = "edited-book"; - public final static String PROCEEDINGS_ARTICLE = "proceedings-article"; public final static String DISSERTATION = "dissertation"; - public final static String BOOK_SECTION = "book-section"; public final static String REPORT_COMPONENT = "report-component"; public final static String REPORT = "report"; public final static String PEER_REVIEW = "peer-review"; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java index 4ca3f6d..293772f 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java @@ -65,33 +65,27 @@ public void init() { itemTypeMapping.put(Publication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE); itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); itemTypeMapping.put(Publication.DOCUMENT, ItemType.DOCUMENT); - itemTypeMapping.put(Publication.MONOGRAPH, ItemType.BOOK); - itemTypeMapping.put(Publication.JOURNAL_ISSUE, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK, ItemType.BOOK); itemTypeMapping.put(Publication.REFERNCE_ENTRY, ItemType.DICTIONARY_ENTRY); itemTypeMapping.put(Publication.POSTED_CONTENT, ItemType.WEBPAGE); itemTypeMapping.put(Publication.COMPONENT, ItemType.ATTACHMENT); itemTypeMapping.put(Publication.EDITED_BOOK, ItemType.BOOK); - itemTypeMapping.put(Publication.PROCEEDINGS_ARTICLE, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); itemTypeMapping.put(Publication.DISSERTATION, ItemType.THESIS); - itemTypeMapping.put(Publication.BOOK_SECTION, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.BOOK_CHAPTER, ItemType.BOOK_SECTION); itemTypeMapping.put(Publication.REPORT_COMPONENT, ItemType.REPORT); itemTypeMapping.put(Publication.REPORT, ItemType.REPORT); itemTypeMapping.put(Publication.PEER_REVIEW, ItemType.JOURNAL_ARTICLE); itemTypeMapping.put(Publication.BOOK_TRACK, ItemType.BOOK); itemTypeMapping.put(Publication.BOOK_PART, ItemType.BOOK_SECTION); itemTypeMapping.put(Publication.OTHER, ItemType.DOCUMENT); - itemTypeMapping.put(Publication.JORUNAL_VOLUME, ItemType.JOURNAL_ARTICLE); itemTypeMapping.put(Publication.BOOK_SET, ItemType.BOOK); - itemTypeMapping.put(Publication.JOURNAL, ItemType.JOURNAL_ARTICLE); - itemTypeMapping.put(Publication.PROCEEDINGS_SERIES, ItemType.CONFERENCE_PAPER); - itemTypeMapping.put(Publication.REPORT_SERIES, ItemType.REPORT); itemTypeMapping.put(Publication.PROCEEDINGS, ItemType.CONFERENCE_PAPER); itemTypeMapping.put(Publication.DATABASE, ItemType.DATABASE); itemTypeMapping.put(Publication.STANDARD, ItemType.STATUTE); - itemTypeMapping.put(Publication.REFERENCE_BOOK, ItemType.DICTIONARY_ENTRY); + itemTypeMapping.put(Publication.REFERENCE_BOOK, ItemType.BOOK); itemTypeMapping.put(Publication.GRANT, ItemType.DOCUMENT); itemTypeMapping.put(Publication.DATASET, ItemType.DATABASE); - itemTypeMapping.put(Publication.BOOK_SERIES, ItemType.BOOK); } @Override @@ -158,7 +152,7 @@ private void startImport(KafkaJobMessage message, JobInfo info) { sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); - BibEntryIterator bibIterator = getbibIterator(message, info); + BibEntryIterator bibIterator = getBibEntryIterator(message, info); if (bibIterator == null) { sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); return; @@ -197,5 +191,5 @@ private void startImport(KafkaJobMessage message, JobInfo info) { sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } - protected abstract BibEntryIterator getbibIterator(KafkaJobMessage message, JobInfo info); + protected abstract BibEntryIterator getBibEntryIterator(KafkaJobMessage message, JobInfo info); } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 3719e2e..7506a42 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -11,7 +11,7 @@ public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { @Override - protected BibEntryIterator getbibIterator(KafkaJobMessage message, JobInfo info) { + protected BibEntryIterator getBibEntryIterator(KafkaJobMessage message, JobInfo info) { return new CrossRefIterator(info); } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java index 8b5024d..88debc7 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java @@ -38,7 +38,7 @@ private String downloadFile(KafkaJobMessage message) { } @Override - protected BibEntryIterator getbibIterator(KafkaJobMessage message, JobInfo info) { + protected BibEntryIterator getBibEntryIterator(KafkaJobMessage message, JobInfo info) { String filePath = downloadFile(message); if (filePath == null) { sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X20); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 47187aa..8f2bec4 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -38,18 +39,16 @@ public class CrossRefIterator implements BibEntryIterator { private final Logger logger = LoggerFactory.getLogger(getClass()); private JobInfo info; - private int currentIndex; - private boolean iteratorDone = false; private Map typeMap; private CrossrefWorksService crossrefService; - + private Iterator doisIterator; public CrossRefIterator(JobInfo info) { this.info = info; - currentIndex = 0; + doisIterator = info.getDois().iterator(); init(); } @@ -59,15 +58,15 @@ private void init() { typeMap.put("journal-article", Publication.ARTICLE); typeMap.put("book", Publication.BOOK); typeMap.put("book-chapter", Publication.BOOK_CHAPTER); - typeMap.put("monograph", Publication.MONOGRAPH); + typeMap.put("monograph", Publication.BOOK); typeMap.put("journal-issue", Publication.JOURNAL_ISSUE); typeMap.put("reference-entry", Publication.REFERNCE_ENTRY); typeMap.put("posted-content", Publication.POSTED_CONTENT); typeMap.put("component", Publication.COMPONENT); typeMap.put("edited-book", Publication.EDITED_BOOK); - typeMap.put("proceedings-article", Publication.PROCEEDINGS_ARTICLE); + typeMap.put("proceedings-article", Publication.PROCEEDINGS_PAPER); typeMap.put("dissertation", Publication.DISSERTATION); - typeMap.put("book-section", Publication.BOOK_SECTION); + typeMap.put("book-section", Publication.BOOK_CHAPTER); typeMap.put("report-component", Publication.REPORT_COMPONENT); typeMap.put("report", Publication.REPORT); typeMap.put("peer-review", Publication.PEER_REVIEW); @@ -103,28 +102,6 @@ private ContainerMeta parseJournalMeta(Item item) { } } meta.setIssns(issnList); - - List contributors = new ArrayList<>(); - if(item.getChair() != null) { - Person itemChair = item.getChair(); - Contributor chair = new Contributor(); - chair.setContributionType(ContributionType.CHAIR); - chair.setGivenName(itemChair.getGiven()); - chair.setSurname(itemChair.getFamily()); - chair.setFullName(itemChair.getName()); - List affiliations = new ArrayList<>(); - for(Institution institute: itemChair.getAffiliation()) { - Affiliation affiliation = new Affiliation(); - affiliation.setName(institute.getName()); - affiliations.add(affiliation); - } - chair.setAffiliations(affiliations); - ContributorId contributorID = new ContributorId(); - contributorID.setId(itemChair.getOrcid()); - contributorID.setIdSystem("ORCID"); - chair.setIds(Arrays.asList(contributorID)); - contributors.add(chair); - } return meta; } @@ -134,17 +111,22 @@ private ArticleMeta parseArticleMeta(Item item) { List contributors = new ArrayList<>(); // List of authors if(item.getAuthor() != null) { - contributors.addAll(mapPersonToContributor(item.getAuthor())); + contributors.addAll(mapPersonToContributor(item.getAuthor(), ContributionType.AUTHOR)); } // List of editors if(item.getEditor() != null) { - contributors.addAll(mapPersonToContributor(item.getEditor())); + contributors.addAll(mapPersonToContributor(item.getEditor(), ContributionType.EDITOR)); } // List of translators if(item.getTranslator() != null) { - contributors.addAll(mapPersonToContributor(item.getTranslator())); + contributors.addAll(mapPersonToContributor(item.getTranslator(), ContributionType.TRANSLATOR)); + } + // List of chair + if(item.getChair() != null) { + contributors.addAll(mapPersonToContributor(Arrays.asList(item.getChair()), ContributionType.CHAIR)); } meta.setContributors(contributors); + meta.setAuthorNotesCorrespondence(null); ArticlePublicationDate publicationDate = new ArticlePublicationDate(); List dateParts = item.getPublished().getIndexedDateParts(); @@ -206,11 +188,11 @@ private List mapReferences(List mapPersonToContributor(List personList) { + private List mapPersonToContributor(List personList, String contributionType) { List contributors = new ArrayList(); for(Person person: personList) { Contributor contributor = new Contributor(); - contributor.setContributionType(ContributionType.EDITOR); + contributor.setContributionType(contributionType); contributor.setGivenName(person.getGiven()); contributor.setSurname(person.getFamily()); contributor.setFullName(person.getName()); @@ -232,34 +214,30 @@ private List mapPersonToContributor(List personList) { @Override public BibEntry next() { - if (iteratorDone) { + if (!doisIterator.hasNext()) { return null; } BibEntry nextEntry = new Publication();; try { - Item item = crossrefService.get(info.getDois().get(currentIndex)); + Item item = crossrefService.get(doisIterator.next()); nextEntry.setArticleType(typeMap.get(item.getType())); nextEntry.setJournalMeta(parseJournalMeta(item)); nextEntry.setArticleMeta(parseArticleMeta(item)); } catch (RequestFailedException | IOException e) { - logger.error("Could not retrieve work for doi: "+ info.getDois().get(currentIndex), e); + logger.error("Could not retrieve work for doi: "+ doisIterator.next(), e); // for now we just log the exceptions // we might want to devise a way to decide if the // service might be down and we should stop sending requests. } - currentIndex++; return nextEntry; } @Override public boolean hasNext() { - if (currentIndex >= info.getDois().size()) { - iteratorDone = true; - } - return !iteratorDone; + return doisIterator.hasNext(); } @Override diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java index d1a2675..27fa4d8 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java @@ -13,7 +13,6 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import edu.asu.diging.citesphere.importer.core.model.BibEntry; -import edu.asu.diging.citesphere.importer.core.model.impl.Publication; import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; import edu.asu.diging.citesphere.importer.core.zotero.template.ItemJsonGenerator; From d954231220e1b011b75050aa8547ccd4c677497f Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Thu, 5 Sep 2024 12:04:52 -0700 Subject: [PATCH 34/35] [CITE-177] Addressing PR comments --- .../core/service/{ => impl}/AbstractImportProcessor.java | 5 +++-- .../core/service/impl/CrossrefReferenceImportProcessor.java | 1 - .../importer/core/service/impl/FileImportProcessor.java | 1 - .../core/service/parse/iterators/CrossRefIterator.java | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) rename citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/{ => impl}/AbstractImportProcessor.java (97%) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/AbstractImportProcessor.java similarity index 97% rename from citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java rename to citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/AbstractImportProcessor.java index 293772f..0a20d02 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/AbstractImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/AbstractImportProcessor.java @@ -1,4 +1,4 @@ -package edu.asu.diging.citesphere.importer.core.service; +package edu.asu.diging.citesphere.importer.core.service.impl; import java.net.URISyntaxException; import java.util.HashMap; @@ -22,7 +22,8 @@ import edu.asu.diging.citesphere.importer.core.model.BibEntry; import edu.asu.diging.citesphere.importer.core.model.ItemType; import edu.asu.diging.citesphere.importer.core.model.impl.Publication; -import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; +import edu.asu.diging.citesphere.importer.core.service.ICitesphereConnector; +import edu.asu.diging.citesphere.importer.core.service.IImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java index 7506a42..6197279 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -2,7 +2,6 @@ import org.springframework.stereotype.Service; -import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.service.parse.iterators.CrossRefIterator; import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java index 88debc7..ce80e41 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java @@ -5,7 +5,6 @@ import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException; import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; -import edu.asu.diging.citesphere.importer.core.service.AbstractImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 8f2bec4..890b31d 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -89,7 +89,7 @@ private void init() { private ContainerMeta parseJournalMeta(Item item) { ContainerMeta meta = new ContainerMeta(); - meta.setContainerTitle(String.join(", ", item.getContainerTitle())); + meta.setContainerTitle(item.getContainerTitle().get(0)); meta.setPublisherName(item.getPublisher()); meta.setPublisherLocation(item.getPublisherLocation()); List issnList = new ArrayList(); From b7ded348004857cee49559a782d605fac79c3014 Mon Sep 17 00:00:00 2001 From: PradnyaC11 Date: Fri, 20 Sep 2024 16:13:49 -0700 Subject: [PATCH 35/35] [CITE-177] Addressed PR comments --- .../importer/core/model/impl/ArticleMeta.java | 7 +++ .../parse/iterators/CrossRefIterator.java | 60 ++++++++++--------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java index 1b403f5..2596a37 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java @@ -7,6 +7,7 @@ public class ArticleMeta { private List articleIds; private String articleTitle; + private String shortTitle; private List categoryGroups = new ArrayList(); private List contributors; private String authorNotesCorrespondence; @@ -56,6 +57,12 @@ public String getArticleTitle() { public void setArticleTitle(String articleTitle) { this.articleTitle = articleTitle; } + public String getShortTitle() { + return shortTitle; + } + public void setShortTitle(String shortTitle) { + this.shortTitle = shortTitle; + } public List getCategories() { return categoryGroups; } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java index 890b31d..eec0eb1 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -107,7 +107,8 @@ private ContainerMeta parseJournalMeta(Item item) { private ArticleMeta parseArticleMeta(Item item) { ArticleMeta meta = new ArticleMeta(); - meta.setArticleTitle(String.join(", ", item.getTitle())); + meta.setArticleTitle(item.getTitle().get(0)); + meta.setShortTitle(String.join(", ", item.getTitle().subList(1, item.getTitle().size()))); List contributors = new ArrayList<>(); // List of authors if(item.getAuthor() != null) { @@ -126,7 +127,7 @@ private ArticleMeta parseArticleMeta(Item item) { contributors.addAll(mapPersonToContributor(Arrays.asList(item.getChair()), ContributionType.CHAIR)); } meta.setContributors(contributors); - + meta.setAuthorNotesCorrespondence(null); ArticlePublicationDate publicationDate = new ArticlePublicationDate(); List dateParts = item.getPublished().getIndexedDateParts(); @@ -156,38 +157,43 @@ private ArticleMeta parseArticleMeta(Item item) { return meta; } - + private List mapReferences(List itemReferences) { List references = new ArrayList<>(); if(itemReferences != null) { for(edu.asu.diging.crossref.model.Reference itemRef: itemReferences) { - Reference ref = new Reference(); - ref.setAuthorString(itemRef.getAuthor()); - ref.setContributors(null); - ref.setTitle(itemRef.getArticleTitle()); - ref.setYear(itemRef.getYear()); - if(itemRef.getDoi()!=null && !itemRef.getDoi().isBlank()) { - ref.setIdentifier(itemRef.getDoi()); - ref.setIdentifierType("DOI"); - ref.setSource(itemRef.getDoiAssertedBy()); - } else if (itemRef.getIssn()!=null && !itemRef.getIssn().isBlank()) { - ref.setIdentifier(itemRef.getIssn()); - ref.setIdentifierType("ISSN"); - } else if (itemRef.getIsbn()!=null && !itemRef.getIsbn().isBlank()) { - ref.setIdentifier(itemRef.getIsbn()); - ref.setIdentifierType("ISBN"); - } - ref.setFirstPage(itemRef.getFirstPage()); - ref.setVolume(itemRef.getVolume()); - ref.setReferenceId(itemRef.getKey()); - ref.setReferenceString(itemRef.getUnstructured()); - ref.setReferenceStringRaw(itemRef.getUnstructured()); - references.add(ref); + references.add(mapSingleReference(itemRef)); } } return references; } + private Reference mapSingleReference(edu.asu.diging.crossref.model.Reference itemRef) { + Reference ref = new Reference(); + ref.setAuthorString(itemRef.getAuthor()); + ref.setContributors(null); + ref.setTitle(itemRef.getArticleTitle()); + ref.setYear(itemRef.getYear()); + if(itemRef.getDoi()!=null && !itemRef.getDoi().isBlank()) { + ref.setIdentifier(itemRef.getDoi()); + ref.setIdentifierType("DOI"); + ref.setSource(itemRef.getDoiAssertedBy()); + } else if (itemRef.getIssn()!=null && !itemRef.getIssn().isBlank()) { + ref.setIdentifier(itemRef.getIssn()); + ref.setIdentifierType("ISSN"); + } else if (itemRef.getIsbn()!=null && !itemRef.getIsbn().isBlank()) { + ref.setIdentifier(itemRef.getIsbn()); + ref.setIdentifierType("ISBN"); + } + ref.setFirstPage(itemRef.getFirstPage()); + ref.setVolume(itemRef.getVolume()); + ref.setReferenceId(itemRef.getKey()); + ref.setReferenceString(itemRef.getUnstructured()); + ref.setReferenceStringRaw(itemRef.getUnstructured()); + + return ref; + } + private List mapPersonToContributor(List personList, String contributionType) { List contributors = new ArrayList(); for(Person person: personList) { @@ -218,7 +224,7 @@ public BibEntry next() { return null; } BibEntry nextEntry = new Publication();; - + try { Item item = crossrefService.get(doisIterator.next()); nextEntry.setArticleType(typeMap.get(item.getType())); @@ -230,7 +236,7 @@ public BibEntry next() { // we might want to devise a way to decide if the // service might be down and we should stop sending requests. } - + return nextEntry; }