Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Story/cite-177 There needs to be an import from Crossref #22

Open
wants to merge 35 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
0106646
[CITE-177] started on processor to import from crossref
jdamerow Jul 7, 2022
656fab7
[CITE-177] Trying to add functionality for starting the import
PratikGiri Mar 7, 2023
a82d996
[CITE-177] Trying to add functionality for picking the job.
PratikGiri Mar 8, 2023
d3aa18f
[CITE-177] Adding function for starting the import
PratikGiri Mar 8, 2023
1710f06
[CITE-177] Trying to add crossref import functionality
PratikGiri Mar 9, 2023
10d8372
[CITE-177] Trying to create Iterator for Crossref
PratikGiri Mar 10, 2023
27ee091
[CITE-177] Iterator changes.
PratikGiri Mar 14, 2023
f0c4b61
[CITE-177] Updating the Iterator and ImportProcessor
PratikGiri Mar 16, 2023
58f195c
[CITE-177] Adding iterator
PratikGiri Mar 27, 2023
a2d4979
Adding iterator.
PratikGiri Mar 31, 2023
9616319
[CITE-177] Correcting the iterator
PratikGiri Mar 31, 2023
e3856ea
[CITE-177] Updating the Crossref Iterator
PratikGiri Apr 4, 2023
8c4545b
[CITE-177] CrossrefIterator and identifier
PratikGiri Apr 10, 2023
c2bfbd0
[CITE-177] Resolved error CrossrefReferenceImportProcessor class
PradnyaC11 Apr 17, 2024
74bcd5f
[CITE-177] Added CrossRef types to CrossRefPublication, and updated C…
PradnyaC11 Apr 19, 2024
7ecdfd0
[CITE-177] Updated CrossRefIterator
PradnyaC11 Apr 23, 2024
41fed60
[CITE-177] Updated CrossRefIterator
PradnyaC11 Apr 25, 2024
7f097df
[CITE-177] updated generateJson method of JsonGenerationService
PradnyaC11 Apr 26, 2024
46c2904
[CITE-177] Updated itemTypeMapping in crossref import processer
PradnyaC11 Apr 30, 2024
e36ff3d
[CITE-177] Added more mapping in CrossRefInportProcessor
PradnyaC11 May 1, 2024
1c94f97
[CITE-177] Udpated CrossRefIterator for typeMap and iterator logic
PradnyaC11 May 3, 2024
bed4968
[CITE-177] Added test cases for CrossrefReferenceImportProcessor
PradnyaC11 Jun 13, 2024
e48be13
[CITE-177] Refactoring code to fix issues.
PradnyaC11 Jun 13, 2024
c086f44
[CITE-177] Renamed file to remove unwanted file commit
PradnyaC11 Jun 13, 2024
287ffed
[CITE-177] Updated CrossRefIterator.java
PradnyaC11 Jun 13, 2024
5fd853e
[CITE-177] Addressed PR comments
PradnyaC11 Jun 25, 2024
c4c4f13
[CITE-177] Addressing PR comments
PradnyaC11 Jun 27, 2024
eb6a3a1
[CITE-177] Addressed PR comments
PradnyaC11 Jul 1, 2024
a06f8af
[CITE-177] Addressed PR comments
PradnyaC11 Jul 2, 2024
5767556
[CITE-177] Changed crossref-connect-version in pom.xml
PradnyaC11 Jul 2, 2024
bfcf53f
[CITE-177] Addressed PR comments
PradnyaC11 Jul 10, 2024
6670084
[CITE-177] Addressed PR comments
PradnyaC11 Jul 11, 2024
0023dd2
[CITE-177] Addressing PR comments
PradnyaC11 Aug 2, 2024
d954231
[CITE-177] Addressing PR comments
PradnyaC11 Sep 5, 2024
b7ded34
[CITE-177] Addressed PR comments
PradnyaC11 Sep 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions citesphere-importer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<spring-data.version>Lovelace-RELEASE</spring-data.version>
<thymeleaf.version>3.0.11.RELEASE</thymeleaf.version>
<spring.kafka.version>2.2.6.RELEASE</spring.kafka.version>
<citesphere.messages.version>0.2</citesphere.messages.version>
<citesphere.messages.version>0.5</citesphere.messages.version>
<crossref-connect-version>0.2</crossref-connect-version>

<admin.password>$2a$04$oQo44vqcDIFRoYKiAXoNheurzkwX9dcNmowvTX/hsWuBMwijqn44i</admin.password>

Expand Down Expand Up @@ -69,6 +70,11 @@
<artifactId>citesphere-messages</artifactId>
<version>${citesphere.messages.version}</version>
</dependency>
<dependency>
<groupId>edu.asu.diging</groupId>
<artifactId>crossref-connect</artifactId>
<version>${crossref-connect-version}</version>
</dependency>

<!-- Spring -->
<dependency>
Expand Down Expand Up @@ -328,7 +334,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-war-plugin</artifactId>
<version>2.6</version>
<version>3.3.1</version>
<configuration>
<webResources>
<resource>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.kafka.annotation.KafkaListener;

import com.fasterxml.jackson.databind.ObjectMapper;
Expand All @@ -18,7 +19,12 @@ public class ReferenceImportListener {
private final Logger logger = LoggerFactory.getLogger(getClass());

@Autowired
private IImportProcessor processor;
@Qualifier("fileImportProcessor")
private IImportProcessor fileProcessor;

@Autowired
@Qualifier("crossrefReferenceImportProcessor")
private IImportProcessor crossrefProcessor;

@KafkaListener(topics = KafkaTopics.REFERENCES_IMPORT_TOPIC)
public void receiveMessage(String message) {
Expand All @@ -32,6 +38,20 @@ public void receiveMessage(String message) {
return;
}

processor.process(msg);
fileProcessor.process(msg);
}

@KafkaListener(topics = KafkaTopics.REFERENCES_IMPORT_CROSSREF_TOPIC)
public void receiveCrossrefImportMessage(String message) {
ObjectMapper mapper = new ObjectMapper();
KafkaJobMessage msg = null;
try {
msg = mapper.readValue(message, KafkaJobMessage.class);
} catch (IOException e) {
logger.error("Could not unmarshall message.", e);
return;
}

crossrefProcessor.process(msg);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ public enum ItemType {
THESIS("thesis"),
TV_BROADCAST("tvBroadcast"),
VIDEO_RECORDIG("videoRecording"),
WEBPAGE("webpage");
WEBPAGE("webpage"),
DATABASE("database");


final private String zoteroKey;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ public interface ContributionType {

public final static String AUTHOR = "author";
public final static String EDITOR = "editor";
public final static String TRANSLATOR = "translator";
public final static String CHAIR = "chair";
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,31 @@ public class Publication implements BibEntry {
public final static String NEWS_ITEM = "newspaperArticle";
public final static String PROCEEDINGS_PAPER = "conferencePaper";
public final static String DOCUMENT = "document";
// publication types in CrossRef
public final static String JOURNAL_ISSUE = "journal-issue";
public final static String REFERNCE_ENTRY = "reference-entry";
public final static String POSTED_CONTENT = "posted-content";
public final static String COMPONENT = "component";
public final static String EDITED_BOOK = "edited-book";
public final static String DISSERTATION = "dissertation";
public final static String REPORT_COMPONENT = "report-component";
public final static String REPORT = "report";
public final static String PEER_REVIEW = "peer-review";
public final static String BOOK_TRACK = "book-track";
public final static String BOOK_PART = "book-part";
public final static String OTHER = "other";
public final static String JORUNAL_VOLUME = "journal-volume";
public final static String BOOK_SET = "book-set";
public final static String JOURNAL = "journal";
public final static String PROCEEDINGS_SERIES = "proceedings-series";
public final static String REPORT_SERIES = "report-series";
public final static String PROCEEDINGS = "proceedings";
public final static String DATABASE = "database";
public final static String STANDARD = "standard";
public final static String REFERENCE_BOOK = "reference-book";
public final static String GRANT = "grant";
public final static String DATASET = "dataset";
public final static String BOOK_SERIES = "book-series";

private String articleType;
private ContainerMeta containerMeta;
Expand Down
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the abstract import processor should still stay in the impl package, it's part implementation after all.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package edu.asu.diging.citesphere.importer.core.service.impl;
package edu.asu.diging.citesphere.importer.core.service;

import java.net.URISyntaxException;
import java.util.HashMap;
Expand All @@ -9,7 +9,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
Expand All @@ -18,16 +17,13 @@
import com.fasterxml.jackson.databind.node.ObjectNode;

import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException;
import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException;
import edu.asu.diging.citesphere.importer.core.exception.MessageCreationException;
import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer;
import edu.asu.diging.citesphere.importer.core.model.BibEntry;
import edu.asu.diging.citesphere.importer.core.model.ItemType;
import edu.asu.diging.citesphere.importer.core.model.impl.Publication;
import edu.asu.diging.citesphere.importer.core.service.ICitesphereConnector;
import edu.asu.diging.citesphere.importer.core.service.IImportProcessor;
import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo;
import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator;
import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry;
import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector;
import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService;
import edu.asu.diging.citesphere.messages.KafkaTopics;
Expand All @@ -37,40 +33,28 @@
import edu.asu.diging.citesphere.messages.model.ResponseCode;
import edu.asu.diging.citesphere.messages.model.Status;

/**
* This class coordinates the import process. It connects with Citesphere and
* downloads the files to be imported. It then starts the transformation process from
* import format to internal bibliographical format and then turns the internal
* bibliographical format to Json that can be submitted to Zotero.
* @author jdamerow
*
*/
@Service
public class ImportProcessor implements IImportProcessor {

private final Logger logger = LoggerFactory.getLogger(getClass());
public abstract class AbstractImportProcessor implements IImportProcessor {

protected final Logger logger = LoggerFactory.getLogger(getClass());

@Autowired
private ICitesphereConnector connector;
private KafkaRequestProducer requestProducer;

@Autowired
private IHandlerRegistry handlerRegistry;

private ICitesphereConnector connector;
@Autowired
private IZoteroConnector zoteroConnector;

@Autowired
private IJsonGenerationService generationService;

@Autowired
private KafkaRequestProducer requestProducer;

/**
* Map that maps internal bibliographical formats (contants of {@link Publication}
* class) to Zotero item types ({@link ItemType} enum).
*/
private Map<String, ItemType> itemTypeMapping = new HashMap<>();

@PostConstruct
public void init() {
// this needs to be changed and improved, but for now it works
Expand All @@ -81,45 +65,99 @@ public void init() {
itemTypeMapping.put(Publication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE);
itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER);
itemTypeMapping.put(Publication.DOCUMENT, ItemType.DOCUMENT);
itemTypeMapping.put(Publication.BOOK, ItemType.BOOK);
itemTypeMapping.put(Publication.REFERNCE_ENTRY, ItemType.DICTIONARY_ENTRY);
itemTypeMapping.put(Publication.POSTED_CONTENT, ItemType.WEBPAGE);
itemTypeMapping.put(Publication.COMPONENT, ItemType.ATTACHMENT);
itemTypeMapping.put(Publication.EDITED_BOOK, ItemType.BOOK);
itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER);
itemTypeMapping.put(Publication.DISSERTATION, ItemType.THESIS);
itemTypeMapping.put(Publication.BOOK_CHAPTER, ItemType.BOOK_SECTION);
itemTypeMapping.put(Publication.REPORT_COMPONENT, ItemType.REPORT);
itemTypeMapping.put(Publication.REPORT, ItemType.REPORT);
itemTypeMapping.put(Publication.PEER_REVIEW, ItemType.JOURNAL_ARTICLE);
itemTypeMapping.put(Publication.BOOK_TRACK, ItemType.BOOK);
itemTypeMapping.put(Publication.BOOK_PART, ItemType.BOOK_SECTION);
itemTypeMapping.put(Publication.OTHER, ItemType.DOCUMENT);
itemTypeMapping.put(Publication.BOOK_SET, ItemType.BOOK);
itemTypeMapping.put(Publication.PROCEEDINGS, ItemType.CONFERENCE_PAPER);
itemTypeMapping.put(Publication.DATABASE, ItemType.DATABASE);
itemTypeMapping.put(Publication.STANDARD, ItemType.STATUTE);
itemTypeMapping.put(Publication.REFERENCE_BOOK, ItemType.BOOK);
itemTypeMapping.put(Publication.GRANT, ItemType.DOCUMENT);
itemTypeMapping.put(Publication.DATASET, ItemType.DATABASE);
}

/*
* (non-Javadoc)
*
* @see
* edu.asu.diging.citesphere.importer.core.service.impl.IImportProcessor#process
* (edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaJobMessage)
*/

@Override
public void process(KafkaJobMessage message) {
JobInfo info = getJobInfo(message);
if (info == null) {
sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X10);
return;
}

String filePath = downloadFile(message);
if (filePath == null) {
sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X20);
return;
startImport(message, info);
}

private JobInfo getJobInfo(KafkaJobMessage message) {
JobInfo info = null;
try {
info = connector.getJobInfo(message.getId());
} catch (CitesphereCommunicationException e) {
logger.error("Could not get Zotero info.", e);
return null;
}

sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00);
BibEntryIterator bibIterator = null;
return info;
}

protected void sendMessage(ItemCreationResponse message, String jobId, Status status, ResponseCode code) {
KafkaImportReturnMessage returnMessage = new KafkaImportReturnMessage(message, jobId);
returnMessage.setStatus(status);
returnMessage.setCode(code);
try {
bibIterator = handlerRegistry.handleFile(info, filePath);
} catch (IteratorCreationException e1) {
logger.error("Could not create iterator.", e1);
requestProducer.sendRequest(returnMessage, KafkaTopics.REFERENCES_IMPORT_DONE_TOPIC);
} catch (MessageCreationException e) {
logger.error("Exception sending message.", e);
}
}

protected ICitesphereConnector getCitesphereConnector() {
return connector;
}

private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) {
ObjectMapper mapper = new ObjectMapper();
try {
String msg = mapper.writeValueAsString(entries);
logger.info("Submitting " + msg);
ItemCreationResponse response = zoteroConnector.addEntries(info, entries);
if (response != null) {
logger.info(response.getSuccessful() + "");
logger.error(response.getFailed() + "");
} else {
logger.error("Item creation failed.");
}
return response;
} catch (URISyntaxException e) {
logger.error("Could not store new entry.", e);
} catch (JsonProcessingException e) {
logger.error("Could not write JSON.");
}
return null;
}

private void startImport(KafkaJobMessage message, JobInfo info) {
ObjectMapper mapper = new ObjectMapper();
ArrayNode root = mapper.createArrayNode();
int entryCounter = 0;

sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00);

BibEntryIterator bibIterator = getBibEntryIterator(message, info);
if (bibIterator == null) {
sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30);
return;
}

ObjectMapper mapper = new ObjectMapper();
ArrayNode root = mapper.createArrayNode();
int entryCounter = 0;

while (bibIterator.hasNext()) {
BibEntry entry = bibIterator.next();
if (entry.getArticleType() == null) {
Expand Down Expand Up @@ -153,60 +191,5 @@ public void process(KafkaJobMessage message) {
sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00);
}

private void sendMessage(ItemCreationResponse message, String jobId, Status status, ResponseCode code) {
KafkaImportReturnMessage returnMessage = new KafkaImportReturnMessage(message, jobId);
returnMessage.setStatus(status);
returnMessage.setCode(code);
try {
requestProducer.sendRequest(returnMessage, KafkaTopics.REFERENCES_IMPORT_DONE_TOPIC);
} catch (MessageCreationException e) {
// FIXME handle this case
logger.error("Exception sending message.", e);
}
}

private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) {
ObjectMapper mapper = new ObjectMapper();
try {
String msg = mapper.writeValueAsString(entries);
logger.info("Submitting " + msg);
ItemCreationResponse response = zoteroConnector.addEntries(info, entries);
if (response != null) {
logger.info(response.getSuccessful() + "");
logger.error(response.getFailed() + "");
} else {
logger.error("Item creation failed.");
}
return response;
} catch (URISyntaxException e) {
logger.error("Could not store new entry.", e);
} catch (JsonProcessingException e) {
logger.error("Could not write JSON.");
}
return null;
}

private JobInfo getJobInfo(KafkaJobMessage message) {
JobInfo info = null;
try {
info = connector.getJobInfo(message.getId());
} catch (CitesphereCommunicationException e) {
// FIXME this needs to be handled better
logger.error("Could not get Zotero info.", e);
return null;
}
return info;
}

private String downloadFile(KafkaJobMessage message) {
String file = null;
try {
file = connector.getUploadeFile(message.getId());
} catch (CitesphereCommunicationException e) {
// FIXME this needs to be handled better
logger.error("Could not get Zotero info.", e);
return null;
}
return file;
}
protected abstract BibEntryIterator getBibEntryIterator(KafkaJobMessage message, JobInfo info);
}
Loading