Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/js 2477 x rechnung m1 #43

Merged
merged 5 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/continuous-delivery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ jobs:
run: |
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
git checkout master
git checkout maintenance-1
git reset --hard HEAD

sed -ri "s,<version>.*</version>,<version>${{ steps.semanticversion.outputs.new_version }}</version>," README.md
Expand All @@ -115,7 +115,7 @@ jobs:
- name: Push changes
uses: ad-m/github-push-action@master
with:
branch: master
branch: maintenance-1
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: Create release
Expand Down
11 changes: 8 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
<dependency>
<groupId>org.codehaus.mojo</groupId>
<artifactId>extra-enforcer-rules</artifactId>
<version>1.7.0</version>
<version>1.8.0</version>
</dependency>
</dependencies>
<executions>
Expand Down Expand Up @@ -181,7 +181,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>3.0.1</version>
<version>3.1.1</version>
<configuration>
<username>git</username>
<password/>
Expand Down Expand Up @@ -224,13 +224,18 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>3.1.1</version>
<version>3.1.2</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.6.3</version>
</plugin>
<plugin>
<groupId>org.sonarsource.scanner.maven</groupId>
<artifactId>sonar-maven-plugin</artifactId>
<version>3.9.1.2184</version>
</plugin>
</plugins>
</pluginManagement>
</build>
Expand Down
30 changes: 29 additions & 1 deletion src/main/java/org/jadice/filetype/matchers/PDFMatcher.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package org.jadice.filetype.matchers;

import static org.jadice.filetype.matchers.XMLMatcher.X_RECHNUNG_KEY;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.Map.Entry;

Expand Down Expand Up @@ -32,6 +35,7 @@
import org.apache.pdfbox.text.PDFTextStripper;
import org.jadice.filetype.Context;
import org.jadice.filetype.database.MimeTypeAction;
import org.jadice.filetype.io.MemoryInputStream;
import org.jadice.filetype.io.SeekableInputStream;
import org.jadice.filetype.pdfutil.PDFBoxSignatureUtil;
import org.slf4j.Logger;
Expand Down Expand Up @@ -85,7 +89,7 @@ public boolean matches(final Context context) {
try (PDDocument document = PDDocument.load(sis)) {
context.setProperty(MimeTypeAction.KEY, PDF_MIME_TYPE);

Map<String, Object> pdfDetails = new HashMap<String, Object>();
Map<String, Object> pdfDetails = new HashMap<>();
context.setProperty(DETAILS_KEY, pdfDetails);

pdfDetails.put(NUMBER_OF_PAGES_KEY, Integer.valueOf(document.getNumberOfPages()));
Expand All @@ -101,6 +105,7 @@ public boolean matches(final Context context) {
PDMetadata meta = catalog.getMetadata();
if (null != meta) {
provideXMPMetadata(pdfDetails, meta);
checkIfXRechnung(pdfDetails);
}

PDEncryption encryption = document.getEncryption();
Expand Down Expand Up @@ -273,6 +278,29 @@ private static void addTextInfo(final Map<String, Object> pdfDetails, final PDDo
}
}

/**
* Checks if the PDF is an electronic invoice.
*
* @param pdfDetails the map of PDF details with the metadata XML
*/
private static void checkIfXRechnung(final Map<String, Object> pdfDetails) {
final Object metadata = pdfDetails.get(METADATA_KEY);
if (metadata instanceof String) {
try {
final XMLMatcher xmlMatcher = new XMLMatcher();
final Context xmlContext = new Context(
new MemoryInputStream(((String) metadata).getBytes(StandardCharsets.UTF_8)),
new HashMap<>(), null, Locale.ENGLISH, "");
final boolean isXRechnung = xmlMatcher.matches(xmlContext);
if (isXRechnung) {
pdfDetails.put(X_RECHNUNG_KEY, true);
}
} catch (IOException e) {
LOGGER.error("Failed to parse metadata XML", e);
}
}
}

/**
* Reads the whole stream to determine the length of it.
*
Expand Down
30 changes: 29 additions & 1 deletion src/main/java/org/jadice/filetype/matchers/XMLMatcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ public class XMLMatcher extends Matcher {

public static final String DOCUMENT_XML_VERSION_KEY = "document_xml_version";

public static final String X_RECHNUNG_KEY = "x_rechnung";

public static final int DEFAULT_MAX_ENTITY_EXPANSIONS = 20;
private static final String JAXP_ENTITY_EXPANSION_LIMIT_KEY = "jdk.xml.entityExpansionLimit";
private static volatile int MAX_ENTITY_EXPANSIONS = determineMaxEntityExpansions();
Expand Down Expand Up @@ -103,6 +105,13 @@ public class XMLMatcher extends Matcher {

private static SoftReference<SAXParserFactory> saxFactoryReference = new SoftReference<>(null);

private static final Map<String, String> X_RECHNUNG_ROOT_ELEMENT_XMLNS_PAIRS;
static {
X_RECHNUNG_ROOT_ELEMENT_XMLNS_PAIRS = new HashMap<>();
X_RECHNUNG_ROOT_ELEMENT_XMLNS_PAIRS.put("Invoice", "urn:oasis:names:specification:ubl:schema:xsd:Invoice-2");
X_RECHNUNG_ROOT_ELEMENT_XMLNS_PAIRS.put("CrossIndustryInvoice", "urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100");
}

@Override
public boolean matches(final Context context) throws IOException {
try {
Expand All @@ -121,6 +130,10 @@ public boolean matches(final Context context) throws IOException {
if (handler.getEncoding() != null && !handler.getEncoding().isEmpty()) {
mimeType += ";charset=" + handler.getEncoding();
}
final boolean isXRechnung = matchesXRechnung(handler.getRootElementName(), handler.getNamespaceURI());
if (isXRechnung) {
mimeType += ";x-rechnung=true";
}

context.setProperty(MimeTypeAction.KEY, mimeType);
context.setProperty(ExtensionAction.KEY, "xml");
Expand All @@ -133,7 +146,10 @@ public boolean matches(final Context context) throws IOException {
// xml version: see
// http://sax.sourceforge.net/apidoc/org/xml/sax/package-summary.html#package_description
putIfPresent(DOCUMENT_XML_VERSION_KEY, handler.getXmlVersion(), xmlDetails);

if (isXRechnung) {
xmlDetails.put(X_RECHNUNG_KEY, true);
}

// Parser would have thrown a SAXException is this is no proper XML
return true;
} catch (ParserConfigurationException | SAXException e) {
Expand Down Expand Up @@ -373,4 +389,16 @@ private static int determineMaxEntityExpansions() {
}
return DEFAULT_MAX_ENTITY_EXPANSIONS;
}

/**
* Returns <code>true</code> if the XML root element and namespace match with those of an X-Rechnung
* standard.
*
* @param rootElement an XML root element
* @param namespaceURI an XML namespace
* @return <code>true</code> if input matches X-Rechnung standard
*/
public static boolean matchesXRechnung(String rootElement, String namespaceURI) {
return X_RECHNUNG_ROOT_ELEMENT_XMLNS_PAIRS.getOrDefault(rootElement, "").equals(namespaceURI);
}
}
176 changes: 118 additions & 58 deletions src/test/java/TestVariousTypes.java
Original file line number Diff line number Diff line change
@@ -1,58 +1,118 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

import java.io.File;
import java.io.IOException;
import java.util.Map;

import org.jadice.filetype.Analyzer;
import org.jadice.filetype.AnalyzerException;
import org.jadice.filetype.database.DescriptionAction;
import org.jadice.filetype.database.ExtensionAction;
import org.jadice.filetype.database.MimeTypeAction;
import org.jadice.filetype.io.MemoryInputStream;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class TestVariousTypes {

private static final Logger LOGGER = LoggerFactory.getLogger(TestVariousTypes.class);

private static Analyzer analyzer;

@BeforeAll
public static void createAnalyzer() throws AnalyzerException {
analyzer = Analyzer.getInstance("/magic.xml");
}

@Test
void testVariousTypes() throws IOException {
final File[] files = new File("src/test/resources/various_types").listFiles(
pathname -> pathname.isFile() && pathname.canRead());
assert files != null;

for (final File file : files) {
LOGGER.info("File: " + file);
final Map<String, Object> results = analyzer.analyze(file);
assertNotNull(results, file + " could not be analyzed");
assertNotNull(results.get(MimeTypeAction.KEY), "mimeType missing for " + file);
assertNotNull(results.get(DescriptionAction.KEY), "description missing for" + file);
// extension can be null
// assertNotNull(results.get(ExtensionAction.KEY), file + " could not be analyzed");
for (final Map.Entry<String, Object> e : results.entrySet())
LOGGER.info(" " + e.getKey() + "=" + e.getValue());
LOGGER.info("\n-------------------");
}
}

@Test
void testEmptyStream() throws Exception {
Map<String, Object> results = analyzer.analyze(new MemoryInputStream(new byte[0]));
assertNotNull(results, "empty stream could not be analyzed");
assertEquals("text/plain", results.get(MimeTypeAction.KEY));
assertEquals("txt", results.get(ExtensionAction.KEY));
assertEquals("Binary data, ASCII Text Document", results.get(DescriptionAction.KEY));
}
}
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.params.provider.Arguments.arguments;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.Map;
import java.util.stream.Stream;

import org.jadice.filetype.Analyzer;
import org.jadice.filetype.AnalyzerException;
import org.jadice.filetype.database.DescriptionAction;
import org.jadice.filetype.database.ExtensionAction;
import org.jadice.filetype.database.MimeTypeAction;
import org.jadice.filetype.io.MemoryInputStream;
import org.jadice.filetype.matchers.PDFMatcher;
import org.jadice.filetype.matchers.XMLMatcher;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class TestVariousTypes {

private static final Logger LOGGER = LoggerFactory.getLogger(TestVariousTypes.class);

private static Analyzer analyzer;

@BeforeAll
public static void createAnalyzer() throws AnalyzerException {
analyzer = Analyzer.getInstance("/magic.xml");
}

@Test
void testVariousTypes() throws IOException {
final File[] files = new File("src/test/resources/various_types").listFiles(
pathname -> pathname.isFile() && pathname.canRead());
assert files != null;

for (final File file : files) {
LOGGER.info("File: " + file);
final Map<String, Object> results = analyzer.analyze(file);
assertNotNull(results, file + " could not be analyzed");
assertNotNull(results.get(MimeTypeAction.KEY), "mimeType missing for " + file);
assertNotNull(results.get(DescriptionAction.KEY), "description missing for" + file);
// extension can be null
// assertNotNull(results.get(ExtensionAction.KEY), file + " could not be analyzed");
printResult(results);
}
}

@Test
void testEmptyStream() throws Exception {
Map<String, Object> results = analyzer.analyze(new MemoryInputStream(new byte[0]));
assertNotNull(results, "empty stream could not be analyzed");
assertEquals("text/plain", results.get(MimeTypeAction.KEY));
assertEquals("txt", results.get(ExtensionAction.KEY));
assertEquals("Binary data, ASCII Text Document", results.get(DescriptionAction.KEY));
}

public static Stream<Arguments> dataProvider() {
return Stream.of(
arguments("/various_types/BASIC_Einfach.pdf", "application/pdf"),
arguments("/various_types/EN16931_Einfach.pdf", "application/pdf"),
arguments("/various_types/EN16931_Einfach.cii.xml", "application/xml;charset=UTF-8;x-rechnung=true"),
arguments("/various_types/EN16931_Einfach.ubl.xml", "application/xml;charset=UTF-8;x-rechnung=true"),
arguments("/various_types/ZUGFeRD-invoice_rabatte_3_abschlag_duepayableamount.xml", "application/xml;charset=UTF-8;x-rechnung=true")
);
}

@ParameterizedTest
@MethodSource("dataProvider")
void testXRechnung(String resource, String expectedMimeType) throws Exception {
final URL url = getClass().getResource(resource);
assertNotNull(url);
final File file = new File(url.toURI());
final Map<String, Object> results = analyzer.analyze(file);
assertNotNull(results, file + " could not be analyzed");
assertNotNull(results.get(MimeTypeAction.KEY), "mimeType missing");
assertEquals(expectedMimeType, results.get(MimeTypeAction.KEY), "wrong mimeType");
assertNotNull(results.get(DescriptionAction.KEY), "description missing");
assertNotNull(results.get(ExtensionAction.KEY), "could not be analyzed");
checkForDetails(results);
printResult(results);
}

private void checkForDetails(final Map<String, Object> results) {
final String mimeType = (String)results.get(MimeTypeAction.KEY);
switch (mimeType) {
case "application/pdf": ensureXRechnungIsTrue(results, PDFMatcher.DETAILS_KEY); break;
case "application/xml;charset=UTF-8;x-rechnung=true": ensureXRechnungIsTrue(results, XMLMatcher.DETAILS_KEY); break;
default: fail("unexpected mime type");
}
}

@SuppressWarnings("unchecked")
private void ensureXRechnungIsTrue(final Map<String, Object> results, final String detailsKey) {
final Object details = results.get(detailsKey);
assertNotNull(details, "details are missing");
final Map<String, Object> detailsMap = (Map<String, Object>) details;
final boolean isXRechnung = (Boolean)detailsMap.get(XMLMatcher.X_RECHNUNG_KEY);
assertTrue(isXRechnung, "x_rechnung should be true");
}


private static void printResult(final Map<String, Object> results) {
for (final Map.Entry<String, Object> e : results.entrySet()) {
LOGGER.info(" {}={}", e.getKey(), e.getValue());
}
LOGGER.info("\n-------------------");
}
}
Binary file not shown.
Loading
Loading