diff --git a/dspace-oai/src/main/java/org/dspace/utils/LangUtil.java b/dspace-oai/src/main/java/org/dspace/utils/LangUtil.java new file mode 100644 index 000000000000..9fda94dbbd95 --- /dev/null +++ b/dspace-oai/src/main/java/org/dspace/utils/LangUtil.java @@ -0,0 +1,104 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ + +/* Created for LINDAT/CLARIAH-CZ (UFAL) */ +package org.dspace.utils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; + +/** + * Class is copied from the LINDAT/CLARIAH-CZ (This class is taken from UFAL-clarin. + * ...) and modified by + * + * @author Marian Berger (dspace at dataquest.sk) + */ +public class LangUtil { + + private LangUtil() {} + private static org.apache.log4j.Logger log = org.apache.log4j.Logger + .getLogger(LangUtil.class); + + static final HashMap idToLang; + + static { + idToLang = new HashMap<>(); + final InputStream langCodesInputStream = LangUtil.class.getClassLoader() + .getResourceAsStream("iso-639-3.tab"); + if (langCodesInputStream != null) { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(langCodesInputStream, + StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + Lang lang = new Lang(line); + idToLang.put(lang.getId(), lang); + if (lang.getPart2B() != null) { + idToLang.put(lang.getPart2B(), lang); + } + } + } catch (IOException e) { + log.error(e); + } + } + } + + public static String getShortestId(String id) { + Lang lang = idToLang.get(id); + if (lang != null) { + if (lang.getPart1() != null) { + return lang.getPart1(); + } else { + return lang.getId(); + } + } + return id; + } + + public static void main(String[] args) { + System.out.println(getShortestId("eng")); + System.out.println(getShortestId("deu")); + System.out.println(getShortestId("ger")); + System.out.println(getShortestId("wtf")); + } + + private static class Lang { + private final String id; + private final String part2B; + //private final String part2T; + private final String part1; + /*private final String scope; + private final String languageType; + private final String refName; + private final String comment;*/ + + public Lang(String line) { + String[] parts = line.split("\t", 8); + id = parts[0]; + part2B = parts[1].isEmpty() ? null : parts[1]; + //part2T = parts[2]; + part1 = parts[3].isEmpty() ? null : parts[3]; + } + + public String getId() { + return id; + } + + public String getPart1() { + return part1; + } + + public String getPart2B() { + return part2B; + } + } +} \ No newline at end of file diff --git a/dspace-oai/src/main/java/org/dspace/utils/LicenseUtil.java b/dspace-oai/src/main/java/org/dspace/utils/LicenseUtil.java index b9f913edcab7..975dbf39f1c7 100644 --- a/dspace-oai/src/main/java/org/dspace/utils/LicenseUtil.java +++ b/dspace-oai/src/main/java/org/dspace/utils/LicenseUtil.java @@ -26,8 +26,8 @@ * https://github.com/ufal/clarin-dspace/blob * * /si-master-origin/dspace-oai/src/main/java/cz/cuni/mff/ufal/utils/LicenseUtil.java) and modified by * - * @author Marian Berger (marian.berger at dataquest.sk) - * @author Milan Majchrak (milan.majchrak at dataquest.sk) + * @author Marian Berger (dspace at dataquest.sk) + * @author Milan Majchrak (dspace at dataquest.sk) */ public class LicenseUtil { diff --git a/dspace-oai/src/main/java/org/dspace/xoai/services/impl/resources/DSpaceResourceResolver.java b/dspace-oai/src/main/java/org/dspace/xoai/services/impl/resources/DSpaceResourceResolver.java index aec4ea516e14..9d4790b9ff47 100644 --- a/dspace-oai/src/main/java/org/dspace/xoai/services/impl/resources/DSpaceResourceResolver.java +++ b/dspace-oai/src/main/java/org/dspace/xoai/services/impl/resources/DSpaceResourceResolver.java @@ -34,6 +34,7 @@ import org.dspace.xoai.services.impl.resources.functions.GetUploadedMetadataFn; import org.dspace.xoai.services.impl.resources.functions.LogMissingFn; import org.dspace.xoai.services.impl.resources.functions.LogMissingMsgFn; +import org.dspace.xoai.services.impl.resources.functions.ShortestIdFn; import org.dspace.xoai.services.impl.resources.functions.StringReplaceFn; import org.dspace.xoai.services.impl.resources.functions.UriToLicenseFn; import org.dspace.xoai.services.impl.resources.functions.UriToMetaShareFn; @@ -50,7 +51,7 @@ public class DSpaceResourceResolver implements ResourceResolver { */ List extensionFunctionList = List.of( new GetPropertyFn(), new StringReplaceFn(), new UriToMetaShareFn(), - new UriToLicenseFn(), new LogMissingMsgFn(), new UriToRestrictionsFn(), + new UriToLicenseFn(), new LogMissingMsgFn(), new UriToRestrictionsFn(), new ShortestIdFn(), new GetContactFn(), new GetAuthorFn(), new GetFundingFn(), new GetLangForCodeFn(), new GetPropertyFn(), new GetSizeFn(), new GetUploadedMetadataFn(), new LogMissingFn(), new BibtexifyFn(), new FormatFn() @@ -62,6 +63,7 @@ public class DSpaceResourceResolver implements ResourceResolver { saxonTransformerFactory.getProcessor().registerExtensionFunction(en); } } + private final String basePath; public DSpaceResourceResolver() { diff --git a/dspace-oai/src/main/java/org/dspace/xoai/services/impl/resources/functions/ShortestIdFn.java b/dspace-oai/src/main/java/org/dspace/xoai/services/impl/resources/functions/ShortestIdFn.java new file mode 100644 index 000000000000..022755d68662 --- /dev/null +++ b/dspace-oai/src/main/java/org/dspace/xoai/services/impl/resources/functions/ShortestIdFn.java @@ -0,0 +1,26 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.xoai.services.impl.resources.functions; + +import org.dspace.utils.LangUtil; + +/** + * Serves as proxy for call from XSL engine. Calls LicenseUtil + * @author Marian Berger (marian.berger at dataquest.sk) + */ +public class ShortestIdFn extends StringXSLFunction { + @Override + protected String getFnName() { + return "shortestIdFn"; + } + + @Override + protected String getStringResult(String param) { + return LangUtil.getShortestId(param); + } +} diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index 32c94581b1ac..a3002aea7ade 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -281,3 +281,5 @@ download.all.limit.min.file.count = 1 download.all.limit.max.file.size = 1073741824 # minimum total size of files for enabling download alert: download.all.alert.min.file.size = 10485760 +# used in elg crosswalk exposing download locations +elg.download-location.exposed = 0 \ No newline at end of file diff --git a/dspace/config/crosswalks/oai/metadataFormats/elg.xsl b/dspace/config/crosswalks/oai/metadataFormats/elg.xsl new file mode 100644 index 000000000000..b822d6e2fad5 --- /dev/null +++ b/dspace/config/crosswalks/oai/metadataFormats/elg.xsl @@ -0,0 +1,912 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value automatically assigned - leave as is + + + + + + + + + + + http://w3id.org/meta-share/meta-share/ELG-SHARE + + LINDAT/CLARIAH-CZ + + + + http://purl.org/spar/datacite/handle + + + + + + + + + + + + LanguageResource + + + unspecified + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Organization + + + + + + + + + + + + + + + + + + + , + + + + + + Person + + + + + + + + + Organization + + + + + + + + + + + + + + + + + + + + + + http://w3id.org/meta-share/meta-share/OpenAIRE + + + + + + + + + + + + http://w3id.org/meta-share/meta-share/other + + + + + + + + Organization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Corpus + http://w3id.org/meta-share/meta-share/unspecified + + + + + + + + http://w3id.org/meta-share/meta-share/noP + http://w3id.org/meta-share/meta-share/noS + + + + + + + + + + + + + + + + + + + + + + + + lcrMediaType + ldMediaType + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + http://w3id.org/meta-share/meta-share/ + + monolingual + bilingual + multilingual + + + http://w3id.org/meta-share/meta-share/unspecified + + + + + + + + + + + http://w3id.org/meta-share/meta-share/ + + monolingual + bilingual + multilingual + + + + + + + + + + + unspecified + + + unspecified + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + downloadable + sourceCode + + + + + + + + + + + + + + + + + + + + + + + + + + + demo + samples + + + + + + + + + + + + + + + + + + + + http://w3id.org/meta-share/meta-share/unspecified + + + + + + + ToolService + + undefined + + + + + + + + + + + false + + + + + + + http://w3id.org/meta-share/meta-share/unspecified + + + + + + + http://w3id.org/meta-share/meta-share/unspecified + + + + + false + + + + + + + LanguageDescription + + + http://w3id.org/meta-share/meta-share/grammar + http://w3id.org/meta-share/meta-share/model + http://w3id.org/meta-share/meta-share/other + + + + + + + + Grammar + http://w3id.org/meta-share/meta-share/unspecified + + + + + + + + + + + + http://w3id.org/meta-share/meta-share/unspecified + + -1 + + + + + + + + + + + + + Unspecified + + + + + + + + + + + LexicalConceptualResource + + + http://w3id.org/meta-share/meta-share/wordNet + + + + + + http://w3id.org/meta-share/meta-share/unspecified + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + http://w3id.org/meta-share/meta-share/four-gram + + + + + http://w3id.org/meta-share/meta-share/five-gram + + + + + http://w3id.org/meta-share/meta-share/T-HPair + + + + + http://w3id.org/meta-share/meta-share/article + + + + + http://w3id.org/meta-share/meta-share/bigram + + + + + http://w3id.org/meta-share/meta-share/byte + + + + + http://w3id.org/meta-share/meta-share/class + + + + + http://w3id.org/meta-share/meta-share/concept + + + + + http://w3id.org/meta-share/meta-share/diphone1 + + + + + http://w3id.org/meta-share/meta-share/element + + + + + http://w3id.org/meta-share/meta-share/entry + + + + + http://w3id.org/meta-share/meta-share/expression + + + + + http://w3id.org/meta-share/meta-share/file + + + + + http://w3id.org/meta-share/meta-share/frame1 + + + + + http://w3id.org/meta-share/meta-share/gb + + + + + http://w3id.org/meta-share/meta-share/hour1 + + + + + http://w3id.org/meta-share/meta-share/idiomaticExpression + + + + + http://w3id.org/meta-share/meta-share/image2 + + + + + http://w3id.org/meta-share/meta-share/item + + + + + http://w3id.org/meta-share/meta-share/kb + + + + + http://w3id.org/meta-share/meta-share/keyword1 + + + + + http://w3id.org/meta-share/meta-share/lexicalType + + + + + http://w3id.org/meta-share/meta-share/mb + + + + + http://w3id.org/meta-share/meta-share/minute + + + + + http://w3id.org/meta-share/meta-share/multiWordUnit + + + + + http://w3id.org/meta-share/meta-share/neologism + + + + + http://w3id.org/meta-share/meta-share/other + + + + + http://w3id.org/meta-share/meta-share/phoneme2 + + + + + http://w3id.org/meta-share/meta-share/phoneticUnit + + + + + http://w3id.org/meta-share/meta-share/predicate + + + + + http://w3id.org/meta-share/meta-share/rule + + + + + http://w3id.org/meta-share/meta-share/second + + + + + http://w3id.org/meta-share/meta-share/semanticUnit1 + + + + + http://w3id.org/meta-share/meta-share/sentence1 + + + + + http://w3id.org/meta-share/meta-share/shot1 + + + + + http://w3id.org/meta-share/meta-share/syllable2 + + + + + http://w3id.org/meta-share/meta-share/synset + + + + + http://w3id.org/meta-share/meta-share/syntacticUnit1 + + + + + http://w3id.org/meta-share/meta-share/term + + + + + http://w3id.org/meta-share/meta-share/text1 + + + + + http://w3id.org/meta-share/meta-share/token + + + + + http://w3id.org/meta-share/meta-share/trigram + + + + + http://w3id.org/meta-share/meta-share/turn + + + + + http://w3id.org/meta-share/meta-share/unigram + + + + + http://w3id.org/meta-share/meta-share/unit + + + + + http://w3id.org/meta-share/meta-share/utterance1 + + + + + http://w3id.org/meta-share/meta-share/word3 + + + + + + + + + + + + + + + + + + + + + + + + + + Model + + http://w3id.org/meta-share/meta-share/unspecified + + + http://w3id.org/meta-share/meta-share/unspecified + + + + + + + + + + + \ No newline at end of file diff --git a/dspace/config/crosswalks/oai/xoai.xml b/dspace/config/crosswalks/oai/xoai.xml index 96bfaffd8010..723aa02d7311 100644 --- a/dspace/config/crosswalks/oai/xoai.xml +++ b/dspace/config/crosswalks/oai/xoai.xml @@ -23,6 +23,7 @@ + This is the default context of the DSpace OAI-PMH data provider. @@ -230,6 +231,13 @@ http://schema.datacite.org/oai/oai-1.1/ http://schema.datacite.org/oai/oai-1.1/oai.xsd + + elg + metadataFormats/elg.xsl + http://w3id.org/meta-share/meta-share/ + http://w3id.org/meta-share/meta-share/ ../Schema/ELG-SHARE.xsd + + @@ -462,6 +470,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +