forked from DSpace/DSpace
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathAIPTechMDCrosswalk.java
More file actions
519 lines (490 loc) · 25.6 KB
/
AIPTechMDCrosswalk.java
File metadata and controls
519 lines (490 loc) · 25.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.crosswalk;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.Site;
import org.dspace.content.clarin.ClarinUserRegistration;
import org.dspace.content.dto.MetadataValueDTO;
import org.dspace.content.factory.ClarinServiceFactory;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.packager.DSpaceAIPIngester;
import org.dspace.content.packager.METSManifest;
import org.dspace.content.packager.PackageUtils;
import org.dspace.content.service.BitstreamFormatService;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.ItemService;
import org.dspace.content.service.SiteService;
import org.dspace.content.service.clarin.ClarinUserRegistrationService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.dspace.eperson.factory.EPersonServiceFactory;
import org.dspace.eperson.service.EPersonService;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.jdom2.Element;
import org.jdom2.Namespace;
/**
* Crosswalk of technical metadata for DSpace AIP. This is
* only intended for use by the METS AIP packager. It borrows the
* DIM XML format and DC field names, although it abuses the meaning
* of Dublin Core terms and qualifiers because this format is
* ONLY FOR DSPACE INTERNAL USE AND INGESTION. It is needed to record
* a complete and accurate image of all of the attributes an object
* has in the RDBMS.
*
* <p>
* It encodes the following common properties of all archival objects:
* <dl>
* <dt>identifier.uri</dt> <dd>persistent identifier of object in URI form (e.g. Handle URN)</dd>
* <dt>relation.isPartOf</dt> <dd>persistent identifier of object's parent in URI form (e.g. Handle URN)</dd>
* <dt>relation.isReferencedBy</dt> <dd>if relevant, persistent identifier of
* other objects that map this one as a child. May repeat.</dd>
* </dl>
*
* <p>
* There may also be other fields, depending on the type of object,
* which encode attributes that are not part of the descriptive metadata and
* are not adequately covered by other technical MD formats (i.e. PREMIS).
*
* <p>
* Configuration entries:
* <dl>
* <dt>aip.ingest.createEperson</dt> <dd>boolean, create EPerson for Submitter
* automatically, on ingest, if it doesn't exist.</dd>
* </dl>
*
* @author Larry Stone
*/
public class AIPTechMDCrosswalk implements IngestionCrosswalk, DisseminationCrosswalk {
/**
* log4j category
*/
private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(AIPTechMDCrosswalk.class);
protected final BitstreamFormatService bitstreamFormatService = ContentServiceFactory.getInstance()
.getBitstreamFormatService();
protected final SiteService siteService = ContentServiceFactory.getInstance().getSiteService();
protected final CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();
protected final EPersonService ePersonService = EPersonServiceFactory.getInstance().getEPersonService();
protected final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
protected final HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
protected final ConfigurationService configurationService
= DSpaceServicesFactory.getInstance().getConfigurationService();
private final ClarinUserRegistrationService clarinUserRegistrationService =
ClarinServiceFactory.getInstance().getClarinUserRegistration();
/**
* Get XML namespaces of the elements this crosswalk may return.
* Returns the XML namespaces (as JDOM objects) of the root element.
*
* @return array of namespaces, which may be empty.
*/
@Override
public Namespace[] getNamespaces() {
Namespace result[] = new Namespace[1];
result[0] = XSLTCrosswalk.DIM_NS;
return result;
}
/**
* Get the XML Schema location(s) of the target metadata format.
* Returns the string value of the <code>xsi:schemaLocation</code>
* attribute that should be applied to the generated XML.
* <p>
* It may return the empty string if no schema is known, but crosswalk
* authors are strongly encouraged to implement this call so their output
* XML can be validated correctly.
*
* @return SchemaLocation string, including URI namespace, followed by
* whitespace and URI of XML schema document, or empty string if unknown.
*/
@Override
public String getSchemaLocation() {
return "";
}
/**
* Predicate: Can this disseminator crosswalk the given object.
* Needed by OAI-PMH server implementation.
*
* @param dso dspace object, e.g. an <code>Item</code>.
* @return true when disseminator is capable of producing metadata.
*/
@Override
public boolean canDisseminate(DSpaceObject dso) {
//can only Disseminate SITE, COMMUNITY, COLLECTION, ITEM, BITSTREAM
return (dso.getType() == Constants.SITE
|| dso.getType() == Constants.COMMUNITY
|| dso.getType() == Constants.COLLECTION
|| dso.getType() == Constants.ITEM
|| dso.getType() == Constants.BITSTREAM);
}
/**
* Predicate: Does this disseminator prefer to return a list of Elements,
* rather than a single root Element?
* <p>
* Some metadata formats have an XML schema without a root element,
* for example, the Dublin Core and Qualified Dublin Core formats.
* This would be <code>true</code> for a crosswalk into QDC, since
* it would "prefer" to return a list, since any root element it has
* to produce would have to be part of a nonstandard schema. In
* most cases your implementation will want to return
* <code>false</code>
*
* @return true when disseminator prefers you call disseminateList().
*/
@Override
public boolean preferList() {
return false;
}
/**
* Execute crosswalk, returning List of XML elements.
* Returns a <code>List</code> of JDOM <code>Element</code> objects representing
* the XML produced by the crosswalk. This is typically called when
* a list of fields is desired, e.g. for embedding in a METS document
* <code>xmlData</code> field.
* <p>
* When there are no results, an
* empty list is returned, but never <code>null</code>.
*
* @param context context
* @param dso the DSpace Object whose metadata to export.
* @return results of crosswalk as list of XML elements.
* @throws CrosswalkInternalException (<code>CrosswalkException</code>) failure of the crosswalk itself.
* @throws CrosswalkObjectNotSupported (<code>CrosswalkException</code>) Cannot crosswalk this kind of DSpace
* object.
* @throws IOException I/O failure in services this calls
* @throws SQLException Database failure in services this calls
* @throws AuthorizeException current user not authorized for this operation.
*/
@Override
public List<Element> disseminateList(Context context, DSpaceObject dso)
throws CrosswalkException, IOException, SQLException,
AuthorizeException {
Element dim = disseminateElement(context, dso);
return dim.getChildren();
}
/**
* Execute crosswalk, returning one XML root element as
* a JDOM <code>Element</code> object.
* This is typically the root element of a document.
* <p>
*
* @param context context
* @param dso the DSpace Object whose metadata to export.
* @return root Element of the target metadata, never <code>null</code>
* @throws CrosswalkInternalException (<code>CrosswalkException</code>) failure of the crosswalk itself.
* @throws CrosswalkObjectNotSupported (<code>CrosswalkException</code>) Cannot crosswalk this kind of DSpace
* object.
* @throws IOException I/O failure in services this calls
* @throws SQLException Database failure in services this calls
* @throws AuthorizeException current user not authorized for this operation.
*/
@Override
public Element disseminateElement(Context context, DSpaceObject dso)
throws CrosswalkException, IOException, SQLException,
AuthorizeException {
List<MetadataValueDTO> dc = new ArrayList<>();
if (dso.getType() == Constants.ITEM) {
Item item = (Item) dso;
EPerson is = item.getSubmitter();
if (is != null) {
dc.add(makeDC("creator", null, is.getEmail()));
}
dc.add(makeDC("identifier", "uri", "hdl:" + item.getHandle()));
Collection owningColl = item.getOwningCollection();
String owner = owningColl.getHandle();
if (owner != null) {
dc.add(makeDC("relation", "isPartOf", "hdl:" + owner));
}
List<Collection> inColl = item.getCollections();
for (int i = 0; i < inColl.size(); ++i) {
if (!inColl.get(i).getID().equals(owningColl.getID())) {
String h = inColl.get(i).getHandle();
if (h != null) {
dc.add(makeDC("relation", "isReferencedBy", "hdl:" + h));
}
}
}
if (item.isWithdrawn()) {
dc.add(makeDC("rights", "accessRights", "WITHDRAWN"));
}
} else if (dso.getType() == Constants.BITSTREAM) {
Bitstream bitstream = (Bitstream) dso;
String bsName = bitstream.getName();
if (bsName != null) {
dc.add(makeDC("title", null, bsName));
}
String bsSource = bitstream.getSource();
if (bsSource != null) {
dc.add(makeDC("title", "alternative", bsSource));
}
String bsDesc = bitstream.getDescription();
if (bsDesc != null) {
dc.add(makeDC("description", null, bsDesc));
}
String bsUfmt = bitstream.getUserFormatDescription();
if (bsUfmt != null) {
dc.add(makeDC("format", null, bsUfmt));
}
BitstreamFormat bsf = bitstream.getFormat(context);
dc.add(makeDC("format", "medium", bsf.getShortDescription()));
dc.add(makeDC("format", "mimetype", bsf.getMIMEType()));
dc.add(makeDC("format", "supportlevel", bitstreamFormatService.getSupportLevelText(bsf)));
dc.add(makeDC("format", "internal", Boolean.toString(bsf.isInternal())));
} else if (dso.getType() == Constants.COLLECTION) {
Collection collection = (Collection) dso;
dc.add(makeDC("identifier", "uri", "hdl:" + dso.getHandle()));
List<Community> owners = collection.getCommunities();
String ownerHdl = owners.get(0).getHandle();
if (ownerHdl != null) {
dc.add(makeDC("relation", "isPartOf", "hdl:" + ownerHdl));
}
for (int i = 1; i < owners.size(); ++i) {
String h = owners.get(i).getHandle();
if (h != null) {
dc.add(makeDC("relation", "isReferencedBy", "hdl:" + h));
}
}
} else if (dso.getType() == Constants.COMMUNITY) {
Community community = (Community) dso;
dc.add(makeDC("identifier", "uri", "hdl:" + dso.getHandle()));
List<Community> parentCommunities = community.getParentCommunities();
String ownerHdl = null;
if (CollectionUtils.isEmpty(parentCommunities)) {
ownerHdl = siteService.findSite(context).getHandle();
} else {
ownerHdl = parentCommunities.get(0).getHandle();
}
if (ownerHdl != null) {
dc.add(makeDC("relation", "isPartOf", "hdl:" + ownerHdl));
}
} else if (dso.getType() == Constants.SITE) {
Site site = (Site) dso;
//FIXME: adding two URIs for now (site handle and URL), in case site isn't using handles
dc.add(makeDC("identifier", "uri", "hdl:" + site.getHandle()));
dc.add(makeDC("identifier", "uri", site.getURL()));
}
return XSLTDisseminationCrosswalk.createDIM(dso, dc);
}
private static MetadataValueDTO makeDC(String element, String qualifier, String value) {
MetadataValueDTO dcv = new MetadataValueDTO();
dcv.setSchema("dc");
dcv.setLanguage(null);
dcv.setElement(element);
dcv.setQualifier(qualifier);
dcv.setValue(value);
return dcv;
}
/**
* Ingest a whole document. Build Document object around root element,
* and feed that to the transformation, since it may get handled
* differently than a List of metadata elements.
*
* @param createMissingMetadataFields whether to create missing fields
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
@Override
public void ingest(Context context, DSpaceObject dso, Element root, boolean createMissingMetadataFields)
throws CrosswalkException, IOException, SQLException, AuthorizeException {
ingest(context, dso, root.getChildren(), createMissingMetadataFields);
}
/**
* Translate metadata with XSL stylesheet and ingest it.
* Translation produces a list of DIM "field" elements;
* these correspond directly to Item.addMetadata() calls so
* they are simply executed.
*
* @param createMissingMetadataFields whether to create missing fields
* @param dimList List of elements
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
@Override
public void ingest(Context context, DSpaceObject dso, List<Element> dimList, boolean createMissingMetadataFields)
throws CrosswalkException,
IOException, SQLException, AuthorizeException {
int type = dso.getType();
// accumulate values for bitstream format in case we have to make one
String bsfShortName = null;
String bsfMIMEType = null;
int bsfSupport = BitstreamFormat.KNOWN;
boolean bsfInternal = false;
for (Element field : dimList) {
// if we get <dim> in a list, recurse.
if (field.getName().equals("dim") && field.getNamespace().equals(XSLTCrosswalk.DIM_NS)) {
ingest(context, dso, field.getChildren(), createMissingMetadataFields);
} else if (field.getName().equals("field") && field.getNamespace().equals(XSLTCrosswalk.DIM_NS)) {
String schema = field.getAttributeValue("mdschema");
if (schema.equals("dc")) {
String dcField = field.getAttributeValue("element");
String qualifier = field.getAttributeValue("qualifier");
if (qualifier != null) {
dcField += "." + qualifier;
}
String value = field.getText();
if (type == Constants.BITSTREAM) {
Bitstream bitstream = (Bitstream) dso;
if (dcField.equals("title")) {
bitstream.setName(context, value);
} else if (dcField.equals("title.alternative")) {
bitstream.setSource(context, value);
} else if (dcField.equals("description")) {
bitstream.setDescription(context, value);
} else if (dcField.equals("format")) {
bitstream.setUserFormatDescription(context, value);
} else if (dcField.equals("format.medium")) {
bsfShortName = value;
} else if (dcField.equals("format.mimetype")) {
bsfMIMEType = value;
} else if (dcField.equals("format.supportlevel")) {
int sl = bitstreamFormatService.getSupportLevelID(value);
if (sl < 0) {
throw new MetadataValidationException(
"Got unrecognized value for bitstream support level: " + value);
} else {
bsfSupport = sl;
}
} else if (dcField.equals("format.internal")) {
bsfInternal = (Boolean.valueOf(value)).booleanValue();
} else {
log.warn("Got unrecognized DC field for Bitstream: " + dcField);
}
} else if (type == Constants.ITEM) {
Item item = (Item) dso;
// item submitter
if (dcField.equals("creator")) {
EPerson sub = ePersonService.findByEmail(context, value);
// if eperson doesn't exist yet, optionally create it:
if (sub == null) {
//This class works in conjunction with the DSpaceAIPIngester.
// so, we'll use the configuration settings for that ingester
String configName = new DSpaceAIPIngester().getConfigurationName();
//Create the EPerson if specified and person doesn't already exit
if (configurationService.getBooleanProperty(
METSManifest.CONFIG_METS_PREFIX + configName + ".ingest.createSubmitter")) {
sub = ePersonService.create(context);
sub.setEmail(value);
sub.setCanLogIn(false);
ePersonService.update(context, sub);
ClarinUserRegistration clarinUserRegistration = new ClarinUserRegistration();
clarinUserRegistration.setPersonID(sub.getID());
clarinUserRegistration.setOrganization(
ClarinUserRegistration.UNKNOWN_USER_REGISTRATION);
clarinUserRegistration.setConfirmation(false);
try {
clarinUserRegistrationService.create(context, clarinUserRegistration);
} catch (AuthorizeException e) {
log.warn("Failed to create ClarinUserRegistration for submitter {}", value, e);
}
} else {
log.warn(
"Ignoring unknown Submitter=" + value + " in AIP Tech MD, no matching EPerson" +
" and 'mets.dspaceAIP.ingest.createSubmitter' is false in dspace.cfg.");
}
}
if (sub != null) {
item.setSubmitter(sub);
}
} else if (dcField.equals("rights.accessRights")) {
//check if item is withdrawn
if (value.equalsIgnoreCase("WITHDRAWN")) {
itemService.withdraw(context, item);
}
} else if (dcField.equals("identifier.uri") ||
dcField.equals("relation.isPartOf")) {
// Ignore identifier.uri (which specifies object handle)
// and relation.isPartOf (which specifies primary parent object)
// Both of these should already be set on object, as they
// are required/generated when a DSpaceObject is created.
} else if (dcField.equals("relation.isReferencedBy")) {
// This Item is referenced by other Collections. This means
// it has been mapped into one or more additional collections.
// We'll attempt to map it to all referenced collections.
// But if this is a recursive ingest, it is possible some of these
// collections may not have been created yet. No need to worry,
// when each Collection is created it will create any mappings that
// we were unable to create now.
String parentHandle = value;
if (parentHandle != null && !parentHandle.isEmpty()) {
//Remove 'hdl:' prefix, if it exists
if (parentHandle.startsWith("hdl:")) {
parentHandle = parentHandle.substring(4);
}
//Get parent object (if it exists)
DSpaceObject parentDso = handleService.resolveToObject(context, parentHandle);
//For Items, this parent *must* be a Collection
if (parentDso != null && parentDso.getType() == Constants.COLLECTION) {
Collection collection = (Collection) parentDso;
//If this item is not already mapped into this collection, map it!
if (!itemService.isIn(item, collection)) {
collectionService.addItem(context, collection, item);
}
}
}
} else {
log.warn("Got unrecognized DC field for Item: " + dcField);
}
} else if (type == Constants.COMMUNITY || type == Constants.COLLECTION) {
if (dcField.equals("identifier.uri") || dcField.equals("relation.isPartOf")) {
// Ignore identifier.uri (which specifies object handle)
// and relation.isPartOf (which specifies primary parent object)
// Both of these should already be set on object, as they
// are required/generated when a DSpaceObject is created.
} else if (dcField.equals("relation.isReferencedBy")) {
// Ignore relation.isReferencedBy since it only
// lists _extra_ mapped parents, not the primary one.
// DSpace currently doesn't fully support mapping of Collections/Communities
} else {
log.warn("Got unrecognized DC field for Collection/Community: " + dcField);
}
}
} else {
log.warn("Skipping DIM field with mdschema=\"" + schema + "\".");
}
} else {
log.error("Got unexpected element in DIM list: " + field.toString());
throw new MetadataValidationException("Got unexpected element in DIM list: " + field.toString());
}
}
// final step: find or create bitstream format since it
// takes the accumulation of a few values:
if (type == Constants.BITSTREAM && bsfShortName != null) {
BitstreamFormat bsf = bitstreamFormatService.findByShortDescription(context, bsfShortName);
if (bsf == null && bsfMIMEType != null) {
bsf = PackageUtils.findOrCreateBitstreamFormat(context,
bsfShortName,
bsfMIMEType,
bsfShortName,
bsfSupport,
bsfInternal);
}
if (bsf != null) {
((Bitstream) dso).setFormat(context, bsf);
} else {
log.warn("Failed to find or create bitstream format named \"" + bsfShortName + "\"");
}
}
}
}