Source code for spinneret.shadow

"""A module for creating shadow metadata"""

from urllib.parse import urljoin
from lxml import etree
from spinneret.utilities import is_url, load_eml, write_eml


[docs]def convert_userid_to_url(eml: etree.ElementTree) -> etree.ElementTree: """ :param eml: An EML document :returns: An EML document with userId elements converted to URLs, if not already, and if possible. """ # Find all userId elements with a directory attribute userid_elements = eml.xpath("//userId[@directory]") for element in userid_elements: directory = element.attrib["directory"] value = element.text # If the directory isn't a URL, then there it is not possible to # convert the value to a URL so skip this element if not is_url(directory): continue # If the value is not a URL, then convert it to a URL if not is_url(value): new_value = urljoin(directory, value) element.text = new_value return eml
[docs]def create_shadow_eml(eml_path: str, output_path: str) -> None: """ :param eml_path: The path to the EML file to be annotated. :param output_path: The path to write the annotated EML file. :returns: None :notes: This function wraps a set of enrichment functions to create a shadow EML file. """ # Load the EML for processing eml = load_eml(eml_path) # Call each enrichment functions, passing the result of each to the next eml = convert_userid_to_url(eml) # Write eml to file write_eml(eml, output_path)