%%%-------------------------------------------------------------------
%%% @author Lukasz Opiola
%%% @copyright (C) 2025 Onedata (onedata.org)
%%% This software is released under the MIT license
%%% cited in 'LICENSE.txt'.
%%% @doc
%%% Implementation of the onezone_plugin_behaviour and the handle_metadata_plugin_behaviour
%%% for handling DataCite metadata schema ("oai_datacite").
%%% @see handle_metadata_plugin_behaviour for general information about metadata plugins.
%%%
%%% NOTE: as far as OAI-OMH is concerned, two variants of DataCite schema are commonly used:
%%% "oai_datacite" and "datacite". The main difference is that the former includes a wrapper
%%% element with additional information, as specified here:
%%% https://support.datacite.org/docs/oai-pmh-schema-documentation
%%% The official DataCite OAI-PMH endpoint supports both schemas:
%%% * https://oai.datacite.org/oai?verb=GetRecord&metadataPrefix=oai_datacite&identifier=doi:10.5061/dryad.7q0nq
%%% * https://oai.datacite.org/oai?verb=GetRecord&metadataPrefix=datacite&identifier=doi:10.5061/dryad.7q0nq
%%% This plugin uses "oai_datacite" to denote the baseline metadata schema and supports
%%% the two above-mentioned formats of its dissemination via OAI-PMH.
%%%
%%% Metadata revision step:
%%%   * remove preexisting identifier element(s) (to be overwritten in the next step)
%%%   * add an alternateIdentifier element with the value equal to the public share URL
%%%
%%% Public handle insertion step:
%%%   * insert an identifier element (serving as primary) with the value equal to the public handle
%%%
%%% Adaptation for OAI-PMH step:
%%%   * depending on the chosen metadataPrefix, wrap or not in an additional oai_datacite
%%%     element (see the NOTE above)
%%% @end
%%%-------------------------------------------------------------------
-module(oai_datacite_metadata_plugin).
-author("Lukasz Opiola").

-behavior(onezone_plugin_behaviour).
-behaviour(handle_metadata_plugin_behaviour).

-include("http/public_data/oai.hrl").


%% onezone_plugin_behaviour callbacks
-export([type/0]).

%% handle_metadata_plugin_behaviour callbacks
-export([metadata_schema/0, supported_oai_pmh_metadata_prefixes/0, schema_URL/1, main_namespace/1]).
-export([revise_for_publication/3, insert_public_handle/2, adapt_for_oai_pmh/2]).
-export([encode_xml/1]).
-export([validation_examples/0]).


-define(identifier_element(Type, Value), #xmlElement{
    name = identifier,
    attributes = [#xmlAttribute{name = identifierType, value = Type}],
    content = [#xmlText{value = Value}]
}).

-define(alternate_url_identifier_element(Value), #xmlElement{
    name = alternateIdentifier,
    attributes = [#xmlAttribute{name = alternateIdentifierType, value = "URL"}],
    content = [#xmlText{value = Value}]
}).


-define(DATACENTRE_SYMBOL, case oz_worker:get_env(datacite_datacentre_symbol, undefined) of
    undefined -> oz_worker:get_domain();
    Symbol -> Symbol
end).


%%%===================================================================
%%% onezone_plugin_behaviour callbacks
%%%===================================================================


%% @doc {@link onezone_plugin_behaviour} callback type/0
-spec type() -> handle_metadata_plugin.
type() ->
    handle_metadata_plugin.


%%%===================================================================
%%% handle_metadata_plugin_behaviour callbacks
%%%===================================================================


%% @doc {@link handle_metadata_plugin_behaviour} callback metadata_schema/0
-spec metadata_schema() -> od_handle:metadata_schema().
metadata_schema() ->
    ?OAI_DATACITE_METADATA_PREFIX.


%% @doc {@link handle_metadata_plugin_behaviour} callback supported_oai_pmh_metadata_prefixes/0
-spec supported_oai_pmh_metadata_prefixes() -> od_handle:metadata_schema().
supported_oai_pmh_metadata_prefixes() ->
    [?OAI_DATACITE_METADATA_PREFIX, ?DATACITE_METADATA_PREFIX].


%% @doc {@link handle_metadata_plugin_behaviour} callback schema_URL/1
-spec schema_URL(oai_metadata:prefix()) -> binary().
schema_URL(?OAI_DATACITE_METADATA_PREFIX) ->
    <<"http://schema.datacite.org/oai/oai-1.1/oai.xsd">>;
schema_URL(?DATACITE_METADATA_PREFIX) ->
    % depends on the record's schema version so it's not possible to determine;
    % see https://oai.datacite.org/oai?verb=ListMetadataFormats
    <<"http://schema.datacite.org/meta/nonexistant/nonexistant.xsd">>.


%% @doc {@link handle_metadata_plugin_behaviour} callback main_namespace/1
-spec main_namespace(oai_metadata:prefix()) -> {atom(), binary()}.
main_namespace(?OAI_DATACITE_METADATA_PREFIX) ->
    {'xmlns', <<"http://schema.datacite.org/oai/oai-1.1/">>};
main_namespace(?DATACITE_METADATA_PREFIX) ->
    % depends on the record's schema version so it's not possible to determine;
    % see https://oai.datacite.org/oai?verb=ListMetadataFormats
    {'xmlns', <<"http://datacite.org/schema/nonexistant">>}.


%% @doc {@link handle_metadata_plugin_behaviour} callback revise_for_publication/3
-spec revise_for_publication(od_handle:parsed_metadata(), od_share:id(), od_share:record()) ->
    {ok, od_handle:parsed_metadata()} | error.
revise_for_publication(#xmlElement{name = resource} = ResourceXml0, ShareId, _ShareRecord) ->
    % TODO VFS-12975 improve the behaviour based on options if the pid should be reused
    % and possibly change the primary identifier to alternate one if needed
    ResourceXml1 = remove_primary_identifier(ResourceXml0),
    ResourceXml2 = ensure_alternate_url_identifier(
        binary_to_list(od_share:build_public_url(ShareId)),
        ResourceXml1
    ),
    {ok, ResourceXml2};

revise_for_publication(_InvalidXml, _ShareId, _ShareRecord) ->
    error.


%% @doc {@link handle_metadata_plugin_behaviour} callback insert_public_handle/1
-spec insert_public_handle(od_handle:parsed_metadata(), od_handle:public_handle()) ->
    od_handle:parsed_metadata().
insert_public_handle(#xmlElement{name = resource} = ResourceXml, PublicHandle) ->
    % TODO VFS-12975 this has to be reworked for the internal handle service type so as not to
    % duplicate primary and alternate identifiers
    case PublicHandle of
        ?DOI_IDENTIFIER(DoiHandle) ->
            insert_primary_identifier("DOI", binary_to_list(DoiHandle), ResourceXml);
        <<"http://hdl.handle.net/", _/binary>> ->
            % TODO VFS-12975 improve this heuristic and allow provision of identifier type client-side
            insert_primary_identifier("Handle", binary_to_list(PublicHandle), ResourceXml);
        _ ->
            insert_primary_identifier("URL", binary_to_list(PublicHandle), ResourceXml)
    end.


%% @private
-spec insert_primary_identifier(string(), string(), od_handle:parsed_metadata()) -> od_handle:parsed_metadata().
insert_primary_identifier(Type, Value, #xmlElement{name = resource, content = Content} = ResourceXml) ->
    case ?find_matching_element(#xmlElement{name = identifier}, Content) of
        {ok, Found} ->
            ResourceXml#xmlElement{content = lists_utils:replace(
                Found,
                ?identifier_element(Type, Value),
                Content
            )};
        error ->
            ResourceXml#xmlElement{
                content = oai_xml:prepend_element_with_indent(4, ?identifier_element(Type, Value), Content)
            }
    end.


%% @private
-spec remove_primary_identifier(od_handle:parsed_metadata()) -> od_handle:parsed_metadata().
remove_primary_identifier(#xmlElement{name = resource, content = Content} = ResourceXml) ->
    case ?find_matching_element(#xmlElement{name = identifier}, Content) of
        {ok, Found} ->
            NextElement = lists:nth(lists_utils:index_of(Found, Content) + 1, Content),
            ContentWithoutWhitespace = case NextElement of
                #xmlText{value = Text} ->
                    case re:run(Text, "^[\\s]*$", [{capture, none}]) of
                        match -> lists:delete(NextElement, Content);
                        _ -> Content
                    end;
                _ ->
                    Content
            end,
            % just in case, remove all identifier elements if there's more than one
            % (though it's technically not allowed by the schema, it won't hurt to safeguard this)
            remove_primary_identifier(
                ResourceXml#xmlElement{content = lists:delete(Found, ContentWithoutWhitespace)}
            );
        error ->
            ResourceXml
    end.


%% @private
-spec ensure_alternate_url_identifier(string(), od_handle:parsed_metadata()) -> od_handle:parsed_metadata().
ensure_alternate_url_identifier(Value, #xmlElement{name = resource, content = Content} = ResourceXml) ->
    case ?find_matching_element(#xmlElement{name = alternateIdentifiers}, Content) of
        {ok, AlternateIdentifiersXml} ->
            ResourceXml#xmlElement{content = lists_utils:replace(
                AlternateIdentifiersXml,
                ensure_alternate_url_identifier(Value, AlternateIdentifiersXml),
                Content
            )};
        error ->
            ensure_alternate_url_identifier(Value, ResourceXml#xmlElement{
                content = oai_xml:prepend_element_with_indent(4, #xmlElement{
                    name = alternateIdentifiers,
                    content = []
                }, Content)
            })
    end;
ensure_alternate_url_identifier(Value, #xmlElement{name = alternateIdentifiers, content = Content} = AIXml) ->
    case ?find_matching_element(?alternate_url_identifier_element(Value), Content) of
        {ok, _} ->
            AIXml;
        error ->
            AIXml#xmlElement{
                content = oai_xml:prepend_element_with_indent(8, ?alternate_url_identifier_element(Value), Content)
            }
    end.


%% @doc {@link handle_metadata_plugin_behaviour} callback adapt_for_oai_pmh/2
-spec adapt_for_oai_pmh(oai_metadata:prefix(), od_handle:parsed_metadata()) -> od_handle:parsed_metadata().
adapt_for_oai_pmh(?DATACITE_METADATA_PREFIX, #xmlElement{name = resource} = ResourceXml) ->
    ResourceXml;

adapt_for_oai_pmh(?OAI_DATACITE_METADATA_PREFIX, #xmlElement{name = resource} = ResourceXml) ->
    {MainNamespaceName, MainNamespaceValue} = main_namespace(?OAI_DATACITE_METADATA_PREFIX),
    SchemaLocation = str_utils:format("~ts ~ts", [MainNamespaceValue, schema_URL(?OAI_DATACITE_METADATA_PREFIX)]),
    #xmlElement{
        name = oai_datacite,
        attributes = [
            #xmlAttribute{name = MainNamespaceName, value = str_utils:to_list(MainNamespaceValue)},
            #xmlAttribute{name = 'xsi:schemaLocation', value = SchemaLocation}
        ],
        content = oai_xml:indent_content_in_newline(4, [
            #xmlElement{
                name = schemaVersion,
                content = [#xmlText{value = str_utils:to_list(infer_schema_version(ResourceXml))}]
            },
            #xmlElement{
                name = datacentreSymbol,
                content = [#xmlText{value = str_utils:to_list(?DATACENTRE_SYMBOL)}]
            },
            #xmlElement{name = payload, content = [
                #xmlText{value = "\n"},
                ResourceXml
            ]}
        ]) ++ [#xmlText{value = "\n"}]
    }.


%% @private
-spec infer_schema_version(od_handle:parsed_metadata()) -> string().
infer_schema_version(#xmlElement{attributes = Attrs}) ->
    case ?find_matching_element(#xmlAttribute{name = xmlns}, Attrs) of
        {ok, #xmlAttribute{value = "http://datacite.org/schema/kernel-" ++ Version}} ->
            Version;
        {ok, _} ->
            "0";
        error ->
            "0"
    end.


%% @doc {@link handle_metadata_plugin_behaviour} callback encode_xml/1
-spec encode_xml(od_handle:parsed_metadata()) -> od_handle:raw_metadata().
encode_xml(Metadata) ->
    oai_xml:encode(Metadata).


%% @doc {@link handle_metadata_plugin_behaviour} callback validation_examples/0
-spec validation_examples() -> [handle_metadata_plugin_behaviour:validation_example()].
validation_examples() -> [
    % TODO VFS-7454 add better validation of the XML (schema)
    #handle_metadata_plugin_validation_example{
        input_raw_xml = <<
            "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n",
            "<valid-xml>but no resource tag</valid-xml>"
        >>,
        input_qualifies_for_publication = false
    },

    #handle_metadata_plugin_validation_example{
        input_raw_xml = <<
            "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n",
            "<creators>\n"
            "   <creator>\n"
            "       <creatorName>Jane Doe</creatorName>\n"
            "   </creator>\n"
            "</creators>"
        >>,
        input_qualifies_for_publication = false
    },

    #handle_metadata_plugin_validation_example{
        input_raw_xml = <<
            "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n",
            "<resource\n"
            "    xmlns=\"http://datacite.org/schema/kernel-4\"\n"
            "    xsi:schemaLocation=\"http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd\">\n"
            "    <identifier>preexisting-identifier-to-be-deleted</identifier>\n"
            "    <alternateIdentifiers>\n"
            "        <alternateIdentifier alternateIdentifierType=\"oai\">oai:example.com:1234567</alternateIdentifier>\n"
            "    </alternateIdentifiers>\n"
            "    <creators>\n"
            "        <creator>\n"
            "            <creatorName nameType=\"Personal\">Jane Doe</creatorName>\n"
            "            <familyName>Doe</familyName>\n"
            "            <affiliation>University X</affiliation>\n"
            "        </creator>\n"
            "    </creators>\n"
            "    <titles>\n"
            "        <title>Example dataset</title>\n"
            "    </titles>\n"
            "    <publisher>Onedata</publisher>\n"
            "    <publicationYear>2025</publicationYear>\n"
            "    <dates>\n"
            "        <date dateType=\"Issued\">2025-01-11</date>\n"
            "        <date dateType=\"Updated\">2025-01-12</date>\n"
            "    </dates>\n"
            "    <resourceType resourceTypeGeneral=\"Software\"/>\n"
            "    <identifier>doi:10.5061/superflouous-identifier</identifier>\n"
            "    <descriptions>\n"
            "        <description descriptionType=\"Abstract\">This is an example dataset</description>\n"
            "    </descriptions>\n"
            "</resource>"
        >>,
        input_qualifies_for_publication = true,
        exp_revised_metadata_generator = fun(ShareId, _ShareRecord) ->
            <<
                "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n",
                "<resource\n"
                "    xmlns=\"http://datacite.org/schema/kernel-4\"\n"
                "    xsi:schemaLocation=\"http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd\">\n"
                "    <alternateIdentifiers>\n"
                "        <alternateIdentifier alternateIdentifierType=\"URL\">", (od_share:build_public_url(ShareId))/binary, "</alternateIdentifier>\n"
                "        <alternateIdentifier alternateIdentifierType=\"oai\">oai:example.com:1234567</alternateIdentifier>\n"
                "    </alternateIdentifiers>\n"
                "    <creators>\n"
                "        <creator>\n"
                "            <creatorName nameType=\"Personal\">Jane Doe</creatorName>\n"
                "            <familyName>Doe</familyName>\n"
                "            <affiliation>University X</affiliation>\n"
                "        </creator>\n"
                "    </creators>\n"
                "    <titles>\n"
                "        <title>Example dataset</title>\n"
                "    </titles>\n"
                "    <publisher>Onedata</publisher>\n"
                "    <publicationYear>2025</publicationYear>\n"
                "    <dates>\n"
                "        <date dateType=\"Issued\">2025-01-11</date>\n"
                "        <date dateType=\"Updated\">2025-01-12</date>\n"
                "    </dates>\n"
                "    <resourceType resourceTypeGeneral=\"Software\"/>\n"
                "    <descriptions>\n"
                "        <description descriptionType=\"Abstract\">This is an example dataset</description>\n"
                "    </descriptions>\n"
                "</resource>"
            >>
        end,
        exp_final_metadata_generator = fun(ShareId, _ShareRecord, PublicHandle) ->
            <<
                "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n",
                "<resource\n"
                "    xmlns=\"http://datacite.org/schema/kernel-4\"\n"
                "    xsi:schemaLocation=\"http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd\">\n"
                "    ", (exp_primary_identifier(PublicHandle))/binary, "\n"
                "    <alternateIdentifiers>\n"
                "        <alternateIdentifier alternateIdentifierType=\"URL\">", (od_share:build_public_url(ShareId))/binary, "</alternateIdentifier>\n"
                "        <alternateIdentifier alternateIdentifierType=\"oai\">oai:example.com:1234567</alternateIdentifier>\n"
                "    </alternateIdentifiers>\n"
                "    <creators>\n"
                "        <creator>\n"
                "            <creatorName nameType=\"Personal\">Jane Doe</creatorName>\n"
                "            <familyName>Doe</familyName>\n"
                "            <affiliation>University X</affiliation>\n"
                "        </creator>\n"
                "    </creators>\n"
                "    <titles>\n"
                "        <title>Example dataset</title>\n"
                "    </titles>\n"
                "    <publisher>Onedata</publisher>\n"
                "    <publicationYear>2025</publicationYear>\n"
                "    <dates>\n"
                "        <date dateType=\"Issued\">2025-01-11</date>\n"
                "        <date dateType=\"Updated\">2025-01-12</date>\n"
                "    </dates>\n"
                "    <resourceType resourceTypeGeneral=\"Software\"/>\n"
                "    <descriptions>\n"
                "        <description descriptionType=\"Abstract\">This is an example dataset</description>\n"
                "    </descriptions>\n"
                "</resource>"
            >>
        end,
        exp_oai_pmh_metadata_generator = fun
            F(?OAI_DATACITE_METADATA_PREFIX, ShareId, ShareRecord, PublicHandle) ->
                OaiPmhEntry = F(?DATACITE_METADATA_PREFIX, ShareId, ShareRecord, PublicHandle),
                [_PrologLine, ExpDataCiteMetadata] = binary:split(OaiPmhEntry, <<"\n">>),
                <<
                    "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n",
                    "<oai_datacite\n",
                    "    xmlns=\"http://schema.datacite.org/oai/oai-1.1/\"\n"
                    "    xsi:schemaLocation=\"http://schema.datacite.org/oai/oai-1.1/ http://schema.datacite.org/oai/oai-1.1/oai.xsd\">\n",
                    "    <schemaVersion>4</schemaVersion>\n",
                    "    <datacentreSymbol>", (oz_worker:get_domain())/binary, "</datacentreSymbol>\n",
                    "    <payload>\n",
                    ExpDataCiteMetadata/binary, "\n",
                    "    </payload>\n",
                    "</oai_datacite>"
                >>;
            F(?DATACITE_METADATA_PREFIX, ShareId, _ShareRecord, PublicHandle) ->
                <<
                    "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n",
                    "<resource\n"
                    "    xmlns=\"http://datacite.org/schema/kernel-4\"\n"
                    "    xsi:schemaLocation=\"http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd\">\n",
                    "    ", (exp_primary_identifier(PublicHandle))/binary, "\n"
                    "    <alternateIdentifiers>\n"
                    "        <alternateIdentifier alternateIdentifierType=\"URL\">", (od_share:build_public_url(ShareId))/binary, "</alternateIdentifier>\n"
                    "        <alternateIdentifier alternateIdentifierType=\"oai\">oai:example.com:1234567</alternateIdentifier>\n"
                    "    </alternateIdentifiers>\n"
                    "    <creators>\n"
                    "        <creator>\n"
                    "            <creatorName nameType=\"Personal\">Jane Doe</creatorName>\n"
                    "            <familyName>Doe</familyName>\n"
                    "            <affiliation>University X</affiliation>\n"
                    "        </creator>\n"
                    "    </creators>\n"
                    "    <titles>\n"
                    "        <title>Example dataset</title>\n"
                    "    </titles>\n"
                    "    <publisher>Onedata</publisher>\n"
                    "    <publicationYear>2025</publicationYear>\n"
                    "    <dates>\n"
                    "        <date dateType=\"Issued\">2025-01-11</date>\n"
                    "        <date dateType=\"Updated\">2025-01-12</date>\n"
                    "    </dates>\n"
                    "    <resourceType resourceTypeGeneral=\"Software\"/>\n"
                    "    <descriptions>\n"
                    "        <description descriptionType=\"Abstract\">This is an example dataset</description>\n"
                    "    </descriptions>\n"
                    "</resource>"
                >>
        end
    }
].


%% @private
-spec exp_primary_identifier(od_handle:public_handle()) -> binary().
exp_primary_identifier(<<"doi:", DoiHandle/binary>>) ->
    <<"<identifier identifierType=\"DOI\">", DoiHandle/binary, "</identifier>">>;
exp_primary_identifier(<<"http://hdl.handle.net/", _/binary>> = PublicHandle) ->
    <<"<identifier identifierType=\"Handle\">", PublicHandle/binary, "</identifier>">>;
exp_primary_identifier(PublicHandle) ->
    <<"<identifier identifierType=\"URL\">", PublicHandle/binary, "</identifier>">>.
