Adding a mechanism to avoid to archive multiple times the same resource for a single archive set.
This commit is contained in:
parent
cf97a98416
commit
3d18e9d8a4
|
@ -2,161 +2,222 @@
|
||||||
|
|
||||||
<p:param name="data" type="input"/>
|
<p:param name="data" type="input"/>
|
||||||
|
|
||||||
<!-- Fetch the resource -->
|
<!-- Look if the resource has already been archived for that set -->
|
||||||
<p:processor name="oxf:url-generator">
|
|
||||||
<p:input name="config" transform="oxf:xslt" href="#data">
|
|
||||||
<config xsl:version="2.0">
|
|
||||||
<url>
|
|
||||||
<xsl:value-of select="/action/@url"/>
|
|
||||||
</url>
|
|
||||||
<header>
|
|
||||||
<name>User-Agent</name>
|
|
||||||
<value>
|
|
||||||
<xsl:value-of select="doc('oxf:/config.xml')/config/user-agent"/>
|
|
||||||
</value>
|
|
||||||
</header>
|
|
||||||
<mode>archive</mode>
|
|
||||||
</config>
|
|
||||||
</p:input>
|
|
||||||
<p:output name="data" id="archive" debug="archive"/>
|
|
||||||
</p:processor>
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Store the archive in the database -->
|
|
||||||
<p:processor name="oxf:pipeline">
|
<p:processor name="oxf:pipeline">
|
||||||
<p:input name="config" href="/data-access.xpl"/>
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
<p:input name="data" transform="oxf:xslt" href="#data">
|
<p:input name="data" transform="oxf:xslt" href="#data">
|
||||||
<config xsl:version="2.0">
|
<config xsl:version="2.0">
|
||||||
<relpath>
|
<relpath>
|
||||||
<xsl:value-of select="/action/@directory"/>
|
<xsl:value-of select="/action/@directory"/>
|
||||||
<xsl:value-of select="/action/@filename"/>
|
<xsl:text>index.xml</xsl:text>
|
||||||
</relpath>
|
</relpath>
|
||||||
<operation>write</operation>
|
<operation>read</operation>
|
||||||
<type>document</type>
|
<type>xquery</type>
|
||||||
|
<parameter name="url" type="string">
|
||||||
|
<xsl:value-of select="/action/@url"/>
|
||||||
|
</parameter>
|
||||||
</config>
|
</config>
|
||||||
</p:input>
|
</p:input>
|
||||||
<p:input name="param" href="#archive"/>
|
<p:input name="param">
|
||||||
<p:output name="data" id="response2"/>
|
<xquery><![CDATA[
|
||||||
|
|
||||||
|
boolean(//archive[@url = $(url)])
|
||||||
|
|
||||||
|
]]></xquery>
|
||||||
|
</p:input>
|
||||||
|
<p:output name="data" id="duplicate" debug="duplicate"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
|
|
||||||
<p:processor name="oxf:null-serializer">
|
<p:choose href="#duplicate">
|
||||||
<p:input name="data" href="#response2"/>
|
|
||||||
</p:processor>
|
|
||||||
|
|
||||||
|
<p:when test="/*/* = 'true'">
|
||||||
<!-- Test the type of document to see if it needs to be rewritten -->
|
<!-- Already archived, nothing to do -->
|
||||||
<p:choose href="#archive">
|
<!-- Update the queue -->
|
||||||
|
<p:processor name="oxf:pipeline">
|
||||||
<!-- HTML document : need to update the links... -->
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
<p:when test="/archive/response/document/@content-type=('text/html', 'text/css')">
|
<p:input name="data" transform="oxf:xslt" href="#data">
|
||||||
|
|
||||||
<!-- Call the corresponding pipeline to extract the links and rewrite them -->
|
|
||||||
<p:processor name="oxf:url-generator">
|
|
||||||
<p:input name="config" transform="oxf:xslt" href="#archive">
|
|
||||||
<config xsl:version="2.0">
|
<config xsl:version="2.0">
|
||||||
<url>
|
<relpath>queue.xml</relpath>
|
||||||
<xsl:text>oxf:/actions/mediatypes/</xsl:text>
|
<operation>write</operation>
|
||||||
<xsl:value-of select="substring-after(/archive/response/document/@content-type, '/')"/>
|
<type>xquery</type>
|
||||||
<xsl:text>.xpl</xsl:text>
|
<parameter name="uuid" type="string">
|
||||||
</url>
|
<xsl:value-of select="/action/@uuid"/>
|
||||||
|
</parameter>
|
||||||
</config>
|
</config>
|
||||||
</p:input>
|
</p:input>
|
||||||
<p:output name="data" id="pipeline"/>
|
<p:input name="param">
|
||||||
|
<xquery><![CDATA[
|
||||||
|
|
||||||
|
for $a in /queue/action where $a/@uuid = $(uuid) return
|
||||||
|
update
|
||||||
|
delete $a
|
||||||
|
|
||||||
|
]]></xquery>
|
||||||
|
</p:input>
|
||||||
|
<p:output name="data" id="response4" debug="response"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
|
<p:processor name="oxf:null-serializer">
|
||||||
|
<p:input name="data" href="#response4"/>
|
||||||
|
</p:processor>
|
||||||
|
</p:when>
|
||||||
|
|
||||||
<p:processor name="oxf:pipeline">
|
<p:otherwise>
|
||||||
<p:input name="config" href="#pipeline"/>
|
<!-- Otherwise, archive the resource... -->
|
||||||
<p:input name="archive" href="#archive"/>
|
<!-- Fetch the resource -->
|
||||||
<p:output name="rewritten" id="rewritten"/>
|
<p:processor name="oxf:url-generator">
|
||||||
<p:output name="links" id="links"/>
|
<p:input name="config" transform="oxf:xslt" href="#data">
|
||||||
|
<config xsl:version="2.0">
|
||||||
|
<url>
|
||||||
|
<xsl:value-of select="/action/@url"/>
|
||||||
|
</url>
|
||||||
|
<header>
|
||||||
|
<name>User-Agent</name>
|
||||||
|
<value>
|
||||||
|
<xsl:value-of select="doc('oxf:/config.xml')/config/user-agent"/>
|
||||||
|
</value>
|
||||||
|
</header>
|
||||||
|
<mode>archive</mode>
|
||||||
|
</config>
|
||||||
|
</p:input>
|
||||||
|
<p:output name="data" id="archive" debug="archive"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
|
|
||||||
|
|
||||||
<!-- Store the rewritten document in the database -->
|
<!-- Store the archive in the database -->
|
||||||
<p:processor name="oxf:pipeline">
|
<p:processor name="oxf:pipeline">
|
||||||
<p:input name="config" href="/data-access.xpl"/>
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
<p:input name="data" transform="oxf:xslt" href="#data">
|
<p:input name="data" transform="oxf:xslt" href="#data">
|
||||||
<config xsl:version="2.0">
|
<config xsl:version="2.0">
|
||||||
<relpath>
|
<relpath>
|
||||||
<xsl:value-of select="/action/@directory"/>
|
<xsl:value-of select="/action/@directory"/>
|
||||||
<xsl:text>rewritten-</xsl:text>
|
|
||||||
<xsl:value-of select="/action/@filename"/>
|
<xsl:value-of select="/action/@filename"/>
|
||||||
</relpath>
|
</relpath>
|
||||||
<operation>write</operation>
|
<operation>write</operation>
|
||||||
<type>document</type>
|
<type>document</type>
|
||||||
</config>
|
</config>
|
||||||
</p:input>
|
</p:input>
|
||||||
<p:input name="param" href="#rewritten"/>
|
<p:input name="param" href="#archive"/>
|
||||||
<p:output name="data" id="response3"/>
|
<p:output name="data" id="response2"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
|
|
||||||
<p:processor name="oxf:null-serializer">
|
<p:processor name="oxf:null-serializer">
|
||||||
<p:input name="data" href="#response3"/>
|
<p:input name="data" href="#response2"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Test the type of document to see if it needs to be rewritten -->
|
||||||
|
<p:choose href="#archive">
|
||||||
|
|
||||||
<!-- Update the archive index -->
|
<!-- HTML document : need to update the links... -->
|
||||||
<p:processor name="oxf:pipeline">
|
<p:when test="/archive/response/document/@content-type=('text/html', 'text/css')">
|
||||||
<p:input name="config" href="/data-access.xpl"/>
|
|
||||||
<p:input name="data" transform="oxf:xslt" href="#data">
|
<!-- Call the corresponding pipeline to extract the links and rewrite them -->
|
||||||
<config xsl:version="2.0">
|
<p:processor name="oxf:url-generator">
|
||||||
<relpath>
|
<p:input name="config" transform="oxf:xslt" href="#archive">
|
||||||
<xsl:value-of select="/action/@directory"/>
|
<config xsl:version="2.0">
|
||||||
<xsl:text>index.xml</xsl:text>
|
<url>
|
||||||
</relpath>
|
<xsl:text>oxf:/actions/mediatypes/</xsl:text>
|
||||||
<operation>write</operation>
|
<xsl:value-of select="substring-after(/archive/response/document/@content-type, '/')"/>
|
||||||
<type>xquery</type>
|
<xsl:text>.xpl</xsl:text>
|
||||||
<parameter name="url" type="string">
|
</url>
|
||||||
<xsl:value-of select="/action/@url"/>
|
</config>
|
||||||
</parameter>
|
</p:input>
|
||||||
<parameter name="filename" type="string">
|
<p:output name="data" id="pipeline"/>
|
||||||
<xsl:value-of select="/action/@filename"/>
|
</p:processor>
|
||||||
</parameter>
|
|
||||||
<parameter name="filename-rewritten" type="string">
|
<p:processor name="oxf:pipeline">
|
||||||
<xsl:text>rewritten-</xsl:text>
|
<p:input name="config" href="#pipeline"/>
|
||||||
<xsl:value-of select="/action/@filename"/>
|
<p:input name="archive" href="#archive"/>
|
||||||
</parameter>
|
<p:output name="rewritten" id="rewritten"/>
|
||||||
</config>
|
<p:output name="links" id="links"/>
|
||||||
</p:input>
|
</p:processor>
|
||||||
<p:input name="param">
|
|
||||||
<xquery><![CDATA[
|
|
||||||
|
<!-- Store the rewritten document in the database -->
|
||||||
|
<p:processor name="oxf:pipeline">
|
||||||
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
|
<p:input name="data" transform="oxf:xslt" href="#data">
|
||||||
|
<config xsl:version="2.0">
|
||||||
|
<relpath>
|
||||||
|
<xsl:value-of select="/action/@directory"/>
|
||||||
|
<xsl:text>rewritten-</xsl:text>
|
||||||
|
<xsl:value-of select="/action/@filename"/>
|
||||||
|
</relpath>
|
||||||
|
<operation>write</operation>
|
||||||
|
<type>document</type>
|
||||||
|
</config>
|
||||||
|
</p:input>
|
||||||
|
<p:input name="param" href="#rewritten"/>
|
||||||
|
<p:output name="data" id="response3"/>
|
||||||
|
</p:processor>
|
||||||
|
<p:processor name="oxf:null-serializer">
|
||||||
|
<p:input name="data" href="#response3"/>
|
||||||
|
</p:processor>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Update the archive index -->
|
||||||
|
<p:processor name="oxf:pipeline">
|
||||||
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
|
<p:input name="data" transform="oxf:xslt" href="#data">
|
||||||
|
<config xsl:version="2.0">
|
||||||
|
<relpath>
|
||||||
|
<xsl:value-of select="/action/@directory"/>
|
||||||
|
<xsl:text>index.xml</xsl:text>
|
||||||
|
</relpath>
|
||||||
|
<operation>write</operation>
|
||||||
|
<type>xquery</type>
|
||||||
|
<parameter name="url" type="string">
|
||||||
|
<xsl:value-of select="/action/@url"/>
|
||||||
|
</parameter>
|
||||||
|
<parameter name="filename" type="string">
|
||||||
|
<xsl:value-of select="/action/@filename"/>
|
||||||
|
</parameter>
|
||||||
|
<parameter name="filename-rewritten" type="string">
|
||||||
|
<xsl:text>rewritten-</xsl:text>
|
||||||
|
<xsl:value-of select="/action/@filename"/>
|
||||||
|
</parameter>
|
||||||
|
</config>
|
||||||
|
</p:input>
|
||||||
|
<p:input name="param">
|
||||||
|
<xquery><![CDATA[
|
||||||
for $as in /archive-set
|
for $as in /archive-set
|
||||||
return
|
return
|
||||||
update
|
update
|
||||||
insert <archive url=$(url) href=$(filename) href-rewritten=$(filename-rewritten) dateTime="{current-dateTime()}"/>
|
insert <archive url=$(url) href=$(filename) href-rewritten=$(filename-rewritten) dateTime="{current-dateTime()}"/>
|
||||||
into $as
|
into $as
|
||||||
]]></xquery>
|
]]></xquery>
|
||||||
</p:input>
|
</p:input>
|
||||||
<p:output name="data" id="response1"/>
|
<p:output name="data" id="response1"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
<p:processor name="oxf:null-serializer">
|
<p:processor name="oxf:null-serializer">
|
||||||
<p:input name="data" href="#response1"/>
|
<p:input name="data" href="#response1"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
|
|
||||||
<!-- Update the queue -->
|
<!-- Update the queue -->
|
||||||
<p:processor name="oxf:pipeline">
|
<p:processor name="oxf:pipeline">
|
||||||
<p:input name="config" href="/data-access.xpl"/>
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
<p:input name="data" transform="oxf:xslt" href="aggregate('root', #data, #links)">
|
<p:input name="data" transform="oxf:xslt" href="aggregate('root', #data, #links)">
|
||||||
<config xsl:version="2.0">
|
<config xsl:version="2.0">
|
||||||
<relpath>queue.xml</relpath>
|
<relpath>queue.xml</relpath>
|
||||||
<operation>write</operation>
|
<operation>write</operation>
|
||||||
<type>xquery</type>
|
<type>xquery</type>
|
||||||
<parameter name="directory" type="string">
|
<parameter name="directory" type="string">
|
||||||
<xsl:value-of select="/root/action/@directory"/>
|
<xsl:value-of select="/root/action/@directory"/>
|
||||||
</parameter>
|
</parameter>
|
||||||
<parameter name="uuid" type="string">
|
<parameter name="uuid" type="string">
|
||||||
<xsl:value-of select="/root/action/@uuid"/>
|
<xsl:value-of select="/root/action/@uuid"/>
|
||||||
</parameter>
|
</parameter>
|
||||||
<parameter name="priority" type="string">
|
<parameter name="priority" type="string">
|
||||||
<xsl:value-of select="/root/action/@priority"/>
|
<xsl:value-of select="/root/action/@priority"/>
|
||||||
</parameter>
|
</parameter>
|
||||||
<parameter name="links" type="node-set">
|
<parameter name="links" type="node-set">
|
||||||
<xsl:copy-of select="/root/links"/>
|
<xsl:copy-of select="/root/links"/>
|
||||||
</parameter>
|
</parameter>
|
||||||
</config>
|
</config>
|
||||||
</p:input>
|
</p:input>
|
||||||
<p:input name="param">
|
<p:input name="param">
|
||||||
<xquery><![CDATA[
|
<xquery><![CDATA[
|
||||||
declare namespace util = "http://exist-db.org/xquery/util";
|
declare namespace util = "http://exist-db.org/xquery/util";
|
||||||
declare variable $links := $(links);
|
declare variable $links := $(links);
|
||||||
|
|
||||||
|
@ -174,82 +235,86 @@ for $a in /queue/action where $a/@uuid = $(uuid) return
|
||||||
delete $a
|
delete $a
|
||||||
|
|
||||||
]]></xquery>
|
]]></xquery>
|
||||||
</p:input>
|
</p:input>
|
||||||
<p:output name="data" id="response4" debug="response"/>
|
<p:output name="data" id="response4" debug="response"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
<p:processor name="oxf:null-serializer">
|
<p:processor name="oxf:null-serializer">
|
||||||
<p:input name="data" href="#response4"/>
|
<p:input name="data" href="#response4"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
|
|
||||||
|
|
||||||
</p:when>
|
</p:when>
|
||||||
|
|
||||||
<!-- Otherwise: no need to rewrite -->
|
<!-- Otherwise: no need to rewrite -->
|
||||||
<p:otherwise>
|
<p:otherwise>
|
||||||
<!-- Update the archive index -->
|
<!-- Update the archive index -->
|
||||||
<p:processor name="oxf:pipeline">
|
<p:processor name="oxf:pipeline">
|
||||||
<p:input name="config" href="/data-access.xpl"/>
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
<p:input name="data" transform="oxf:xslt" href="#data">
|
<p:input name="data" transform="oxf:xslt" href="#data">
|
||||||
<config xsl:version="2.0">
|
<config xsl:version="2.0">
|
||||||
<relpath>
|
<relpath>
|
||||||
<xsl:value-of select="/action/@directory"/>
|
<xsl:value-of select="/action/@directory"/>
|
||||||
<xsl:text>index.xml</xsl:text>
|
<xsl:text>index.xml</xsl:text>
|
||||||
</relpath>
|
</relpath>
|
||||||
<operation>write</operation>
|
<operation>write</operation>
|
||||||
<type>xquery</type>
|
<type>xquery</type>
|
||||||
<parameter name="url" type="string">
|
<parameter name="url" type="string">
|
||||||
<xsl:value-of select="/action/@url"/>
|
<xsl:value-of select="/action/@url"/>
|
||||||
</parameter>
|
</parameter>
|
||||||
<parameter name="filename" type="string">
|
<parameter name="filename" type="string">
|
||||||
<xsl:value-of select="/action/@filename"/>
|
<xsl:value-of select="/action/@filename"/>
|
||||||
</parameter>
|
</parameter>
|
||||||
</config>
|
</config>
|
||||||
</p:input>
|
</p:input>
|
||||||
<p:input name="param">
|
<p:input name="param">
|
||||||
<xquery><![CDATA[
|
<xquery><![CDATA[
|
||||||
for $as in /archive-set
|
for $as in /archive-set
|
||||||
return
|
return
|
||||||
update
|
update
|
||||||
insert <archive url=$(url) href=$(filename) dateTime="{current-dateTime()}"/>
|
insert <archive url=$(url) href=$(filename) dateTime="{current-dateTime()}"/>
|
||||||
into $as
|
into $as
|
||||||
]]></xquery>
|
]]></xquery>
|
||||||
</p:input>
|
</p:input>
|
||||||
<p:output name="data" id="response1"/>
|
<p:output name="data" id="response1"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
<p:processor name="oxf:null-serializer">
|
<p:processor name="oxf:null-serializer">
|
||||||
<p:input name="data" href="#response1"/>
|
<p:input name="data" href="#response1"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
|
|
||||||
<!-- Update the queue -->
|
|
||||||
<p:processor name="oxf:pipeline">
|
|
||||||
<p:input name="config" href="/data-access.xpl"/>
|
|
||||||
<p:input name="data" transform="oxf:xslt" href="#data">
|
|
||||||
<config xsl:version="2.0">
|
|
||||||
<relpath>queue.xml</relpath>
|
|
||||||
<operation>write</operation>
|
|
||||||
<type>xquery</type>
|
|
||||||
<parameter name="uuid" type="string">
|
|
||||||
<xsl:value-of select="/action/@uuid"/>
|
|
||||||
</parameter>
|
|
||||||
</config>
|
|
||||||
</p:input>
|
|
||||||
<p:input name="param">
|
|
||||||
<xquery><![CDATA[
|
|
||||||
|
|
||||||
|
<!-- Update the queue -->
|
||||||
|
<p:processor name="oxf:pipeline">
|
||||||
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
|
<p:input name="data" transform="oxf:xslt" href="#data">
|
||||||
|
<config xsl:version="2.0">
|
||||||
|
<relpath>queue.xml</relpath>
|
||||||
|
<operation>write</operation>
|
||||||
|
<type>xquery</type>
|
||||||
|
<parameter name="uuid" type="string">
|
||||||
|
<xsl:value-of select="/action/@uuid"/>
|
||||||
|
</parameter>
|
||||||
|
</config>
|
||||||
|
</p:input>
|
||||||
|
<p:input name="param">
|
||||||
|
<xquery><![CDATA[
|
||||||
|
|
||||||
for $a in /queue/action where $a/@uuid = $(uuid) return
|
for $a in /queue/action where $a/@uuid = $(uuid) return
|
||||||
update
|
update
|
||||||
delete $a
|
delete $a
|
||||||
|
|
||||||
]]></xquery>
|
]]></xquery>
|
||||||
</p:input>
|
</p:input>
|
||||||
<p:output name="data" id="response4" debug="response"/>
|
<p:output name="data" id="response4" debug="response"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
<p:processor name="oxf:null-serializer">
|
<p:processor name="oxf:null-serializer">
|
||||||
<p:input name="data" href="#response4"/>
|
<p:input name="data" href="#response4"/>
|
||||||
</p:processor>
|
</p:processor>
|
||||||
|
|
||||||
|
</p:otherwise>
|
||||||
|
|
||||||
|
</p:choose>
|
||||||
</p:otherwise>
|
</p:otherwise>
|
||||||
|
|
||||||
</p:choose>
|
</p:choose>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</p:config>
|
</p:config>
|
||||||
|
|
Loading…
Reference in New Issue