Starting to write pipeline actions that interact with an Heritrix server
This commit is contained in:
parent
ad35672603
commit
c4c4108025
|
@ -0,0 +1,120 @@
|
||||||
|
|
||||||
|
<!--
|
||||||
|
|
||||||
|
Create a new archive through Heritrix
|
||||||
|
|
||||||
|
-->
|
||||||
|
|
||||||
|
<p:config xmlns:p="http://www.orbeon.com/oxf/pipeline" xmlns:oxf="http://www.orbeon.com/oxf/processors" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xforms="http://www.w3.org/2002/xforms"
|
||||||
|
xmlns:xxforms="http://orbeon.org/oxf/xml/xforms" xmlns:exist="http://exist.sourceforge.net/NS/exist" xmlns:saxon="http://saxon.sf.net/"
|
||||||
|
xmlns:pipeline="java:org.orbeon.oxf.processor.pipeline.PipelineFunctionLibrary">
|
||||||
|
|
||||||
|
<p:param name="data" type="input"/>
|
||||||
|
|
||||||
|
<p:processor name="oxf:unsafe-xslt">
|
||||||
|
<p:input name="data" href="#data"/>
|
||||||
|
<p:input name="config">
|
||||||
|
<config xsl:version="2.0">
|
||||||
|
<relpath>queue.xml</relpath>
|
||||||
|
<operation>write</operation>
|
||||||
|
<type>xquery</type>
|
||||||
|
<parameter name="directory" type="string">
|
||||||
|
<xsl:value-of select="translate(/action/@uuid, '-', '/')"/>
|
||||||
|
<xsl:text>/</xsl:text>
|
||||||
|
</parameter>
|
||||||
|
<parameter name="filename" type="string">
|
||||||
|
<xsl:value-of select="saxon:string-to-hexBinary(/action/@url, 'utf-8')"/>
|
||||||
|
<xsl:text>.xml</xsl:text>
|
||||||
|
</parameter>
|
||||||
|
<parameter name="uuid" type="string">
|
||||||
|
<xsl:value-of select="/action/@uuid"/>
|
||||||
|
</parameter>
|
||||||
|
<parameter name="url" type="string">
|
||||||
|
<xsl:value-of select="/action/@url"/>
|
||||||
|
</parameter>
|
||||||
|
<parameter name="priority-resource" type="string">
|
||||||
|
<xsl:value-of select="/action/@priority + 2"/>
|
||||||
|
</parameter>
|
||||||
|
<parameter name="priority-package" type="string">
|
||||||
|
<xsl:value-of select="/action/@priority + 1"/>
|
||||||
|
</parameter>
|
||||||
|
</config>
|
||||||
|
</p:input>
|
||||||
|
<p:output name="data" id="data-access-data"/>
|
||||||
|
</p:processor>
|
||||||
|
|
||||||
|
<!-- <p:processor name="oxf:pipeline">
|
||||||
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
|
<p:input name="data" href="#data-access-data"/>
|
||||||
|
<p:input name="param">
|
||||||
|
<xquery><![CDATA[
|
||||||
|
declare namespace util = "http://exist-db.org/xquery/util";
|
||||||
|
|
||||||
|
for $q in /queue return
|
||||||
|
update
|
||||||
|
insert (<action priority=$(priority-resource) uuid="{util:uuid()}" type="archive-resource" url=$(url) directory=$(directory) filename=$(filename)/>,
|
||||||
|
<action priority=$(priority-package) uuid="{util:uuid()}" type="package-archive" directory=$(directory)/>)
|
||||||
|
into $q,
|
||||||
|
|
||||||
|
for $a in /queue/action where $a/@uuid = $(uuid) return
|
||||||
|
update
|
||||||
|
delete $a
|
||||||
|
|
||||||
|
]]></xquery>
|
||||||
|
</p:input>
|
||||||
|
<p:output name="data" id="response" debug="response"/>
|
||||||
|
</p:processor>
|
||||||
|
|
||||||
|
<p:processor name="oxf:null-serializer">
|
||||||
|
<p:input name="data" href="#response"/>
|
||||||
|
</p:processor>
|
||||||
|
-->
|
||||||
|
<p:processor name="oxf:pipeline">
|
||||||
|
<p:input name="config" href="/data-access.xpl"/>
|
||||||
|
<p:input name="data" transform="oxf:xslt" href="#data-access-data">
|
||||||
|
<config xsl:version="2.0">
|
||||||
|
<relpath>
|
||||||
|
<xsl:value-of select="/config/parameter[@name='directory']"/>
|
||||||
|
<xsl:text>index.xml</xsl:text>
|
||||||
|
</relpath>
|
||||||
|
<operation>write</operation>
|
||||||
|
<type>document</type>
|
||||||
|
</config>
|
||||||
|
</p:input>
|
||||||
|
<p:input name="param" transform="oxf:xslt" href="#data">
|
||||||
|
<archive-set xsl:version="2.0">
|
||||||
|
<xsl:copy-of select="/action/@url|/action/@uuid"/>
|
||||||
|
</archive-set>
|
||||||
|
</p:input>
|
||||||
|
<p:output name="data" id="response2" debug="response2"/>
|
||||||
|
</p:processor>
|
||||||
|
|
||||||
|
<p:processor name="oxf:null-serializer">
|
||||||
|
<p:input name="data" href="#response2"/>
|
||||||
|
</p:processor>
|
||||||
|
|
||||||
|
<!-- Create a new Heritrix job-->
|
||||||
|
<p:processor name="oxf:xforms-submission">
|
||||||
|
<p:input name="submission" transform="oxf:xslt" href="oxf:/config.xml">
|
||||||
|
<xforms:submission xsl:version="2.0" method="urlencoded-post" replace="none" action="{/config/heritrix/rest-api}" xxforms:authentication-scheme="digest">
|
||||||
|
<xforms:header combine="replace">
|
||||||
|
<xforms:name>Accept</xforms:name>
|
||||||
|
<xforms:value>application/xml</xforms:value>
|
||||||
|
</xforms:header>
|
||||||
|
</xforms:submission>
|
||||||
|
</p:input>
|
||||||
|
<p:input name="request" transform="oxf:xslt" href="#data">
|
||||||
|
<instance xsl:version="2.0">
|
||||||
|
<action>create</action>
|
||||||
|
<createpath>
|
||||||
|
<xsl:value-of select="/action/@uuid"/>
|
||||||
|
</createpath>
|
||||||
|
</instance>
|
||||||
|
</p:input>
|
||||||
|
<p:output name="response" id="heritrix1" debug="heritrix1"/>
|
||||||
|
</p:processor>
|
||||||
|
<p:processor name="oxf:null-serializer">
|
||||||
|
<p:input name="data" href="#heritrix1"/>
|
||||||
|
</p:processor>
|
||||||
|
|
||||||
|
</p:config>
|
|
@ -1,10 +1,13 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<config>
|
<config>
|
||||||
|
|
||||||
<exist-root>http://admin@localhost:8080/orbeon/exist/rest/db/</exist-root>
|
<exist-root>http://admin@localhost:8080/orbeon/exist/rest/db/</exist-root>
|
||||||
<exist-db>owark/</exist-db>
|
<exist-db>owark/</exist-db>
|
||||||
|
|
||||||
<user-agent>Mozilla/5.0 (compatible; owark/0.3; http://owark.org/)</user-agent>
|
<user-agent>Mozilla/5.0 (compatible; owark/0.3; http://owark.org/)</user-agent>
|
||||||
|
|
||||||
|
<heritrix>
|
||||||
|
<rest-api>https://admin:envierse@localhost:8443/engine</rest-api>
|
||||||
|
</heritrix>
|
||||||
|
|
||||||
</config>
|
</config>
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
<!--
|
||||||
|
Copyright (C) 2004 Orbeon, Inc.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify it under the terms of the
|
||||||
|
GNU Lesser General Public License as published by the Free Software Foundation; either version
|
||||||
|
2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||||
|
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
See the GNU Lesser General Public License for more details.
|
||||||
|
|
||||||
|
The full text of the license is available at http://www.gnu.org/copyleft/lesser.html
|
||||||
|
-->
|
||||||
|
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/" xmlns="http://jakarta.apache.org/log4j/">
|
||||||
|
|
||||||
|
<!-- This is the standard log appender to the console (System.out) -->
|
||||||
|
<appender name="ConsoleAppender" class="org.apache.log4j.ConsoleAppender">
|
||||||
|
<param name="Target" value="System.err"/>
|
||||||
|
<layout class="org.apache.log4j.PatternLayout">
|
||||||
|
<param name="ConversionPattern" value="%d{ISO8601} %-5p %c %x - %m%n"/>
|
||||||
|
</layout>
|
||||||
|
<filter class="org.apache.log4j.varia.LevelRangeFilter">
|
||||||
|
<param name="LevelMin" value="INFO" />
|
||||||
|
</filter>
|
||||||
|
</appender>
|
||||||
|
<!-- XForms engine activity -->
|
||||||
|
<category name="org.orbeon.oxf.xforms.processor.XFormsServer">
|
||||||
|
<priority value="debug"/>
|
||||||
|
</category>
|
||||||
|
|
||||||
|
<!-- This is the root logger -->
|
||||||
|
<root>
|
||||||
|
<priority value="debug"/>
|
||||||
|
<appender-ref ref="ConsoleAppender"/>
|
||||||
|
</root>
|
||||||
|
|
||||||
|
</log4j:configuration>
|
|
@ -23,7 +23,7 @@ declare namespace util = "http://exist-db.org/xquery/util";
|
||||||
|
|
||||||
for $q in /queue return
|
for $q in /queue return
|
||||||
update
|
update
|
||||||
insert <action priority="0" uuid="{util:uuid()}" type="archive-set" url="http://dyomedea.com"/>
|
insert <action priority="0" uuid="{util:uuid()}" type="heritrix-archive-set" url="http://dyomedea.com"/>
|
||||||
into $q
|
into $q
|
||||||
|
|
||||||
]]></xquery>
|
]]></xquery>
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
<!--
|
||||||
|
Copyright (C) 2004 Orbeon, Inc.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify it under the terms of the
|
||||||
|
GNU Lesser General Public License as published by the Free Software Foundation; either version
|
||||||
|
2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||||
|
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
See the GNU Lesser General Public License for more details.
|
||||||
|
|
||||||
|
The full text of the license is available at http://www.gnu.org/copyleft/lesser.html
|
||||||
|
-->
|
||||||
|
<properties xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||||
|
xmlns:oxf="http://www.orbeon.com/oxf/processors">
|
||||||
|
|
||||||
|
<property as="xs:anyURI" name="oxf.log4j-config" value="oxf:/default-log4j.xml"/>
|
||||||
|
<property as="xs:integer" name="oxf.cache.size" value="200"/>
|
||||||
|
|
||||||
|
|
||||||
|
<property as="xs:anyURI"
|
||||||
|
name="oxf.http.ssl.keystore.uri"
|
||||||
|
value="file:/var/local/heritrix-3.1.0/heritrix.keystore"/>
|
||||||
|
<property as="xs:string"
|
||||||
|
name="oxf.http.ssl.keystore.password"
|
||||||
|
value="heritrix"/>
|
||||||
|
<property as="xs:string"
|
||||||
|
name="oxf.http.ssl.hostname-verifier"
|
||||||
|
value="allow-all"/>
|
||||||
|
|
||||||
|
<!--<property as="xs:NMTOKENS" name="oxf.xforms.logging.debug"
|
||||||
|
value="document model submission control event action analysis server server-body html resolver utils
|
||||||
|
submission-details submission-body"/>-->
|
||||||
|
</properties>
|
Loading…
Reference in New Issue