Store the craw log into the archive

This commit is contained in:
Eric van der Vlist 2012-05-04 19:49:41 +02:00
parent c25b18f9f5
commit 5e2b674092
1 changed files with 18 additions and 3 deletions

View File

@ -65,6 +65,18 @@
<p:output name="data" id="log" debug="log"/> <p:output name="data" id="log" debug="log"/>
</p:processor> </p:processor>
<!-- Store the log in a temp file -->
<p:processor name="oxf:file-serializer">
<p:input name="config">
<config>
<scope>request</scope>
</config>
</p:input>
<p:input name="data" href="#log"/>
<p:output name="data" id="log-location" debug="log-location"/>
</p:processor>
<p:processor name="oxf:xslt"> <p:processor name="oxf:xslt">
<p:input name="data" href="#log"/> <p:input name="data" href="#log"/>
<p:input name="config" href="parse-log.xslt"/> <p:input name="config" href="parse-log.xslt"/>
@ -156,10 +168,13 @@
</p:processor> </p:processor>
<p:processor name="oxf:zip"> <p:processor name="oxf:zip">
<p:input name="data" transform="oxf:unsafe-xslt" href="aggregate('root', #warc-location, #loop)"> <p:input name="data" transform="oxf:unsafe-xslt" href="aggregate('root', #warc-location, #log-location, #loop)">
<files xsl:version="2.0" file-name="archive.zip"> <files xsl:version="2.0" file-name="archive.zip">
<file name="archive.warc"> <file name="archive/archive.warc">
<xsl:value-of select="/root/url"/> <xsl:value-of select="/root/url[1]"/>
</file>
<file name="archive/archive.log">
<xsl:value-of select="/root/url[2]"/>
</file> </file>
<xsl:for-each select="/root/root/doc"> <xsl:for-each select="/root/root/doc">
<file name="rewritten/{resource/local-name}"> <file name="rewritten/{resource/local-name}">