From 0e7bdd1de4d9764eafd2a0ce2612d65dbde4f6d0 Mon Sep 17 00:00:00 2001 From: Eric van der Vlist Date: Fri, 13 Apr 2012 18:01:53 +0200 Subject: [PATCH] Adding a basic squeleton to generate what should ultimately be a WARC archive --- archiver/pipelines/actions/archive-set.xpl | 8 +- .../pipelines/actions/package-archive.xpl | 292 ++++++++++++++++++ archiver/pipelines/actions/warc-lib.xsl | 68 ++++ 3 files changed, 366 insertions(+), 2 deletions(-) create mode 100644 archiver/pipelines/actions/package-archive.xpl create mode 100644 archiver/pipelines/actions/warc-lib.xsl diff --git a/archiver/pipelines/actions/archive-set.xpl b/archiver/pipelines/actions/archive-set.xpl index 9515c52..af4aa55 100644 --- a/archiver/pipelines/actions/archive-set.xpl +++ b/archiver/pipelines/actions/archive-set.xpl @@ -31,7 +31,10 @@ - + + + + @@ -48,7 +51,8 @@ declare namespace util = "http://exist-db.org/xquery/util"; for $q in /queue return update - insert + insert (, + ) into $q, for $a in /queue/action where $a/@uuid = $(uuid) return diff --git a/archiver/pipelines/actions/package-archive.xpl b/archiver/pipelines/actions/package-archive.xpl new file mode 100644 index 0000000..83ea80d --- /dev/null +++ b/archiver/pipelines/actions/package-archive.xpl @@ -0,0 +1,292 @@ + + + + + + + + + + + + index.xml + + read + document + + + + + + + + + + + + + request + + + + + + + + + + WARC-Type + warcinfo + + + WARC-Date + + + + + + WARC-Record-ID + + <urn:uuid: + + > + + + + Content-Type + application/warc-fields + + + + + software + Owark 0.3 http://owark.org + + + format + WARC file version 0.18 + + + + + + + + + + + + + + + + + + + + + + + + + + + + read + document + + + + + + + + + + + + + + + + false + true + + + + + + + + + + + WARC-Type + request + + + WARC-Target-URI + + + + + + WARC-Date + + + + + + + WARC-Record-ID + + <urn:uuid: + + > + + + + Content-Type + application/http;msgtype=request + + + + + + + + + + WARC-Type + response + + + WARC-Target-URI + + + + + + WARC-Date + + + + + + + WARC-Record-ID + + <urn:uuid: + + > + + + + Content-Type + application/http;msgtype=response + + + + + + + + + + + + + + + + + + + + + + + false + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /tmp/archive.zip + + + + + + + + + + diff --git a/archiver/pipelines/actions/warc-lib.xsl b/archiver/pipelines/actions/warc-lib.xsl new file mode 100644 index 0000000..c9b0a62 --- /dev/null +++ b/archiver/pipelines/actions/warc-lib.xsl @@ -0,0 +1,68 @@ + + + + + Created on: Apr 13, 2012 + Author: vdv + Template library to produce WARC documents + + + + + WARC/0.18 + + + + + + + + + + : + + + + + + + + + + + + + + + + HTTP/1.0 + + + + + + + + HTTP/1.1 + + OK + + + + + + + + + + + + + + + + + + +