From 466d4473cec7be68b6161deb39a405fba2ed663c Mon Sep 17 00:00:00 2001 From: Eric van der Vlist Date: Fri, 27 Apr 2012 17:04:17 +0200 Subject: [PATCH] Generating a resource index to facilitate further processing. --- .../actions/package-heritrix-warc.xpl | 16 +++- .../pipelines/actions/resource-index.xslt | 80 +++++++++++++++++++ 2 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 archiver/pipelines/actions/resource-index.xslt diff --git a/archiver/pipelines/actions/package-heritrix-warc.xpl b/archiver/pipelines/actions/package-heritrix-warc.xpl index 51dbea7..18503a7 100644 --- a/archiver/pipelines/actions/package-heritrix-warc.xpl +++ b/archiver/pipelines/actions/package-heritrix-warc.xpl @@ -67,15 +67,27 @@ - + - + + + + + + + + + + + + diff --git a/archiver/pipelines/actions/resource-index.xslt b/archiver/pipelines/actions/resource-index.xslt new file mode 100644 index 0000000..a763b3a --- /dev/null +++ b/archiver/pipelines/actions/resource-index.xslt @@ -0,0 +1,80 @@ + + + + + Created on: Apr 26, 2012 + Author: vdv + Create a resource index with links and local names from the Heritrix crawl log in XML format + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +