From 16ef7979b0681db6aa88e33fa2a945cb31dfd5c0 Mon Sep 17 00:00:00 2001 From: Eric van der Vlist Date: Sat, 28 Apr 2012 23:12:20 +0200 Subject: [PATCH] Trying to guess content types --- .../actions/mediatypes/warc-html.xpl | 39 +++++++++++++++---- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/archiver/pipelines/actions/mediatypes/warc-html.xpl b/archiver/pipelines/actions/mediatypes/warc-html.xpl index 7db1982..29542dc 100644 --- a/archiver/pipelines/actions/mediatypes/warc-html.xpl +++ b/archiver/pipelines/actions/mediatypes/warc-html.xpl @@ -5,28 +5,51 @@ + + + + + + + + + + ENCODING : + + + + utf-8 + + + + + - - + + session - utf-8 + + + true - + - + - + - utf-8 + + + true text/html true @@ -75,7 +98,7 @@ - application/xml + text/html utf-8