Still work in progress, but the WARC archive now validates with warc-tools' warcvalid.py...
This commit is contained in:
parent
ba51ddfb0b
commit
ad35672603
|
@ -211,7 +211,9 @@ conformsTo:
|
||||||
</xsl:variable>
|
</xsl:variable>
|
||||||
<document xsl:version="2.0" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xs:string" content-type="text/plain">
|
<document xsl:version="2.0" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xs:string" content-type="text/plain">
|
||||||
<xsl:apply-templates select="$request" mode="warc"/>
|
<xsl:apply-templates select="$request" mode="warc"/>
|
||||||
<xsl:apply-templates select="$response" mode="warc"/>
|
<xsl:apply-templates select="$response" mode="warc">
|
||||||
|
<xsl:with-param name="document-length" as="xs:integer" select="string-length(translate(/archive/response/document, ' 

', '')) * 3 div 4" tunnel="yes"/>
|
||||||
|
</xsl:apply-templates>
|
||||||
</document>
|
</document>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
</xsl:stylesheet>
|
</xsl:stylesheet>
|
||||||
|
|
|
@ -30,6 +30,9 @@
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="record" mode="warc">
|
<xsl:template match="record" mode="warc">
|
||||||
|
<xsl:param name="document-length" as="xs:integer" select="0" tunnel="yes"/>
|
||||||
|
<xsl:call-template name="CRLF"/>
|
||||||
|
<xsl:call-template name="CRLF"/>
|
||||||
<xsl:apply-templates select="header" mode="warc"/>
|
<xsl:apply-templates select="header" mode="warc"/>
|
||||||
<xsl:variable name="block">
|
<xsl:variable name="block">
|
||||||
<xsl:apply-templates select="block" mode="warc"/>
|
<xsl:apply-templates select="block" mode="warc"/>
|
||||||
|
@ -38,15 +41,13 @@
|
||||||
<field>
|
<field>
|
||||||
<name>Content-Length</name>
|
<name>Content-Length</name>
|
||||||
<value>
|
<value>
|
||||||
<xsl:value-of select="string-length($block)"/>
|
<xsl:value-of select="string-length($block) + $document-length "/>
|
||||||
</value>
|
</value>
|
||||||
</field>
|
</field>
|
||||||
</xsl:variable>
|
</xsl:variable>
|
||||||
<xsl:apply-templates select="$content-length" mode="warc"/>
|
<xsl:apply-templates select="$content-length" mode="warc"/>
|
||||||
<xsl:call-template name="CRLF"/>
|
<xsl:call-template name="CRLF"/>
|
||||||
<xsl:value-of select="$block"/>
|
<xsl:value-of select="$block"/>
|
||||||
<xsl:call-template name="CRLF"/>
|
|
||||||
<xsl:call-template name="CRLF"/>
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="block" mode="warc">
|
<xsl:template match="block" mode="warc">
|
||||||
|
@ -71,6 +72,11 @@
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="response" mode="warc-http">
|
<xsl:template match="response" mode="warc-http">
|
||||||
|
<!--<xsl:message>
|
||||||
|
<xsl:value-of select="string-length(document)"/>
|
||||||
|
<xsl:text> - </xsl:text>
|
||||||
|
<xsl:value-of select="string-length(translate(document, ' 

', ''))"/>
|
||||||
|
</xsl:message>-->
|
||||||
<line>
|
<line>
|
||||||
<!-- TODO: get the HTTP version and status-->
|
<!-- TODO: get the HTTP version and status-->
|
||||||
<xsl:text>HTTP/1.1 </xsl:text>
|
<xsl:text>HTTP/1.1 </xsl:text>
|
||||||
|
|
Loading…
Reference in New Issue