#4 detection of the encoding used in the archives.
This commit is contained in:
parent
5a50ccf29c
commit
1033614814
|
@ -32,7 +32,7 @@ if (!class_exists("Owark")) {
|
||||||
private $broken_links = array();
|
private $broken_links = array();
|
||||||
private $post_id = -1;
|
private $post_id = -1;
|
||||||
private $post_type = "";
|
private $post_type = "";
|
||||||
private $version = '0.1';
|
private $version = '0.2';
|
||||||
private $notices = "";
|
private $notices = "";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -97,6 +97,7 @@ if (!class_exists("Owark")) {
|
||||||
status varchar(20) NOT NULL DEFAULT 'to-archive',
|
status varchar(20) NOT NULL DEFAULT 'to-archive',
|
||||||
arc_date datetime,
|
arc_date datetime,
|
||||||
arc_location text,
|
arc_location text,
|
||||||
|
encoding varchar(10),
|
||||||
PRIMARY KEY(`id`),
|
PRIMARY KEY(`id`),
|
||||||
KEY `url` (`url`(150)) )";
|
KEY `url` (`url`(150)) )";
|
||||||
require_once(ABSPATH . 'wp-admin/includes/upgrade.php');
|
require_once(ABSPATH . 'wp-admin/includes/upgrade.php');
|
||||||
|
@ -425,7 +426,9 @@ if (!class_exists("Owark")) {
|
||||||
from {$wpdb->prefix}owark AS owark
|
from {$wpdb->prefix}owark AS owark
|
||||||
where id = {$id}";
|
where id = {$id}";
|
||||||
$link = $wpdb->get_row($query);
|
$link = $wpdb->get_row($query);
|
||||||
|
$wpdb->flush();
|
||||||
|
|
||||||
|
// Find the file to read
|
||||||
$blog_title = get_bloginfo('name');
|
$blog_title = get_bloginfo('name');
|
||||||
$home_url = home_url();
|
$home_url = home_url();
|
||||||
|
|
||||||
|
@ -434,17 +437,6 @@ if (!class_exists("Owark")) {
|
||||||
$loc = '/wp-content/plugins/owark' . substr($link->arc_location, $pos);
|
$loc = '/wp-content/plugins/owark' . substr($link->arc_location, $pos);
|
||||||
$arc_loc = home_url() . $loc;
|
$arc_loc = home_url() . $loc;
|
||||||
|
|
||||||
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">';
|
|
||||||
|
|
||||||
echo "<base href=\"{$arc_loc}/\">";
|
|
||||||
echo '<div style="background:#fff;border:1px solid #999;margin:-1px -1px 0;padding:0;">';
|
|
||||||
echo '<div style="background:#ddd;border:1px solid #999;color:#000;font:13px arial,sans-serif;font-weight:normal;margin:12px;padding:8px;text-align:left">';
|
|
||||||
echo "This is an <a href='http://owark.org'>Open Web Archive</a> archive of <a href=\"{$link->url}\">{$link->url}</a>.";
|
|
||||||
echo "<br />This snapshot has been taken on {$link->arc_date} for the website <a href=\"{$home_url}\">{$blog_title}</a> which contains a link to this page and has saved a copy to be displayed in the page ever disappears.";
|
|
||||||
echo '</div></div><div style="position:relative">';
|
|
||||||
|
|
||||||
$file_location = '.'. $loc .'/index.html';
|
$file_location = '.'. $loc .'/index.html';
|
||||||
if (!file_exists($file_location)) {
|
if (!file_exists($file_location)) {
|
||||||
// If index.html doesn't exist, find another html file!
|
// If index.html doesn't exist, find another html file!
|
||||||
|
@ -458,9 +450,51 @@ if (!class_exists("Owark")) {
|
||||||
closedir($dir);
|
closedir($dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read the file
|
||||||
|
|
||||||
|
$f = fopen($file_location, "r");
|
||||||
|
$content = fread($f, filesize($file_location));
|
||||||
|
fclose($f);
|
||||||
|
|
||||||
|
// Which encoding?
|
||||||
|
$encoding = $link->encoding;
|
||||||
|
|
||||||
|
if ($encoding == NULL) {
|
||||||
|
// We need to guess the encoding!
|
||||||
|
|
||||||
|
$matches = NULL;
|
||||||
|
// <meta http-equiv="Content-Type" content="text/xml; charset=iso-8859-1"/>
|
||||||
|
if (preg_match('/<meta\s*http-equiv\s*=\s*["\']Content-Type["\']\s+content\s*=\s*["\'][^"\'>]*charset\s*=\s*([^"\'>]+)\s*["\']/si',
|
||||||
|
$content, &$matches) > 0) {
|
||||||
|
$encoding = $matches[1];
|
||||||
|
} else {
|
||||||
|
$encoding = mb_detect_encoding($content);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($encoding) {
|
||||||
|
$wpdb->update(
|
||||||
|
"{$wpdb->prefix}owark",
|
||||||
|
array('encoding' => $encoding),
|
||||||
|
array('id' => $id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
header("Content-Type: text/html; charset=$encoding");
|
||||||
|
|
||||||
|
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset='.$encoding.'">';
|
||||||
|
|
||||||
|
echo "<base href=\"{$arc_loc}/\">";
|
||||||
|
echo '<div style="background:#fff;border:1px solid #999;margin:-1px -1px 0;padding:0;">';
|
||||||
|
echo '<div style="background:#ddd;border:1px solid #999;color:#000;font:13px arial,sans-serif;font-weight:normal;margin:12px;padding:8px;text-align:left">';
|
||||||
|
echo "This is an <a href='http://owark.org'>Open Web Archive</a> archive of <a href=\"{$link->url}\">{$link->url}</a>.";
|
||||||
|
echo "<br />This snapshot has been taken on {$link->arc_date} for the website <a href=\"{$home_url}\">{$blog_title}</a> which contains a link to this page and has saved a copy to be displayed in the page ever disappears.";
|
||||||
|
echo '</div></div><div style="position:relative">';
|
||||||
|
|
||||||
|
|
||||||
$f = fopen($file_location, "r");
|
$f = fopen($file_location, "r");
|
||||||
echo fread($f, filesize($file_location));
|
echo $content;
|
||||||
fclose($f);
|
|
||||||
echo '</div>';
|
echo '</div>';
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue