732 lines
25 KiB
PHP
732 lines
25 KiB
PHP
<?php
|
|
|
|
/* Copyright 2011-2020 Eric van der Vlist (vdv@dyomedea.com)
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License, version 2, as
|
|
published by the Free Software Foundation.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
/*
|
|
Plugin Name: owark
|
|
Plugin URI: http://owark.org
|
|
Description: Tired of broken links? Archive yours with owark, the Open Web Archive!
|
|
Version: 1.0
|
|
Author: Eric van der Vlist
|
|
Author URI: http://eric.van-der-vlist.com
|
|
License: GLP2
|
|
Gitea Plugin URI: https://gitea.dyomedea.com/Dyomedea/owark
|
|
*/
|
|
|
|
/*
|
|
Log
|
|
*/
|
|
if (!function_exists('print_r_log')) {
|
|
|
|
function print_r_log($log) {
|
|
$caller_strace = debug_backtrace()[1];
|
|
if (is_array($log) || is_object($log)) {
|
|
error_log($caller_strace['file'] . '/#' . $caller_strace['line'] . ':');
|
|
error_log(print_r($log, true));
|
|
} else {
|
|
error_log($caller_strace['file'] . '/#' . $caller_strace['line'] . ': ' . $log);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (!function_exists('log_function_call')) {
|
|
|
|
function log_function_call() {
|
|
$caller_strace = debug_backtrace()[1];
|
|
error_log((isset($caller_strace['file']) ? $caller_strace['file'] : '<undefined>') . '/#' . (isset($caller_strace['line']) ? $caller_strace['line'] : '<undefined>') . ' function: ' . (isset($caller_strace['function']) ? $caller_strace['function'] : '<undefined>') . '(');
|
|
foreach ($caller_strace['args'] as $arg) {
|
|
error_log(' * ' . gettype($arg) . ': ' . print_r($arg, true));
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (!function_exists('archives_dir')) {
|
|
|
|
function archives_dir() {
|
|
if (defined('OWARK_ARCHIVES_REL_PATH')) {
|
|
return WP_CONTENT_DIR . '/' . OWARK_ARCHIVES_REL_PATH;
|
|
} else {
|
|
return WP_PLUGIN_DIR . '/archives';
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (!function_exists('archives_url')) {
|
|
|
|
function archives_url() {
|
|
if (defined('OWARK_ARCHIVES_REL_PATH')) {
|
|
return WP_CONTENT_URL . '/' . OWARK_ARCHIVES_REL_PATH;
|
|
} else {
|
|
return WP_PLUGIN_URL . '/archives';
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (!class_exists("Owark")) {
|
|
|
|
class Owark {
|
|
|
|
private $broken_links = array();
|
|
private $post_id = -1;
|
|
private $post_type = "";
|
|
private $version = '1.0';
|
|
private $db_version = '1.0';
|
|
private $notices = "";
|
|
|
|
/**
|
|
* Class constructor
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
public function __construct() {
|
|
|
|
|
|
if (is_admin()) {
|
|
add_action('admin_menu', array($this, 'owark_admin_menu'));
|
|
add_action('plugins_loaded', array($this, 'sanity_checks'));
|
|
}
|
|
|
|
// See http://stackoverflow.com/questions/2210826/need-help-with-wp-rewrite-in-a-wordpress-plugin
|
|
// Using a filter instead of an action to create the rewrite rules.
|
|
// Write rules -> Add query vars -> Recalculate rewrite rules
|
|
add_filter('rewrite_rules_array', array($this, 'create_rewrite_rules'));
|
|
add_filter('query_vars', array($this, 'add_query_vars'));
|
|
|
|
// Recalculates rewrite rules during admin init to save resources.
|
|
// Could probably run it once as long as it isn't going to change or check the
|
|
// $wp_rewrite rules to see if it's active.
|
|
add_filter('admin_init', array($this, 'flush_rewrite_rules'));
|
|
add_action('template_redirect', array($this, 'template_redirect_intercept'));
|
|
|
|
add_filter('the_content', array($this, 'content_filter'), 20);
|
|
add_filter('comment_text', array($this, 'comment_filter'), 20, 2);
|
|
add_filter('get_comment_author_url', array($this, 'comment_author_url_filter'), 20, 1);
|
|
|
|
add_action('owark_schedule_event', array('Owark', 'schedule'), 10, 2);
|
|
if (!wp_next_scheduled('owark_schedule_event', array('occurrences' => 30, 'version' => $this->version))) {
|
|
wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version));
|
|
}
|
|
}
|
|
|
|
function Owark() {
|
|
// PHP4-style constructor.
|
|
// This will NOT be invoked, unless a sub-class that extends `foo` calls it.
|
|
// In that case, call the new-style constructor to keep compatibility.
|
|
self::__construct();
|
|
}
|
|
|
|
/**
|
|
* Check we have everything we need...
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function sanity_checks() {
|
|
|
|
// Install or upgrade tables if needed
|
|
|
|
$installed_ver = get_option("owark_db_version");
|
|
$update_required = ($installed_ver != $this->db_version);
|
|
print_r_log("update_required: $update_required ($installed_ver vs {$this->db_version})");
|
|
global $wpdb;
|
|
$table = $wpdb->prefix . "owark";
|
|
if ($installed_ver == '0.1') {
|
|
// In version 0.1 final URLs where used but the broken link checkers update these URLs when a link is detected broken
|
|
// Let's replace these URLS by raw URLs...
|
|
$sql = "update
|
|
{$wpdb->prefix}owark as owark
|
|
join {$wpdb->prefix}blc_links as links on
|
|
owark.url = links.final_url COLLATE latin1_swedish_ci
|
|
join {$wpdb->prefix}blc_instances as instances on
|
|
instances.link_id = links.link_id set
|
|
owark.url = instances.raw_url ";
|
|
$wpdb->query($sql);
|
|
$installed_ver = '1.0';
|
|
update_option("owark_db_version", $installed_ver);
|
|
}
|
|
if ($installed_ver != $this->db_version) {
|
|
print_r_log("Database upgrade from $installed_ver to {$this->version}");
|
|
$sql = "CREATE TABLE $table (
|
|
id int(10) unsigned NOT NULL AUTO_INCREMENT,
|
|
url text NOT NULL,
|
|
status varchar(20) NOT NULL DEFAULT 'to-archive',
|
|
arc_date datetime,
|
|
arc_location text,
|
|
encoding varchar(10),
|
|
PRIMARY KEY (id),
|
|
KEY url (`url`(150)) )";
|
|
require_once(ABSPATH . 'wp-admin/includes/upgrade.php');
|
|
dbDelta($sql);
|
|
update_option("owark_db_version", $this->db_version);
|
|
}
|
|
|
|
if ($update_required) {
|
|
$sql = "CREATE OR REPLACE VIEW {$wpdb->prefix}owark_broken_links AS
|
|
SELECT
|
|
owark.id as id,
|
|
owark.url as url,
|
|
instances.container_id as container_id,
|
|
instances.container_type as container_type,
|
|
instances.container_field as container_field
|
|
FROM
|
|
{$wpdb->prefix}owark as owark,
|
|
{$wpdb->prefix}blc_links as links,
|
|
{$wpdb->prefix}blc_instances AS instances
|
|
WHERE
|
|
owark.url = instances.raw_url
|
|
AND broken = 1
|
|
AND last_check is not null
|
|
AND instances.link_id = links.link_id";
|
|
print_r_log("sql: $sql");
|
|
$wpdb->query($sql);
|
|
$sql = "CREATE OR REPLACE VIEW {$wpdb->prefix}owark_archives AS
|
|
SELECT
|
|
owark.*,
|
|
links.broken,
|
|
links.first_failure
|
|
FROM
|
|
{$wpdb->prefix}owark as owark,
|
|
{$wpdb->prefix}blc_links as links,
|
|
{$wpdb->prefix}blc_instances AS instances
|
|
WHERE
|
|
owark.url = instances.raw_url
|
|
AND instances.link_id = links.link_id
|
|
GROUP BY owark.url";
|
|
print_r_log("sql: $sql");
|
|
$wpdb->query($sql);
|
|
$sql = "CREATE OR REPLACE VIEW {$wpdb->prefix}owark_links_to_archive AS
|
|
SELECT
|
|
DISTINCT instances.raw_url as url
|
|
FROM
|
|
{$wpdb->prefix}blc_links as links,
|
|
{$wpdb->prefix}blc_instances AS instances
|
|
WHERE
|
|
broken = 0
|
|
AND instances.link_id = links.link_id
|
|
AND url NOT IN (SELECT url FROM wp_owark)";
|
|
print_r_log("sql: $sql");
|
|
$wpdb->query($sql);
|
|
|
|
$this->notices = "<div class=\"updated fade\"><p><strong>The owark table has been installed or upgraded to version {$this->db_version}</strong></p></div>";
|
|
}
|
|
|
|
|
|
// Check that the broken link checker is installed
|
|
if (!function_exists('get_plugins'))
|
|
require_once (ABSPATH . "wp-admin/includes/plugin.php");
|
|
|
|
$blc = 'not-found';
|
|
foreach (get_plugins() as $plugin_file => $plugin_data) {
|
|
if ($plugin_data['Title'] == 'Broken Link Checker') {
|
|
if (is_plugin_active($plugin_file)) {
|
|
$blc = 'active';
|
|
} else {
|
|
$blc = 'inactive';
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($blc == 'inactive') {
|
|
$this->notices = $this->notices . "<div class=\"updated fade\"><p><strong>Please activate the Broken Link Checker so that the Open Web Archive can be fully functional.</strong></p></div>";
|
|
} else if ($blc == 'not-found') {
|
|
$this->notices = $this->notices . "<div class=\"error fade\"><p><strong>The Open Web Archive relies on the <a href=\"http://w-shadow.com/blog/2007/08/05/broken-link-checker-for-wordpress/\">Broken Link Checker</a>. Please install this plugin!</strong></p></div>";
|
|
}
|
|
|
|
// Check if we have an archive subdirectory
|
|
$archives_dir = archives_dir();
|
|
print_r_log("archives_dir: $archives_dir");
|
|
if (!is_dir($archives_dir)) {
|
|
@mkdir($archives_dir);
|
|
if (!is_dir($archives_dir)) {
|
|
$this->notices = $this->notices . "<div class=\"error fade\"><p><strong>The Open Web Archive has not been able to create the folder /archives in its installation directory. Please create it by hand and make it writable for the web server.</strong></p></div>";
|
|
}
|
|
} elseif (!is_writable($archives_dir)) {
|
|
$this->notices = $this->notices . "<div class=\"error fade\"><p><strong>The Open Web Archive needs a writable folder /archives in its installation directory. Please make it writable for the web server.</strong></p></div>";
|
|
}
|
|
|
|
// Check that we can execute commands
|
|
|
|
if (ini_get('disable_functions')) {
|
|
$not_allowed = ini_get('disable_functions');
|
|
if (stristr($not_allowed, 'exec')) {
|
|
$this->notices = $this->notices . "<div class=\"error fade\"><p><strong>The Open Web Archives requires that exec() is allowed to run wget and retrieve the pages to archive.</strong></p></div>";
|
|
}
|
|
}
|
|
|
|
// Check that wget is installed
|
|
|
|
$output = array();
|
|
exec('/usr/bin/wget -V', $output);
|
|
|
|
|
|
if (empty($output)) {
|
|
$this->notices = $this->notices .
|
|
"<div class=\"error fade\"><p><strong>The Open Web Archives is not able to run GNU wget and retrieve the pages to archive. Please check that wget is installed and on the default path.</strong></p></div>";
|
|
}
|
|
|
|
// We need as least version 1.11 or higher
|
|
$helper = preg_match('/GNU Wget ([0-9\.]+) /', $output[0], $wget_version);
|
|
if ($wget_version[0] < '1.11') {
|
|
$this->notices = $this->notices . "<div class=\"error fade\"><p><strong>The Open Web Archives needs GNU wget version 1.11 or higher.</strong><br />Version read: {$wget_version[0]}</p></div>";
|
|
}
|
|
|
|
if ($this->notices != '') {
|
|
add_action('admin_notices', array($this, 'admin_notices'));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Show admin notices
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function admin_notices() {
|
|
|
|
echo $this->notices;
|
|
}
|
|
|
|
/**
|
|
* Admin menus
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function owark_admin_menu() {
|
|
add_management_page(__('The Open Web Archive', 'owark'), __('Web Archive', 'owark'), 'edit_others_posts', 'owark', array($this, 'management_page'));
|
|
}
|
|
|
|
/**
|
|
* URL of an archive page
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function get_archive_url($archive_id) {
|
|
return home_url() . '/owark/' . $archive_id;
|
|
}
|
|
|
|
/**
|
|
* Display the admin/tools page.
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function management_page() {
|
|
//must check that the user has the required capability
|
|
if (!current_user_can('edit_others_posts')) {
|
|
wp_die(__('You do not have sufficient permissions to access this page.'));
|
|
}
|
|
|
|
global $wpdb;
|
|
|
|
echo '<div class="wrap">';
|
|
screen_icon();
|
|
echo '<h2>Owark - The Open Web Archive</h2>';
|
|
echo '<p><em>Tired of broken links? Archive yours with the Open Web Archive!</em></p>';
|
|
echo "</div>";
|
|
|
|
echo '<p>List of broken links with archived pages:</p>';
|
|
|
|
$query = "SELECT *
|
|
FROM {$wpdb->prefix}owark_archives
|
|
WHERE broken = 1
|
|
ORDER BY url";
|
|
$results = $wpdb->get_results($query);
|
|
|
|
echo '<table class="widefat">';
|
|
echo '<thead>';
|
|
echo '<tr>';
|
|
echo '<th>URL</th>';
|
|
echo '<th>Archive</th>';
|
|
echo '</tr>';
|
|
echo '</thead>';
|
|
echo '<tbody>';
|
|
|
|
foreach ($results as $link) {
|
|
$archive_url = $this->get_archive_url($link->id);
|
|
echo "<tr>
|
|
<td><a href=\"{$link->url}\" target='_blank'>{$link->url}</a></td>
|
|
<td><a href=\"{$archive_url}\" target='_blank'>{$link->arc_date}</a></td>
|
|
</tr>";
|
|
}
|
|
|
|
echo '</tbody>';
|
|
echo '</table>';
|
|
}
|
|
|
|
/**
|
|
* Add a rewrite rule to display archive pages
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function create_rewrite_rules($rules) {
|
|
global $wp_rewrite;
|
|
$newRule = array('owark/(.+)' => 'index.php?owark=' . $wp_rewrite->preg_index(1));
|
|
$newRules = $newRule + $rules;
|
|
return $newRules;
|
|
}
|
|
|
|
/**
|
|
* Add a query variable used to display archive pages
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function add_query_vars($qvars) {
|
|
$qvars[] = 'owark';
|
|
return $qvars;
|
|
}
|
|
|
|
/**
|
|
* Title says it all ;) ...
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function flush_rewrite_rules() {
|
|
global $wp_rewrite;
|
|
$wp_rewrite->flush_rules();
|
|
}
|
|
|
|
/**
|
|
* Intercepts archive pages.
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function template_redirect_intercept() {
|
|
global $wp_query;
|
|
if ($wp_query->get('owark')) {
|
|
$this->display_archive($wp_query->get('owark'));
|
|
exit;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Filter to replace broken links in comments.
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function content_filter($content) {
|
|
log_function_call();
|
|
global $post;
|
|
return $this->link_filter($content, $post->ID, $post->post_type);
|
|
}
|
|
|
|
/**
|
|
* Filter to replace broken links in comments.
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function comment_filter($content, $comment) {
|
|
log_function_call();
|
|
return $this->link_filter($content, $comment->comment_ID, 'comment');
|
|
}
|
|
|
|
/**
|
|
* Filter to replace broken links in author links.
|
|
*
|
|
* @package owark
|
|
* @since 0.2
|
|
*
|
|
*
|
|
*/
|
|
function comment_author_url_filter($url) {
|
|
log_function_call();
|
|
global $wpdb;
|
|
$q = "
|
|
SELECT id
|
|
FROM {$wpdb->prefix}owark_broken_links
|
|
WHERE url = %s
|
|
";
|
|
print_r_log($q);
|
|
$q = $wpdb->prepare($q, $url);
|
|
$results = $wpdb->get_results($q);
|
|
print_r_log($results);
|
|
if (empty($results)) {
|
|
return $url;
|
|
} else {
|
|
return $this->get_archive_url($results[0]->id);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generic filter to replace broken links in content.
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function link_filter($content, $post_id, $post_type) {
|
|
global $wpdb;
|
|
log_function_call();
|
|
// See if we haven't already loaded the broken links for this post...
|
|
if ($this->post_id != $post_id || $this->post_type != $post_type) {
|
|
|
|
$this->post_id = $post_id;
|
|
$this->post_type = $post_type;
|
|
|
|
//Retrieve info about all occurrences of broken links in the current post
|
|
//which happens for comments (they have links to check in 2 different filters)
|
|
$q = "
|
|
SELECT url, id
|
|
FROM {$wpdb->prefix}owark_broken_links
|
|
WHERE container_id = %s
|
|
AND container_type = %s
|
|
";
|
|
print_r_log($q);
|
|
$q = $wpdb->prepare($q, $this->post_id, $this->post_type);
|
|
$results = $wpdb->get_results($q);
|
|
print_r_log($results);
|
|
$this->broken_links = array();
|
|
|
|
foreach ($results as $link) {
|
|
$this->broken_links[$link->url] = $link->id;
|
|
}
|
|
}
|
|
|
|
|
|
if (empty($this->broken_links)) {
|
|
return $content;
|
|
}
|
|
|
|
// Regexp : see http://stackoverflow.com/questions/2609095/hooking-into-comment-text-to-add-surrounding-tag
|
|
$replaced = preg_replace_callback('/(<a.*?href\s*=\s*["\'])([^"\'>]+)(["\'][^>]*>.*?<\/a>)/si', array($this, 'replace_a_link'), $content);
|
|
print_r_log("replaced: $replaced");
|
|
return $replaced;
|
|
}
|
|
|
|
/**
|
|
* Replace a link.
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function replace_a_link($matches) {
|
|
log_function_call();
|
|
if (array_key_exists($matches[2], $this->broken_links)) {
|
|
return $matches[1] . $this->get_archive_url($this->broken_links[$matches[2]]) . $matches[3];
|
|
} else {
|
|
return $matches[0];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Display an archive page
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
function display_archive($parameter) {
|
|
|
|
print_r_log("display_archive($parameter)");
|
|
global $wpdb;
|
|
|
|
$id = intval($parameter);
|
|
|
|
$blog_title = get_bloginfo('name');
|
|
$home_url = home_url();
|
|
|
|
$query = "SELECT *
|
|
from {$wpdb->prefix}owark AS owark
|
|
where id = {$id}";
|
|
$link = $wpdb->get_row($query);
|
|
$wpdb->flush();
|
|
|
|
$loc = archives_dir() . '/' . $link->arc_location;
|
|
$arc_base = archives_url() . '/' . $link->arc_location;
|
|
|
|
// The file name is either index.html or guessed from the URL
|
|
if ($link->url[strlen($link->url) - 1] == '/') {
|
|
$file_location = $loc . '/index.html';
|
|
} else {
|
|
$parts = explode($link->url, '/');
|
|
$file_location = $loc . $parts[count($parts) - 1] . '.html';
|
|
}
|
|
|
|
if (!file_exists($file_location)) {
|
|
// If index.html doesn't exist, find another html file!
|
|
$dir = opendir($loc);
|
|
if ($dir) {
|
|
while (false !== ($file = readdir($dir))) {
|
|
if ('.html' === substr($file, strlen($file) - 5)) {
|
|
$file_location = $loc . '/' . $file;
|
|
break;
|
|
}
|
|
}
|
|
closedir($dir);
|
|
}
|
|
}
|
|
|
|
// Read the file
|
|
|
|
if (file_exists($file_location)) {
|
|
$f = fopen($file_location, "r");
|
|
$content = fread($f, filesize($file_location));
|
|
fclose($f);
|
|
} else {
|
|
$content = 'Archive not found';
|
|
}
|
|
|
|
// Which encoding?
|
|
$encoding = $link->encoding;
|
|
|
|
if ($encoding == NULL) {
|
|
// We need to guess the encoding!
|
|
|
|
$matches = NULL;
|
|
// <meta http-equiv="Content-Type" content="text/xml; charset=iso-8859-1"/>
|
|
if (preg_match('/<meta\s*http-equiv\s*=\s*["\']Content-Type["\']\s+content\s*=\s*["\'][^"\'>]*charset\s*=\s*([^"\'>]+)\s*["\']/si',
|
|
$content, $matches) > 0) {
|
|
$encoding = $matches[1];
|
|
} else {
|
|
$encoding = mb_detect_encoding($content);
|
|
}
|
|
|
|
if ($encoding) {
|
|
$wpdb->update(
|
|
"{$wpdb->prefix}owark",
|
|
array('encoding' => $encoding),
|
|
array('id' => $id));
|
|
}
|
|
}
|
|
|
|
|
|
header("Content-Type: text/html; charset=$encoding");
|
|
|
|
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<meta http-equiv="Content-Type" content="text/html; charset=' . $encoding . '">';
|
|
|
|
echo "<base href=\"{$arc_base}/\">";
|
|
echo '<div style="background:#fff;border:1px solid #999;margin:-1px -1px 0;padding:0;">';
|
|
echo '<div style="background:#ddd;border:1px solid #999;color:#000;font:13px arial,sans-serif;font-weight:normal;margin:12px;padding:8px;text-align:left">';
|
|
echo "This is an <a href='http://owark.org'>Open Web Archive</a> archive of <a href=\"{$link->url}\">{$link->url}</a>.";
|
|
echo "<br />This snapshot has been taken on {$link->arc_date} for the website <a href=\"{$home_url}\">{$blog_title}</a> which contains a link to this page and has saved a copy to be displayed in the page ever disappears.";
|
|
echo '</div></div><div style="position:relative">';
|
|
|
|
print_r_log("file_location: $file_location");
|
|
|
|
$f = fopen($file_location, "r");
|
|
echo $content;
|
|
echo '</div>';
|
|
}
|
|
|
|
/**
|
|
* Check if we've got something to archive
|
|
*
|
|
* @package owark
|
|
* @since 0.1
|
|
*
|
|
*
|
|
*/
|
|
public static function schedule($occurrences, $version) {
|
|
|
|
log_function_call();
|
|
|
|
$archiving = get_option('owark_archiving', false);
|
|
if (!$archiving) {
|
|
update_option('owark_archiving', true);
|
|
} else {
|
|
return;
|
|
}
|
|
global $wpdb;
|
|
|
|
$query = "
|
|
SELECT url from {$wpdb->prefix}owark_links_to_archive";
|
|
print_r_log("query: $query");
|
|
|
|
$url = $wpdb->get_row($query);
|
|
print_r_log($url);
|
|
$wpdb->flush();
|
|
|
|
if ($url != NULL) {
|
|
$date = date('c');
|
|
$relpath = str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->url))) . '/' . $date;
|
|
$path = archives_dir() . "/$relpath";
|
|
//mkdir($path, $recursive=true);
|
|
|
|
$output = array();
|
|
$status = 0;
|
|
exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/$version; http://owark.org/)\" -P $path {$url->url} 2>&1",
|
|
$output, $status);
|
|
print_r_log("wget status: $status");
|
|
if ($status != 0) {
|
|
print_r_log("wget status: $status, output:");
|
|
print_r_log($output);
|
|
}
|
|
$q = $wpdb->insert("{$wpdb->prefix}owark", array(
|
|
'url' => $url->url,
|
|
'status' => $status,
|
|
'arc_date' => $date,
|
|
'arc_location' => $relpath));
|
|
|
|
if ($occurrences > 0) {
|
|
wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1, 'version' => $version));
|
|
}
|
|
}
|
|
delete_option('owark_archiving');
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
if (class_exists("Owark")) {
|
|
$owark = new Owark();
|
|
}
|
|
?>
|