Massive refactoring and bug fixes

This commit is contained in:
Eric van der Vlist 2020-05-05 14:05:53 +02:00
parent 460c77f116
commit 49afb2b9ea
1 changed files with 651 additions and 551 deletions

222
owark.php
View File

@ -1,18 +1,18 @@
<?php <?php
/* Copyright 2011-2020 Eric van der Vlist (vdv@dyomedea.com) /* Copyright 2011-2020 Eric van der Vlist (vdv@dyomedea.com)
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2, as it under the terms of the GNU General Public License, version 2, as
published by the Free Software Foundation. published by the Free Software Foundation.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
/* /*
@ -26,14 +26,46 @@ License: GLP2
*/ */
/* /*
Log Log
*/ */
if ( ! function_exists('print_r_log')) { if ( ! function_exists('print_r_log')) {
function print_r_log ( $log ) { function print_r_log ( $log ) {
$caller_strace = debug_backtrace()[1];
if ( is_array( $log ) || is_object( $log ) ) { if ( is_array( $log ) || is_object( $log ) ) {
error_log($caller_strace['file'] . '/#' .$caller_strace['line'] . ':');
error_log( print_r( $log, true ) ); error_log( print_r( $log, true ) );
} else { } else {
error_log( $log ); error_log($caller_strace['file'] . '/#' .$caller_strace['line'] . ': ' . $log );
}
}
}
if ( ! function_exists('log_function_call')) {
function log_function_call () {
$caller_strace = debug_backtrace()[1];
error_log((isset($caller_strace['file']) ? $caller_strace['file'] : '<undefined>') . '/#' . (isset($caller_strace['line']) ? $caller_strace['line'] : '<undefined>') . ' function: ' . (isset($caller_strace['function']) ? $caller_strace['function'] : '<undefined>') . '(');
foreach ($caller_strace['args'] as $arg) {
error_log(' * ' . gettype($arg) . ': ' . print_r( $arg, true ));
}
}
}
if ( ! function_exists('archives_dir')) {
function archives_dir() {
if (defined('OWARK_ARCHIVES_REL_PATH')) {
return WP_CONTENT_DIR . '/' . OWARK_ARCHIVES_REL_PATH;
} else {
return WP_PLUGIN_DIR . '/archives';
}
}
}
if ( ! function_exists('archives_url')) {
function archives_url() {
if (defined('OWARK_ARCHIVES_REL_PATH')) {
return WP_CONTENT_URL . '/' . OWARK_ARCHIVES_REL_PATH;
} else {
return WP_PLUGIN_URL . '/archives';
} }
} }
} }
@ -45,6 +77,7 @@ if (!class_exists("Owark")) {
private $post_id = -1; private $post_id = -1;
private $post_type = ""; private $post_type = "";
private $version = '0.2'; private $version = '0.2';
private $db_version = '0.2';
private $notices = ""; private $notices = "";
/** /**
@ -75,13 +108,13 @@ if (!class_exists("Owark")) {
add_filter('admin_init', array($this, 'flush_rewrite_rules')); add_filter('admin_init', array($this, 'flush_rewrite_rules'));
add_action( 'template_redirect', array($this, 'template_redirect_intercept') ); add_action( 'template_redirect', array($this, 'template_redirect_intercept') );
add_filter ( 'the_content', array($this, 'content_filter')); add_filter ( 'the_content', array($this, 'content_filter'), 20);
add_filter ( 'comment_text', array($this, 'comment_filter')); add_filter ( 'comment_text', array($this, 'comment_filter'), 20, 2);
add_filter ( 'get_comment_author_link', array($this, 'comment_filter')); add_filter ( 'get_comment_author_url', array($this, 'comment_author_url_filter'), 20, 1);
add_action('owark_schedule_event', array('Owark', 'schedule')); add_action('owark_schedule_event', array('Owark', 'schedule'), 10, 2);
if ( !wp_next_scheduled( 'owark_schedule_event', array('occurrences' => 30) ) ) { if ( !wp_next_scheduled( 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version) ) ) {
wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30)); wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version));
} }
@ -95,6 +128,14 @@ if (!class_exists("Owark")) {
self::__construct(); self::__construct();
} }
function archives_dir() {
if (defined('OWARK_ARCHIVES_DIR')) {
return OWARK_ARCHIVES_DIR;
} else {
return dirname(__FILE__) . '/archives';
}
}
/** /**
* Check we have everything we need... * Check we have everything we need...
* *
@ -105,14 +146,21 @@ if (!class_exists("Owark")) {
*/ */
function sanity_checks(){ function sanity_checks(){
// print_r_log("Sanity checks");
// Install or upgrade tables if needed // Install or upgrade tables if needed
$installed_ver = get_option( "owark_db_version" ); $installed_ver = get_option( "owark_db_version" );
if ($installed_ver != $this->version) {
global $wpdb; global $wpdb;
$table = $wpdb->prefix."owark"; $table = $wpdb->prefix."owark";
if ($installed_ver == '0.1') {
// In version 0.1 final URLs where used but the broken link checkers update these URLs when a link is detected broken
$table_links = $wpdb->prefix."blc_links";
$sql = "update $table as owark join $table_links as links on owark.url = links.final_url COLLATE latin1_swedish_ci set owark.url = links.url COLLATE latin1_swedish_ci";
$wpdb->query($sql);
$installed_ver = '0.2';
update_option( "owark_db_version", $installed_ver );
}
if ($installed_ver != $this->db_version) {
print_r_log("Database upgrade from $installed_ver to {$this->version}");
$sql = "CREATE TABLE $table ( $sql = "CREATE TABLE $table (
id int(10) unsigned NOT NULL AUTO_INCREMENT, id int(10) unsigned NOT NULL AUTO_INCREMENT,
url text NOT NULL, url text NOT NULL,
@ -124,11 +172,12 @@ if (!class_exists("Owark")) {
KEY `url` (`url`(150)) )"; KEY `url` (`url`(150)) )";
require_once(ABSPATH . 'wp-admin/includes/upgrade.php'); require_once(ABSPATH . 'wp-admin/includes/upgrade.php');
dbDelta($sql); dbDelta($sql);
update_option( "owark_db_version", $this->db_version );
update_option( "owark_db_version", $this->version ); $this->notices = "<div class=\"updated fade\"><p><strong>The owark table has been installed or upgraded to version {$this->db_version}</strong></p></div>";
$this->notices = "<div class=\"updated fade\"><p><strong>The owark table has been installed or upgraded to version {$this->version}</strong></p></div>";
} }
// Check that the broken link checker is installed // Check that the broken link checker is installed
if (!function_exists('get_plugins')) if (!function_exists('get_plugins'))
require_once (ABSPATH."wp-admin/includes/plugin.php"); require_once (ABSPATH."wp-admin/includes/plugin.php");
@ -151,7 +200,8 @@ if (!class_exists("Owark")) {
} }
// Check if we have an archive subdirectory // Check if we have an archive subdirectory
$archives_dir = dirname(__FILE__) . '/archives'; $archives_dir = archives_dir();
print_r_log($archives_dir);
if (!is_dir($archives_dir)) { if (!is_dir($archives_dir)) {
@mkdir($archives_dir); @mkdir($archives_dir);
if (!is_dir($archives_dir)) { if (!is_dir($archives_dir)) {
@ -350,6 +400,7 @@ if (!class_exists("Owark")) {
* *
*/ */
function content_filter($content) { function content_filter($content) {
log_function_call();
global $post; global $post;
return $this->link_filter($content, $post->ID, $post->post_type); return $this->link_filter($content, $post->ID, $post->post_type);
} }
@ -362,8 +413,42 @@ if (!class_exists("Owark")) {
* *
* *
*/ */
function comment_filter($content) { function comment_filter($content, $comment) {
return $this->link_filter($content, get_comment_ID(), 'comment'); log_function_call();
return $this->link_filter($content, $comment->comment_ID, 'comment');
}
/**
* Filter to replace broken links in author links.
*
* @package owark
* @since 0.2
*
*
*/
function comment_author_url_filter($url) {
log_function_call();
global $wpdb;
$q = "
SELECT owark.id
FROM {$wpdb->prefix}blc_instances AS instances,
{$wpdb->prefix}blc_links AS links,
{$wpdb->prefix}owark AS owark
WHERE
instances.link_id = links.link_id
AND owark.url = %s
AND owark.url = instances.raw_url
AND links.broken = 1
";
print_r_log($q);
$q = $wpdb->prepare($q, $url);
$results = $wpdb->get_results($q);
print_r_log($results);
if (empty($results)) {
return $url;
} else {
return $this->get_archive_url($results[0]->id);
}
} }
/** /**
@ -375,9 +460,8 @@ if (!class_exists("Owark")) {
* *
*/ */
function link_filter($content, $post_id, $post_type) { function link_filter($content, $post_id, $post_type) {
global $wpdb; global $wpdb;
log_function_call();
// See if we haven't already loaded the broken links for this post... // See if we haven't already loaded the broken links for this post...
if ($this->post_id != $post_id || $this->post_type != $post_type) { if ($this->post_id != $post_id || $this->post_type != $post_type) {
@ -393,14 +477,15 @@ if (!class_exists("Owark")) {
{$wpdb->prefix}owark AS owark {$wpdb->prefix}owark AS owark
WHERE WHERE
instances.link_id = links.link_id instances.link_id = links.link_id
AND owark.url = links.final_url COLLATE latin1_swedish_ci AND owark.url = instances.raw_url
AND instances.container_id = %s AND instances.container_id = %s
AND instances.container_type = %s AND instances.container_type = %s
AND links.broken = 1 AND links.broken = 1
"; ";
print_r_log($q);
$q = $wpdb->prepare($q, $this->post_id, $this->post_type); $q = $wpdb->prepare($q, $this->post_id, $this->post_type);
$results = $wpdb->get_results($q); $results = $wpdb->get_results($q);
print_r_log($results);
$this->broken_links = array(); $this->broken_links = array();
foreach ($results as $link) { foreach ($results as $link) {
@ -415,7 +500,9 @@ if (!class_exists("Owark")) {
} }
// Regexp : see http://stackoverflow.com/questions/2609095/hooking-into-comment-text-to-add-surrounding-tag // Regexp : see http://stackoverflow.com/questions/2609095/hooking-into-comment-text-to-add-surrounding-tag
return preg_replace_callback('/(<a.*?href\s*=\s*["\'])([^"\'>]+)(["\'][^>]*>.*?<\/a>)/si', array( $this, 'replace_a_link'), $content); $replaced = preg_replace_callback('/(<a.*?href\s*=\s*["\'])([^"\'>]+)(["\'][^>]*>.*?<\/a>)/si', array( $this, 'replace_a_link'), $content);
print_r_log("replaced: $replaced");
return $replaced;
} }
/** /**
@ -427,6 +514,7 @@ if (!class_exists("Owark")) {
* *
*/ */
function replace_a_link($matches) { function replace_a_link($matches) {
log_function_call();
if (array_key_exists($matches[2], $this->broken_links)) { if (array_key_exists($matches[2], $this->broken_links)) {
return $matches[1].$this->get_archive_url($this->broken_links[$matches[2]]).$matches[3]; return $matches[1].$this->get_archive_url($this->broken_links[$matches[2]]).$matches[3];
} else { } else {
@ -445,40 +533,38 @@ if (!class_exists("Owark")) {
*/ */
function display_archive($parameter) { function display_archive($parameter) {
print_r_log("display_archive($parameter)");
global $wpdb; global $wpdb;
$id = intval($parameter); $id = intval($parameter);
$blog_title = get_bloginfo('name');
$home_url = home_url();
$query = "SELECT * $query = "SELECT *
from {$wpdb->prefix}owark AS owark from {$wpdb->prefix}owark AS owark
where id = {$id}"; where id = {$id}";
$link = $wpdb->get_row($query); $link = $wpdb->get_row($query);
$wpdb->flush(); $wpdb->flush();
// Find the file to read $loc = archives_dir() . '/' . $link->arc_location;
$blog_title = get_bloginfo('name'); $arc_base = archives_url() . '/' . $link->arc_location;
$home_url = home_url();
$loc = "";
if( ($pos = strpos($link->arc_location, '/archives')) !== FALSE )
$loc = '/wp-content/plugins/owark' . substr($link->arc_location, $pos);
$arc_loc = home_url() . $loc;
// The file name is either index.html or guessed from the URL // The file name is either index.html or guessed from the URL
if ($home_url[strlen($home_url)] == '/') { if ($link->url[strlen($link->url) - 1] == '/') {
$file_location = '.'. $loc .'/index.html'; $file_location = $loc .'/index.html';
} else { } else {
$parts = str_split($home_url, '/'); $parts = explode($link->url, '/');
$file_location = '.'. $loc . $parts[count($parts)] . '.html'; $file_location = $loc . $parts[count($parts) - 1] . '.html';
} }
if (!file_exists($file_location)) { if (!file_exists($file_location)) {
// If index.html doesn't exist, find another html file! // If index.html doesn't exist, find another html file!
$dir = opendir('.'.$loc); $dir = opendir($loc);
if ($dir) { if ($dir) {
while (false !== ($file = readdir($dir))) { while (false !== ($file = readdir($dir))) {
if ('.html' === substr($file, strlen($file) - 5)) { if ('.html' === substr($file, strlen($file) - 5)) {
$file_location = '.'.$loc.'/' . $file; $file_location = $loc.'/' . $file;
break; break;
} }
} }
@ -519,19 +605,21 @@ if (!class_exists("Owark")) {
} }
} }
header("Content-Type: text/html; charset=$encoding"); header("Content-Type: text/html; charset=$encoding");
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<meta http-equiv="Content-Type" content="text/html; charset='.$encoding.'">'; <meta http-equiv="Content-Type" content="text/html; charset='.$encoding.'">';
echo "<base href=\"{$arc_loc}/\">"; echo "<base href=\"{$arc_base}/\">";
echo '<div style="background:#fff;border:1px solid #999;margin:-1px -1px 0;padding:0;">'; echo '<div style="background:#fff;border:1px solid #999;margin:-1px -1px 0;padding:0;">';
echo '<div style="background:#ddd;border:1px solid #999;color:#000;font:13px arial,sans-serif;font-weight:normal;margin:12px;padding:8px;text-align:left">'; echo '<div style="background:#ddd;border:1px solid #999;color:#000;font:13px arial,sans-serif;font-weight:normal;margin:12px;padding:8px;text-align:left">';
echo "This is an <a href='http://owark.org'>Open Web Archive</a> archive of <a href=\"{$link->url}\">{$link->url}</a>."; echo "This is an <a href='http://owark.org'>Open Web Archive</a> archive of <a href=\"{$link->url}\">{$link->url}</a>.";
echo "<br />This snapshot has been taken on {$link->arc_date} for the website <a href=\"{$home_url}\">{$blog_title}</a> which contains a link to this page and has saved a copy to be displayed in the page ever disappears."; echo "<br />This snapshot has been taken on {$link->arc_date} for the website <a href=\"{$home_url}\">{$blog_title}</a> which contains a link to this page and has saved a copy to be displayed in the page ever disappears.";
echo '</div></div><div style="position:relative">'; echo '</div></div><div style="position:relative">';
print_r_log("file_location: $file_location");
$f = fopen($file_location, "r"); $f = fopen($file_location, "r");
echo $content; echo $content;
@ -547,7 +635,9 @@ if (!class_exists("Owark")) {
* *
* *
*/ */
public static function schedule($occurrences) { public static function schedule($occurrences, $version) {
log_function_call();
$archiving = get_option( 'owark_archiving', false); $archiving = get_option( 'owark_archiving', false);
if (! $archiving) { if (! $archiving) {
@ -557,33 +647,43 @@ if (!class_exists("Owark")) {
} }
global $wpdb; global $wpdb;
$query = "SELECT DISTINCT final_url from {$wpdb->prefix}blc_links $query = "
WHERE final_url NOT IN (SELECT url COLLATE latin1_swedish_ci FROM {$wpdb->prefix}owark) SELECT DISTINCT instances.raw_url as url
from {$wpdb->prefix}blc_links as links,
{$wpdb->prefix}blc_instances AS instances
WHERE url NOT IN (SELECT url FROM {$wpdb->prefix}owark)
AND broken=0 AND broken=0
AND final_url!=''"; AND last_check is not null
AND instances.link_id = links.link_id";
print_r_log("query: $query");
$url = $wpdb->get_row($query); $url = $wpdb->get_row($query);
print_r_log($url); print_r_log($url);
$wpdb->flush(); $wpdb->flush();
if ($url != NULL) { if ($url != NULL) {
$date = date('c'); $date = date('c');
$relpath = '/archives/'. str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->final_url))) . '/' . $date; $relpath = str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->url))) . '/' . $date;
$path = dirname(__FILE__).$relpath; $path = archives_dir()."/$relpath";
//mkdir($path, $recursive=true); //mkdir($path, $recursive=true);
$output = array(); $output = array();
$status = 0; $status = 0;
exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/0.2; http://owark.org/)\" -P $path {$url->final_url}", exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/$version; http://owark.org/)\" -P $path {$url->url} 2>&1",
$output, $status); $output, $status);
print_r_log("wget status: $status"); print_r_log("wget status: $status");
if ($status != 0) {
print_r_log("wget status: $status, output:");
print_r_log($output);
}
$q = $wpdb->insert("{$wpdb->prefix}owark", array( $q = $wpdb->insert("{$wpdb->prefix}owark", array(
'url' => $url->final_url, 'url' => $url->url,
'status' => $status, 'status' => $status,
'arc_date' => $date, 'arc_date' => $date,
'arc_location' => $relpath)); 'arc_location' => $relpath));
if ($occurrences > 0) { if ($occurrences > 0) {
wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1)); wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1, 'version' => $version));
} }
} }
@ -596,13 +696,13 @@ if (!class_exists("Owark")) {
} }
} }
if (class_exists("Owark")) { if (class_exists("Owark")) {
$owark = new Owark(); $owark = new Owark();
} }
?> ?>