Massive refactoring and bug fixes

This commit is contained in:
Eric van der Vlist 2020-05-05 14:05:53 +02:00
parent 460c77f116
commit 49afb2b9ea
1 changed files with 651 additions and 551 deletions

222
owark.php
View File

@ -1,18 +1,18 @@
<?php
/* Copyright 2011-2020 Eric van der Vlist (vdv@dyomedea.com)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2, as
published by the Free Software Foundation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2, as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
@ -26,14 +26,46 @@ License: GLP2
*/
/*
Log
*/
Log
*/
if ( ! function_exists('print_r_log')) {
function print_r_log ( $log ) {
$caller_strace = debug_backtrace()[1];
if ( is_array( $log ) || is_object( $log ) ) {
error_log($caller_strace['file'] . '/#' .$caller_strace['line'] . ':');
error_log( print_r( $log, true ) );
} else {
error_log( $log );
error_log($caller_strace['file'] . '/#' .$caller_strace['line'] . ': ' . $log );
}
}
}
if ( ! function_exists('log_function_call')) {
function log_function_call () {
$caller_strace = debug_backtrace()[1];
error_log((isset($caller_strace['file']) ? $caller_strace['file'] : '<undefined>') . '/#' . (isset($caller_strace['line']) ? $caller_strace['line'] : '<undefined>') . ' function: ' . (isset($caller_strace['function']) ? $caller_strace['function'] : '<undefined>') . '(');
foreach ($caller_strace['args'] as $arg) {
error_log(' * ' . gettype($arg) . ': ' . print_r( $arg, true ));
}
}
}
if ( ! function_exists('archives_dir')) {
function archives_dir() {
if (defined('OWARK_ARCHIVES_REL_PATH')) {
return WP_CONTENT_DIR . '/' . OWARK_ARCHIVES_REL_PATH;
} else {
return WP_PLUGIN_DIR . '/archives';
}
}
}
if ( ! function_exists('archives_url')) {
function archives_url() {
if (defined('OWARK_ARCHIVES_REL_PATH')) {
return WP_CONTENT_URL . '/' . OWARK_ARCHIVES_REL_PATH;
} else {
return WP_PLUGIN_URL . '/archives';
}
}
}
@ -45,6 +77,7 @@ if (!class_exists("Owark")) {
private $post_id = -1;
private $post_type = "";
private $version = '0.2';
private $db_version = '0.2';
private $notices = "";
/**
@ -75,13 +108,13 @@ if (!class_exists("Owark")) {
add_filter('admin_init', array($this, 'flush_rewrite_rules'));
add_action( 'template_redirect', array($this, 'template_redirect_intercept') );
add_filter ( 'the_content', array($this, 'content_filter'));
add_filter ( 'comment_text', array($this, 'comment_filter'));
add_filter ( 'get_comment_author_link', array($this, 'comment_filter'));
add_filter ( 'the_content', array($this, 'content_filter'), 20);
add_filter ( 'comment_text', array($this, 'comment_filter'), 20, 2);
add_filter ( 'get_comment_author_url', array($this, 'comment_author_url_filter'), 20, 1);
add_action('owark_schedule_event', array('Owark', 'schedule'));
if ( !wp_next_scheduled( 'owark_schedule_event', array('occurrences' => 30) ) ) {
wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30));
add_action('owark_schedule_event', array('Owark', 'schedule'), 10, 2);
if ( !wp_next_scheduled( 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version) ) ) {
wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version));
}
@ -95,6 +128,14 @@ if (!class_exists("Owark")) {
self::__construct();
}
function archives_dir() {
if (defined('OWARK_ARCHIVES_DIR')) {
return OWARK_ARCHIVES_DIR;
} else {
return dirname(__FILE__) . '/archives';
}
}
/**
* Check we have everything we need...
*
@ -105,14 +146,21 @@ if (!class_exists("Owark")) {
*/
function sanity_checks(){
// print_r_log("Sanity checks");
// Install or upgrade tables if needed
$installed_ver = get_option( "owark_db_version" );
if ($installed_ver != $this->version) {
global $wpdb;
$table = $wpdb->prefix."owark";
if ($installed_ver == '0.1') {
// In version 0.1 final URLs where used but the broken link checkers update these URLs when a link is detected broken
$table_links = $wpdb->prefix."blc_links";
$sql = "update $table as owark join $table_links as links on owark.url = links.final_url COLLATE latin1_swedish_ci set owark.url = links.url COLLATE latin1_swedish_ci";
$wpdb->query($sql);
$installed_ver = '0.2';
update_option( "owark_db_version", $installed_ver );
}
if ($installed_ver != $this->db_version) {
print_r_log("Database upgrade from $installed_ver to {$this->version}");
$sql = "CREATE TABLE $table (
id int(10) unsigned NOT NULL AUTO_INCREMENT,
url text NOT NULL,
@ -124,11 +172,12 @@ if (!class_exists("Owark")) {
KEY `url` (`url`(150)) )";
require_once(ABSPATH . 'wp-admin/includes/upgrade.php');
dbDelta($sql);
update_option( "owark_db_version", $this->db_version );
update_option( "owark_db_version", $this->version );
$this->notices = "<div class=\"updated fade\"><p><strong>The owark table has been installed or upgraded to version {$this->version}</strong></p></div>";
$this->notices = "<div class=\"updated fade\"><p><strong>The owark table has been installed or upgraded to version {$this->db_version}</strong></p></div>";
}
// Check that the broken link checker is installed
if (!function_exists('get_plugins'))
require_once (ABSPATH."wp-admin/includes/plugin.php");
@ -151,7 +200,8 @@ if (!class_exists("Owark")) {
}
// Check if we have an archive subdirectory
$archives_dir = dirname(__FILE__) . '/archives';
$archives_dir = archives_dir();
print_r_log($archives_dir);
if (!is_dir($archives_dir)) {
@mkdir($archives_dir);
if (!is_dir($archives_dir)) {
@ -350,6 +400,7 @@ if (!class_exists("Owark")) {
*
*/
function content_filter($content) {
log_function_call();
global $post;
return $this->link_filter($content, $post->ID, $post->post_type);
}
@ -362,8 +413,42 @@ if (!class_exists("Owark")) {
*
*
*/
function comment_filter($content) {
return $this->link_filter($content, get_comment_ID(), 'comment');
function comment_filter($content, $comment) {
log_function_call();
return $this->link_filter($content, $comment->comment_ID, 'comment');
}
/**
* Filter to replace broken links in author links.
*
* @package owark
* @since 0.2
*
*
*/
function comment_author_url_filter($url) {
log_function_call();
global $wpdb;
$q = "
SELECT owark.id
FROM {$wpdb->prefix}blc_instances AS instances,
{$wpdb->prefix}blc_links AS links,
{$wpdb->prefix}owark AS owark
WHERE
instances.link_id = links.link_id
AND owark.url = %s
AND owark.url = instances.raw_url
AND links.broken = 1
";
print_r_log($q);
$q = $wpdb->prepare($q, $url);
$results = $wpdb->get_results($q);
print_r_log($results);
if (empty($results)) {
return $url;
} else {
return $this->get_archive_url($results[0]->id);
}
}
/**
@ -375,9 +460,8 @@ if (!class_exists("Owark")) {
*
*/
function link_filter($content, $post_id, $post_type) {
global $wpdb;
log_function_call();
// See if we haven't already loaded the broken links for this post...
if ($this->post_id != $post_id || $this->post_type != $post_type) {
@ -393,14 +477,15 @@ if (!class_exists("Owark")) {
{$wpdb->prefix}owark AS owark
WHERE
instances.link_id = links.link_id
AND owark.url = links.final_url COLLATE latin1_swedish_ci
AND owark.url = instances.raw_url
AND instances.container_id = %s
AND instances.container_type = %s
AND links.broken = 1
";
print_r_log($q);
$q = $wpdb->prepare($q, $this->post_id, $this->post_type);
$results = $wpdb->get_results($q);
print_r_log($results);
$this->broken_links = array();
foreach ($results as $link) {
@ -415,7 +500,9 @@ if (!class_exists("Owark")) {
}
// Regexp : see http://stackoverflow.com/questions/2609095/hooking-into-comment-text-to-add-surrounding-tag
return preg_replace_callback('/(<a.*?href\s*=\s*["\'])([^"\'>]+)(["\'][^>]*>.*?<\/a>)/si', array( $this, 'replace_a_link'), $content);
$replaced = preg_replace_callback('/(<a.*?href\s*=\s*["\'])([^"\'>]+)(["\'][^>]*>.*?<\/a>)/si', array( $this, 'replace_a_link'), $content);
print_r_log("replaced: $replaced");
return $replaced;
}
/**
@ -427,6 +514,7 @@ if (!class_exists("Owark")) {
*
*/
function replace_a_link($matches) {
log_function_call();
if (array_key_exists($matches[2], $this->broken_links)) {
return $matches[1].$this->get_archive_url($this->broken_links[$matches[2]]).$matches[3];
} else {
@ -445,40 +533,38 @@ if (!class_exists("Owark")) {
*/
function display_archive($parameter) {
print_r_log("display_archive($parameter)");
global $wpdb;
$id = intval($parameter);
$blog_title = get_bloginfo('name');
$home_url = home_url();
$query = "SELECT *
from {$wpdb->prefix}owark AS owark
where id = {$id}";
$link = $wpdb->get_row($query);
$wpdb->flush();
// Find the file to read
$blog_title = get_bloginfo('name');
$home_url = home_url();
$loc = "";
if( ($pos = strpos($link->arc_location, '/archives')) !== FALSE )
$loc = '/wp-content/plugins/owark' . substr($link->arc_location, $pos);
$arc_loc = home_url() . $loc;
$loc = archives_dir() . '/' . $link->arc_location;
$arc_base = archives_url() . '/' . $link->arc_location;
// The file name is either index.html or guessed from the URL
if ($home_url[strlen($home_url)] == '/') {
$file_location = '.'. $loc .'/index.html';
if ($link->url[strlen($link->url) - 1] == '/') {
$file_location = $loc .'/index.html';
} else {
$parts = str_split($home_url, '/');
$file_location = '.'. $loc . $parts[count($parts)] . '.html';
$parts = explode($link->url, '/');
$file_location = $loc . $parts[count($parts) - 1] . '.html';
}
if (!file_exists($file_location)) {
// If index.html doesn't exist, find another html file!
$dir = opendir('.'.$loc);
$dir = opendir($loc);
if ($dir) {
while (false !== ($file = readdir($dir))) {
if ('.html' === substr($file, strlen($file) - 5)) {
$file_location = '.'.$loc.'/' . $file;
$file_location = $loc.'/' . $file;
break;
}
}
@ -519,19 +605,21 @@ if (!class_exists("Owark")) {
}
}
header("Content-Type: text/html; charset=$encoding");
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<meta http-equiv="Content-Type" content="text/html; charset='.$encoding.'">';
<meta http-equiv="Content-Type" content="text/html; charset='.$encoding.'">';
echo "<base href=\"{$arc_loc}/\">";
echo "<base href=\"{$arc_base}/\">";
echo '<div style="background:#fff;border:1px solid #999;margin:-1px -1px 0;padding:0;">';
echo '<div style="background:#ddd;border:1px solid #999;color:#000;font:13px arial,sans-serif;font-weight:normal;margin:12px;padding:8px;text-align:left">';
echo "This is an <a href='http://owark.org'>Open Web Archive</a> archive of <a href=\"{$link->url}\">{$link->url}</a>.";
echo "<br />This snapshot has been taken on {$link->arc_date} for the website <a href=\"{$home_url}\">{$blog_title}</a> which contains a link to this page and has saved a copy to be displayed in the page ever disappears.";
echo '</div></div><div style="position:relative">';
print_r_log("file_location: $file_location");
$f = fopen($file_location, "r");
echo $content;
@ -547,7 +635,9 @@ if (!class_exists("Owark")) {
*
*
*/
public static function schedule($occurrences) {
public static function schedule($occurrences, $version) {
log_function_call();
$archiving = get_option( 'owark_archiving', false);
if (! $archiving) {
@ -557,33 +647,43 @@ if (!class_exists("Owark")) {
}
global $wpdb;
$query = "SELECT DISTINCT final_url from {$wpdb->prefix}blc_links
WHERE final_url NOT IN (SELECT url COLLATE latin1_swedish_ci FROM {$wpdb->prefix}owark)
$query = "
SELECT DISTINCT instances.raw_url as url
from {$wpdb->prefix}blc_links as links,
{$wpdb->prefix}blc_instances AS instances
WHERE url NOT IN (SELECT url FROM {$wpdb->prefix}owark)
AND broken=0
AND final_url!=''";
AND last_check is not null
AND instances.link_id = links.link_id";
print_r_log("query: $query");
$url = $wpdb->get_row($query);
print_r_log($url);
$wpdb->flush();
if ($url != NULL) {
$date = date('c');
$relpath = '/archives/'. str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->final_url))) . '/' . $date;
$path = dirname(__FILE__).$relpath;
$relpath = str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->url))) . '/' . $date;
$path = archives_dir()."/$relpath";
//mkdir($path, $recursive=true);
$output = array();
$status = 0;
exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/0.2; http://owark.org/)\" -P $path {$url->final_url}",
exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/$version; http://owark.org/)\" -P $path {$url->url} 2>&1",
$output, $status);
print_r_log("wget status: $status");
if ($status != 0) {
print_r_log("wget status: $status, output:");
print_r_log($output);
}
$q = $wpdb->insert("{$wpdb->prefix}owark", array(
'url' => $url->final_url,
'url' => $url->url,
'status' => $status,
'arc_date' => $date,
'arc_location' => $relpath));
if ($occurrences > 0) {
wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1));
wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1, 'version' => $version));
}
}
@ -596,13 +696,13 @@ if (!class_exists("Owark")) {
}
}
}
if (class_exists("Owark")) {
if (class_exists("Owark")) {
$owark = new Owark();
}
}
?>
?>