Massive refactoring and bug fixes
This commit is contained in:
parent
460c77f116
commit
49afb2b9ea
188
owark.php
188
owark.php
|
@ -30,10 +30,42 @@ License: GLP2
|
|||
*/
|
||||
if ( ! function_exists('print_r_log')) {
|
||||
function print_r_log ( $log ) {
|
||||
$caller_strace = debug_backtrace()[1];
|
||||
if ( is_array( $log ) || is_object( $log ) ) {
|
||||
error_log($caller_strace['file'] . '/#' .$caller_strace['line'] . ':');
|
||||
error_log( print_r( $log, true ) );
|
||||
} else {
|
||||
error_log( $log );
|
||||
error_log($caller_strace['file'] . '/#' .$caller_strace['line'] . ': ' . $log );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! function_exists('log_function_call')) {
|
||||
function log_function_call () {
|
||||
$caller_strace = debug_backtrace()[1];
|
||||
error_log((isset($caller_strace['file']) ? $caller_strace['file'] : '<undefined>') . '/#' . (isset($caller_strace['line']) ? $caller_strace['line'] : '<undefined>') . ' function: ' . (isset($caller_strace['function']) ? $caller_strace['function'] : '<undefined>') . '(');
|
||||
foreach ($caller_strace['args'] as $arg) {
|
||||
error_log(' * ' . gettype($arg) . ': ' . print_r( $arg, true ));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! function_exists('archives_dir')) {
|
||||
function archives_dir() {
|
||||
if (defined('OWARK_ARCHIVES_REL_PATH')) {
|
||||
return WP_CONTENT_DIR . '/' . OWARK_ARCHIVES_REL_PATH;
|
||||
} else {
|
||||
return WP_PLUGIN_DIR . '/archives';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! function_exists('archives_url')) {
|
||||
function archives_url() {
|
||||
if (defined('OWARK_ARCHIVES_REL_PATH')) {
|
||||
return WP_CONTENT_URL . '/' . OWARK_ARCHIVES_REL_PATH;
|
||||
} else {
|
||||
return WP_PLUGIN_URL . '/archives';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -45,6 +77,7 @@ if (!class_exists("Owark")) {
|
|||
private $post_id = -1;
|
||||
private $post_type = "";
|
||||
private $version = '0.2';
|
||||
private $db_version = '0.2';
|
||||
private $notices = "";
|
||||
|
||||
/**
|
||||
|
@ -75,13 +108,13 @@ if (!class_exists("Owark")) {
|
|||
add_filter('admin_init', array($this, 'flush_rewrite_rules'));
|
||||
add_action( 'template_redirect', array($this, 'template_redirect_intercept') );
|
||||
|
||||
add_filter ( 'the_content', array($this, 'content_filter'));
|
||||
add_filter ( 'comment_text', array($this, 'comment_filter'));
|
||||
add_filter ( 'get_comment_author_link', array($this, 'comment_filter'));
|
||||
add_filter ( 'the_content', array($this, 'content_filter'), 20);
|
||||
add_filter ( 'comment_text', array($this, 'comment_filter'), 20, 2);
|
||||
add_filter ( 'get_comment_author_url', array($this, 'comment_author_url_filter'), 20, 1);
|
||||
|
||||
add_action('owark_schedule_event', array('Owark', 'schedule'));
|
||||
if ( !wp_next_scheduled( 'owark_schedule_event', array('occurrences' => 30) ) ) {
|
||||
wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30));
|
||||
add_action('owark_schedule_event', array('Owark', 'schedule'), 10, 2);
|
||||
if ( !wp_next_scheduled( 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version) ) ) {
|
||||
wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version));
|
||||
}
|
||||
|
||||
|
||||
|
@ -95,6 +128,14 @@ if (!class_exists("Owark")) {
|
|||
self::__construct();
|
||||
}
|
||||
|
||||
function archives_dir() {
|
||||
if (defined('OWARK_ARCHIVES_DIR')) {
|
||||
return OWARK_ARCHIVES_DIR;
|
||||
} else {
|
||||
return dirname(__FILE__) . '/archives';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check we have everything we need...
|
||||
*
|
||||
|
@ -105,14 +146,21 @@ if (!class_exists("Owark")) {
|
|||
*/
|
||||
function sanity_checks(){
|
||||
|
||||
// print_r_log("Sanity checks");
|
||||
|
||||
// Install or upgrade tables if needed
|
||||
|
||||
$installed_ver = get_option( "owark_db_version" );
|
||||
if ($installed_ver != $this->version) {
|
||||
global $wpdb;
|
||||
$table = $wpdb->prefix."owark";
|
||||
if ($installed_ver == '0.1') {
|
||||
// In version 0.1 final URLs where used but the broken link checkers update these URLs when a link is detected broken
|
||||
$table_links = $wpdb->prefix."blc_links";
|
||||
$sql = "update $table as owark join $table_links as links on owark.url = links.final_url COLLATE latin1_swedish_ci set owark.url = links.url COLLATE latin1_swedish_ci";
|
||||
$wpdb->query($sql);
|
||||
$installed_ver = '0.2';
|
||||
update_option( "owark_db_version", $installed_ver );
|
||||
}
|
||||
if ($installed_ver != $this->db_version) {
|
||||
print_r_log("Database upgrade from $installed_ver to {$this->version}");
|
||||
$sql = "CREATE TABLE $table (
|
||||
id int(10) unsigned NOT NULL AUTO_INCREMENT,
|
||||
url text NOT NULL,
|
||||
|
@ -124,11 +172,12 @@ if (!class_exists("Owark")) {
|
|||
KEY `url` (`url`(150)) )";
|
||||
require_once(ABSPATH . 'wp-admin/includes/upgrade.php');
|
||||
dbDelta($sql);
|
||||
update_option( "owark_db_version", $this->db_version );
|
||||
|
||||
update_option( "owark_db_version", $this->version );
|
||||
$this->notices = "<div class=\"updated fade\"><p><strong>The owark table has been installed or upgraded to version {$this->version}</strong></p></div>";
|
||||
$this->notices = "<div class=\"updated fade\"><p><strong>The owark table has been installed or upgraded to version {$this->db_version}</strong></p></div>";
|
||||
}
|
||||
|
||||
|
||||
// Check that the broken link checker is installed
|
||||
if (!function_exists('get_plugins'))
|
||||
require_once (ABSPATH."wp-admin/includes/plugin.php");
|
||||
|
@ -151,7 +200,8 @@ if (!class_exists("Owark")) {
|
|||
}
|
||||
|
||||
// Check if we have an archive subdirectory
|
||||
$archives_dir = dirname(__FILE__) . '/archives';
|
||||
$archives_dir = archives_dir();
|
||||
print_r_log($archives_dir);
|
||||
if (!is_dir($archives_dir)) {
|
||||
@mkdir($archives_dir);
|
||||
if (!is_dir($archives_dir)) {
|
||||
|
@ -350,6 +400,7 @@ if (!class_exists("Owark")) {
|
|||
*
|
||||
*/
|
||||
function content_filter($content) {
|
||||
log_function_call();
|
||||
global $post;
|
||||
return $this->link_filter($content, $post->ID, $post->post_type);
|
||||
}
|
||||
|
@ -362,8 +413,42 @@ if (!class_exists("Owark")) {
|
|||
*
|
||||
*
|
||||
*/
|
||||
function comment_filter($content) {
|
||||
return $this->link_filter($content, get_comment_ID(), 'comment');
|
||||
function comment_filter($content, $comment) {
|
||||
log_function_call();
|
||||
return $this->link_filter($content, $comment->comment_ID, 'comment');
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter to replace broken links in author links.
|
||||
*
|
||||
* @package owark
|
||||
* @since 0.2
|
||||
*
|
||||
*
|
||||
*/
|
||||
function comment_author_url_filter($url) {
|
||||
log_function_call();
|
||||
global $wpdb;
|
||||
$q = "
|
||||
SELECT owark.id
|
||||
FROM {$wpdb->prefix}blc_instances AS instances,
|
||||
{$wpdb->prefix}blc_links AS links,
|
||||
{$wpdb->prefix}owark AS owark
|
||||
WHERE
|
||||
instances.link_id = links.link_id
|
||||
AND owark.url = %s
|
||||
AND owark.url = instances.raw_url
|
||||
AND links.broken = 1
|
||||
";
|
||||
print_r_log($q);
|
||||
$q = $wpdb->prepare($q, $url);
|
||||
$results = $wpdb->get_results($q);
|
||||
print_r_log($results);
|
||||
if (empty($results)) {
|
||||
return $url;
|
||||
} else {
|
||||
return $this->get_archive_url($results[0]->id);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -375,9 +460,8 @@ if (!class_exists("Owark")) {
|
|||
*
|
||||
*/
|
||||
function link_filter($content, $post_id, $post_type) {
|
||||
|
||||
global $wpdb;
|
||||
|
||||
log_function_call();
|
||||
// See if we haven't already loaded the broken links for this post...
|
||||
if ($this->post_id != $post_id || $this->post_type != $post_type) {
|
||||
|
||||
|
@ -393,14 +477,15 @@ if (!class_exists("Owark")) {
|
|||
{$wpdb->prefix}owark AS owark
|
||||
WHERE
|
||||
instances.link_id = links.link_id
|
||||
AND owark.url = links.final_url COLLATE latin1_swedish_ci
|
||||
AND owark.url = instances.raw_url
|
||||
AND instances.container_id = %s
|
||||
AND instances.container_type = %s
|
||||
AND links.broken = 1
|
||||
";
|
||||
print_r_log($q);
|
||||
$q = $wpdb->prepare($q, $this->post_id, $this->post_type);
|
||||
$results = $wpdb->get_results($q);
|
||||
|
||||
print_r_log($results);
|
||||
$this->broken_links = array();
|
||||
|
||||
foreach ($results as $link) {
|
||||
|
@ -415,7 +500,9 @@ if (!class_exists("Owark")) {
|
|||
}
|
||||
|
||||
// Regexp : see http://stackoverflow.com/questions/2609095/hooking-into-comment-text-to-add-surrounding-tag
|
||||
return preg_replace_callback('/(<a.*?href\s*=\s*["\'])([^"\'>]+)(["\'][^>]*>.*?<\/a>)/si', array( $this, 'replace_a_link'), $content);
|
||||
$replaced = preg_replace_callback('/(<a.*?href\s*=\s*["\'])([^"\'>]+)(["\'][^>]*>.*?<\/a>)/si', array( $this, 'replace_a_link'), $content);
|
||||
print_r_log("replaced: $replaced");
|
||||
return $replaced;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -427,6 +514,7 @@ if (!class_exists("Owark")) {
|
|||
*
|
||||
*/
|
||||
function replace_a_link($matches) {
|
||||
log_function_call();
|
||||
if (array_key_exists($matches[2], $this->broken_links)) {
|
||||
return $matches[1].$this->get_archive_url($this->broken_links[$matches[2]]).$matches[3];
|
||||
} else {
|
||||
|
@ -445,40 +533,38 @@ if (!class_exists("Owark")) {
|
|||
*/
|
||||
function display_archive($parameter) {
|
||||
|
||||
print_r_log("display_archive($parameter)");
|
||||
global $wpdb;
|
||||
|
||||
$id = intval($parameter);
|
||||
|
||||
$blog_title = get_bloginfo('name');
|
||||
$home_url = home_url();
|
||||
|
||||
$query = "SELECT *
|
||||
from {$wpdb->prefix}owark AS owark
|
||||
where id = {$id}";
|
||||
$link = $wpdb->get_row($query);
|
||||
$wpdb->flush();
|
||||
|
||||
// Find the file to read
|
||||
$blog_title = get_bloginfo('name');
|
||||
$home_url = home_url();
|
||||
|
||||
$loc = "";
|
||||
if( ($pos = strpos($link->arc_location, '/archives')) !== FALSE )
|
||||
$loc = '/wp-content/plugins/owark' . substr($link->arc_location, $pos);
|
||||
$arc_loc = home_url() . $loc;
|
||||
$loc = archives_dir() . '/' . $link->arc_location;
|
||||
$arc_base = archives_url() . '/' . $link->arc_location;
|
||||
|
||||
// The file name is either index.html or guessed from the URL
|
||||
if ($home_url[strlen($home_url)] == '/') {
|
||||
$file_location = '.'. $loc .'/index.html';
|
||||
if ($link->url[strlen($link->url) - 1] == '/') {
|
||||
$file_location = $loc .'/index.html';
|
||||
} else {
|
||||
$parts = str_split($home_url, '/');
|
||||
$file_location = '.'. $loc . $parts[count($parts)] . '.html';
|
||||
$parts = explode($link->url, '/');
|
||||
$file_location = $loc . $parts[count($parts) - 1] . '.html';
|
||||
}
|
||||
|
||||
if (!file_exists($file_location)) {
|
||||
// If index.html doesn't exist, find another html file!
|
||||
$dir = opendir('.'.$loc);
|
||||
$dir = opendir($loc);
|
||||
if ($dir) {
|
||||
while (false !== ($file = readdir($dir))) {
|
||||
if ('.html' === substr($file, strlen($file) - 5)) {
|
||||
$file_location = '.'.$loc.'/' . $file;
|
||||
$file_location = $loc.'/' . $file;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -519,19 +605,21 @@ if (!class_exists("Owark")) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
header("Content-Type: text/html; charset=$encoding");
|
||||
|
||||
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset='.$encoding.'">';
|
||||
|
||||
echo "<base href=\"{$arc_loc}/\">";
|
||||
echo "<base href=\"{$arc_base}/\">";
|
||||
echo '<div style="background:#fff;border:1px solid #999;margin:-1px -1px 0;padding:0;">';
|
||||
echo '<div style="background:#ddd;border:1px solid #999;color:#000;font:13px arial,sans-serif;font-weight:normal;margin:12px;padding:8px;text-align:left">';
|
||||
echo "This is an <a href='http://owark.org'>Open Web Archive</a> archive of <a href=\"{$link->url}\">{$link->url}</a>.";
|
||||
echo "<br />This snapshot has been taken on {$link->arc_date} for the website <a href=\"{$home_url}\">{$blog_title}</a> which contains a link to this page and has saved a copy to be displayed in the page ever disappears.";
|
||||
echo '</div></div><div style="position:relative">';
|
||||
|
||||
print_r_log("file_location: $file_location");
|
||||
|
||||
$f = fopen($file_location, "r");
|
||||
echo $content;
|
||||
|
@ -547,7 +635,9 @@ if (!class_exists("Owark")) {
|
|||
*
|
||||
*
|
||||
*/
|
||||
public static function schedule($occurrences) {
|
||||
public static function schedule($occurrences, $version) {
|
||||
|
||||
log_function_call();
|
||||
|
||||
$archiving = get_option( 'owark_archiving', false);
|
||||
if (! $archiving) {
|
||||
|
@ -557,33 +647,43 @@ if (!class_exists("Owark")) {
|
|||
}
|
||||
global $wpdb;
|
||||
|
||||
$query = "SELECT DISTINCT final_url from {$wpdb->prefix}blc_links
|
||||
WHERE final_url NOT IN (SELECT url COLLATE latin1_swedish_ci FROM {$wpdb->prefix}owark)
|
||||
$query = "
|
||||
SELECT DISTINCT instances.raw_url as url
|
||||
from {$wpdb->prefix}blc_links as links,
|
||||
{$wpdb->prefix}blc_instances AS instances
|
||||
WHERE url NOT IN (SELECT url FROM {$wpdb->prefix}owark)
|
||||
AND broken=0
|
||||
AND final_url!=''";
|
||||
AND last_check is not null
|
||||
AND instances.link_id = links.link_id";
|
||||
print_r_log("query: $query");
|
||||
|
||||
$url = $wpdb->get_row($query);
|
||||
print_r_log($url);
|
||||
$wpdb->flush();
|
||||
|
||||
if ($url != NULL) {
|
||||
$date = date('c');
|
||||
$relpath = '/archives/'. str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->final_url))) . '/' . $date;
|
||||
$path = dirname(__FILE__).$relpath;
|
||||
$relpath = str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->url))) . '/' . $date;
|
||||
$path = archives_dir()."/$relpath";
|
||||
//mkdir($path, $recursive=true);
|
||||
|
||||
$output = array();
|
||||
$status = 0;
|
||||
exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/0.2; http://owark.org/)\" -P $path {$url->final_url}",
|
||||
exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/$version; http://owark.org/)\" -P $path {$url->url} 2>&1",
|
||||
$output, $status);
|
||||
print_r_log("wget status: $status");
|
||||
if ($status != 0) {
|
||||
print_r_log("wget status: $status, output:");
|
||||
print_r_log($output);
|
||||
}
|
||||
$q = $wpdb->insert("{$wpdb->prefix}owark", array(
|
||||
'url' => $url->final_url,
|
||||
'url' => $url->url,
|
||||
'status' => $status,
|
||||
'arc_date' => $date,
|
||||
'arc_location' => $relpath));
|
||||
|
||||
if ($occurrences > 0) {
|
||||
wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1));
|
||||
wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1, 'version' => $version));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue