diff --git a/owark.php b/owark.php index caebee1..4b4cfaf 100644 --- a/owark.php +++ b/owark.php @@ -1,18 +1,18 @@ ') . '/#' . (isset($caller_strace['line']) ? $caller_strace['line'] : '') . ' function: ' . (isset($caller_strace['function']) ? $caller_strace['function'] : '') . '('); + foreach ($caller_strace['args'] as $arg) { + error_log(' * ' . gettype($arg) . ': ' . print_r( $arg, true )); + } + } +} + +if ( ! function_exists('archives_dir')) { + function archives_dir() { + if (defined('OWARK_ARCHIVES_REL_PATH')) { + return WP_CONTENT_DIR . '/' . OWARK_ARCHIVES_REL_PATH; + } else { + return WP_PLUGIN_DIR . '/archives'; + } + } +} + +if ( ! function_exists('archives_url')) { + function archives_url() { + if (defined('OWARK_ARCHIVES_REL_PATH')) { + return WP_CONTENT_URL . '/' . OWARK_ARCHIVES_REL_PATH; + } else { + return WP_PLUGIN_URL . '/archives'; + } + } } if (!class_exists("Owark")) { class Owark { - private $broken_links = array(); - private $post_id = -1; - private $post_type = ""; - private $version = '0.2'; - private $notices = ""; + private $broken_links = array(); + private $post_id = -1; + private $post_type = ""; + private $version = '0.2'; + private $db_version = '0.2'; + private $notices = ""; - /** - * Class constructor - * - * @package owark - * @since 0.1 - * - * - */ + /** + * Class constructor + * + * @package owark + * @since 0.1 + * + * + */ public function __construct() { - if (is_admin()) { - add_action('admin_menu', array($this, 'owark_admin_menu')); - add_action('plugins_loaded', array($this, 'sanity_checks')); - } + if (is_admin()) { + add_action('admin_menu', array($this, 'owark_admin_menu')); + add_action('plugins_loaded', array($this, 'sanity_checks')); + } - // See http://stackoverflow.com/questions/2210826/need-help-with-wp-rewrite-in-a-wordpress-plugin - // Using a filter instead of an action to create the rewrite rules. - // Write rules -> Add query vars -> Recalculate rewrite rules - add_filter('rewrite_rules_array', array($this, 'create_rewrite_rules')); - add_filter('query_vars',array($this, 'add_query_vars')); + // See http://stackoverflow.com/questions/2210826/need-help-with-wp-rewrite-in-a-wordpress-plugin + // Using a filter instead of an action to create the rewrite rules. + // Write rules -> Add query vars -> Recalculate rewrite rules + add_filter('rewrite_rules_array', array($this, 'create_rewrite_rules')); + add_filter('query_vars',array($this, 'add_query_vars')); - // Recalculates rewrite rules during admin init to save resources. - // Could probably run it once as long as it isn't going to change or check the - // $wp_rewrite rules to see if it's active. - add_filter('admin_init', array($this, 'flush_rewrite_rules')); - add_action( 'template_redirect', array($this, 'template_redirect_intercept') ); + // Recalculates rewrite rules during admin init to save resources. + // Could probably run it once as long as it isn't going to change or check the + // $wp_rewrite rules to see if it's active. + add_filter('admin_init', array($this, 'flush_rewrite_rules')); + add_action( 'template_redirect', array($this, 'template_redirect_intercept') ); - add_filter ( 'the_content', array($this, 'content_filter')); - add_filter ( 'comment_text', array($this, 'comment_filter')); - add_filter ( 'get_comment_author_link', array($this, 'comment_filter')); + add_filter ( 'the_content', array($this, 'content_filter'), 20); + add_filter ( 'comment_text', array($this, 'comment_filter'), 20, 2); + add_filter ( 'get_comment_author_url', array($this, 'comment_author_url_filter'), 20, 1); - add_action('owark_schedule_event', array('Owark', 'schedule')); - if ( !wp_next_scheduled( 'owark_schedule_event', array('occurrences' => 30) ) ) { - wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30)); - } + add_action('owark_schedule_event', array('Owark', 'schedule'), 10, 2); + if ( !wp_next_scheduled( 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version) ) ) { + wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version)); + } } function Owark() { - // PHP4-style constructor. - // This will NOT be invoked, unless a sub-class that extends `foo` calls it. - // In that case, call the new-style constructor to keep compatibility. - self::__construct(); - } + // PHP4-style constructor. + // This will NOT be invoked, unless a sub-class that extends `foo` calls it. + // In that case, call the new-style constructor to keep compatibility. + self::__construct(); + } - /** - * Check we have everything we need... - * - * @package owark - * @since 0.1 - * - * - */ - function sanity_checks(){ + function archives_dir() { + if (defined('OWARK_ARCHIVES_DIR')) { + return OWARK_ARCHIVES_DIR; + } else { + return dirname(__FILE__) . '/archives'; + } + } - // print_r_log("Sanity checks"); + /** + * Check we have everything we need... + * + * @package owark + * @since 0.1 + * + * + */ + function sanity_checks(){ - // Install or upgrade tables if needed + // Install or upgrade tables if needed - $installed_ver = get_option( "owark_db_version" ); - if ($installed_ver != $this->version) { - global $wpdb; - $table = $wpdb->prefix."owark"; - $sql = "CREATE TABLE $table ( - id int(10) unsigned NOT NULL AUTO_INCREMENT, - url text NOT NULL, - status varchar(20) NOT NULL DEFAULT 'to-archive', - arc_date datetime, - arc_location text, - encoding varchar(10), - PRIMARY KEY(`id`), - KEY `url` (`url`(150)) )"; - require_once(ABSPATH . 'wp-admin/includes/upgrade.php'); - dbDelta($sql); + $installed_ver = get_option( "owark_db_version" ); + global $wpdb; + $table = $wpdb->prefix."owark"; + if ($installed_ver == '0.1') { + // In version 0.1 final URLs where used but the broken link checkers update these URLs when a link is detected broken + $table_links = $wpdb->prefix."blc_links"; + $sql = "update $table as owark join $table_links as links on owark.url = links.final_url COLLATE latin1_swedish_ci set owark.url = links.url COLLATE latin1_swedish_ci"; + $wpdb->query($sql); + $installed_ver = '0.2'; + update_option( "owark_db_version", $installed_ver ); + } + if ($installed_ver != $this->db_version) { + print_r_log("Database upgrade from $installed_ver to {$this->version}"); + $sql = "CREATE TABLE $table ( + id int(10) unsigned NOT NULL AUTO_INCREMENT, + url text NOT NULL, + status varchar(20) NOT NULL DEFAULT 'to-archive', + arc_date datetime, + arc_location text, + encoding varchar(10), + PRIMARY KEY(`id`), + KEY `url` (`url`(150)) )"; + require_once(ABSPATH . 'wp-admin/includes/upgrade.php'); + dbDelta($sql); + update_option( "owark_db_version", $this->db_version ); - update_option( "owark_db_version", $this->version ); - $this->notices = "

The owark table has been installed or upgraded to version {$this->version}

"; - } + $this->notices = "

The owark table has been installed or upgraded to version {$this->db_version}

"; + } - // Check that the broken link checker is installed - if (!function_exists('get_plugins')) + + // Check that the broken link checker is installed + if (!function_exists('get_plugins')) require_once (ABSPATH."wp-admin/includes/plugin.php"); - $blc = 'not-found'; - foreach(get_plugins() as $plugin_file => $plugin_data) { - if ($plugin_data['Title'] == 'Broken Link Checker') { - if (is_plugin_active($plugin_file)) { - $blc = 'active'; - } else { - $blc = 'inactive'; - } - } - } - - if ($blc == 'inactive') { - $this->notices = $this->notices . "

Please activate the Broken Link Checker so that the Open Web Archive can be fully functional.

"; - } else if ($blc == 'not-found') { - $this->notices = $this->notices . "

The Open Web Archive relies on the Broken Link Checker. Please install this plugin!

"; - } - - // Check if we have an archive subdirectory - $archives_dir = dirname(__FILE__) . '/archives'; - if (!is_dir($archives_dir)) { - @mkdir($archives_dir); - if (!is_dir($archives_dir)) { - $this->notices = $this->notices . "

The Open Web Archive has not been able to create the folder /archives in its installation directory. Please create it by hand and make it writable for the web server.

"; - } - } elseif (! is_writable($archives_dir)) { - $this->notices = $this->notices . "

The Open Web Archive needs a writable folder /archives in its installation directory. Please make it writable for the web server.

"; + $blc = 'not-found'; + foreach(get_plugins() as $plugin_file => $plugin_data) { + if ($plugin_data['Title'] == 'Broken Link Checker') { + if (is_plugin_active($plugin_file)) { + $blc = 'active'; + } else { + $blc = 'inactive'; } + } + } + + if ($blc == 'inactive') { + $this->notices = $this->notices . "

Please activate the Broken Link Checker so that the Open Web Archive can be fully functional.

"; + } else if ($blc == 'not-found') { + $this->notices = $this->notices . "

The Open Web Archive relies on the Broken Link Checker. Please install this plugin!

"; + } + + // Check if we have an archive subdirectory + $archives_dir = archives_dir(); + print_r_log($archives_dir); + if (!is_dir($archives_dir)) { + @mkdir($archives_dir); + if (!is_dir($archives_dir)) { + $this->notices = $this->notices . "

The Open Web Archive has not been able to create the folder /archives in its installation directory. Please create it by hand and make it writable for the web server.

"; + } + } elseif (! is_writable($archives_dir)) { + $this->notices = $this->notices . "

The Open Web Archive needs a writable folder /archives in its installation directory. Please make it writable for the web server.

"; + } + + // Check that we can execute commands + + if ( ini_get('disable_functions') ) { + $not_allowed = ini_get('disable_functions'); + if ( stristr($not_allowed, 'exec') ) { + $this->notices = $this->notices . "

The Open Web Archives requires that exec() is allowed to run wget and retrieve the pages to archive.

"; + } + } + + // Check that wget is installed + + $output = array(); + exec('/usr/bin/wget -V', $output); + + + if ( empty($output) ) { + $this->notices = $this->notices . + "

The Open Web Archives is not able to run GNU wget and retrieve the pages to archive. Please check that wget is installed and on the default path.

"; + } + + // We need as least version 1.11 or higher + $helper = preg_match('/GNU Wget ([0-9\.]+) /', $output[0], $wget_version); + if ( $wget_version[0] < '1.11' ) { + $this->notices = $this->notices . "

The Open Web Archives needs GNU wget version 1.11 or higher.
Version read: {$wget_version[0]}

"; + } + + if ($this->notices != '') { + add_action('admin_notices', array($this, 'admin_notices')); + } + + } + + /** + * Show admin notices + * + * @package owark + * @since 0.1 + * + * + */ + function admin_notices(){ + + echo $this->notices; + + } + + /** + * Admin menus + * + * @package owark + * @since 0.1 + * + * + */ + function owark_admin_menu() { + add_management_page(__('The Open Web Archive', 'owark'), __('Web Archive', 'owark'), 'edit_others_posts', 'owark', array($this, 'management_page')); + } + + /** + * URL of an archive page + * + * @package owark + * @since 0.1 + * + * + */ + function get_archive_url($archive_id) { + return home_url().'/owark/'.$archive_id; + } + + /** + * Display the admin/tools page. + * + * @package owark + * @since 0.1 + * + * + */ + function management_page() { + //must check that the user has the required capability + if (!current_user_can('edit_others_posts')) { + wp_die( __('You do not have sufficient permissions to access this page.') ); + } + + global $wpdb; + + echo '
'; + screen_icon(); + echo '

Owark - The Open Web Archive

'; + echo '

Tired of broken links? Archive yours with the Open Web Archive!

'; + echo "
"; + + echo '

List of broken links with archived pages:

'; + + $query = "SELECT owark.id, owark.url, owark.status, owark.arc_date, owark.arc_location, blc_links.status_text + FROM {$wpdb->prefix}owark AS owark, {$wpdb->prefix}blc_links as blc_links + WHERE owark.url = blc_links.final_url COLLATE latin1_swedish_ci and blc_links.broken = 1 + ORDER BY owark.url"; + $results = $wpdb->get_results($query); + + echo ''; + echo ''; + echo ''; + echo ''; + echo ''; + echo ''; + echo ''; + echo ''; + + foreach ($results as $link) { + $archive_url = $this->get_archive_url($link->id); + echo " + + + "; + } + + echo ''; + echo '
URLArchive
url}\" target='_blank'>{$link->url}{$link->arc_date}
'; + + + } + + /** + * Add a rewrite rule to display archive pages + * + * @package owark + * @since 0.1 + * + * + */ + function create_rewrite_rules($rules) { + global $wp_rewrite; + $newRule = array('owark/(.+)' => 'index.php?owark='.$wp_rewrite->preg_index(1)); + $newRules = $newRule + $rules; + return $newRules; + } + + /** + * Add a query variable used to display archive pages + * + * @package owark + * @since 0.1 + * + * + */ + function add_query_vars($qvars) { + $qvars[] = 'owark'; + return $qvars; + } + + /** + * Title says it all ;) ... + * + * @package owark + * @since 0.1 + * + * + */ + function flush_rewrite_rules() { + global $wp_rewrite; + $wp_rewrite->flush_rules(); + } + + /** + * Intercepts archive pages. + * + * @package owark + * @since 0.1 + * + * + */ + function template_redirect_intercept() { + global $wp_query; + if ($wp_query->get('owark')) { + $this->display_archive($wp_query->get('owark')); + exit; + } + } + + /** + * Filter to replace broken links in comments. + * + * @package owark + * @since 0.1 + * + * + */ + function content_filter($content) { + log_function_call(); + global $post; + return $this->link_filter($content, $post->ID, $post->post_type); + } + + /** + * Filter to replace broken links in comments. + * + * @package owark + * @since 0.1 + * + * + */ + function comment_filter($content, $comment) { + log_function_call(); + return $this->link_filter($content, $comment->comment_ID, 'comment'); + } + + /** + * Filter to replace broken links in author links. + * + * @package owark + * @since 0.2 + * + * + */ + function comment_author_url_filter($url) { + log_function_call(); + global $wpdb; + $q = " + SELECT owark.id + FROM {$wpdb->prefix}blc_instances AS instances, + {$wpdb->prefix}blc_links AS links, + {$wpdb->prefix}owark AS owark + WHERE + instances.link_id = links.link_id + AND owark.url = %s + AND owark.url = instances.raw_url + AND links.broken = 1 + "; + print_r_log($q); + $q = $wpdb->prepare($q, $url); + $results = $wpdb->get_results($q); + print_r_log($results); + if (empty($results)) { + return $url; + } else { + return $this->get_archive_url($results[0]->id); + } + } + + /** + * Generic filter to replace broken links in content. + * + * @package owark + * @since 0.1 + * + * + */ + function link_filter($content, $post_id, $post_type) { + global $wpdb; + log_function_call(); + // See if we haven't already loaded the broken links for this post... + if ($this->post_id != $post_id || $this->post_type != $post_type) { + + $this->post_id = $post_id; + $this->post_type = $post_type; + + //Retrieve info about all occurrences of broken links in the current post + //which happens for comments (they have links to check in 2 different filters) + $q = " + SELECT instances.raw_url, owark.id + FROM {$wpdb->prefix}blc_instances AS instances, + {$wpdb->prefix}blc_links AS links, + {$wpdb->prefix}owark AS owark + WHERE + instances.link_id = links.link_id + AND owark.url = instances.raw_url + AND instances.container_id = %s + AND instances.container_type = %s + AND links.broken = 1 + "; + print_r_log($q); + $q = $wpdb->prepare($q, $this->post_id, $this->post_type); + $results = $wpdb->get_results($q); + print_r_log($results); + $this->broken_links = array(); + + foreach ($results as $link) { + $this->broken_links[$link->raw_url] = $link->id; + } + + } + + + if (empty($this->broken_links)) { + return $content; + } + + // Regexp : see http://stackoverflow.com/questions/2609095/hooking-into-comment-text-to-add-surrounding-tag + $replaced = preg_replace_callback('/(]+)(["\'][^>]*>.*?<\/a>)/si', array( $this, 'replace_a_link'), $content); + print_r_log("replaced: $replaced"); + return $replaced; + } + + /** + * Replace a link. + * + * @package owark + * @since 0.1 + * + * + */ + function replace_a_link($matches) { + log_function_call(); + if (array_key_exists($matches[2], $this->broken_links)) { + return $matches[1].$this->get_archive_url($this->broken_links[$matches[2]]).$matches[3]; + } else { + return $matches[0]; + } + } + + + /** + * Display an archive page + * + * @package owark + * @since 0.1 + * + * + */ + function display_archive($parameter) { + + print_r_log("display_archive($parameter)"); + global $wpdb; + + $id = intval($parameter); + + $blog_title = get_bloginfo('name'); + $home_url = home_url(); + + $query = "SELECT * + from {$wpdb->prefix}owark AS owark + where id = {$id}"; + $link = $wpdb->get_row($query); + $wpdb->flush(); + + $loc = archives_dir() . '/' . $link->arc_location; + $arc_base = archives_url() . '/' . $link->arc_location; + + // The file name is either index.html or guessed from the URL + if ($link->url[strlen($link->url) - 1] == '/') { + $file_location = $loc .'/index.html'; + } else { + $parts = explode($link->url, '/'); + $file_location = $loc . $parts[count($parts) - 1] . '.html'; + } + + if (!file_exists($file_location)) { + // If index.html doesn't exist, find another html file! + $dir = opendir($loc); + if ($dir) { + while (false !== ($file = readdir($dir))) { + if ('.html' === substr($file, strlen($file) - 5)) { + $file_location = $loc.'/' . $file; + break; + } + } + closedir($dir); + } + } + + // Read the file + + if (file_exists($file_location)) { + $f = fopen($file_location, "r"); + $content = fread($f, filesize($file_location)); + fclose($f); + } else { + $content = 'Archive not found'; + } + + // Which encoding? + $encoding = $link->encoding; + + if ($encoding == NULL) { + // We need to guess the encoding! + + $matches = NULL; + // + if (preg_match('/]*charset\s*=\s*([^"\'>]+)\s*["\']/si', + $content, $matches) > 0) { + $encoding = $matches[1]; + } else { + $encoding = mb_detect_encoding($content); + } + + if ($encoding) { + $wpdb->update( + "{$wpdb->prefix}owark", + array('encoding' => $encoding), + array('id' => $id)); + } + } + + + header("Content-Type: text/html; charset=$encoding"); + + echo ' + '; + + echo ""; + echo '
'; + echo '
'; + echo "This is an Open Web Archive archive of url}\">{$link->url}."; + echo "
This snapshot has been taken on {$link->arc_date} for the website {$blog_title} which contains a link to this page and has saved a copy to be displayed in the page ever disappears."; + echo '
'; + + print_r_log("file_location: $file_location"); + + $f = fopen($file_location, "r"); + echo $content; + echo '
'; + + } + + /** + * Check if we've got something to archive + * + * @package owark + * @since 0.1 + * + * + */ + public static function schedule($occurrences, $version) { + + log_function_call(); + + $archiving = get_option( 'owark_archiving', false); + if (! $archiving) { + update_option('owark_archiving', true); + } else { + return; + } + global $wpdb; + + $query = " + SELECT DISTINCT instances.raw_url as url + from {$wpdb->prefix}blc_links as links, + {$wpdb->prefix}blc_instances AS instances + WHERE url NOT IN (SELECT url FROM {$wpdb->prefix}owark) + AND broken=0 + AND last_check is not null + AND instances.link_id = links.link_id"; + print_r_log("query: $query"); + + $url = $wpdb->get_row($query); + print_r_log($url); + $wpdb->flush(); + + if ($url != NULL) { + $date = date('c'); + $relpath = str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->url))) . '/' . $date; + $path = archives_dir()."/$relpath"; + //mkdir($path, $recursive=true); + + $output = array(); + $status = 0; + exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/$version; http://owark.org/)\" -P $path {$url->url} 2>&1", + $output, $status); + print_r_log("wget status: $status"); + if ($status != 0) { + print_r_log("wget status: $status, output:"); + print_r_log($output); + } + $q = $wpdb->insert("{$wpdb->prefix}owark", array( + 'url' => $url->url, + 'status' => $status, + 'arc_date' => $date, + 'arc_location' => $relpath)); + + if ($occurrences > 0) { + wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1, 'version' => $version)); + } + + } + delete_option('owark_archiving'); + } + - // Check that we can execute commands - - if ( ini_get('disable_functions') ) { - $not_allowed = ini_get('disable_functions'); - if ( stristr($not_allowed, 'exec') ) { - $this->notices = $this->notices . "

The Open Web Archives requires that exec() is allowed to run wget and retrieve the pages to archive.

"; - } - } - - // Check that wget is installed - - $output = array(); - exec('/usr/bin/wget -V', $output); - - - if ( empty($output) ) { - $this->notices = $this->notices . - "

The Open Web Archives is not able to run GNU wget and retrieve the pages to archive. Please check that wget is installed and on the default path.

"; - } - - // We need as least version 1.11 or higher - $helper = preg_match('/GNU Wget ([0-9\.]+) /', $output[0], $wget_version); - if ( $wget_version[0] < '1.11' ) { - $this->notices = $this->notices . "

The Open Web Archives needs GNU wget version 1.11 or higher.
Version read: {$wget_version[0]}

"; - } - - if ($this->notices != '') { - add_action('admin_notices', array($this, 'admin_notices')); - } - - } - - /** - * Show admin notices - * - * @package owark - * @since 0.1 - * - * - */ - function admin_notices(){ - - echo $this->notices; - - } - - /** - * Admin menus - * - * @package owark - * @since 0.1 - * - * - */ - function owark_admin_menu() { - add_management_page(__('The Open Web Archive', 'owark'), __('Web Archive', 'owark'), 'edit_others_posts', 'owark', array($this, 'management_page')); - } - - /** - * URL of an archive page - * - * @package owark - * @since 0.1 - * - * - */ - function get_archive_url($archive_id) { - return home_url().'/owark/'.$archive_id; - } - - /** - * Display the admin/tools page. - * - * @package owark - * @since 0.1 - * - * - */ - function management_page() { - //must check that the user has the required capability - if (!current_user_can('edit_others_posts')) { - wp_die( __('You do not have sufficient permissions to access this page.') ); - } - - global $wpdb; - - echo '
'; - screen_icon(); - echo '

Owark - The Open Web Archive

'; - echo '

Tired of broken links? Archive yours with the Open Web Archive!

'; - echo "
"; - - echo '

List of broken links with archived pages:

'; - - $query = "SELECT owark.id, owark.url, owark.status, owark.arc_date, owark.arc_location, blc_links.status_text - FROM {$wpdb->prefix}owark AS owark, {$wpdb->prefix}blc_links as blc_links - WHERE owark.url = blc_links.final_url COLLATE latin1_swedish_ci and blc_links.broken = 1 - ORDER BY owark.url"; - $results = $wpdb->get_results($query); - - echo ''; - echo ''; - echo ''; - echo ''; - echo ''; - echo ''; - echo ''; - echo ''; - - foreach ($results as $link) { - $archive_url = $this->get_archive_url($link->id); - echo " - - - "; - } - - echo ''; - echo '
URLArchive
url}\" target='_blank'>{$link->url}{$link->arc_date}
'; - - - } - - /** - * Add a rewrite rule to display archive pages - * - * @package owark - * @since 0.1 - * - * - */ - function create_rewrite_rules($rules) { - global $wp_rewrite; - $newRule = array('owark/(.+)' => 'index.php?owark='.$wp_rewrite->preg_index(1)); - $newRules = $newRule + $rules; - return $newRules; - } - - /** - * Add a query variable used to display archive pages - * - * @package owark - * @since 0.1 - * - * - */ - function add_query_vars($qvars) { - $qvars[] = 'owark'; - return $qvars; - } - - /** - * Title says it all ;) ... - * - * @package owark - * @since 0.1 - * - * - */ - function flush_rewrite_rules() { - global $wp_rewrite; - $wp_rewrite->flush_rules(); - } - - /** - * Intercepts archive pages. - * - * @package owark - * @since 0.1 - * - * - */ - function template_redirect_intercept() { - global $wp_query; - if ($wp_query->get('owark')) { - $this->display_archive($wp_query->get('owark')); - exit; - } - } - - /** - * Filter to replace broken links in comments. - * - * @package owark - * @since 0.1 - * - * - */ - function content_filter($content) { - global $post; - return $this->link_filter($content, $post->ID, $post->post_type); - } - - /** - * Filter to replace broken links in comments. - * - * @package owark - * @since 0.1 - * - * - */ - function comment_filter($content) { - return $this->link_filter($content, get_comment_ID(), 'comment'); - } - - /** - * Generic filter to replace broken links in content. - * - * @package owark - * @since 0.1 - * - * - */ - function link_filter($content, $post_id, $post_type) { - - global $wpdb; - - // See if we haven't already loaded the broken links for this post... - if ($this->post_id != $post_id || $this->post_type != $post_type) { - - $this->post_id = $post_id; - $this->post_type = $post_type; - - //Retrieve info about all occurrences of broken links in the current post - //which happens for comments (they have links to check in 2 different filters) - $q = " - SELECT instances.raw_url, owark.id - FROM {$wpdb->prefix}blc_instances AS instances, - {$wpdb->prefix}blc_links AS links, - {$wpdb->prefix}owark AS owark - WHERE - instances.link_id = links.link_id - AND owark.url = links.final_url COLLATE latin1_swedish_ci - AND instances.container_id = %s - AND instances.container_type = %s - AND links.broken = 1 - "; - $q = $wpdb->prepare($q, $this->post_id, $this->post_type); - $results = $wpdb->get_results($q); - - $this->broken_links = array(); - - foreach ($results as $link) { - $this->broken_links[$link->raw_url] = $link->id; - } - - } - - - if (empty($this->broken_links)) { - return $content; - } - - // Regexp : see http://stackoverflow.com/questions/2609095/hooking-into-comment-text-to-add-surrounding-tag - return preg_replace_callback('/(]+)(["\'][^>]*>.*?<\/a>)/si', array( $this, 'replace_a_link'), $content); - } - - /** - * Replace a link. - * - * @package owark - * @since 0.1 - * - * - */ - function replace_a_link($matches) { - if (array_key_exists($matches[2], $this->broken_links)) { - return $matches[1].$this->get_archive_url($this->broken_links[$matches[2]]).$matches[3]; - } else { - return $matches[0]; - } - } - - - /** - * Display an archive page - * - * @package owark - * @since 0.1 - * - * - */ - function display_archive($parameter) { - - global $wpdb; - - $id = intval($parameter); - - $query = "SELECT * - from {$wpdb->prefix}owark AS owark - where id = {$id}"; - $link = $wpdb->get_row($query); - $wpdb->flush(); - - // Find the file to read - $blog_title = get_bloginfo('name'); - $home_url = home_url(); - - $loc = ""; - if( ($pos = strpos($link->arc_location, '/archives')) !== FALSE ) - $loc = '/wp-content/plugins/owark' . substr($link->arc_location, $pos); - $arc_loc = home_url() . $loc; - - // The file name is either index.html or guessed from the URL - if ($home_url[strlen($home_url)] == '/') { - $file_location = '.'. $loc .'/index.html'; - } else { - $parts = str_split($home_url, '/'); - $file_location = '.'. $loc . $parts[count($parts)] . '.html'; - } - - if (!file_exists($file_location)) { - // If index.html doesn't exist, find another html file! - $dir = opendir('.'.$loc); - if ($dir) { - while (false !== ($file = readdir($dir))) { - if ('.html' === substr($file, strlen($file) - 5)) { - $file_location = '.'.$loc.'/' . $file; - break; - } - } - closedir($dir); - } - } - - // Read the file - - if (file_exists($file_location)) { - $f = fopen($file_location, "r"); - $content = fread($f, filesize($file_location)); - fclose($f); - } else { - $content = 'Archive not found'; - } - - // Which encoding? - $encoding = $link->encoding; - - if ($encoding == NULL) { - // We need to guess the encoding! - - $matches = NULL; - // - if (preg_match('/]*charset\s*=\s*([^"\'>]+)\s*["\']/si', - $content, $matches) > 0) { - $encoding = $matches[1]; - } else { - $encoding = mb_detect_encoding($content); - } - - if ($encoding) { - $wpdb->update( - "{$wpdb->prefix}owark", - array('encoding' => $encoding), - array('id' => $id)); - } - } - - header("Content-Type: text/html; charset=$encoding"); - - echo ' -'; - - echo ""; - echo '
'; - echo '
'; - echo "This is an Open Web Archive archive of url}\">{$link->url}."; - echo "
This snapshot has been taken on {$link->arc_date} for the website {$blog_title} which contains a link to this page and has saved a copy to be displayed in the page ever disappears."; - echo '
'; - - - $f = fopen($file_location, "r"); - echo $content; - echo '
'; - - } - - /** - * Check if we've got something to archive - * - * @package owark - * @since 0.1 - * - * - */ - public static function schedule($occurrences) { - - $archiving = get_option( 'owark_archiving', false); - if (! $archiving) { - update_option('owark_archiving', true); - } else { - return; - } - global $wpdb; - - $query = "SELECT DISTINCT final_url from {$wpdb->prefix}blc_links - WHERE final_url NOT IN (SELECT url COLLATE latin1_swedish_ci FROM {$wpdb->prefix}owark) - AND broken=0 - AND final_url!=''"; - $url = $wpdb->get_row($query); - print_r_log($url); - $wpdb->flush(); - - if ($url != NULL) { - $date = date('c'); - $relpath = '/archives/'. str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->final_url))) . '/' . $date; - $path = dirname(__FILE__).$relpath; - //mkdir($path, $recursive=true); - - $output = array(); - $status = 0; - exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/0.2; http://owark.org/)\" -P $path {$url->final_url}", - $output, $status); - print_r_log("wget status: $status"); - $q = $wpdb->insert("{$wpdb->prefix}owark", array( - 'url' => $url->final_url, - 'status' => $status, - 'arc_date' => $date, - 'arc_location' => $relpath)); - - if ($occurrences > 0) { - wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1)); - } - - } - delete_option('owark_archiving'); - } - + } - } + } -} - -if (class_exists("Owark")) { - $owark = new Owark(); -} + if (class_exists("Owark")) { + $owark = new Owark(); + } -?> + ?>