') . '/#' . (isset($caller_strace['line']) ? $caller_strace['line'] : '') . ' function: ' . (isset($caller_strace['function']) ? $caller_strace['function'] : '') . '('); foreach ($caller_strace['args'] as $arg) { error_log(' * ' . gettype($arg) . ': ' . print_r($arg, true)); } } } if (!function_exists('archives_dir')) { function archives_dir() { if (defined('OWARK_ARCHIVES_REL_PATH')) { return WP_CONTENT_DIR . '/' . OWARK_ARCHIVES_REL_PATH; } else { return WP_PLUGIN_DIR . '/archives'; } } } if (!function_exists('archives_url')) { function archives_url() { if (defined('OWARK_ARCHIVES_REL_PATH')) { return WP_CONTENT_URL . '/' . OWARK_ARCHIVES_REL_PATH; } else { return WP_PLUGIN_URL . '/archives'; } } } if (!class_exists("Owark")) { class Owark { private $broken_links = array(); private $post_id = -1; private $post_type = ""; private $version = '1.0'; private $db_version = '1.0'; private $notices = ""; /** * Class constructor * * @package owark * @since 0.1 * * */ public function __construct() { if (is_admin()) { add_action('admin_menu', array($this, 'owark_admin_menu')); add_action('plugins_loaded', array($this, 'sanity_checks')); } // See http://stackoverflow.com/questions/2210826/need-help-with-wp-rewrite-in-a-wordpress-plugin // Using a filter instead of an action to create the rewrite rules. // Write rules -> Add query vars -> Recalculate rewrite rules add_filter('rewrite_rules_array', array($this, 'create_rewrite_rules')); add_filter('query_vars', array($this, 'add_query_vars')); // Recalculates rewrite rules during admin init to save resources. // Could probably run it once as long as it isn't going to change or check the // $wp_rewrite rules to see if it's active. add_filter('admin_init', array($this, 'flush_rewrite_rules')); add_action('template_redirect', array($this, 'template_redirect_intercept')); add_filter('the_content', array($this, 'content_filter'), 20); add_filter('comment_text', array($this, 'comment_filter'), 20, 2); add_filter('get_comment_author_url', array($this, 'comment_author_url_filter'), 20, 1); add_action('owark_schedule_event', array('Owark', 'schedule'), 10, 2); if (!wp_next_scheduled('owark_schedule_event', array('occurrences' => 30, 'version' => $this->version))) { wp_schedule_event(time(), 'hourly', 'owark_schedule_event', array('occurrences' => 30, 'version' => $this->version)); } } function Owark() { // PHP4-style constructor. // This will NOT be invoked, unless a sub-class that extends `foo` calls it. // In that case, call the new-style constructor to keep compatibility. self::__construct(); } /** * Check we have everything we need... * * @package owark * @since 0.1 * * */ function sanity_checks() { // Install or upgrade tables if needed $installed_ver = get_option("owark_db_version"); $update_required = ($installed_ver != $this->db_version); print_r_log("update_required: $update_required ($installed_ver vs {$this->db_version})"); global $wpdb; $table = $wpdb->prefix . "owark"; if ($installed_ver == '0.1') { // In version 0.1 final URLs where used but the broken link checkers update these URLs when a link is detected broken // Let's replace these URLS by raw URLs... $sql = "update {$wpdb->prefix}owark as owark join {$wpdb->prefix}blc_links as links on owark.url = links.final_url COLLATE latin1_swedish_ci join {$wpdb->prefix}blc_instances as instances on instances.link_id = links.link_id set owark.url = instances.raw_url "; $wpdb->query($sql); $installed_ver = '1.0'; update_option("owark_db_version", $installed_ver); } if ($installed_ver != $this->db_version) { print_r_log("Database upgrade from $installed_ver to {$this->version}"); $sql = "CREATE TABLE $table ( id int(10) unsigned NOT NULL AUTO_INCREMENT, url text NOT NULL, status varchar(20) NOT NULL DEFAULT 'to-archive', arc_date datetime, arc_location text, encoding varchar(10), PRIMARY KEY (id), KEY url (`url`(150)) )"; require_once(ABSPATH . 'wp-admin/includes/upgrade.php'); dbDelta($sql); update_option("owark_db_version", $this->db_version); } if ($update_required) { $sql = "CREATE OR REPLACE VIEW {$wpdb->prefix}owark_broken_links AS SELECT owark.id as id, owark.url as url, instances.container_id as container_id, instances.container_type as container_type, instances.container_field as container_field FROM {$wpdb->prefix}owark as owark, {$wpdb->prefix}blc_links as links, {$wpdb->prefix}blc_instances AS instances WHERE owark.url = instances.raw_url AND broken = 1 AND last_check is not null AND instances.link_id = links.link_id"; print_r_log("sql: $sql"); $wpdb->query($sql); $sql = "CREATE OR REPLACE VIEW {$wpdb->prefix}owark_archives AS SELECT owark.*, links.broken, links.first_failure FROM {$wpdb->prefix}owark as owark, {$wpdb->prefix}blc_links as links, {$wpdb->prefix}blc_instances AS instances WHERE owark.url = instances.raw_url AND instances.link_id = links.link_id GROUP BY owark.url"; print_r_log("sql: $sql"); $wpdb->query($sql); $sql = "CREATE OR REPLACE VIEW {$wpdb->prefix}owark_links_to_archive AS SELECT DISTINCT instances.raw_url as url FROM {$wpdb->prefix}blc_links as links, {$wpdb->prefix}blc_instances AS instances WHERE broken = 0 AND instances.link_id = links.link_id AND url NOT IN (SELECT url FROM wp_owark)"; print_r_log("sql: $sql"); $wpdb->query($sql); $this->notices = "

The owark table has been installed or upgraded to version {$this->db_version}

"; } // Check that the broken link checker is installed if (!function_exists('get_plugins')) require_once (ABSPATH . "wp-admin/includes/plugin.php"); $blc = 'not-found'; foreach (get_plugins() as $plugin_file => $plugin_data) { if ($plugin_data['Title'] == 'Broken Link Checker') { if (is_plugin_active($plugin_file)) { $blc = 'active'; } else { $blc = 'inactive'; } } } if ($blc == 'inactive') { $this->notices = $this->notices . "

Please activate the Broken Link Checker so that the Open Web Archive can be fully functional.

"; } else if ($blc == 'not-found') { $this->notices = $this->notices . "

The Open Web Archive relies on the Broken Link Checker. Please install this plugin!

"; } // Check if we have an archive subdirectory $archives_dir = archives_dir(); print_r_log("archives_dir: $archives_dir"); if (!is_dir($archives_dir)) { @mkdir($archives_dir); if (!is_dir($archives_dir)) { $this->notices = $this->notices . "

The Open Web Archive has not been able to create the folder /archives in its installation directory. Please create it by hand and make it writable for the web server.

"; } } elseif (!is_writable($archives_dir)) { $this->notices = $this->notices . "

The Open Web Archive needs a writable folder /archives in its installation directory. Please make it writable for the web server.

"; } // Check that we can execute commands if (ini_get('disable_functions')) { $not_allowed = ini_get('disable_functions'); if (stristr($not_allowed, 'exec')) { $this->notices = $this->notices . "

The Open Web Archives requires that exec() is allowed to run wget and retrieve the pages to archive.

"; } } // Check that wget is installed $output = array(); exec('/usr/bin/wget -V', $output); if (empty($output)) { $this->notices = $this->notices . "

The Open Web Archives is not able to run GNU wget and retrieve the pages to archive. Please check that wget is installed and on the default path.

"; } // We need as least version 1.11 or higher $helper = preg_match('/GNU Wget ([0-9\.]+) /', $output[0], $wget_version); if ($wget_version[0] < '1.11') { $this->notices = $this->notices . "

The Open Web Archives needs GNU wget version 1.11 or higher.
Version read: {$wget_version[0]}

"; } if ($this->notices != '') { add_action('admin_notices', array($this, 'admin_notices')); } } /** * Show admin notices * * @package owark * @since 0.1 * * */ function admin_notices() { echo $this->notices; } /** * Admin menus * * @package owark * @since 0.1 * * */ function owark_admin_menu() { add_management_page(__('The Open Web Archive', 'owark'), __('Web Archive', 'owark'), 'edit_others_posts', 'owark', array($this, 'management_page')); } /** * URL of an archive page * * @package owark * @since 0.1 * * */ function get_archive_url($archive_id) { return home_url() . '/owark/' . $archive_id; } /** * Display the admin/tools page. * * @package owark * @since 0.1 * * */ function management_page() { //must check that the user has the required capability if (!current_user_can('edit_others_posts')) { wp_die(__('You do not have sufficient permissions to access this page.')); } global $wpdb; echo '
'; screen_icon(); echo '

Owark - The Open Web Archive

'; echo '

Tired of broken links? Archive yours with the Open Web Archive!

'; echo "
"; echo '

List of broken links with archived pages:

'; $query = "SELECT * FROM {$wpdb->prefix}owark_archives WHERE broken = 1 ORDER BY url"; $results = $wpdb->get_results($query); echo ''; echo ''; echo ''; echo ''; echo ''; echo ''; echo ''; echo ''; foreach ($results as $link) { $archive_url = $this->get_archive_url($link->id); echo ""; } echo ''; echo '
URLArchive
url}\" target='_blank'>{$link->url} {$link->arc_date}
'; } /** * Add a rewrite rule to display archive pages * * @package owark * @since 0.1 * * */ function create_rewrite_rules($rules) { global $wp_rewrite; $newRule = array('owark/(.+)' => 'index.php?owark=' . $wp_rewrite->preg_index(1)); $newRules = $newRule + $rules; return $newRules; } /** * Add a query variable used to display archive pages * * @package owark * @since 0.1 * * */ function add_query_vars($qvars) { $qvars[] = 'owark'; return $qvars; } /** * Title says it all ;) ... * * @package owark * @since 0.1 * * */ function flush_rewrite_rules() { global $wp_rewrite; $wp_rewrite->flush_rules(); } /** * Intercepts archive pages. * * @package owark * @since 0.1 * * */ function template_redirect_intercept() { global $wp_query; if ($wp_query->get('owark')) { $this->display_archive($wp_query->get('owark')); exit; } } /** * Filter to replace broken links in comments. * * @package owark * @since 0.1 * * */ function content_filter($content) { log_function_call(); global $post; return $this->link_filter($content, $post->ID, $post->post_type); } /** * Filter to replace broken links in comments. * * @package owark * @since 0.1 * * */ function comment_filter($content, $comment) { log_function_call(); return $this->link_filter($content, $comment->comment_ID, 'comment'); } /** * Filter to replace broken links in author links. * * @package owark * @since 0.2 * * */ function comment_author_url_filter($url) { log_function_call(); global $wpdb; $q = " SELECT id FROM {$wpdb->prefix}owark_broken_links WHERE url = %s "; print_r_log($q); $q = $wpdb->prepare($q, $url); $results = $wpdb->get_results($q); print_r_log($results); if (empty($results)) { return $url; } else { return $this->get_archive_url($results[0]->id); } } /** * Generic filter to replace broken links in content. * * @package owark * @since 0.1 * * */ function link_filter($content, $post_id, $post_type) { global $wpdb; log_function_call(); // See if we haven't already loaded the broken links for this post... if ($this->post_id != $post_id || $this->post_type != $post_type) { $this->post_id = $post_id; $this->post_type = $post_type; //Retrieve info about all occurrences of broken links in the current post //which happens for comments (they have links to check in 2 different filters) $q = " SELECT url, id FROM {$wpdb->prefix}owark_broken_links WHERE container_id = %s AND container_type = %s "; print_r_log($q); $q = $wpdb->prepare($q, $this->post_id, $this->post_type); $results = $wpdb->get_results($q); print_r_log($results); $this->broken_links = array(); foreach ($results as $link) { $this->broken_links[$link->url] = $link->id; } } if (empty($this->broken_links)) { return $content; } // Regexp : see http://stackoverflow.com/questions/2609095/hooking-into-comment-text-to-add-surrounding-tag $replaced = preg_replace_callback('/(]+)(["\'][^>]*>.*?<\/a>)/si', array($this, 'replace_a_link'), $content); print_r_log("replaced: $replaced"); return $replaced; } /** * Replace a link. * * @package owark * @since 0.1 * * */ function replace_a_link($matches) { log_function_call(); if (array_key_exists($matches[2], $this->broken_links)) { return $matches[1] . $this->get_archive_url($this->broken_links[$matches[2]]) . $matches[3]; } else { return $matches[0]; } } /** * Display an archive page * * @package owark * @since 0.1 * * */ function display_archive($parameter) { print_r_log("display_archive($parameter)"); global $wpdb; $id = intval($parameter); $blog_title = get_bloginfo('name'); $home_url = home_url(); $query = "SELECT * from {$wpdb->prefix}owark AS owark where id = {$id}"; $link = $wpdb->get_row($query); $wpdb->flush(); $loc = archives_dir() . '/' . $link->arc_location; $arc_base = archives_url() . '/' . $link->arc_location; // The file name is either index.html or guessed from the URL if ($link->url[strlen($link->url) - 1] == '/') { $file_location = $loc . '/index.html'; } else { $parts = explode($link->url, '/'); $file_location = $loc . $parts[count($parts) - 1] . '.html'; } if (!file_exists($file_location)) { // If index.html doesn't exist, find another html file! $dir = opendir($loc); if ($dir) { while (false !== ($file = readdir($dir))) { if ('.html' === substr($file, strlen($file) - 5)) { $file_location = $loc . '/' . $file; break; } } closedir($dir); } } // Read the file if (file_exists($file_location)) { $f = fopen($file_location, "r"); $content = fread($f, filesize($file_location)); fclose($f); } else { $content = 'Archive not found'; } // Which encoding? $encoding = $link->encoding; if ($encoding == NULL) { // We need to guess the encoding! $matches = NULL; // if (preg_match('/]*charset\s*=\s*([^"\'>]+)\s*["\']/si', $content, $matches) > 0) { $encoding = $matches[1]; } else { $encoding = mb_detect_encoding($content); } if ($encoding) { $wpdb->update( "{$wpdb->prefix}owark", array('encoding' => $encoding), array('id' => $id)); } } header("Content-Type: text/html; charset=$encoding"); echo ' '; echo ""; echo '
'; echo '
'; echo "This is an Open Web Archive archive of url}\">{$link->url}."; echo "
This snapshot has been taken on {$link->arc_date} for the website {$blog_title} which contains a link to this page and has saved a copy to be displayed in the page ever disappears."; echo '
'; print_r_log("file_location: $file_location"); $f = fopen($file_location, "r"); echo $content; echo '
'; } /** * Check if we've got something to archive * * @package owark * @since 0.1 * * */ public static function schedule($occurrences, $version) { log_function_call(); $archiving = get_option('owark_archiving', false); if (!$archiving) { update_option('owark_archiving', true); } else { return; } global $wpdb; $query = " SELECT url from {$wpdb->prefix}owark_links_to_archive"; print_r_log("query: $query"); $url = $wpdb->get_row($query); print_r_log($url); $wpdb->flush(); if ($url != NULL) { $date = date('c'); $relpath = str_replace('%2F', '/', urlencode(preg_replace('/https?:\/\//', '', $url->url))) . '/' . $date; $path = archives_dir() . "/$relpath"; //mkdir($path, $recursive=true); $output = array(); $status = 0; exec("wget -t3 -E -H -k -K -p -nd -nv --timeout=60 --user-agent=\"Mozilla/5.0 (compatible; owark/$version; http://owark.org/)\" -P $path {$url->url} 2>&1", $output, $status); print_r_log("wget status: $status"); if ($status != 0) { print_r_log("wget status: $status, output:"); print_r_log($output); } $q = $wpdb->insert("{$wpdb->prefix}owark", array( 'url' => $url->url, 'status' => $status, 'arc_date' => $date, 'arc_location' => $relpath)); if ($occurrences > 0) { wp_schedule_single_event(time() + 90, 'owark_schedule_event', array('occurrences' => $occurrences - 1, 'version' => $version)); } } delete_option('owark_archiving'); } } } if (class_exists("Owark")) { $owark = new Owark(); } ?>