<?php
/**
 * MetaMirror.php
 * XenForo addon to re-host images to the local file system.
 * Mick West
 * Metabunk.org
 */


define('MMR_CONNECTTIMEOUT', 5);
define('MMR_CURL_TIMEOUT', 5);
define('MMR_SOCKET_TIMEOUT', 5);
define('MMR_MAX_RETRIES',3);

define('MMR_MAGIC',1008); // if you change this, it will re-parse previously ignored posts.

// Not everyone has exif support compiled in, so, as per:
// http://php.net/manual/en/function.exif-imagetype.php
if ( ! function_exists( 'exif_imagetype' ) ) {
    function exif_imagetype ( $filename ) {
        if ( ( list($width, $height, $type, $attr) = getimagesize( $filename ) ) !== false ) {
            return $type;
        }
        return false;
    }
}



/**
 * helper function to load the contents of a remote binary file (usually an image file)
 * Tries to use CURL
 * @param $url  - The URL of the file we want to try to load, can be HTTPS
 * @return bool|mixed|string - false on failure, otherwise a string with file contents
 */
function loadImage(&$url)
{
    $image = '';

    $url_info = @parse_url($url);
//    XenForo_Helper_File::log('MetaMirror',''.$url_info['scheme'].' '.$url_info['host'].' '.$url_info['port'].' '.$url_info['request'].' '.$url_info['path'].' ',true);
    if($url_info['scheme'] != 'http' && $url_info['scheme'] != 'https') // only HTTP/S, otherwise hacker could theoretically specify file://
    {
        // return an empty string for anything other than http
        return $image;
    }

    if(function_exists("curl_init"))
    {
        $ch = @curl_init($url);
        @curl_setopt($ch,CURLOPT_SSL_VERIFYPEER, false); // don't care about security here.
        @curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
        @curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        @curl_setopt($ch, CURLOPT_FAILONERROR, 1);
        @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
        @curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, MMR_CONNECTTIMEOUT);
        @curl_setopt($ch, CURLOPT_TIMEOUT, MMR_CURL_TIMEOUT);
        @curl_setopt($ch, CURLINFO_HEADER_OUT, 1);
        // We set the referrer to the same as the image URL
        // which gets around some hotlink protection
        //@curl_setopt($ch, CURLOPT_REFERER, $url);

        // Some sites require you have the host in the request headers.
        @curl_setopt($ch, CURLOPT_HTTPHEADER, array(
            'Host: '.$url_info['host'],
            'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.68 Safari/537.36
',
            'Connection: keep-alive',
        ));
        $safe_referrer = 'http://'.$url_info['host'];
        @curl_setopt($ch, CURLOPT_REFERER, $safe_referrer);
        $image = @curl_exec($ch);
        $http_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        $http_output_header = curl_getinfo($ch, CURLINFO_HEADER_OUT);
        @curl_close($ch);
        if ($http_status != '200')
        {
            XenForo_Helper_File::log('MetaMirror','CURL ERROR'.$http_status.' Output Header '.$http_output_header,true);
            return false; // just return, no point trying again with crappier method
           // $image = ''; // OR IS IT!!!!???
        }
    }

    // This is basically the fallback for if Curl is not installed or fails
    // although it will also be called if an empty file is returned.
    // then try with normal sockets (will not handle 301/302 redirects)
    if(strlen($image) == 0)
    {
        {
            //Unlikely not to be 80, but check to be sure.
            $url_info['port'] = isset($url_info['port']) ? $url_info['port'] : 80;
            $url_info['request'] = substr($url, strpos($url, '/', 7));
            if($fp = @fsockopen($url_info['host'], $url_info['port'], $errno, $errstr, MMR_CONNECTTIMEOUT))
            {
                @stream_set_timeout($fp, MMR_SOCKET_TIMEOUT);
                @stream_set_blocking($fp, 1);
                $request = "GET $url_info[request] HTTP/1.1\r\nHost: $url_info[host]\r\nConnection: Close\r\n\r\n";
                if(@fwrite($fp, $request))
                {
                    $image = @stream_get_contents($fp);
                }
                @fclose($fp);
                if(strpos($image, 'HTTP') === 0)
                {
                    // The returned value has a full HTTP header, we strip it off by finding the first
                    // double CR/LF, and the actual contents start directly after that.
                    // (this is actually the standard way of doing it, as per RFC2626)
                    $header = substr($image, 0, strpos($image, "\r\n\r\n"));
                    $headers = explode("\r\n", $header);
                    $response_code = substr($headers[0], 9, 3);
                    if ($response_code != '200')
                        return false;
                    $image = substr($image, strpos($image, "\r\n\r\n") + 4);
                }
                else
                {
                    return false;
                }
            }
        }
    }
    return $image ? $image : false;
}


/**
 * Via: http://php.net/manual/en/function.filesize.php#92462
 * Returns the size of a file without downloading it, or -1 if the file
 * size could not be determined.
 *
 * @param $url - The location of the remote file to download. Cannot
 * be null or empty.
 *
 * returns The size of the file referenced by $url, or -1 if the size
 * could not be determined.
 */
function curl_get_file_size( $url ) {
    // Assume failure.
    $result = -1;

    if(function_exists("curl_init"))
    {

        $ch = curl_init( $url );

        // Issue a HEAD request and follow any redirects.
        @curl_setopt( $ch, CURLOPT_NOBODY, true ); // NOBODY just ignores the body and returns the head
        @curl_setopt( $ch, CURLOPT_HEADER, true );

        @curl_setopt($ch,CURLOPT_SSL_VERIFYPEER, false); // don't care about security here.
        @curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
        @curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        @curl_setopt($ch, CURLOPT_FAILONERROR, 1);
        @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
        @curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, MMR_CONNECTTIMEOUT);
        @curl_setopt($ch, CURLOPT_TIMEOUT, MMR_CURL_TIMEOUT);
        // Much lower timeouts here, as we are just attempting a quick read of the header
        @curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 2);
        @curl_setopt($ch, CURLOPT_TIMEOUT, 2);

        $data = curl_exec( $ch );
        curl_close( $ch );

        if( $data ) {
            $content_length = "unknown";
            $status = "unknown";

            if( preg_match( "/^HTTP\/1\.[01] (\d\d\d)/", $data, $matches ) ) {
                $status = (int)$matches[1];
            }

            if( preg_match( "/Content-Length: (\d+)/", $data, $matches ) ) {
                $content_length = (int)$matches[1];
            }

            // http://en.wikipedia.org/wiki/List_of_HTTP_status_codes
            if( $status == 200 || ($status > 300 && $status <= 308) ) {
                $result = $content_length;
            }
        }
    }
    return $result;
}



class MetaMirror_CronEntry
{

    private static $content_type;
    private static $table_name;
    private static $id_name;
    private static $column_name;
    private static $host;
    private static $max_posts;
    private static $metamirror_folder;
    private static $metaMirror_ignore;
    private static $exclude_forums;
    private static $result;
    private static $commit;
    private static $save;
    private static $replace_base_url;  // url where they with eventually be hosted.
    private static $use_attachments;

    private static $isAbsoluteExternalData;
    private static $externalDataPath;

    private static $attachmentModel;

    private static $append;
    private static $append_note;

    private static $flag_broken;
    private static $broken_note;

    private static $size_limit;

    private static $db;
    private static $bb_cache_exists;

    private static $init_done = 0;

    private static function _init($input)
    {
        if (self::$init_done)
            return;
        $init_done = 1;


        // We are not a model, so we need to create a temporary attachment model.
        self::$attachmentModel = XenForo_Model::create('XenForo_Model_Attachment');

        $options = XenForo_Application::get('options');
        // The $result is what we return to the calling function if there was input
        self::$result = array();


        self::$result['some_results'] = 1;  // Flag to display results
        self::$result['fetched'] = 0;
        self::$result['not_ignored'] = 0;
        self::$result['files_matched'] = 0;
        self::$result['files_attempted_downloaded'] = 0;
        self::$result['files_downloaded'] = 0;
        self::$result['bytes_downloaded'] = 0;
        self::$result['bytes_skipped'] = 0;
        self::$result['urls_updated'] = 0;
        self::$result['log'] = array();

        self::$result['signatures'] = $options->MetaMirror_CacheSigImages;

        self::$commit = 1;
        self::$save = 1;
        self::$max_posts = $options->MetaMirror_PostPer;

        self::$append = $options->MetaMirror_AppendSourceInfo;
        self::$append_note = $options->MetaMirror_Note;

        self::$flag_broken = $options->MetaMirror_FlagBroken;
        self::$broken_note = $options->MetaMirror_BrokenNote;

        self::$size_limit = $options->MetaMirror_SizeLimit;
        if (!self::$size_limit)
            self::$size_limit = 1000000000; // 1000MB, so no limit really.
        else
            self::$size_limit *= 1024;


        // [IMG] urls require absolute paths
        // By default we assume the data path is relative, and so we have to add the host
        self::$isAbsoluteExternalData = 0;

        self::$use_attachments = $options->MetaMirror_UseAttachments;
        self::$result['use_attachments'] = $options->MetaMirror_UseAttachments;


        // However,
        // inspect the external data URL, to see if it is absolute
        $url_info = @parse_url(XenForo_Application::$externalDataUrl);
        if(isset($url_info['scheme']))
        {
            self::$isAbsoluteExternalData = 1;
        }


// Common initialization
        // The name of the local host, like example.com
        self::$host = XenForo_Application::$host;

        // Folder where we put all the cached images, in the external data folder
        self::$metamirror_folder = '/MetaMirrorCache';


        self::$metaMirror_ignore = array(self::$host);                   // always ignore the local host

        // List of ignore strings
        // note we strip out whitespace from these strings
        // otherwise people put a space after the comma, etc.
        if ($options->MetaMirror_Ignore)
            self::$metaMirror_ignore = array_merge(self::$metaMirror_ignore,explode(',',$options->MetaMirror_Ignore));

        // if the external data URL is absolute, then add that to the ignore list
        // as that is essentially the /data folder, but on some external CDN
        // otherwise that's handled by the self::$host
        if (self::$isAbsoluteExternalData)
        {
            self::$metaMirror_ignore[] = XenForo_Application::$externalDataUrl;
        }

        // Any link generated by XF will have this, so we should ignore those too
        self::$metaMirror_ignore[] = XenForo_Link::getCanonicalLinkPrefix();

        self::$metaMirror_ignore = array_map('trim', self::$metaMirror_ignore);

        // get ids of forums to exclude. Using intval prevents any SQL injection
        $option_exclude = preg_replace('#[^0-9,]#', '', $options->MetaMirror_ForumExclude);
        if ($option_exclude == '') // ignore if just white space
            self::$exclude_forums = false;
        else
            self::$exclude_forums = array_map('intval', array_filter(explode(',',$option_exclude),"is_numeric"));


        // get the root path of this web server;s data path, like
        // /srv/www/metabunk.org/public_html/
        $root = XenForo_Helper_File::getExternalDataPath();
        // gives, for example: /Users/mick/Sites/metabunk.org/public_html/data

        // $path is the absolute path to the cach directory
        self::$externalDataPath = $root.self::$metamirror_folder;

        // [IMG] tags need an ABSOLUTE image URL
        // so we either use the externalDataURL (if that is absolute)
        // or the host + the relative externalDataURL
        if (self::$isAbsoluteExternalData)
            self::$replace_base_url = XenForo_Application::$externalDataUrl.self::$metamirror_folder.'/';
        else
        {
            // the $boardURL is set in options, but it's not guarenteed to be correct, so if it's not, then use the host name
            // for testing you might want to just clear the boardURL field.
            //$boardURL = $options->boardUrl;
            $canonicalLinkPrefix = XenForo_Link::getCanonicalLinkPrefix();
            $canonicalLinkPrefix_info = @parse_url($canonicalLinkPrefix);
            if (isset($canonicalLinkPrefix_info['scheme']))
            {
                // if we want to localize the url, then rebuild it here
                if ($options->MetaMirror_localize_host)
                {
                    $canonicalLinkPrefix = $canonicalLinkPrefix_info['scheme'].'://'.self::$host;
                    if (isset($canonicalLinkPrefix_info['path']))
                    {
                        $canonicalLinkPrefix .= $canonicalLinkPrefix_info['path'];
                    }
                }

                self::$replace_base_url = $canonicalLinkPrefix.'/'.XenForo_Application::$externalDataUrl.self::$metamirror_folder.'/';
                self::$metaMirror_ignore[] = $canonicalLinkPrefix;      // and let's ignore that, in case it's some odd .htaccess type redirect.

            }
            else
            {
                self::$replace_base_url = 'http://'.self::$host.'/'.XenForo_Application::$externalDataUrl.self::$metamirror_folder.'/';

            }
        }


        // Get the Xenforo database object
        self::$db = XenForo_Application::getDb();

        // List the tables, so we can see if the bb cache exists
        // (I think it always does, but who knows)
        self::$bb_cache_exists = 0;
        $xf_tables = self::$db->listTables();
        foreach ($xf_tables as $key => $original_url)
        {
            if ($original_url == 'xf_bb_code_parse_cache')
                self::$bb_cache_exists = 1;
        }


        /////////////////////////////////////////////////////////////////////////////////////////////
        // Detect if not running from cron, then get the commit and save parameters
        if (!isset($input['cron_class']) && isset($input['commit']))
        {
            // Not from cron, so parameters passed in
            self::$commit = $input['commit'];
            self::$save = $input['save']||self::$commit;
            // number of posts per cron run
            // this will be the number of [IMG] posts we fetch from the database
            // the actual number of images fetched will depend on the ignore list,
            // and how many images there are per post.
            self::$max_posts = intval($input['override']);
            if (self::$max_posts == 0)
                self::$max_posts = $options->MetaMirror_PostPer;
        }

        self::$result['commit'] = self::$commit;
        self::$result['save'] = self::$save;


    }

    // These are utility functions for external use only
    public static function getDataPath()
    {
        self::_init(false);
        return self::$externalDataPath;
    }

    public static function getDataURL()
    {
        self::_init(false);
        return self::$replace_base_url;
    }



    // code from: http://xenforo.com/community/threads/metamirror.55736/page-4#post-595530
    private static function importPostAttachment($database, $fileName, $tempFile, $userId, $postId, $date, array $attach = array())
    {
        $upload = new XenForo_Upload($fileName, $tempFile);


        try
        {
            $dataExtra = array('upload_date' => $date, 'attach_count' => 1);
//        $dataId = $this->getModelFromCache('XenForo_Model_Attachment')->insertUploadedAttachmentData($upload, $userId, $dataExtra);
            $dataId = self::$attachmentModel->insertUploadedAttachmentData($upload, $userId, $dataExtra);
        }
        catch (XenForo_Exception $e)
        {
            return false;
        }

        $dw = XenForo_DataWriter::create('XenForo_DataWriter_Attachment');
        //$dw->setImportMode(true);

        $dw->bulkSet(array(
            'data_id' => $dataId,
            'content_type' => 'post',
            'content_id' => $postId,
            'attach_date' => $date,
            'unassociated' => 0
        ));
        $dw->bulkSet($attach);

        $dw->save();

        $newAttachmentId = $dw->get('attachment_id');

        $database->query('
                UPDATE xf_post SET
                attach_count = IF(attach_count < 65535, attach_count + 1, 65535)
                WHERE post_id = ?
                ', $postId);

        return $newAttachmentId;
    }


    public static function flushSingle($id, $table_name, $id_name)
    {

        $options = XenForo_Application::get('options');
        if ($options->MetaMirror_enabled)
        {
            self::_init(array());

            // For the unlikely case where editing is the first thing done
            self::addFlagsForContentType($table_name);

            self::$db->query('
                    UPDATE '.$table_name.' SET
                    MetaMirror_Processed = 0
                    WHERE '.$id_name.' = '.$id.'
                    ');
        }
    }

    public static function updateSingle($id, $content_type, $table_name, $id_name, $column_name)
    {

        self::$content_type = $content_type;
        self::$table_name = $table_name;        // table that we look for files in
        self::$id_name = $id_name;           // id field for this table
        self::$column_name = $column_name;      // the name of the column that contains the message text (post body, or sig body)


        $options = XenForo_Application::get('options');
        if ($options->MetaMirror_enabled)
        {
            self::_init(array());

            if ($content_type == 'signature')
            {
                // Can't use attachments for signatures yet
                self::$use_attachments = 0;
                // We would never want to append info to a sig. Messes it up, and multiple changes would accumulate.
                self::$append = 0;
            }

            // For the unlikely case where editing is the first thing done
            self::addFlagsForContentType($table_name);

            $row = self::$db->fetchRow("
                SELECT * FROM ".$table_name."
                WHERE ".$id_name." = ".$id."
                ");

            self::updateSingleContent($row);

        }
    }


    // The run task can by run with or without input
    // if there's input, then that means we have run from the
    public static function runMetaMirror($input)
    {
        // This is the root entry point both from cron and from the testing tool.
        // so the first thing we need to do is check to see if we are using cron
        // then if we are, check to see if we are disabled, and return if so.
        $options = XenForo_Application::get('options');
        //
        if (isset($input['cron_class']))
        {
            // If it's turned off in the options, then do not run it.
            if (!$options->MetaMirror_enabled)
                return;
        }

        // The init function calculates various paths, flags etc, and sets up the helper objects like self::$db
        self::_init($input);


        // Flush the cache if needed.
        if (isset($input['flush']) && $input['flush'])
        {
            self::addFlagsForContentType('xf_post');
            self::$db->query('
                UPDATE xf_post SET
                MetaMirror_Processed = 0
                ');

            self::addFlagsForContentType('xf_user_profile');
            self::$db->query('
                UPDATE xf_user_profile SET
                MetaMirror_Processed = 0
                ');
        }



        // Then basically run once for each content type

        // for posts
        self::$content_type = 'post';
        self::$table_name = 'xf_post';        // table that we look for files in
        self::$id_name = 'post_id';           // id field for this table
        self::$column_name = 'message';      // the name of the column that contains the message text (post body, or sig body)
        self::runForContentType();


        // For resources
        if ($options->MetaMirror_CacheResources)
        {
        //    self::$use_attachments = 0;
        //    self::$append = 0;
            $resourceTableExists = self::$db->query("SHOW TABLES LIKE 'xf_resource_update'")->rowCount() > 0;

            if ($resourceTableExists)
            {
                self::$content_type = 'resource_update';
                self::$table_name = 'xf_resource_update';        // table that we look for files in
                self::$id_name = 'resource_update_id';          // id field for this table
                self::$column_name = 'message';      // the name of the column that contains the message text (post body, or sig body)
                self::runForContentType();
            }
        }

        // for signatures, we are actually modifying the user profiles.
        if ($options->MetaMirror_CacheSigImages)
        {
            // Can't use attachments for signatures yet
            self::$use_attachments = 0;

            // We would never want to append info to a sig. Messes it up, and multiple changes would accumulate.
            self::$append = 0;

            self::$content_type = 'signature';
            self::$table_name = 'xf_user_profile';        // table that we look for files in
            self::$id_name = 'user_id';           // id field for this table
            self::$column_name = 'signature';      // the name of the column that contains the message text (post body, or sig body)
            self::runForContentType();
        }

        // add new content types ABOVE sigs, as the $use_attachments flag is cleared (or preserve it)
        // if not running from cron, then we return the results for display/debugging
        if (!isset($input['cron_class']))
        {
          return self::$result;
        }

        return 0;
    }

    // Update a single post or signature (or potentially other content types.
    // todo: if we call this separately efficiently check if the magic number field is there
    //
    // The $row parameter is a row from a xf_post or xf_user_profile table, and we require the following fields:
    //   ['MetaMirror_Processed'] - magic number flag
    //   [self::$column_name] - the contents of the content type (i.e the actual post or sig)
    //   [self::$id_name] - 'post_id' or 'user_id'
    // then for post type only:
    //   ['user_id'] - user who made the post (note potential conflict with user profiles, which also use user_id as the table index.
    //   ['post_date'] - date/time when the post was made. Used to set the date/time of the attachment.
    private static function updateSingleContent($row)
    {

        self::$result['fetched'] += 1;

        // The $count variable is a retry count, for most things it will be zero, meaning
        // we have never been here before. If it's non-zero, then it's the count of the nubmer of attempts
        $count = $row['MetaMirror_Processed'];

        // Get the content of the post
        $posttext = $row[self::$column_name];

        $added_note = 0; // set when we first add a note, so we know no to add the note header twice

        // match up [img]...[/img], finding the text inbetween
        if(!preg_match_all('/\[img.*?\](.*?)\[\/img\]/is', $posttext, $matches))
        {
            // Nothing found, so just return.
            return;
        }

        // the array $matches now contains all found instances of the search string
        // $matches[1] is the array of matches to the first parentesized subpattern (the (.*?) above)
        // i.e. the url of the image.
        $error = 0;
        $modified = 0;

        // Remove duplicate image URLS
        // we onlt need to handle each one once, the replacement will affect all usages
        $matches[1] = array_unique($matches[1]);

        foreach($matches[1] as $key => $original_url)
        {
            // $value is the url
            // $key is unimportant

            // now ignore URLs that contain the substring in the self::$metaMirror_ignore array
            // this could theoretically skip files contining these names, so you could add the http://
            // but probably best to have to top domain only
            $do_ignore = false;
            foreach(self::$metaMirror_ignore as $ignore_value)
            {
                if($original_url && $ignore_value && strpos($original_url, $ignore_value) !== false)
                {
                    $do_ignore = true;
                    break;
                }
            }

            $url_info = @parse_url($original_url);



            if($do_ignore || !isset($url_info['host']) || !isset($url_info['path']) || !isset($url_info['scheme']))
            {
                if (!$do_ignore) XenForo_Helper_File::log('MetaMirror','CORRUPT URL: '.$original_url,true);

                $count = MMR_MAGIC; // flag as done (it's on the ignore list, or has corrupt URL)
                // Write the magic number count back into the database
                // we will never select this post again
                if (self::$commit)
                {
                    self::$db->update(self::$table_name,
                        array('MetaMirror_Processed' => $count),
                        self::$id_name.' = ' . $row[self::$id_name]
                    );
                }
                continue;
            }

            self::$result['not_ignored'] += 1;

/////////////////////////////////////////////////////////////////////
// Create the new file name, and path to it.
            // create a file name by concatanating the URL with the full path
            // and replacing any non-alphanumeric (or .) charaters with an underscore
            $new_name = preg_replace('#[^A-Za-z0-9_\.]#', '_', $url_info['host'].$url_info['path']);

            $safe_extension = "";  // extension we can add later.
            // Strip out non-image extensions from images
            $new_parts = pathinfo($new_name);
            if (isset($new_parts['extension']))
            {
                $lower_extension = strtolower($new_parts['extension']);
                $valid_extensions = array('jpg','jpeg','png','gif','bmp','ico');
                if (!in_array($lower_extension,$valid_extensions))
                {
                    // it's not a valid safe extension, so kill the extension by replacing ALL dots with _
                    // This is the same expression as above, but without the dot.
                    $new_name = preg_replace('#[^A-Za-z0-9_]#', '_', $new_name);
                }
                else
                {
                    // it is in the array, so we can use it
                    $safe_extension = $lower_extension;
                }
            }


            // If there's a query, then MD5 it and add that to the new file name
            // if we found a safe file extension, then also use that.
            if (isset($url_info['query']))
            {
                $new_name .= '_'.md5($url_info['query']).'.'.$safe_extension;
            }

            // If we are using attachments, then put an 'a' in front of the name
            // to ensure we don't erase files that we previously had as direct links
            if (self::$use_attachments)
            {
                $new_name = 'a'.$new_name;
            }

            $path = self::$externalDataPath;

            // ensure the cache folder exists
            @mkdir($path, 0755, true);

            // ensure sufficient permissions
            @chmod($path, 0755);

            // If we want short names, then we md5 the whole things and add back any extension.
            $options = XenForo_Application::get('options');
            if ($options->MetaMirror_ShortNames)
            {
                $new_name = md5($new_name).'.'.$safe_extension;
            }

            // absolute filename of file within that folder
            $filename = "$path/$new_name";
//////////////////////////////////////////////////////////////////////////////////////////////////

            $file_size = 0;

            $do_update_message_text = 0;
            if (!@file_exists($filename) )
            {
                self::$result['files_attempted_downloaded'] += 1;


                // If we are not committing, then first try to just fetch the file size
                if (!self::$commit && !self::$save)
                {
                    $file_size = curl_get_file_size($original_url);
                    if ($file_size == -1)
                        $file_size = 0;
                    self::$result['bytes_skipped'] += $file_size;
                }

                $image_data = "";
                // Could not get the file, so try to get it from the remote server
                if ($file_size == 0)
                {
                    $image_data = loadImage($original_url);
                    XenForo_Helper_File::log('MetaMirror','Attempted load'.$original_url.' Gives '.strlen($image_data).' Bytes ',true);
                }

                $broken_link = 0;
                // not broken if we fetched a file size
                // but if we failed to fetch a size AND we then failed to load the
                // image, then it is broken
                if ($file_size == 0 && strlen($image_data) == 0)
                {
                    $broken_link = 1;
                }

                // if either the inspected file size, or the size of the actual image data are too large
                // then just kill them
                if ($file_size > self::$size_limit || strlen($image_data) > self::$size_limit)
                {
                    if (!$file_size) $file_size = strlen($image_data);
                    self::$result['log'][] = array('post_id'=>$row[self::$id_name],'size'=>$file_size, 'old'=>$original_url, 'new'=>'File size exceeded, limit='.self::$size_limit. ' bytes');
                    $file_size = 0;
                    $image_data = '';
                    $count = MMR_MAGIC; // flag as done, we don't want to re-load this huge file.
                }


                // odd, yet correct logic. We might have set $file_size earlier without loading the file.
                if (strlen($image_data)>0)
                    $file_size = strlen($image_data);

                $file_exists_locally = 0; // This is a flag just used for the test tool display, says if the file has been downloaded
                // Bit of dodgy logic here for non-commits, should probably separate it out.

                // check if we have a file
                if ($file_size > 0)
                {
                    // only actually write the file if we are going to commit
                    if (self::$commit || self::$save)
                    {
                        // we want to ensure that it actually is an image
                        // and not some dodgy code
                        // so save to to a temporary file
                        // then rename it.
                        $tmp = $path.'/'.uniqid("meta_");
                        $file_is_loaded = @file_put_contents($tmp, $image_data);
                        if ($file_is_loaded)
                        {

                            // If the image returned is too small, then exif_imagetype will crash
                            // so just default to 0 for really small images. (docs say 12. I say 16 to be safe).
                            // smallest gif is about 42 bytes.
                            $exif_type = 0;
                            if (strlen($image_data)>=16)
                            {
                                $exif_type = exif_imagetype($tmp);
                            }

                            if (!$exif_type)
                            {
                                // not an image, so delete it
                                unlink($tmp);
                                $broken_link = 1;
                                $file_is_loaded = false;
                                $count = MMR_MAGIC; // flag as done (it's on the ignore list, or has corrupt URL)
                                // Write the magic number count back into the database
                                // we will never select this post again
                                if (self::$commit)
                                {
                                    self::$db->update(self::$table_name,
                                        array('MetaMirror_Processed' => $count),
                                        self::$id_name.' = ' . $row[self::$id_name]
                                    );
                                    self::$result['log'][] = array('post_id'=>$row[self::$id_name],'size'=>0, 'old'=>$original_url, 'new'=>'FILE IS NOT AN IMAGE');

                                }
                            }
                            else
                            {
                                // It IS a valid image, so we just rename the temp file with the new filename


                                // But first, try to make the correct extension for the file type
                                // if it has one, then
                                $extension = XenForo_Helper_File::getFileExtension($new_name);
                                $imageTypes = array(
                                    'gif' => 'image/gif',
                                    'jpg' => 'image/jpeg',
                                    'jpeg' => 'image/jpeg',
                                    'jpe' => 'image/jpeg',
                                    'png' => 'image/png'
                                );

                                // only change the extension if it's not one of those
                                if (!in_array($extension, array_keys($imageTypes)))
                                {
                                    $nice_extensions = array('', 'gif','jpg','png');
                                    if ($exif_type < 1 || $exif_type > 3)
                                        $exif_type = 2; // default jpg
                                    $new_name .= '_.'.$nice_extensions[$exif_type];

                                }

                                // File name ($new_name) might now be too long,
                                // clamp $new_name to 100  bytes, to handle this and also the limit on the attachment system filename length
                                $maxlen = 100;
                                if (strlen($new_name) > $maxlen)
                                {
                                    $extension = XenForo_Helper_File::getFileExtension($new_name);
                                    // take the first 100 bytes, less 32 for the md5, less 5 for the extension (.jpeg), less one for safety
                                    // then create a new name with that, plus md5, plus extension
                                    // this should give a 98 or 99 byte filename, mostly 98
                                    $new_name = substr($new_name,0,$maxlen-32-6).md5($new_name).'.'.$extension;
                                }
                                $filename = "$path/$new_name";
                                rename($tmp, $filename);
                                $file_exists_locally = 1;
                            }
                        }
                    }
                    else
                    {
                        $file_is_loaded = true;
                    }

                    // Success, attempt to save it
                    if($file_is_loaded === false)
                    {
                        // Error during saving
                        $error = 1;
                    }
                    else
                    {
                        self::$result['files_downloaded'] += 1;
                        self::$result['bytes_downloaded'] += strlen($image_data);
                        // we got the contents and saved them just fine, so need to update
                        $do_update_message_text = 1;
                        // Logging goes to internal_data
                        if (self::$commit)
                            XenForo_Helper_File::log('MetaMirror','Rehosted: '.$original_url.' For '.self::$content_type.' # '.$row[self::$id_name],true);

                    }
                }
                else
                {
                    // error fetching contents (length of 0)
                    $error = 1;
//                        self::$result['log'][] = array('post_id'=>$row[self::$id_name],'size'=>0, 'old'=>$original_url, 'new'=>'ERROR FETCHING FILE retries = '.$count);
                }
            }
            else
            {
                // we have an existing file, so just update with that
                self::$result['files_matched'] += 1;
                $do_update_message_text = 1;
                $file_exists_locally = 1;
                $file_size = filesize($filename);
            }

            if ($do_update_message_text)
            {
                $modified = 1; // flag some changes to message contents, so now we need to flush the bb cache
                $replace_value = self::$replace_base_url.$new_name;

                $update_successful = 0;
                // if we want to use attachments, and we actually have an attachment, then try that
                if (self::$use_attachments)
                {

                    if (self::$commit)
                    {
                        $attachment_id = self::importPostAttachment(self::$db, $new_name, $filename, $row['user_id'], $row[self::$id_name], $row['post_date'] );

                        // only do the message modification if we got a valid attachment_id
                        if ($attachment_id)
                        {
                            $replace_value = '[attach=full]'.$attachment_id.'[/attach]';
                            $posttext = str_ireplace('[img]'.$original_url.'[/img]',
                                $replace_value,
                                $posttext);
                            self::$result['log'][] = array('post_id'=>$row[self::$id_name],'size'=>number_format($file_size), 'old'=>$original_url, 'new'=>'Attachment '.$attachment_id, 'new_url'=>'/attachments/.'.$attachment_id, 'file_local'=>$file_exists_locally);
                            $update_successful = 1;
                        }
                        else
                        {
                            $replace_value = 'Failed to import as attachment';
                            self::$result['log'][] = array('post_id'=>$row[self::$id_name],'size'=>number_format($file_size), 'old'=>$original_url, 'new'=>'(TEMP)'.$replace_value, 'new_url'=>$replace_value, 'file_local'=>$file_exists_locally);
                        }
                    }
                    else
                    {
                        // Not committing, but we have the file downloaded, so can display a link to it.
                        self::$result['log'][] = array('post_id'=>$row[self::$id_name],'size'=>number_format($file_size), 'old'=>$original_url, 'new'=>'(TEMP)'.$replace_value, 'new_url'=>$replace_value, 'file_local'=>$file_exists_locally);
                    }

                }
                else
                {
                    $update_successful = 1;
                    $posttext = str_ireplace($original_url,
                        $replace_value,
                        $posttext);
                    self::$result['log'][] = array('post_id'=>$row[self::$id_name],'size'=>number_format($file_size), 'old'=>$original_url, 'new'=>$replace_value, 'new_url'=>$replace_value, 'file_local'=>$file_exists_locally);
                }

                if ($update_successful && self::$append)
                {
                    if (!$added_note)
                    {
                        // This is the note header, only add it once per note
                        // adding a CR automatically.
                        $added_note = 1;
                        $posttext .= "\n\n".self::$append_note."\n";
                    }
                    $posttext .= '[URL]'.$original_url."[/URL]\n";
                }



                if (self::$commit)
                    XenForo_Helper_File::log('MetaMirror','Modified '.self::$content_type.' # '.$row[self::$id_name]. " Changed: ". $original_url." => ".$replace_value,true);
                self::$result['urls_updated'] += 1;
            }
            else
            {
                // check to see if it failed (i.e. a broken link) and note it if flagged
                if (self::$flag_broken && $broken_link)
                {
                    $replace_value = self::$broken_note . '[URL]' . $original_url . '[/URL]';
                    $posttext = str_ireplace('[img]'.$original_url.'[/img]',
                        $replace_value,
                        $posttext);
                    $modified = 1;
                    if (self::$commit)
                    {
                        XenForo_Helper_File::log('MetaMirror','Modified '.self::$content_type.' # '.$row[self::$id_name]. " Broken IMG Link ". $original_url,true);
                    }
                    self::$result['log'][] = array('post_id'=>$row[self::$id_name],'size'=>0, 'old'=>$original_url, 'new'=>'FLAGGED BROKEN LINK');
                }
            }
        } // next [IMG]...[/IMG] match within the post

        // After we've done all the matches in this post, then we can write out the new message text
        // if there have been no errors.

        if ($error == 0)
        {
            // No error, so modify up the source and write back
            $count = MMR_MAGIC; // flag as done
        }
        else
        {
            if ($count != MMR_MAGIC)
            {
                // Error occurred, so maybe retry next time, up to ten times
                $count += 1;
                if ($count > MMR_MAX_RETRIES)
                    $count = MMR_MAGIC; // flag as done (just given up)
            }
        }

        // Write the post text and count back into the database
        if (self::$commit)
        {
            self::$db->update(self::$table_name,
                array('MetaMirror_Processed' => $count,
                    self::$column_name => $posttext
                ),
                self::$id_name.' = ' . $row[self::$id_name]
            );
        }

        // We also need to flush the BB code cache for this post
        // Assuming it exists. Does it for a fresh install??
        // only need to do this if we actually modified a post - just updating counts does not matter
        // todo: use the actual API for this.
        if ($modified && self::$bb_cache_exists && self::$commit)
        {
            self::$db->query("DELETE FROM xf_bb_code_parse_cache WHERE
                content_id =". $row[self::$id_name] ."
                AND content_type = '".self::$content_type."'");
        }
    }


    private static function addFlagsForContentType($table_name)
    {

        // Get the description of the xf_post table, so we can see if the flag is there
        $xf_table_description = self::$db->describeTable($table_name);
        // If the MetaMirror_Processed entry is not there, then create it, with a value of 0
        // We use a smallint, so we can have a rotating "visited" number
        // probably could have used a byte, but no big deal.
        if (!isset($xf_table_description['MetaMirror_Processed']))
        {
            self::$db->query("ALTER TABLE ".$table_name."
            ADD MetaMirror_Processed smallint NOT NULL default '0',
            ADD INDEX (MetaMirror_Processed)");
        }

    }

    public static function isExcluded($node_id)
    {
        self::_init(array());
        if (!self::$exclude_forums)
            return false;
        $excluded = in_array($node_id,self::$exclude_forums);
        return $excluded;

    }

    private static function runForContentType()
    {

// If a user modifies a post or a sig, and changes the image then if the post has been visited before, then it won't be changed again
// HENCE TOTO - when post or sig is changed, then reset the MetaMirror_Processed to 0 so it will get parsed again.
// So need to hook into the "post modified" or whatever, callback.

        // Fetch some entries to process. Notice we skip anything that
        // has a "MMR_MAGIC" number set
        // so we will either be retrying failed fetches
        // or some new ones.

        self::addFlagsForContentType(self::$table_name);

        // This excluder clause is used to exclude numbered forums
        $excluder = isset (self::$exclude_forums[0]) ? ("AND node_id NOT IN (".self::$db->quote(self::$exclude_forums).")"):"";

        $rows = array();
        if (self::$table_name == 'xf_post' && $excluder != "")
        {
            // We only can do exclusions where we can get a node_id, and if there's actually any exclusion set.
            $rows = self::$db->fetchAll("
                SELECT * FROM ".self::$table_name."
                INNER JOIN xf_thread ON (xf_thread.thread_id = xf_post.thread_id)
                WHERE  MetaMirror_Processed != ".MMR_MAGIC."
                ".$excluder."
                AND ".self::$column_name." LIKE '%[/IMG]%'
                LIMIT 0, ".self::$max_posts
            );
        }
        else
        {
            // Generic content (i.e. Signatures) with no forum attached, or there's no exclusion test needed.
            // so we just fetch it
            $rows = self::$db->fetchAll("
                SELECT * FROM ".self::$table_name."
                WHERE  MetaMirror_Processed != ".MMR_MAGIC."
                AND ".self::$column_name." LIKE '%[/IMG]%'
                LIMIT 0, ".self::$max_posts."
                ");
        }

        // we now have an array of self::$max_posts of actual posts, so we loop over them one at a time
        foreach($rows as $row)
        {
            self::updateSingleContent($row);
        } // Next row (post)
    }

}


?>