cURL Html output different from original page when rendered cURL Html output different from original page when rendered curl curl

cURL Html output different from original page when rendered


You should use <base> to specify a base url for all relative links:

If you curl http://example.com/thisPage.html then add a base tag in your echoed output of ''. This should technically be in the <head>, but this will work:

echo '<base href="http://example.com/" />';echo $html;

Live example w <base> is broken w/o <base>


Use this

    //Get the html of url    function get_data($url)     {        $ch = curl_init();       $timeout = 5;       //$userAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US)AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.X.Y.Z Safari/525.13.";       $userAgent = "IE 7 – Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)";      curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);      curl_setopt($ch, CURLOPT_FAILONERROR, true);      curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);      curl_setopt($ch, CURLOPT_AUTOREFERER, true);      curl_setopt($ch, CURLOPT_TIMEOUT, 10);      curl_setopt($ch,CURLOPT_URL,$url);      curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);      curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);      $data = curl_exec($ch);      curl_close($ch);      return $data;    }$parse = parse_url($url);$count =  "http://".$parse['host'].dirname($parse['path'])."//";$page = str_replace("<head>", "<head>\n<base href=\"" . $count . "\" />", $page);$page = str_replace("<HEAD>", "<head>\n<base href=\"" . $count . "\" />", $page);echo $page;?>