* This file is licensed under the Affero General Public License version 3 or * later. * See the COPYING-README file. */ namespace OC; class HTTPHelper { const USER_AGENT = 'ownCloud Server Crawler'; /** @var \OC\AllConfig */ private $config; /** * @param \OC\AllConfig $config */ public function __construct(AllConfig $config) { $this->config = $config; } /** * Returns the default context array * @return array */ public function getDefaultContextArray() { return array( 'http' => array( 'header' => 'User-Agent: ' . self::USER_AGENT . "\r\n", 'timeout' => 10, 'follow_location' => false, // Do not follow the location since we can't limit the protocol ), 'ssl' => array( 'disable_compression' => true ) ); } /** * Get URL content * @param string $url Url to get content * @throws \Exception If the URL does not start with http:// or https:// * @return string of the response or false on error * This function get the content of a page via curl, if curl is enabled. * If not, file_get_contents is used. */ public function getUrlContent($url) { if (!$this->isHTTPURL($url)) { throw new \Exception('$url must start with https:// or http://', 1); } $proxy = $this->config->getSystemValue('proxy', null); $proxyUserPwd = $this->config->getSystemValue('proxyuserpwd', null); if (function_exists('curl_init')) { $curl = curl_init(); $max_redirects = 10; curl_setopt($curl, CURLOPT_HEADER, 0); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); curl_setopt($curl, CURLOPT_REDIR_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); curl_setopt($curl, CURLOPT_USERAGENT, self::USER_AGENT); if ($proxy !== null) { curl_setopt($curl, CURLOPT_PROXY, $proxy); } if ($proxyUserPwd !== null) { curl_setopt($curl, CURLOPT_PROXYUSERPWD, $proxyUserPwd); } if (ini_get('open_basedir') === '' && (ini_get('safe_mode') === false) || strtolower(ini_get('safe_mode')) === 'off') { curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); curl_setopt($curl, CURLOPT_MAXREDIRS, $max_redirects); $data = curl_exec($curl); } else { curl_setopt($curl, CURLOPT_FOLLOWLOCATION, false); $mr = $max_redirects; if ($mr > 0) { $newURL = curl_getinfo($curl, CURLINFO_EFFECTIVE_URL); $rcurl = curl_copy_handle($curl); curl_setopt($rcurl, CURLOPT_HEADER, true); curl_setopt($rcurl, CURLOPT_NOBODY, true); curl_setopt($rcurl, CURLOPT_FORBID_REUSE, false); curl_setopt($rcurl, CURLOPT_RETURNTRANSFER, true); curl_setopt($rcurl, CURLOPT_USERAGENT, self::USER_AGENT); do { curl_setopt($rcurl, CURLOPT_URL, $newURL); $header = curl_exec($rcurl); if (curl_errno($rcurl)) { $code = 0; } else { $code = curl_getinfo($rcurl, CURLINFO_HTTP_CODE); if ($code == 301 || $code == 302) { preg_match('/Location:(.*?)\n/', $header, $matches); $newURL = trim(array_pop($matches)); } else { $code = 0; } } } while ($code && --$mr); curl_close($rcurl); if ($mr > 0) { curl_setopt($curl, CURLOPT_URL, $newURL); } } if ($mr == 0 && $max_redirects > 0) { $data = false; } else { $data = curl_exec($curl); } } curl_close($curl); } else { $url = $this->getFinalLocationOfURL($url); $contextArray = $this->getDefaultContextArray(); if ($proxy !== null) { $contextArray['http']['proxy'] = $proxy; } $ctx = stream_context_create( $contextArray ); $data = @file_get_contents($url, 0, $ctx); } return $data; } /** * Returns the response headers of a HTTP URL without following redirects * @param string $location Needs to be a HTTPS or HTTP URL * @return array */ public function getHeaders($location) { stream_context_set_default($this->getDefaultContextArray()); return get_headers($location, 1); } /** * Checks whether the supplied URL begins with HTTPS:// or HTTP:// (case insensitive) * @param string $url * @return bool */ public function isHTTPURL($url) { return stripos($url, 'https://') === 0 || stripos($url, 'http://') === 0; } /** * Returns the last HTTP or HTTPS site the request has been redirected too using the Location HTTP header * This is a very ugly workaround about the missing functionality to restrict fopen() to protocols * @param string $location Needs to be a HTTPS or HTTP URL * @throws \Exception In case the initial URL is not a HTTP or HTTPS one * @return string */ public function getFinalLocationOfURL($location) { if(!$this->isHTTPURL($location)) { throw new \Exception('URL must begin with HTTPS or HTTP.'); } $headerArray = $this->getHeaders($location, 1); if($headerArray !== false && isset($headerArray['Location'])) { while($this->isHTTPURL($headerArray['Location'])) { $location = $headerArray['Location']; $headerArray = $this->getHeaders($location); } } return $location; } }