Помощь по скриптам | Помогите спарсить сайт!
$curl_version = curl_version();
if (preg_match('#^([0-9]+)\.([0-9]+)#is', $curl_version['version'], $match) and $match[1] >= 7 and $match[2] >= 10) {
if (function_exists('gzdecode')) {
curl_setopt($this->_ch, CURLOPT_ENCODING, 'gzip, deflate');
} else {
curl_setopt($this->_ch, CURLOPT_ENCODING, '');
}
}
curl_setopt($this->_ch, CURLOPT_HTTPHEADER, array(
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
'Cache-Control: max-age=0',
'DNT: 0',
'Connection: keep-alive',
'Proxy-Connection:'
));
curl_setopt($this->_ch, CURLOPT_HEADER, 1);
}
}
/**
* Загружает страницу. Публичный
* @param $url
* @param bool $refer
* @param bool $post
* @return bool|mixed|string
*/
public function loadPage($url, $refer = false, $post = false)
{
if (($page = $this->_getCache($url.http_build_query($post))) and !$this->debug) {
return $page;
}
$this->_redirect_current = 0;
$this->_initCurl();
if ($refer) {
curl_setopt($this->_ch, CURLOPT_REFERER, $refer);
} else {
curl_setopt($this->_ch, CURLOPT_REFERER, null);
}
if ($post) {
curl_setopt($this->_ch, CURLOPT_POST, true);
curl_setopt($this->_ch, CURLOPT_POSTFIELDS, $post);
} else {
curl_setopt($this->_ch, CURLOPT_POST, false);
curl_setopt($this->_ch, CURLOPT_HTTPGET, true);
}
$page = $this->_load($url);
$this->_setCache($url.http_build_query($post), $page);
return $page;
}
private function _load($url)
{
if ($this->_redirect_current >= $this->_redirect_max) {
return false;
}
if ($this->_sleep > 0) {
if (($this->_last_active_time + $this->_sleep) > time()) {
sleep($this->_sleep);
}
$this->_last_active_time = time();
}
if ($this->debug) {
curl_setopt($this->_ch, CURLINFO_HEADER_OUT, true);
}
curl_setopt($this->_ch, CURLOPT_URL, $url);
$page = curl_exec($this->_ch);
$curl_info = curl_getinfo($this->_ch);
if ($this->debug) {
$this->debug_list[] = array(
'url' => $url,
'request_header' => $curl_info['request_header'],
'header' => substr($page, 0, $curl_info['header_size']),
'body' => convert_charset_pkp(substr($page, $curl_info['header_size']), $from = 'cp1251', $to = 'utf-8'),
'curl_errno' => curl_errno($this->_ch),
'curl_error' => curl_error($this->_ch)
);
}
if ($curl_info['http_code'] == 301 or $curl_info['http_code'] == 302) {
$header = substr($page, 0, $curl_info['header_size']);
if (preg_match('/Location:(.*?)(\n|$)/is', $header, $matches) and ($url = parse_url(trim($matches[1])))) {
$url['scheme'] = $url['scheme'] ? $url['scheme'] : 'http';
$url['host'] = $url['host'] ? $url['host'] : 'megapesni.me';
$url['path'] = $url['path'] ? $url['path'] : '';
$url['query'] = $url['query'] ? '?'.$url['query'] : '';
$url = $url['scheme'].'://'.$url['host'].$url['path'].$url['query'];
$this->_redirect_current++;
return $this->_load($url);
} else {
return false;
}
}
$page = substr($page, $curl_info['header_size']);
return $page;
}
/**
* Записывает данные в кеш
* @param $url
* @param $page
* @return bool|int
*/
private function _setCache($url, $page)
{
if ($this->cached and $this->_dir_cache) {
return file_put_contents($this->_dir_cache.'/parser_cache_page_'.md5($url).'.tmp', $page, LOCK_EX);
}
return false;
}
/**
* Берет данные из кеша
* @param $url
* @return bool|string
*/
private function _getCache($url)
{
if ($this->cached and $this->_dir_cache) {
$file = $this->_dir_cache.'/parser_cache_page_'.md5($url).'.tmp';
if (file_exists($file)) {
return file_get_contents($file);
}
}
return false;
}
}
$parser = new Parser();
$parser->debug = DEBUG;
$parser->setDirCache('./cache');
$parser->cached = true;
$proxy_type = 0;
if ($config_mod['conf']['proxy_type'] == 1) {
$proxy_type = Parser::PROXY_TYPE_HTML;
} elseif ($config_mod['conf']['proxy_type'] == 2) {
$proxy_type = Parser::PROXY_TYPE_SOCKS5;
}
$parser->setProxy($config_mod['conf']['proxy'], $proxy_type);
if (isset($_SERVER['HTTP_USER_AGENT']) && strlen($_SERVER['HTTP_USER_AGENT']) > 25) {
$parser->setUserAgent($_SERVER['HTTP_USER_AGENT']);
}
define('MOD_COOKIE_FILE', './cache/cookie_www.kinopoisk.ru.tmp');
define( 'FOLDER_PREFIX', date( "Y-m" ) );
/**
* Удаляем файл cookie и создаем пустой новый файл
*/
function clear_cookie_file()
{
@unlink(MOD_COOKIE_FILE);
$fh = fopen(MOD_COOKIE_FILE, 'w');
fwrite($fh, '');
fclose($fh);
chmod(MOD_COOKIE_FILE, 0666);
}
if (!is_file(MOD_COOKIE_FILE) or filemtime(MOD_COOKIE_FILE) < (time() - 43200)) {
clear_cookie_file();
}
$parser->setCoolieFile(MOD_COOKIE_FILE);
set_file_info('delete');
if (!($refer = $author)) {
$refer = 'http://megapesni.me/';
}
$url_link = 'http://megapesni.me/popsa/139692-vremja-i-steklo-ebojj.html';
$pars_oboi = $parser->loadPage($url_link);
echo $pars_oboi;
?>