优化ta数据采集,html转换大小阈值修改

hotfix/远程访问多媒体中心
尹诚诚 8 years ago
parent 3b21e3a85f
commit a996da044a

@ -25,17 +25,24 @@ class Index extends CI_Controller {
}
function auto_update($city = 'Beijing') {
ini_set('max_execution_time', '100');
ini_set('max_execution_time', '180');
$ta_website = $this->config->item('tripadvisor_website');
//分页代码,只查询前三页,反过来查询,越早的越在后面
$page_mark = array('-or20-', '-or10-', '-');
$nation_mark=array('www.tripadvisor.com','www.tripadvisor.it','www.tripadvisor.jp','www.tripadvisor.es','www.tripadvisor.fr','www.tripadvisor.de');
foreach ($ta_website as $key_city => $item_url) {
if ($key_city == $city) {
foreach ($page_mark as $page_num) {
$page_url = str_replace('{PAGENUM}', $page_num, $item_url);
//采集各个国家的评论
foreach($nation_mark as $nation_item){
$page_url = str_replace('www.tripadvisor.com', $nation_item, $item_url);
if($nation_item=='www.tripadvisor.com'){//分页代码,英文站点查询前三页,反过来查询,越早的越在后面
$page_mark = array('-or20-', '-or10-', '-');
//使用代理来请求,国内直接访问会很慢
$page_url=str_replace('https://www.tripadvisor.com', 'http://47.91.16.199:5052', $page_url);
}else{
$page_mark = array('-');
}
foreach ($page_mark as $page_num) {
$page_url = str_replace('{PAGENUM}', $page_num, $page_url);
$content = GET_HTTP($page_url);
if (!empty($content)) {
$html_object = str_get_html($content);
@ -66,6 +73,7 @@ class Index extends CI_Controller {
}
}
}
}
//获取内容更新
function auto_update_content() {

@ -62,7 +62,7 @@ define('HDOM_INFO_ENDSPACE',7);
define('DEFAULT_TARGET_CHARSET', 'UTF-8');
define('DEFAULT_BR_TEXT', "\r\n");
define('DEFAULT_SPAN_TEXT', " ");
define('MAX_FILE_SIZE', 600000);
define('MAX_FILE_SIZE', 6000000);
// helper functions
// -----------------------------------------------------------------------------
// get html dom from file

Loading…
Cancel
Save