|
|
|
@ -25,44 +25,52 @@ class Index extends CI_Controller {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function auto_update($city = 'Beijing') {
|
|
|
|
|
ini_set('max_execution_time', '100');
|
|
|
|
|
ini_set('max_execution_time', '180');
|
|
|
|
|
$ta_website = $this->config->item('tripadvisor_website');
|
|
|
|
|
//分页代码,只查询前三页,反过来查询,越早的越在后面
|
|
|
|
|
$page_mark = array('-or20-', '-or10-', '-');
|
|
|
|
|
$nation_mark=array('www.tripadvisor.com','www.tripadvisor.it','www.tripadvisor.jp','www.tripadvisor.es','www.tripadvisor.fr','www.tripadvisor.de');
|
|
|
|
|
|
|
|
|
|
foreach ($ta_website as $key_city => $item_url) {
|
|
|
|
|
if ($key_city == $city) {
|
|
|
|
|
foreach ($page_mark as $page_num) {
|
|
|
|
|
$page_url = str_replace('{PAGENUM}', $page_num, $item_url);
|
|
|
|
|
//使用代理来请求,国内直接访问会很慢
|
|
|
|
|
$page_url=str_replace('https://www.tripadvisor.com', 'http://47.91.16.199:5052', $page_url);
|
|
|
|
|
$content = GET_HTTP($page_url);
|
|
|
|
|
if (!empty($content)) {
|
|
|
|
|
$html_object = str_get_html($content);
|
|
|
|
|
foreach ($html_object->find('.reviewSelector') as $review) {
|
|
|
|
|
//获取到评论ID
|
|
|
|
|
if (!empty($review->id)) {
|
|
|
|
|
$tr_review_id = str_replace('review_', '', $review->id);
|
|
|
|
|
$tr_review_title = $review->find('div.quote a', 0);
|
|
|
|
|
if (empty($tr_review_title)) {
|
|
|
|
|
$tr_review_title = '';
|
|
|
|
|
} else {
|
|
|
|
|
$tr_review_title = $tr_review_title->plaintext;
|
|
|
|
|
}
|
|
|
|
|
$review = $this->Tripadvisor_Review_model->detail($tr_review_id);
|
|
|
|
|
if (empty($review)) {
|
|
|
|
|
$Tripadvisor_Review_Data = new StdClass;
|
|
|
|
|
$Tripadvisor_Review_Data->tr_city = $key_city;
|
|
|
|
|
$Tripadvisor_Review_Data->tr_review_title = $tr_review_title;
|
|
|
|
|
$Tripadvisor_Review_Data->tr_review_id = $tr_review_id;
|
|
|
|
|
$Tripadvisor_Review_Data->tr_datetime = date('Y-m-d H:i:s', time());
|
|
|
|
|
$tr_id = $this->Tripadvisor_Review_model->add('Tripadvisor_Review', $Tripadvisor_Review_Data);
|
|
|
|
|
echo '<br/>' . $tr_id . ' ' . $key_city . ' ' . $tr_review_id;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//采集各个国家的评论
|
|
|
|
|
foreach($nation_mark as $nation_item){
|
|
|
|
|
$page_url = str_replace('www.tripadvisor.com', $nation_item, $item_url);
|
|
|
|
|
if($nation_item=='www.tripadvisor.com'){//分页代码,英文站点查询前三页,反过来查询,越早的越在后面
|
|
|
|
|
$page_mark = array('-or20-', '-or10-', '-');
|
|
|
|
|
//使用代理来请求,国内直接访问会很慢
|
|
|
|
|
$page_url=str_replace('https://www.tripadvisor.com', 'http://47.91.16.199:5052', $page_url);
|
|
|
|
|
}else{
|
|
|
|
|
$page_mark = array('-');
|
|
|
|
|
}
|
|
|
|
|
foreach ($page_mark as $page_num) {
|
|
|
|
|
$page_url = str_replace('{PAGENUM}', $page_num, $page_url);
|
|
|
|
|
$content = GET_HTTP($page_url);
|
|
|
|
|
if (!empty($content)) {
|
|
|
|
|
$html_object = str_get_html($content);
|
|
|
|
|
foreach ($html_object->find('.reviewSelector') as $review) {
|
|
|
|
|
//获取到评论ID
|
|
|
|
|
if (!empty($review->id)) {
|
|
|
|
|
$tr_review_id = str_replace('review_', '', $review->id);
|
|
|
|
|
$tr_review_title = $review->find('div.quote a', 0);
|
|
|
|
|
if (empty($tr_review_title)) {
|
|
|
|
|
$tr_review_title = '';
|
|
|
|
|
} else {
|
|
|
|
|
$tr_review_title = $tr_review_title->plaintext;
|
|
|
|
|
}
|
|
|
|
|
$review = $this->Tripadvisor_Review_model->detail($tr_review_id);
|
|
|
|
|
if (empty($review)) {
|
|
|
|
|
$Tripadvisor_Review_Data = new StdClass;
|
|
|
|
|
$Tripadvisor_Review_Data->tr_city = $key_city;
|
|
|
|
|
$Tripadvisor_Review_Data->tr_review_title = $tr_review_title;
|
|
|
|
|
$Tripadvisor_Review_Data->tr_review_id = $tr_review_id;
|
|
|
|
|
$Tripadvisor_Review_Data->tr_datetime = date('Y-m-d H:i:s', time());
|
|
|
|
|
$tr_id = $this->Tripadvisor_Review_model->add('Tripadvisor_Review', $Tripadvisor_Review_Data);
|
|
|
|
|
echo '<br/>' . $tr_id . ' ' . $key_city . ' ' . $tr_review_id;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|