diff --git a/application/third_party/tripadvisor_spider/controllers/index.php b/application/third_party/tripadvisor_spider/controllers/index.php
index eaffa83e..4697c747 100644
--- a/application/third_party/tripadvisor_spider/controllers/index.php
+++ b/application/third_party/tripadvisor_spider/controllers/index.php
@@ -25,44 +25,52 @@ class Index extends CI_Controller {
}
function auto_update($city = 'Beijing') {
- ini_set('max_execution_time', '100');
+ ini_set('max_execution_time', '180');
$ta_website = $this->config->item('tripadvisor_website');
- //分页代码,只查询前三页,反过来查询,越早的越在后面
- $page_mark = array('-or20-', '-or10-', '-');
+ $nation_mark=array('www.tripadvisor.com','www.tripadvisor.it','www.tripadvisor.jp','www.tripadvisor.es','www.tripadvisor.fr','www.tripadvisor.de');
foreach ($ta_website as $key_city => $item_url) {
if ($key_city == $city) {
- foreach ($page_mark as $page_num) {
- $page_url = str_replace('{PAGENUM}', $page_num, $item_url);
- //使用代理来请求,国内直接访问会很慢
- $page_url=str_replace('https://www.tripadvisor.com', 'http://47.91.16.199:5052', $page_url);
- $content = GET_HTTP($page_url);
- if (!empty($content)) {
- $html_object = str_get_html($content);
- foreach ($html_object->find('.reviewSelector') as $review) {
- //获取到评论ID
- if (!empty($review->id)) {
- $tr_review_id = str_replace('review_', '', $review->id);
- $tr_review_title = $review->find('div.quote a', 0);
- if (empty($tr_review_title)) {
- $tr_review_title = '';
- } else {
- $tr_review_title = $tr_review_title->plaintext;
- }
- $review = $this->Tripadvisor_Review_model->detail($tr_review_id);
- if (empty($review)) {
- $Tripadvisor_Review_Data = new StdClass;
- $Tripadvisor_Review_Data->tr_city = $key_city;
- $Tripadvisor_Review_Data->tr_review_title = $tr_review_title;
- $Tripadvisor_Review_Data->tr_review_id = $tr_review_id;
- $Tripadvisor_Review_Data->tr_datetime = date('Y-m-d H:i:s', time());
- $tr_id = $this->Tripadvisor_Review_model->add('Tripadvisor_Review', $Tripadvisor_Review_Data);
- echo '
' . $tr_id . ' ' . $key_city . ' ' . $tr_review_id;
- }
- }
- }
- }
- }
+ //采集各个国家的评论
+ foreach($nation_mark as $nation_item){
+ $page_url = str_replace('www.tripadvisor.com', $nation_item, $item_url);
+ if($nation_item=='www.tripadvisor.com'){//分页代码,英文站点查询前三页,反过来查询,越早的越在后面
+ $page_mark = array('-or20-', '-or10-', '-');
+ //使用代理来请求,国内直接访问会很慢
+ $page_url=str_replace('https://www.tripadvisor.com', 'http://47.91.16.199:5052', $page_url);
+ }else{
+ $page_mark = array('-');
+ }
+ foreach ($page_mark as $page_num) {
+ $page_url = str_replace('{PAGENUM}', $page_num, $page_url);
+ $content = GET_HTTP($page_url);
+ if (!empty($content)) {
+ $html_object = str_get_html($content);
+ foreach ($html_object->find('.reviewSelector') as $review) {
+ //获取到评论ID
+ if (!empty($review->id)) {
+ $tr_review_id = str_replace('review_', '', $review->id);
+ $tr_review_title = $review->find('div.quote a', 0);
+ if (empty($tr_review_title)) {
+ $tr_review_title = '';
+ } else {
+ $tr_review_title = $tr_review_title->plaintext;
+ }
+ $review = $this->Tripadvisor_Review_model->detail($tr_review_id);
+ if (empty($review)) {
+ $Tripadvisor_Review_Data = new StdClass;
+ $Tripadvisor_Review_Data->tr_city = $key_city;
+ $Tripadvisor_Review_Data->tr_review_title = $tr_review_title;
+ $Tripadvisor_Review_Data->tr_review_id = $tr_review_id;
+ $Tripadvisor_Review_Data->tr_datetime = date('Y-m-d H:i:s', time());
+ $tr_id = $this->Tripadvisor_Review_model->add('Tripadvisor_Review', $Tripadvisor_Review_Data);
+ echo '
' . $tr_id . ' ' . $key_city . ' ' . $tr_review_id;
+ }
+ }
+ }
+ }
+ }
+ }
}
}
}
diff --git a/lib/simple_html_dom.php b/lib/simple_html_dom.php
index ce412794..7a95367e 100644
--- a/lib/simple_html_dom.php
+++ b/lib/simple_html_dom.php
@@ -62,7 +62,7 @@ define('HDOM_INFO_ENDSPACE',7);
define('DEFAULT_TARGET_CHARSET', 'UTF-8');
define('DEFAULT_BR_TEXT', "\r\n");
define('DEFAULT_SPAN_TEXT', " ");
-define('MAX_FILE_SIZE', 600000);
+define('MAX_FILE_SIZE', 6000000);
// helper functions
// -----------------------------------------------------------------------------
// get html dom from file