From 62060b51fa9d3a41962c65742e947359886642fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=B9=E8=AF=9A=E8=AF=9A?= Date: Thu, 13 Jul 2017 17:36:42 +0800 Subject: [PATCH] =?UTF-8?q?TA=E8=AF=84=E8=AE=BA=E9=87=87=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tripadvisor_spider/config/config.php | 26 +++ .../tripadvisor_spider/controllers/index.php | 180 ++++++++++++++++++ .../models/Tripadvisor_Review_model.php | 162 ++++++++++++++++ .../views/find_group_result.php | 28 +++ .../tripadvisor_spider/views/welcome.php | 81 ++++++++ 5 files changed, 477 insertions(+) create mode 100644 application/third_party/tripadvisor_spider/config/config.php create mode 100644 application/third_party/tripadvisor_spider/controllers/index.php create mode 100644 application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php create mode 100644 application/third_party/tripadvisor_spider/views/find_group_result.php create mode 100644 application/third_party/tripadvisor_spider/views/welcome.php diff --git a/application/third_party/tripadvisor_spider/config/config.php b/application/third_party/tripadvisor_spider/config/config.php new file mode 100644 index 00000000..b76a507d --- /dev/null +++ b/application/third_party/tripadvisor_spider/config/config.php @@ -0,0 +1,26 @@ + 'https://www.tripadvisor.com/Attraction_Review-g298556-d4418151-Reviews{PAGENUM}China_Highlights_Guilin-Guilin_Guangxi.html', + 'Beijing' => 'http://www.tripadvisor.com/Attraction_Review-g294212-d6433772-Reviews{PAGENUM}China_Highlights_Beijing_Tours_Day_Tour-Beijing.html', + 'Xian' => 'http://www.tripadvisor.com/Attraction_Review-g298557-d6636310-Reviews{PAGENUM}China_Highlights_Terracotta_Army_Tours_Xi_an_Private_One_day_Tours-Xi_an_Shaanxi.html', + 'Shanghai' => 'http://www.tripadvisor.com/Attraction_Review-g308272-d6725426-Reviews{PAGENUM}China_Highlights_Shanghai_Day_Tour-Shanghai.html', + 'Tibet' => 'http://www.tripadvisor.com/Attraction_Review-g294223-d7020455-Reviews{PAGENUM}China_Highlights_Tibet_Private_One_day_Tours-Lhasa_Tibet.html', + 'Chengdu' => 'http://www.tripadvisor.com/Attraction_Review-g297463-d7998265-Reviews{PAGENUM}China_Highlights_Chengdu_Day_Tour-Chengdu_Sichuan.html', + 'Suzhou' => 'http://www.tripadvisor.com/Attraction_Review-g297442-d8130686-Reviews{PAGENUM}China_Highlights_Suzhou_Day_Tours-Suzhou_Jiangsu.html', + 'Hangzhou' => 'http://www.tripadvisor.com/Attraction_Review-g298559-d8062170-Reviews{PAGENUM}China_Highlights_Hangzhou_Day_Tour-Hangzhou_Zhejiang.html', + 'Huangshan' => 'http://www.tripadvisor.com/Attraction_Review-g303685-d8051315-Reviews{PAGENUM}China_Highlights_Huangshan_Day_Tour-Huangshan_Anhui.html', + 'Lijiang' => 'http://www.tripadvisor.com/Attraction_Review-g303783-d8464335-Reviews{PAGENUM}China_Highlights_Lijiang-Lijiang_Yunnan.html', + 'Zhangjiajie' => 'http://www.tripadvisor.com/Attraction_Review-g494933-d8077695-Reviews{PAGENUM}China_Highlights_Zhangjiajie_Day_Tour-Zhangjiajie_Hunan.html', + 'HongKong' => 'https://www.tripadvisor.com/Attraction_Review-g294217-d10243951-Reviews{PAGENUM}China_Highlights_Hong_Kong-Hong_Kong.html', + 'Panda' => 'https://www.tripadvisor.com/Attraction_Review-g297463-d11489225-Reviews{PAGENUM}China_Highlights-Chengdu_Sichuan.html' +); + + + + + diff --git a/application/third_party/tripadvisor_spider/controllers/index.php b/application/third_party/tripadvisor_spider/controllers/index.php new file mode 100644 index 00000000..08f9aa99 --- /dev/null +++ b/application/third_party/tripadvisor_spider/controllers/index.php @@ -0,0 +1,180 @@ +output->enable_profiler(TRUE); + $this->load->model('Tripadvisor_Review_model'); + } + + public function index($city = 'Beijing') { + $data = array(); + $data['city'] = $city; + $data['ta_review_list'] = $this->Tripadvisor_Review_model->search(10, $data['city']); + $this->load->view('bootstrap3/header', $data); + $this->load->view('welcome'); + $this->load->view('bootstrap3/footer'); + } + + function auto_update($city = 'Beijing') { + ini_set('max_execution_time', '100'); + $ta_website = $this->config->item('tripadvisor_website'); + //分页代码,只查询前三页 + $page_mark = array('-', '-or10-', '-or20-'); + $page_mark = array('-'); + + foreach ($ta_website as $key_city => $item_url) { + if ($key_city == $city) { + foreach ($page_mark as $page_num) { + $page_url = str_replace('{PAGENUM}', $page_num, $item_url); + $content = GET_HTTP($page_url); + if (!empty($content)) { + $html_object = str_get_html($content); + foreach ($html_object->find('.reviewSelector') as $review) { + //获取到评论ID + if (!empty($review->id)) { + $tr_review_id = str_replace('review_', '', $review->id); + $tr_review_title = $review->find('div.quote a', 0); + if (empty($tr_review_title)) { + $tr_review_title = ''; + } else { + $tr_review_title = $tr_review_title->plaintext; + } + $review = $this->Tripadvisor_Review_model->detail($tr_review_id); + if (empty($review)) { + $Tripadvisor_Review_Data = new StdClass; + $Tripadvisor_Review_Data->tr_city = $key_city; + $Tripadvisor_Review_Data->tr_review_title = $tr_review_title; + $Tripadvisor_Review_Data->tr_review_id = $tr_review_id; + $Tripadvisor_Review_Data->tr_datetime = date('Y-m-d H:i:s', time()); + $tr_id = $this->Tripadvisor_Review_model->add('Tripadvisor_Review', $Tripadvisor_Review_Data); + echo '
' . $tr_id . ' ' . $key_city . ' ' . $tr_review_id; + } + } + } + } + } + } + } + } + + //获取内容更新 + function auto_update_content() { + ini_set('max_execution_time', '100'); + $update_list = $this->Tripadvisor_Review_model->update_list(10); + if (empty($update_list)) { + echo 'all done'; + return true; + } + $tr_review_id_string = ''; + foreach ($update_list as $key => $item) { + $tr_review_id_string.=$item->tr_review_id . ','; + } + $tr_review_id_string.='0'; + $url = "https://www.tripadvisor.com/ExpandedUserReviews-g298556-d4418151?target=480111710&context=1&reviews=480111710,$tr_review_id_string&servlet=Attraction_Review&expand=1"; + echo $url . '
'; + $content = GET_HTTP($url); + if (empty($content)) { + echo 'error gethttp:' . $url; + return FALSE; + } + //echo $content; + $html_object = str_get_html($content); + foreach ($html_object->find('div .extended') as $review) { + if (!empty($review->id)) { + //评论ID + $tr_review_id = str_replace('UR', '', $review->id); + //用户ID + $tr_member_id = $review->find('div.member_info div.memberOverlayLink', 0); + if (isset($tr_member_id) && isset($tr_member_id->id)) { + //UID_A50920FC5494D02709AA8F0E12294AAB-SRC_494596572 + $tr_member_id = substr($tr_member_id->id, 4, 32); + } else { + $tr_member_id = 0; + } + //用户名 + $tr_member_name = $review->find('div.username span.expand_inline', 0); + if (empty($tr_member_name)) { + $tr_member_name = 'A TripAdvisor Member'; //看不到客人名字 + } else { + $tr_member_name = $tr_member_name->plaintext; + } + //TA内容 + $tr_content = $review->find('div.entry', 0); + if (empty($tr_content)) { + $tr_content = ''; + } else { + $tr_content = $tr_content->innertext; + } + //游览时间 + $tr_visited_date = $review->find('span.recommend-titleInline', 0); + if (empty($tr_visited_date)) { + $tr_visited_date = ''; + } else { + $tr_visited_date = str_replace('Visited ', '', $tr_visited_date->plaintext); + } + //评论时间 + $tr_review_date = $review->find('span.ratingDate', 0); + if (empty($tr_review_date->title)) { + $tr_review_date = str_replace('Reviewed ', '', $tr_review_date->innertext); + } else { + $tr_review_date = $tr_review_date->title; + } + + $Tripadvisor_Review_Data = new StdClass; + $Tripadvisor_Review_Data->tr_member_id = $tr_member_id; + $Tripadvisor_Review_Data->tr_member_name = $tr_member_name; + $Tripadvisor_Review_Data->tr_content = $tr_content; + $Tripadvisor_Review_Data->tr_visited_date = $tr_visited_date; + $Tripadvisor_Review_Data->tr_review_date = $tr_review_date; + $where = array('tr_review_id' => $tr_review_id); + $this->Tripadvisor_Review_model->update('Tripadvisor_Review', $Tripadvisor_Review_Data, $where); + //print_r($Tripadvisor_Review_Data); + echo $tr_review_id . ' ' . $tr_member_id . ' ' . $tr_member_name . ' ' . $tr_content . ' ' . $tr_visited_date . ' ' . $tr_review_date . '
'; + } + } + } + + //分析评论,找出可能的团号和导游 + public function analysis_ta_review($tr_id) { + $data = array(); + $data['ta_review'] = $this->Tripadvisor_Review_model->detail_tr_id($tr_id); + if (empty($data['ta_review'])) { + echo '找不到评论内容'; + return; + } + $start_date = date('Y-m-d', strtotime($data['ta_review']->tr_visited_date)); + $end_date = date("Y-m-d", strtotime("$start_date +31 day")); + $group_list = $this->Tripadvisor_Review_model->find_group($data['ta_review']->tr_city, $start_date, $end_date); + if (empty($group_list)) { + echo json_encode(array('group_result' => '没有找到匹配团信息', 'tr_content' => $data['ta_review']->tr_content)); + return; + } + $data['match_group_list'] = array(); + //echo $data['ta_review']->tr_content . '
'; + foreach ($group_list as $item) { + //echo $item->GuideName . '
'; + if ((trim($item->GuideName) <> '') && (stripos($data['ta_review']->tr_content, $item->GuideName) !== false)) { + //查询团队中客人信息 + $item->customer_list = $this->Tripadvisor_Review_model->get_customer_info($item->coli_sn); + //填充匹配到的导游到匹配记录中 + $data['match_group_list'][] = $item; + //高亮导游名字 + //防止重复替换 + $data['ta_review']->tr_content = str_ireplace('' . $item->GuideName . '', $item->GuideName, $data['ta_review']->tr_content); + $data['ta_review']->tr_content = str_ireplace($item->GuideName, '' . $item->GuideName . '', $data['ta_review']->tr_content); + //echo $data['ta_review']->tr_content; + //echo $item->GRI_No . ',' . date("Y-m-d", strtotime($item->EOI_GetDate)) . ',' . $item->GuideName . ',' . $customer_string . '
'; + } + } + echo json_encode(array('group_result' => $this->load->view('find_group_result', $data, true), 'tr_content' => $data['ta_review']->tr_content)); + } + +} diff --git a/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php b/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php new file mode 100644 index 00000000..40998e6a --- /dev/null +++ b/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php @@ -0,0 +1,162 @@ +INFO = $this->load->database('INFO', TRUE); + $this->HT = $this->load->database('HT', TRUE); + } + + function init() { + $this->topnum = false; + $this->where = false; + $this->orderby = " ORDER BY tr_datetime DESC "; + } + + function detail_tr_id($tr_id) { + $this->init(); + $this->topnum = 1; + $this->where = ' AND tr.tr_id= ' . $this->INFO->escape($tr_id); + return $this->GetList(); + } + + function detail($tr_review_id) { + $this->init(); + $this->topnum = 1; + $this->where = ' AND tr.tr_review_id= ' . $this->INFO->escape($tr_review_id); + return $this->GetList(); + } + + function search($topnum, $tr_city) { + $this->init(); + $this->topnum = $topnum; + $this->where = ' AND tr.tr_city =' . $this->INFO->escape($tr_city); + $this->orderby = " ORDER BY tr.tr_id DESC "; + return $this->GetList(); + } + + //获取准备要更新的数据集 + function update_list($topnum = 10) { + $this->init(); + $this->topnum = $topnum; + $this->where = ' AND tr.tr_content IS NULL '; + return $this->GetList(); + } + + function GetList() { + $this->topnum ? $sql = "SELECT TOP " . $this->topnum : $sql = "SELECT "; + $sql.=" + tr.tr_id + ,tr.tr_city + ,tr.tr_review_id + ,tr.tr_review_title + ,tr.tr_member_id + ,tr.tr_member_name + ,tr.tr_review_id + ,tr.tr_review_date + ,tr.tr_visited_date + ,tr.tr_content + ,tr.tr_datetime + FROM Tripadvisor_Review tr + WHERE 1 = 1 + + "; + $this->where ? $sql.=$this->where : false; + $this->orderby ? $sql.=$this->orderby : false; + + $query = $this->INFO->query($sql); + //print_r($this->INFO->queries); + if ($this->topnum === 1) { + if ($query->num_rows() > 0) { + $row = $query->row(); + return $row; + } else { + return FALSE; + } + } else { + return $query->result(); + } + } + + public function add($table, $data) { + if ($this->INFO->insert($table, $data)) { + return $this->INFO->last_id($table); + } else { + return false; + } + } + + public function update($table, $data, $where) { + $this->INFO->update($table, $data, $where); + //print_r($this->INFO->queries); + } + + public function find_group($city, $start_date, $end_date) { + $sql = " + SELECT GRI_No + ,coli_id + ,coli_sn + ,b.EOI_ObjSN + ,a.EOI_GetDate + ,dbo.GetGuideName(b.EOI_ObjSN ,1) AS GuideName + FROM Eva_ObjectInfo AS a + INNER JOIN ConfirmLineInfo + ON COLI_GRI_SN = a.EOI_GRI_SN + AND COLI_GRI_SN>0 + INNER JOIN GroupInfo + ON GRI_SN = a.EOI_GRI_SN + LEFT JOIN Eva_ObjectInfo AS b + ON b.EOI_GRI_SN = a.EOI_GRI_SN + AND b.EOI_VEI_SN = a.EOI_ObjSN + AND b.EOI_Type = 3 + WHERE a.EOI_Type = 1 + AND a.EOI_GetDate BETWEEN ? AND ? + AND EXISTS( + SELECT VEI_SN + FROM VEndorInfo + WHERE VEI_SN =a.EOI_ObjSN + AND EXISTS + ( + SELECT TOP 1 1 + FROM CItyInfo2 ci2 + WHERE ci2.CII2_Name = ? + AND ci2.CII2_CII_SN = VEI_CII_Name + ) + ) + AND b.EOI_ObjSN IS NOT NULL + + "; + $query = $this->HT->query($sql, array($start_date, $end_date, $city)); + return $query->result(); + } + + //客人信息 + public function get_customer_info($COLI_SN) { + $sql = "SELECT CUL_IsLinkMan, + MEI_FirstName, + MEI_LastName, + MEI_Gender, + MEI_Country, + COI2_Country, + MEI_PassportNo, + MEI_Phone, + MEI_Birthday, + MEI_ServiceLang, + SYC2_CodeDiscribe + FROM CUstomerList INNER JOIN MEmberInfo ON CUL_CUI_SN=MEI_SN + LEFT JOIN COuntryInfo2 ON COI2_COI_SN=MEI_Country AND COI2_LGC=2 + LEFT JOIN V_System_Code ON MEI_ServiceLang=SYC_SN AND SYC_Type=102 AND LGC_LGC=2 + WHERE CUL_COLI_SN=?"; + $query = $this->HT->query($sql, array($COLI_SN)); + $result = $query->result(); + return $result; + } + +} diff --git a/application/third_party/tripadvisor_spider/views/find_group_result.php b/application/third_party/tripadvisor_spider/views/find_group_result.php new file mode 100644 index 00000000..9dc95862 --- /dev/null +++ b/application/third_party/tripadvisor_spider/views/find_group_result.php @@ -0,0 +1,28 @@ + \ No newline at end of file diff --git a/application/third_party/tripadvisor_spider/views/welcome.php b/application/third_party/tripadvisor_spider/views/welcome.php new file mode 100644 index 00000000..9dd8e848 --- /dev/null +++ b/application/third_party/tripadvisor_spider/views/welcome.php @@ -0,0 +1,81 @@ + +
+
+ + + +
+
+
+ + + + +
+
+
+ + + + + +
+
+ + $item) { ?> + +
+
tr_review_title; ?> 分析
+
+ +

tr_member_name; ?>:

+
tr_content; ?>
+

-Visited tr_visited_date; ?> , Reviewed tr_review_date; ?>

+
+
+ + +
+ + + +
+
+ + + \ No newline at end of file