TA评论采集
parent
beb90cb875
commit
62060b51fa
@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
if (!defined('BASEPATH'))
|
||||
exit('No direct script access allowed');
|
||||
|
||||
|
||||
$config['tripadvisor_website'] = array(
|
||||
'Guilin' => 'https://www.tripadvisor.com/Attraction_Review-g298556-d4418151-Reviews{PAGENUM}China_Highlights_Guilin-Guilin_Guangxi.html',
|
||||
'Beijing' => 'http://www.tripadvisor.com/Attraction_Review-g294212-d6433772-Reviews{PAGENUM}China_Highlights_Beijing_Tours_Day_Tour-Beijing.html',
|
||||
'Xian' => 'http://www.tripadvisor.com/Attraction_Review-g298557-d6636310-Reviews{PAGENUM}China_Highlights_Terracotta_Army_Tours_Xi_an_Private_One_day_Tours-Xi_an_Shaanxi.html',
|
||||
'Shanghai' => 'http://www.tripadvisor.com/Attraction_Review-g308272-d6725426-Reviews{PAGENUM}China_Highlights_Shanghai_Day_Tour-Shanghai.html',
|
||||
'Tibet' => 'http://www.tripadvisor.com/Attraction_Review-g294223-d7020455-Reviews{PAGENUM}China_Highlights_Tibet_Private_One_day_Tours-Lhasa_Tibet.html',
|
||||
'Chengdu' => 'http://www.tripadvisor.com/Attraction_Review-g297463-d7998265-Reviews{PAGENUM}China_Highlights_Chengdu_Day_Tour-Chengdu_Sichuan.html',
|
||||
'Suzhou' => 'http://www.tripadvisor.com/Attraction_Review-g297442-d8130686-Reviews{PAGENUM}China_Highlights_Suzhou_Day_Tours-Suzhou_Jiangsu.html',
|
||||
'Hangzhou' => 'http://www.tripadvisor.com/Attraction_Review-g298559-d8062170-Reviews{PAGENUM}China_Highlights_Hangzhou_Day_Tour-Hangzhou_Zhejiang.html',
|
||||
'Huangshan' => 'http://www.tripadvisor.com/Attraction_Review-g303685-d8051315-Reviews{PAGENUM}China_Highlights_Huangshan_Day_Tour-Huangshan_Anhui.html',
|
||||
'Lijiang' => 'http://www.tripadvisor.com/Attraction_Review-g303783-d8464335-Reviews{PAGENUM}China_Highlights_Lijiang-Lijiang_Yunnan.html',
|
||||
'Zhangjiajie' => 'http://www.tripadvisor.com/Attraction_Review-g494933-d8077695-Reviews{PAGENUM}China_Highlights_Zhangjiajie_Day_Tour-Zhangjiajie_Hunan.html',
|
||||
'HongKong' => 'https://www.tripadvisor.com/Attraction_Review-g294217-d10243951-Reviews{PAGENUM}China_Highlights_Hong_Kong-Hong_Kong.html',
|
||||
'Panda' => 'https://www.tripadvisor.com/Attraction_Review-g297463-d11489225-Reviews{PAGENUM}China_Highlights-Chengdu_Sichuan.html'
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,180 @@
|
||||
<?php
|
||||
|
||||
if (!defined('BASEPATH'))
|
||||
exit('No direct script access allowed');
|
||||
|
||||
//加载第三方用于解析html的类
|
||||
require '/lib/simple_html_dom.php';
|
||||
|
||||
class Index extends CI_Controller {
|
||||
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
//$this->output->enable_profiler(TRUE);
|
||||
$this->load->model('Tripadvisor_Review_model');
|
||||
}
|
||||
|
||||
public function index($city = 'Beijing') {
|
||||
$data = array();
|
||||
$data['city'] = $city;
|
||||
$data['ta_review_list'] = $this->Tripadvisor_Review_model->search(10, $data['city']);
|
||||
$this->load->view('bootstrap3/header', $data);
|
||||
$this->load->view('welcome');
|
||||
$this->load->view('bootstrap3/footer');
|
||||
}
|
||||
|
||||
function auto_update($city = 'Beijing') {
|
||||
ini_set('max_execution_time', '100');
|
||||
$ta_website = $this->config->item('tripadvisor_website');
|
||||
//分页代码,只查询前三页
|
||||
$page_mark = array('-', '-or10-', '-or20-');
|
||||
$page_mark = array('-');
|
||||
|
||||
foreach ($ta_website as $key_city => $item_url) {
|
||||
if ($key_city == $city) {
|
||||
foreach ($page_mark as $page_num) {
|
||||
$page_url = str_replace('{PAGENUM}', $page_num, $item_url);
|
||||
$content = GET_HTTP($page_url);
|
||||
if (!empty($content)) {
|
||||
$html_object = str_get_html($content);
|
||||
foreach ($html_object->find('.reviewSelector') as $review) {
|
||||
//获取到评论ID
|
||||
if (!empty($review->id)) {
|
||||
$tr_review_id = str_replace('review_', '', $review->id);
|
||||
$tr_review_title = $review->find('div.quote a', 0);
|
||||
if (empty($tr_review_title)) {
|
||||
$tr_review_title = '';
|
||||
} else {
|
||||
$tr_review_title = $tr_review_title->plaintext;
|
||||
}
|
||||
$review = $this->Tripadvisor_Review_model->detail($tr_review_id);
|
||||
if (empty($review)) {
|
||||
$Tripadvisor_Review_Data = new StdClass;
|
||||
$Tripadvisor_Review_Data->tr_city = $key_city;
|
||||
$Tripadvisor_Review_Data->tr_review_title = $tr_review_title;
|
||||
$Tripadvisor_Review_Data->tr_review_id = $tr_review_id;
|
||||
$Tripadvisor_Review_Data->tr_datetime = date('Y-m-d H:i:s', time());
|
||||
$tr_id = $this->Tripadvisor_Review_model->add('Tripadvisor_Review', $Tripadvisor_Review_Data);
|
||||
echo '<br/>' . $tr_id . ' ' . $key_city . ' ' . $tr_review_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//获取内容更新
|
||||
function auto_update_content() {
|
||||
ini_set('max_execution_time', '100');
|
||||
$update_list = $this->Tripadvisor_Review_model->update_list(10);
|
||||
if (empty($update_list)) {
|
||||
echo 'all done';
|
||||
return true;
|
||||
}
|
||||
$tr_review_id_string = '';
|
||||
foreach ($update_list as $key => $item) {
|
||||
$tr_review_id_string.=$item->tr_review_id . ',';
|
||||
}
|
||||
$tr_review_id_string.='0';
|
||||
$url = "https://www.tripadvisor.com/ExpandedUserReviews-g298556-d4418151?target=480111710&context=1&reviews=480111710,$tr_review_id_string&servlet=Attraction_Review&expand=1";
|
||||
echo $url . '<br/>';
|
||||
$content = GET_HTTP($url);
|
||||
if (empty($content)) {
|
||||
echo 'error gethttp:' . $url;
|
||||
return FALSE;
|
||||
}
|
||||
//echo $content;
|
||||
$html_object = str_get_html($content);
|
||||
foreach ($html_object->find('div .extended') as $review) {
|
||||
if (!empty($review->id)) {
|
||||
//评论ID
|
||||
$tr_review_id = str_replace('UR', '', $review->id);
|
||||
//用户ID
|
||||
$tr_member_id = $review->find('div.member_info div.memberOverlayLink', 0);
|
||||
if (isset($tr_member_id) && isset($tr_member_id->id)) {
|
||||
//UID_A50920FC5494D02709AA8F0E12294AAB-SRC_494596572
|
||||
$tr_member_id = substr($tr_member_id->id, 4, 32);
|
||||
} else {
|
||||
$tr_member_id = 0;
|
||||
}
|
||||
//用户名
|
||||
$tr_member_name = $review->find('div.username span.expand_inline', 0);
|
||||
if (empty($tr_member_name)) {
|
||||
$tr_member_name = 'A TripAdvisor Member'; //看不到客人名字
|
||||
} else {
|
||||
$tr_member_name = $tr_member_name->plaintext;
|
||||
}
|
||||
//TA内容
|
||||
$tr_content = $review->find('div.entry', 0);
|
||||
if (empty($tr_content)) {
|
||||
$tr_content = '';
|
||||
} else {
|
||||
$tr_content = $tr_content->innertext;
|
||||
}
|
||||
//游览时间
|
||||
$tr_visited_date = $review->find('span.recommend-titleInline', 0);
|
||||
if (empty($tr_visited_date)) {
|
||||
$tr_visited_date = '';
|
||||
} else {
|
||||
$tr_visited_date = str_replace('Visited ', '', $tr_visited_date->plaintext);
|
||||
}
|
||||
//评论时间
|
||||
$tr_review_date = $review->find('span.ratingDate', 0);
|
||||
if (empty($tr_review_date->title)) {
|
||||
$tr_review_date = str_replace('Reviewed ', '', $tr_review_date->innertext);
|
||||
} else {
|
||||
$tr_review_date = $tr_review_date->title;
|
||||
}
|
||||
|
||||
$Tripadvisor_Review_Data = new StdClass;
|
||||
$Tripadvisor_Review_Data->tr_member_id = $tr_member_id;
|
||||
$Tripadvisor_Review_Data->tr_member_name = $tr_member_name;
|
||||
$Tripadvisor_Review_Data->tr_content = $tr_content;
|
||||
$Tripadvisor_Review_Data->tr_visited_date = $tr_visited_date;
|
||||
$Tripadvisor_Review_Data->tr_review_date = $tr_review_date;
|
||||
$where = array('tr_review_id' => $tr_review_id);
|
||||
$this->Tripadvisor_Review_model->update('Tripadvisor_Review', $Tripadvisor_Review_Data, $where);
|
||||
//print_r($Tripadvisor_Review_Data);
|
||||
echo $tr_review_id . ' ' . $tr_member_id . ' ' . $tr_member_name . ' ' . $tr_content . ' ' . $tr_visited_date . ' ' . $tr_review_date . '<br/>';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//分析评论,找出可能的团号和导游
|
||||
public function analysis_ta_review($tr_id) {
|
||||
$data = array();
|
||||
$data['ta_review'] = $this->Tripadvisor_Review_model->detail_tr_id($tr_id);
|
||||
if (empty($data['ta_review'])) {
|
||||
echo '找不到评论内容';
|
||||
return;
|
||||
}
|
||||
$start_date = date('Y-m-d', strtotime($data['ta_review']->tr_visited_date));
|
||||
$end_date = date("Y-m-d", strtotime("$start_date +31 day"));
|
||||
$group_list = $this->Tripadvisor_Review_model->find_group($data['ta_review']->tr_city, $start_date, $end_date);
|
||||
if (empty($group_list)) {
|
||||
echo json_encode(array('group_result' => '<span class="text-primary">没有找到匹配团信息</span>', 'tr_content' => $data['ta_review']->tr_content));
|
||||
return;
|
||||
}
|
||||
$data['match_group_list'] = array();
|
||||
//echo $data['ta_review']->tr_content . '<br/>';
|
||||
foreach ($group_list as $item) {
|
||||
//echo $item->GuideName . '<br/>';
|
||||
if ((trim($item->GuideName) <> '') && (stripos($data['ta_review']->tr_content, $item->GuideName) !== false)) {
|
||||
//查询团队中客人信息
|
||||
$item->customer_list = $this->Tripadvisor_Review_model->get_customer_info($item->coli_sn);
|
||||
//填充匹配到的导游到匹配记录中
|
||||
$data['match_group_list'][] = $item;
|
||||
//高亮导游名字
|
||||
//防止重复替换
|
||||
$data['ta_review']->tr_content = str_ireplace('<span class="bg-danger text-danger">' . $item->GuideName . '</span>', $item->GuideName, $data['ta_review']->tr_content);
|
||||
$data['ta_review']->tr_content = str_ireplace($item->GuideName, '<span class="bg-danger text-danger">' . $item->GuideName . '</span>', $data['ta_review']->tr_content);
|
||||
//echo $data['ta_review']->tr_content;
|
||||
//echo $item->GRI_No . ',' . date("Y-m-d", strtotime($item->EOI_GetDate)) . ',' . $item->GuideName . ',' . $customer_string . '<br/>';
|
||||
}
|
||||
}
|
||||
echo json_encode(array('group_result' => $this->load->view('find_group_result', $data, true), 'tr_content' => $data['ta_review']->tr_content));
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
<ul class="list-group">
|
||||
<?php
|
||||
if (empty($match_group_list)) {
|
||||
echo '<span class="text-primary">没有找到匹配团信息</span> ';
|
||||
} else {
|
||||
foreach ($match_group_list as $item) {
|
||||
?>
|
||||
<li class="list-group-item">
|
||||
<span class="text-primary"><?php echo $item->GRI_No; ?></span>
|
||||
, <?php echo date("Y-m-d", strtotime($item->EOI_GetDate)); ?>
|
||||
, <span class="bg-danger text-danger"><?php echo $item->GuideName ?></span>
|
||||
<br/>
|
||||
<?php
|
||||
$customer_string = '';
|
||||
if (!empty($item->customer_list)) {
|
||||
foreach ($item->customer_list as $item_cus) {
|
||||
$customer_string.=$item_cus->MEI_FirstName . ' ' . $item_cus->MEI_LastName . ' , ';
|
||||
}
|
||||
}
|
||||
$customer_string = rtrim($customer_string, ' , ');
|
||||
echo $customer_string;
|
||||
?>
|
||||
</li>
|
||||
<?php
|
||||
}
|
||||
}
|
||||
?>
|
||||
</ul>
|
@ -0,0 +1,81 @@
|
||||
<script type="text/javascript">
|
||||
|
||||
</script>
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
|
||||
|
||||
<div class="col-md-5">
|
||||
<form method="post" action="<?php echo site_url('apps/paypal/index/note_list'); ?>">
|
||||
<div class="input-group">
|
||||
<input type="text" name="search_key" value="<?php echo isset($search_key) ? $search_key : ''; ?>" class="form-control" placeholder="订单号、客人邮箱" style="height: 33px;-webkit-box-shadow: inset 0 0px 0px rgba(0,0,0,0.075);box-shadow: inset 0 0px 0px rgba(0,0,0,0.075);border:none;border-bottom:1px solid #ddd;">
|
||||
<span class="input-group-btn">
|
||||
<button class="btn btn-default" type="submit" style="border:none;border-bottom:1px solid #ddd;"><span class="glyphicon glyphicon-search"></span></button>
|
||||
</span>
|
||||
</div>
|
||||
<div id="datepicker"></div>
|
||||
</form>
|
||||
|
||||
|
||||
<ul class="nav nav-pills nav-stacked">
|
||||
<?php
|
||||
$ta_website = $this->config->item('tripadvisor_website');
|
||||
foreach ($ta_website as $key => $item) {
|
||||
strcasecmp($city, $key) == 0 ? $active = ' class="active" ' : $active = '';
|
||||
?>
|
||||
<li <?php echo $active; ?>><a href="<?php echo site_url('apps/tripadvisor_spider/index/index/' . $key); ?>"><?php echo $key; ?></a></li>
|
||||
<?php } ?>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
<div class="col-md-19">
|
||||
<div class="well well-sm">
|
||||
<a href="<?php echo site_url('apps/tripadvisor_spider/index/auto_update/' . $city); ?>" target="_blank" class="btn btn-info">抓取新帖</a>
|
||||
<a href="<?php echo site_url('apps/tripadvisor_spider/index/auto_update_content/'); ?>" target="_blank" class="btn btn-info">抓取内容</a>
|
||||
</div>
|
||||
<?php foreach ($ta_review_list as $key => $item) { ?>
|
||||
|
||||
<div class="panel panel-default">
|
||||
<div class="panel-heading"><?php echo $item->tr_review_title; ?> <a class="pull-right" onclick="find_group_modal(<?php echo $item->tr_id; ?>);" >分析</a></div>
|
||||
<div class="panel-body" >
|
||||
|
||||
<p><?php echo $item->tr_member_name; ?>:</p>
|
||||
<div id="tr_content_<?php echo $item->tr_id; ?>"><?php echo $item->tr_content; ?></div>
|
||||
<p class="pull-right">-Visited <?php echo $item->tr_visited_date; ?> , Reviewed <?php echo $item->tr_review_date; ?></p>
|
||||
</div>
|
||||
<div id="find_group_result_<?php echo $item->tr_id; ?>"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<?php } ?>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
|
||||
function find_group_modal(tr_id) {
|
||||
if ($('#find_group_result_' + tr_id).html() !== '') {
|
||||
$('#find_group_result_' + tr_id).html('');
|
||||
} else {
|
||||
|
||||
$.ajax({
|
||||
type: "get",
|
||||
dataType: "json",
|
||||
url: '<?php echo site_url('apps/tripadvisor_spider/index/analysis_ta_review'); ?>' + '/' + tr_id,
|
||||
success: function(data, textStatus) {
|
||||
$('#find_group_result_' + tr_id).html(data.group_result);
|
||||
$('#tr_content_' + tr_id).html(data.tr_content);
|
||||
},
|
||||
error: function(msg) {
|
||||
alert('\u53d1\u751f\u9519\u8bef\uff0c\u8bf7\u8054\u7cfbYCC...');
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
</script>
|
Loading…
Reference in New Issue