Merge branch 'master' of gitee.com:hainatravel/information-system

hotfix/远程访问多媒体中心
lyt 6 years ago
commit 3e34078d25

@ -18,10 +18,10 @@ $config['tripadvisor_website'] = array(
'Zhangjiajie' => 'http://www.tripadvisor.com/Attraction_Review-g494933-d8077695-Reviews{PAGENUM}China_Highlights_Zhangjiajie_Day_Tour-Zhangjiajie_Hunan.html',
'HongKong' => 'https://www.tripadvisor.com/Attraction_Review-g294217-d10243951-Reviews{PAGENUM}China_Highlights_Hong_Kong-Hong_Kong.html',
'Panda' => 'https://www.tripadvisor.com/Attraction_Review-g297463-d11489225-Reviews{PAGENUM}China_Highlights-Chengdu_Sichuan.html',
'tp_Beijing' => 'https://www.tripadvisor.com/Attraction_Review-g294212-d4006739-Reviews-The_Trippest_Mini_Group_Tours-Beijing.html',
'tp_Xian' => 'https://www.tripadvisor.com/Attraction_Review-g298557-d10999897-Reviews-Xi_an_Trippest_Mini_Group_Tours-Xi_an_Shaanxi.html',
'tp_Shanghai' => 'https://www.tripadvisor.com/Attraction_Review-g308272-d6222868-Reviews-Shanghai_Trippest_Mini_Group_Tours-Shanghai.html',
'tp_Guilin' => 'https://www.tripadvisor.com/Attraction_Review-g298556-d14121459-Reviews-Trippest_Mini_Group_Tours-Guilin_Guangxi.html'
'tp_Beijing' => 'https://www.tripadvisor.com/Attraction_Review-g294212-d4006739-Reviews{PAGENUM}-The_Trippest_Mini_Group_Tours-Beijing.html',
'tp_Xian' => 'https://www.tripadvisor.com/Attraction_Review-g298557-d10999897-Reviews{PAGENUM}-Xi_an_Trippest_Mini_Group_Tours-Xi_an_Shaanxi.html',
'tp_Shanghai' => 'https://www.tripadvisor.com/Attraction_Review-g308272-d6222868-Reviews{PAGENUM}-Shanghai_Trippest_Mini_Group_Tours-Shanghai.html',
'tp_Guilin' => 'https://www.tripadvisor.com/Attraction_Review-g298556-d14121459-Reviews{PAGENUM}-Trippest_Mini_Group_Tours-Guilin_Guangxi.html'
);

@ -267,24 +267,40 @@ class Index extends CI_Controller {
}
}
public function get_destination_reviews($destination = null){
public function get_destination_reviews($destination = null,$pagenum = null){
set_time_limit(0);
$ta_website = $this->config->item('tripadvisor_website');
//根据传入的目的地简码获取TA的相应评论列表
if(isset($ta_website[$destination])){
$url = $ta_website[$destination];
if($pagenum != ''){
$url = str_replace('{PAGENUM}','-or'.$pagenum,$url);
}else{
$url = str_replace('{PAGENUM}','',$url);
}
//根据url获取页面内容
$content = GET_HTTP($url);
//进行页面解析
$html_object = str_get_html($content);
//获取第一页列表上的url
foreach ($html_object->find('.reviewSelector .quote a') as $a_info){
$url = 'https://www.tripadvisor.com'.$a_info->href;
$return = new stdClass();
$return->urls = array();
//获取每个页面上的url
foreach ($html_object->find('.reviewSelector .quote a') as $reviews_url){
array_push($return->urls,'https://www.tripadvisor.com'.$reviews_url->href);
}
print_r(json_encode($return));
}
}
//查看抓取到的所有信息
public function get_all_reviews($destination){
if($destination != ''){
$all_reviews = $this->Tripadvisor_Review_model->get_all_reviews($destination);
$return = array();
$return['list'] = $all_reviews;
print_r(json_encode($return));
}
}
@ -294,8 +310,8 @@ class Index extends CI_Controller {
$destination = $this->input->get_post('destination');
$html_num = $this->input->get_post('html_num');
//$url = 'https://www.tripadvisor.com/ShowUserReviews-g294212-d4006739-r666168101-The_Trippest_Mini_Group_Tours-Beijing.html';
$destination = 'tp_Beijing';
//$url = 'https://www.tripadvisor.com/ShowUserReviews-g308272-d6222868-r599123490-Shanghai_Trippest_Mini_Group_Tours-Shanghai.html';
//$destination = 'tp_Beijing';
if($url != ''){
$content = GET_HTTP($url);
@ -307,15 +323,28 @@ class Index extends CI_Controller {
//提取局部,不做整个页面的寻找元素,提升效率
$meta_inner = $html_object->find('.meta_inner');
$detail_data->user_loc = '';
$detail_data->pic = array();
foreach($meta_inner as $detail_info){
//记录该条记录的id
$detail_data->html_id = $html_num;
//获取评论者帐号
foreach($detail_info->find('.info_text') as $review_name){
$detail_data->review_name = $review_name->first_child()->innertext;
}
//获取评论者帐号
foreach($detail_info->find('.info_text .userLoc strong') as $user_loc){
$detail_data->user_loc = $user_loc->innertext;
}
//抓取评论时间
foreach($detail_info->find('.ratingDate') as $ratingDate){
$detail_data->rating_date = date('Y-m-d',strtotime($ratingDate->title));
}
//获取评论者ID
foreach($detail_info->find('.reviewSelector') as $review_id){
$detail_data->review_id = str_replace('review_','',$review_id->id);
@ -337,13 +366,21 @@ class Index extends CI_Controller {
$detail_data->content = $content->innertext;
}
//获取评论时间
foreach($detail_info->find('.prw_reviews_stay_date_hsx') as $review_date){
$detail_data->review_date = str_replace('<span class="stay_date_label">Date of experience:</span> ','',$review_date->innertext);
//获取体验时间
foreach($detail_info->find('.prw_reviews_stay_date_hsx') as $experience_date){
$detail_data->experience_date = date('Y-m-d',strtotime(str_replace('<span class="stay_date_label">Date of experience:</span> ','',$experience_date->innertext)));
}
//抓取图片
foreach($detail_info->find('.imgWrap .noscript') as $imgWrap){
$imgWrap->src = str_replace('photo-l','photo-s',$imgWrap->src);
array_push($detail_data->pic,$imgWrap->src);
}
}
//拿到数据后进行入库
$this->Tripadvisor_Review_model->add_reviews($detail_data);
print_r(json_encode($detail_data));
}
}

@ -161,5 +161,39 @@ class Tripadvisor_Review_model extends CI_Model {
$result = $query->result();
return $result;
}
public function add_reviews($detail_data){
$sql = "
IF NOT EXISTS(
select tr_review_id from Ta_Reviews where tr_review_id = ?
)
insert into Ta_Reviews
(
tr_destination,
tr_review_id,
tr_review_title,
tr_content,
tr_member_name,
tr_member_loc,
tr_member_starts,
tr_review_date,
tr_visited_date,
tr_review_pics,
tr_gri_no,
tr_tgi_sn,
tr_datetime
)values(
?,?,?,?,?,?,?,?,?,?,?,?,GETDATE()
)
";
$query = $this->INFO->query($sql, array($detail_data->review_id,$detail_data->destination,$detail_data->review_id,$detail_data->title,$detail_data->content,$detail_data->review_name,$detail_data->user_loc,$detail_data->star_nums,$detail_data->rating_date,$detail_data->experience_date,json_encode($detail_data->pic),'',''));
//$result = $query->result();
}
public function get_all_reviews($destination){
$sql = 'select * from Ta_Reviews where tr_destination = ? order by tr_review_date desc';
$query = $this->INFO->query($sql,array($destination));
return $query->result();
}
}

@ -10,9 +10,15 @@
<li role="presentation" class="active">
<a href="#handinput" aria-controls="handinput" role="tab" data-toggle="tab">手动录入</a>
</li>
<li role="presentation">
<a href="#listinput" aria-controls="listinput" role="tab" data-toggle="tab">列表抓取</a>
</li>
<li role="presentation">
<a href="#excelinput" aria-controls="excelinput" role="tab" data-toggle="tab">excel导入</a>
</li>
<li role="presentation">
<a href="#list_view" aria-controls="list_view" role="tab" data-toggle="tab">数据预览</a>
</li>
</ul>
<div class="tab-content">
<div role="tabpanel" class="tab-pane active" id="handinput">
@ -30,17 +36,21 @@
<div class="review_content">
<div class="col-md-4">
<p class="review_name"></p>
<p class="user_loc"></p>
<p class="review_stars"></p>
</div>
<div class="col-md-20">
<p class="ta_title"></p>
<p class="rating_date"></p>
<p class="ta_content"></p>
<p class="review_date"></p>
<p class="review_pic"></p>
<p class="experience_date"></p>
</div>
</div>
</div>
</div>
</div>
<div role="tabpanel" class="tab-pane" id="excelinput">
<div class="row" style="margin-top:10px;">
<div class="col-md-18">
@ -57,6 +67,50 @@
</div>
</div>
</div>
<div role="tabpanel" class="tab-pane" id="listinput">
<div class="row" style="margin-top:10px;">
<div class="col-md-9">
<select name="destination" class="form-control" id="destination">
<option value="">选择站点</option>
<option value="tp_Beijing">tp_Beijing</option>
<option value="tp_Shanghai">tp_Shanghai</option>
<option value="tp_Guilin">tp_Guilin</option>
</select>
</div>
<div class="col-md-9">
<select name="nums" class="form-control" id="pagenums">
<option value="">选择抓取条数</option>
<option value="">1~10</option>
<option value="10">11~20</option>
<option value="20">21~30</option>
</select>
</div>
<div class="col-md-6">
<a href="#" id="batchcontents" class="btn btn-info">批量抓取</a>
</div>
</div>
</div>
<div role="tabpanel" class="tab-pane" id="list_view">
<div class="row" style="margin-top:10px;">
<div class="col-md-9">
<select name="destination" class="form-control" id="view_destination">
<option value="">选择站点</option>
<option value="tp_Beijing">tp_Beijing</option>
<option value="tp_Shanghai">tp_Shanghai</option>
<option value="tp_Guilin">tp_Guilin</option>
</select>
</div>
<div class="col-md-9">
<a href="#" id="view_list" class="btn btn-info">查看数据</a>
</div>
</div>
<div id="list_view_content" style="margin-top:20px;">
</div>
</div>
</div>
</div>
</div>
@ -70,6 +124,7 @@ $(function(){
//获取填写的url
var ta_url = $('#ta_url').val();
var stars = '';
var pic_htm = '';
if(ta_url == ''){
alert('请填写需要采集的TA地址');
}else{
@ -80,8 +135,14 @@ $(function(){
var data = $.parseJSON(json);
console.log(data);
$('.ta_content').html(data.content);
$('.review_date').html('Date of experience: '+data.review_date);
$('.experience_date').html('Date of experience: '+data.experience_date);
$('.review_name').html(data.review_name);
$('.user_loc').html(data.user_loc);
$('.rating_date').html('Reviewed:'+data.rating_date);
for(var i=0;i<data.pic.length;i++){
pic_htm += '<p><a href="'+data.pic[i]+'" target="_blank"><img src="'+data.pic[i]+'"/></a></p>';
}
$('.review_pic').html(pic_htm);
$('.ta_title').html('<strong>'+data.title+'</strong>');
if(data.star_nums){
for(var i=0;i<data.star_nums;i++){
@ -94,6 +155,30 @@ $(function(){
}
});
//批量抓取
$('#batchcontents').click(function(){
var destination = $('#destination').val();
var pagenums = $('#pagenums').val();
if(destination != ''){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_destination_reviews/'+destination+'/'+pagenums,
success:function(json,status){
var data = $.parseJSON(json);
var html = '';
for(var x=0;x<data.urls.length;x++){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_reviews_detail',
data:{url:data.urls[x],html_num:'list_'+x,destination:destination},
success:function(content,status){
}
});
}
}
});
}
});
//上传文件
$('#contentbyexcel').click(function(){
var fileArray = document.getElementById("file_excel").files;
@ -153,5 +238,38 @@ $(function(){
}
});
});
//预览数据
$('#view_list').click(function(){
var view_destination = $('#view_destination').val();
if(view_destination != ''){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_all_reviews/'+view_destination,
success:function(json,status){
var jsondata = $.parseJSON(json);
var html = '';
for(var y=0;y<jsondata.list.length;y++){
html += '<div class="row"><div class="col-md-4">';
html += '<p class="review_name">'+jsondata.list[y].tr_member_name+'</p><p class="user_loc">'+jsondata.list[y].tr_member_loc+'</p><p class="review_stars">';
for(var i=0;i<jsondata.list[y].tr_member_starts;i++){
html += '<span class="glyphicon glyphicon-star"></span>';
}
html += '</p></div><div class="col-md-20"><p class="ta_title">'+jsondata.list[y].tr_review_title+'</p><p class="rating_date">Reviewed: '+jsondata.list[y].tr_review_date+'</p><p class="ta_content">'+jsondata.list[y].tr_content+'</p>';
html += '<p class="review_pic">';
if($.parseJSON(jsondata.list[y].tr_review_pics).length > 0){
for(var j=0;j<$.parseJSON(jsondata.list[y].tr_review_pics).length;j++){
html += '<p><a href="'+$.parseJSON(jsondata.list[y].tr_review_pics)[j]+'" target="_blank"><img src="'+$.parseJSON(jsondata.list[y].tr_review_pics)[j]+'"/></a></p>';
}
}
html += '</p><p class="experience_date">Date of experience: '+jsondata.list[y].tr_visited_date+'</p></div></div><hr>';
}
html += '<p class="pull-right">total nums : '+jsondata.list.length+'</p>'
$('#list_view_content').html(html);
}
});
}
});
});
</script>
Loading…
Cancel
Save