You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
information-system/application/third_party/htmlcompressor/controllers/index_gm.php

394 lines
16 KiB
PHTML

<?php
if (!defined('BASEPATH')) exit('No direct script access allowed');
class Index_gm extends CI_Controller
{
public function __construct(){
parent::__construct();
$this->load->library('simple_html_dom_lib');
}
public function index(){
$this->load->view('welcome');
}
//选择处理方式
public function choose_way(){
$this->htmlsource = $this->input->post('htmlsource');
$this->websitehost = $this->input->post('websitehost');
$this->template_name = $this->input->post('template_name');
$this->create_amp = $this->input->post('create_amp');
$this->debug = $this->input->post('debug');
/*$this->htmlsource = file_get_contents('aa.htm');
$this->websitehost = 'https://data.chinarundreisen.com';
$this->create_amp = 'true';
$this->template_name = 'gm';*/
if (empty($this->htmlsource) || empty($this->websitehost)) {
$this->output->set_status_header(500);
echo 'error:htmlsource or websitehost is empty!';
log_message('error', "htmlsource or websitehost is empty! " . $this->websitehost);
return false;
}
//域名后面不能有/
if (substr($this->websitehost, -1, 1) == '/') {
$this->websitehost = substr($this->websitehost, 0, -1);
}
//进行页面解析
$this->html_object = str_get_html($this->htmlsource);
//PC或AMP的构造方式
if (!empty($this->create_amp)) {
echo $this->create_amp();
return;
}else{
echo $this->optimize();
return;
}
}
//html优化处理
function optimize(){
//提取css样式地址
$link_css_array = array();
$css_content = '';
foreach ($this->html_object->find('link') as $link_css) {
//将所有的样式链接存储为一个数组
if ($link_css->rel == 'stylesheet' && !empty($link_css->href)) {
$link_css_array[] = $link_css->href;
$link_css->outertext = ''; //删除链接
}
}
//循环下载所有的css样式拼接成字符串
foreach ($link_css_array as $item) {
$get_http_temp = GET_HTTP($this->format_url($item, $this->websitehost));
if ($get_http_temp == false) {
$this->output->set_status_header(404);
echo 'CSS文件下载错误';
log_message('error', "optimize CSS文件下载错误! " . $item);
return FALSE;
}
$css_content .= $get_http_temp;
}
//找出所有style写的内置样式
foreach ($this->html_object->find('style') as $style_css) {
if ($style_css->type == "text/css") {
$css_content .= $style_css->innertext;
}
}
//提取和下载所有JS脚本包括链接文件和页面脚本
$link_js_array = array();
$js_inline_content = '';
$js_jquery_content = '';
foreach ($this->html_object->find('script') as $link_script) {
if (!empty($link_script->src)) {
if (empty($link_script->defer) && empty($link_script->async)) { //没有defer标记的才处理否则让js延后加载
$link_js_array[] = $link_script->src;
$link_script->outertext = ''; //删除链接,移动到页底
}
} else {
//网页内的js不需要提取
//$js_content.= $link_script->innertext;//js的内容
// $js_content.= $link_script;//js的内容包含<script>
//含有$表示调用了jquery的函数添加延迟加载defer
//包含gtm的js引入就跳出本次循环
if(strpos($link_script, 'https://www.googletagmanager.com/gtm.js')){
continue;
}
if (strpos($link_script, '$') !== FALSE) {
$js_jquery_content .= $link_script->innertext;
} else {
$js_inline_content .= $link_script; //js的内容包含<script>
}
$link_script->outertext = ''; //删除js移动到页底
}
}
$js_scr_content = '';
foreach ($link_js_array as $item) {
$get_http_temp = GET_HTTP($this->format_url($item, $this->websitehost));
if ($get_http_temp == false) {
$this->output->set_status_header(404);
echo 'JS文件下载错误';
log_message('error', "optimize js文件下载错误! " . $item);
return FALSE;
}
$js_scr_content .= $get_http_temp;
}
//把网页内容和css提交到purifycss处理
$purifycss_server = 'http://184.172.113.216:33033/';
if (strpos($this->websitehost, 'chinarundreisen.com') !== false) {//德语站点使用自己的css处理服务器
$purifycss_server = 'http://158.177.67.52:33033/';
}
$optimize_css = GET_HTTP($purifycss_server, 'html_source=' . urlencode($this->htmlsource) . '&html_css=' . urlencode($css_content), 'POST');
if (empty($optimize_css)) {
$this->output->set_status_header(500);
echo 'css精简错误';
return FALSE;
}
//把精简的css添加到head前面
$this->html_object = str_replace('</head>', '<style type="text/css">' . $optimize_css . "</style></head>", $this->html_object);
//删除多余空格和换行符
$this->html_object = str_replace(array(" ", "\t", "\n", "\r"), " ", $this->html_object);
//循环n次把双空格替换为一个空格
for ($i = 0; $i <= 4; $i++) {
$this->html_object = str_replace(" ", " ", $this->html_object);
}
//在最后加载原始css文件和js文件
//把css移动到页面底部延迟加载
$lastload_js = '<noscript id="deferred-styles">';
foreach ($link_css_array as $item) {
$lastload_js .= '<link rel="stylesheet" type="text/css" href="' . $item . '"/>';
}
$lastload_js .= '</noscript><script>var loadDeferredStyles=function(){var addStylesNode=document.getElementById("deferred-styles");var replacement=document.createElement("div");replacement.innerHTML=addStylesNode.textContent;document.body.appendChild(replacement);addStylesNode.parentElement.removeChild(addStylesNode)};var raf=requestAnimationFrame||mozRequestAnimationFrame||webkitRequestAnimationFrame||msRequestAnimationFrame;if(raf){raf(function(){window.setTimeout(loadDeferredStyles,0)})}else{window.addEventListener("load",loadDeferredStyles)};</script>';
$js_content = $js_scr_content . $js_jquery_content;
//延迟加载js需要把返回的js代码保存到一个文件中然后替换占位符以便加载js文件
$lastload_js .= '<script async src="##DEFERSCRIPTDEFER##"></script>';
$lastload_js .= $js_inline_content;
$this->html_object = str_replace('</body>', $lastload_js . '</body>', $this->html_object);
echo json_encode(array('result' => 'ok', 'data' => array('html_object' => $this->html_object, 'js_content' => $js_content)));
}
//生成AMP版本 用移动端版本生成
/*
1.加载站点AMP模板模板只有导航、头部、底部
2.删除全部js引用和内链代码
3.删除全部css引用内联css放入<style amp-custom>
4.图片全部替换成amp-img设置宽高设置layout="responsive"
5.去掉所有元素中的style
6.去掉样式表中的!important
7.替换读取源文件title description keywords canonical
8.todo:增加application/ld+json
要求: 图片文件必须带有域名,否则的获取尺寸会出错,如 https://data.asiahighlights.com/pic/abc.jpg
*/
function create_amp(){
//读取站点的AMP模板
$amp_template = $this->load->view('amp-template/' . $this->template_name, null, true);
//生成移动版本,删除多余元素
foreach ($this->html_object->find('.hidden-xs') as $hidden_item) {
$hidden_item->outertext = '';
}
foreach ($this->html_object->find('title') as $title) {
$amp_template = str_replace('<!--@TITLE@-->', $title->innertext, $amp_template);
}
foreach ($this->html_object->find('meta') as $meta) {
if ($meta->name == 'description') {
$amp_template = str_replace('<!--@DESCRIPTION@-->', $meta->content, $amp_template);
}
if ($meta->name == 'keywords') {
$amp_template = str_replace('<!--@KEYWORDS@-->', $meta->content, $amp_template);
}
}
foreach ($this->html_object->find('script') as $script) {
$script->outertext = '';
}
foreach ($this->html_object->find('noscript') as $noscript) {
$noscript->outertext = '';
}
foreach ($this->html_object->find('link') as $link) {
if ($link->rel == 'stylesheet' && !empty($link->href)) {
$link->outertext = '';
}
if ($link->rel == 'canonical' && !empty($link->href)) {
$amp_template = str_replace('<!--@CANONICAL@-->', $link->href, $amp_template);
}
}
//获取图片尺寸AMP要求必须填写width和height
//格式化图片URL
foreach ($this->html_object->find('img') as $image) {
if (!empty($image->src)) {
$img_src_host = parse_url($image->src, PHP_URL_HOST);
if (empty($img_src_host)) {
$img_src_host = $this->websitehost;
}
$image->src = $this->format_url($image->src, $img_src_host);
}
if (!empty($image->originalsrc)) {
$img_src_host = parse_url($image->src, PHP_URL_HOST);
if (empty($img_src_host)) {
$img_src_host = $websitehost;
}
$image->originalsrc = $this->format_url($image->originalsrc, $img_src_host);
}
}
//拼接请求的图片url如 https://data.asiahighlights.com/imagesize.php?photo=/pic/logo-ah.png,/pic/ah-slide-logo.png
$request_size = array();
foreach ($this->html_object->find('img') as $image) {
$img_src = $image->src;
if (!empty($image->originalsrc)) {
$img_src = $image->originalsrc;
}
if (strpos($img_src, 'facebook.com') !== false || strpos($img_src, 'dmca.com') !== false) {//facebook dmca会添加一个跟踪代码需要删除掉否则获取图片尺寸会报错
$image->outertext = '';
continue;
}
$img_src_urls = parse_url($img_src);
$request_size[$img_src_urls['host']][] = $img_src_urls['path'];
}
//请求图片尺寸
$image_sizes = array();
foreach ($request_size as $host => $path) {
$parse_url = "https://{$host}/imagesize.php?photo=" . urlencode(implode(',', $path));
$size_data = GET_HTTP($parse_url);
if (!empty($size_data)) {
$size_data = json_decode($size_data);
foreach ($size_data as $size_item) {
$size_item->photo = "https://{$host}" . $size_item->photo;
$image_sizes[$size_item->photo] = $size_item;
}
}
}
//获取图片尺寸 end
foreach ($this->html_object->find('img') as $image) {
$img_src = $image->src;
if (!empty($image->originalsrc)) {
$img_src = $image->originalsrc;
}
if(empty($image_sizes[$img_src])){
//如果尺寸不存在则跳过
continue;
}
$img_size = $image_sizes[$img_src];
$img_alt = $image->alt;
$img_class = $image->class;
if (!empty($img_size)) {
//添加webp图片支持
$img_src_webp='';
$layout = 'responsive';
if(substr($img_src, -4) == '.jpg'){
if($img_size->width < 400){
$layout = 'fixed';
}
$img_src_jpg=" <amp-img fallback layout=\"{$layout}\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\"></amp-img>";
$img_src_webp=substr($img_src, 0, strlen($img_src) - 4).'.webp';
$img_src_webp = " <amp-img layout=\"{$layout}\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src_webp}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\">{$img_src_jpg}</amp-img>";
}
if(!empty($img_src_webp)){
$image->outertext =$img_src_webp;
}else{
$image->outertext = " <amp-img layout=\"{$layout}\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\"></amp-img>";
}
}
}
//只将中间主要部分用于css精简提高速度
foreach ($this->html_object->find('#content_main') as $content_main) {
$amp_template = str_replace('<!--@CUSTOM-CONENT@-->', $content_main, $amp_template);
}
//产品页面多做一些特殊的处理
foreach ($this->html_object->find('.tab-nav') as $tabnav) {
$amp_template = str_replace($tabnav, '', $amp_template);
}
//产品页面删除表单
foreach ($this->html_object->find('.sticky') as $sticky) {
$amp_template = str_replace($sticky, '@AMPFOOTER-BUTTON@', $amp_template);
}
foreach ($this->html_object->find('form') as $form) {
$amp_template = str_replace($form, '', $amp_template);
}
if(!empty($form)){
$replace_text = '<script async="" custom-element="amp-form" src="https://cdn.ampproject.org/v0/amp-form-0.1.js"></script><style amp-custom>';
$amp_template = str_replace('<style amp-custom>',$replace_text, $amp_template);
}
//产品页面处理结束
$link_css_array = array();
$css_content = '';
foreach ($this->html_object->find('link') as $link_css) {
//将所有的样式链接存储为一个数组
if ($link_css->rel == 'stylesheet' && !empty($link_css->href)) {
$link_css_array[] = $link_css->href;
$link_css->outertext = ''; //删除链接
}
}
//循环下载所有的css样式拼接成字符串
foreach ($link_css_array as $item) {
//排除一定不会用到的css提高精简的速度
if(strpos($item,'base.css')){
$item = 'https://data.chinarundreisen.com/min/?f=/css/base.css';
}
if(strpos($item,'reiseroute.css')){
$item = 'https://data.chinarundreisen.com/min/?f=/css/reiseroute-amp.css';
}
$get_http_temp = GET_HTTP($this->format_url($item, $this->websitehost));
if ($get_http_temp == false) {
$this->output->set_status_header(404);
echo 'CSS文件下载错误';
log_message('error', "optimize CSS文件下载错误! " . $item);
return FALSE;
}
$css_content .= $get_http_temp;
}
6 years ago
foreach ($this->html_object->find('style') as $style) {
$css_content .= $style->innertext;
$style->outertext = '';
}
//把网页内容和css提交到purifycss处理
$purifycss_server = 'http://158.177.67.52:33033/';
$optimize_css = GET_HTTP($purifycss_server, 'html_source=' . urlencode($content_main) . '&html_css=' . urlencode($css_content), 'POST');
if (empty($optimize_css)) {
$this->output->set_status_header(500);
echo 'css精简错误';
return FALSE;
}
$optimize_css = str_replace('!important', '', $optimize_css);
$optimize_css = str_replace('@charset "utf-8";', '', $optimize_css);
$amp_template = str_replace('/*@CUSTOM-CSS@*/', $optimize_css, $amp_template);
//替换掉写在组件上的样式
$amp_template = preg_replace('/style=.+?[\'|"]/i', '', $amp_template);
echo json_encode(array('result' => 'ok', 'data' => array('amp' => $amp_template)));
}
//格式化url保证请求的URL有域名//更换为对应的域名路径
function format_url($url, $host = ''){
if (substr($url, 0, 8) == 'https://' || substr($url, 0, 7) == 'http://') {
$url = str_replace('http://', 'https://', $url);//http要改为https
return urldecode($url);
}
if (substr($url, 0, 2) == '//') { //https或http
return urldecode('https://'.substr($url, 2, strlen($url)-2)); //只替换开头的//部分url不规范有//在url中间
}
return urldecode($host . $url);
}
}