You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
information-system/application/third_party/htmlcompressor/controllers/index_gm.php

394 lines
16 KiB
PHP

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<?php
if (!defined('BASEPATH')) exit('No direct script access allowed');
class Index_gm extends CI_Controller
{
public function __construct(){
parent::__construct();
$this->load->library('simple_html_dom_lib');
}
public function index(){
$this->load->view('welcome');
}
//选择处理方式
public function choose_way(){
$this->htmlsource = $this->input->post('htmlsource');
$this->websitehost = $this->input->post('websitehost');
$this->template_name = $this->input->post('template_name');
$this->create_amp = $this->input->post('create_amp');
$this->debug = $this->input->post('debug');
/*$this->htmlsource = file_get_contents('aa.htm');
$this->websitehost = 'https://data.chinarundreisen.com';
$this->create_amp = 'true';
$this->template_name = 'gm';*/
if (empty($this->htmlsource) || empty($this->websitehost)) {
$this->output->set_status_header(500);
echo 'error:htmlsource or websitehost is empty!';
log_message('error', "htmlsource or websitehost is empty! " . $this->websitehost);
return false;
}
//域名后面不能有/
if (substr($this->websitehost, -1, 1) == '/') {
$this->websitehost = substr($this->websitehost, 0, -1);
}
//进行页面解析
$this->html_object = str_get_html($this->htmlsource);
//PC或AMP的构造方式
if (!empty($this->create_amp)) {
echo $this->create_amp();
return;
}else{
echo $this->optimize();
return;
}
}
//html优化处理
function optimize(){
//提取css样式地址
$link_css_array = array();
$css_content = '';
foreach ($this->html_object->find('link') as $link_css) {
//将所有的样式链接存储为一个数组
if ($link_css->rel == 'stylesheet' && !empty($link_css->href)) {
$link_css_array[] = $link_css->href;
$link_css->outertext = ''; //删除链接
}
}
//循环下载所有的css样式拼接成字符串
foreach ($link_css_array as $item) {
$get_http_temp = GET_HTTP($this->format_url($item, $this->websitehost));
if ($get_http_temp == false) {
$this->output->set_status_header(404);
echo 'CSS文件下载错误';
log_message('error', "optimize CSS文件下载错误! " . $item);
return FALSE;
}
$css_content .= $get_http_temp;
}
//找出所有style写的内置样式
foreach ($this->html_object->find('style') as $style_css) {
if ($style_css->type == "text/css") {
$css_content .= $style_css->innertext;
}
}
//提取和下载所有JS脚本包括链接文件和页面脚本
$link_js_array = array();
$js_inline_content = '';
$js_jquery_content = '';
foreach ($this->html_object->find('script') as $link_script) {
if (!empty($link_script->src)) {
if (empty($link_script->defer) && empty($link_script->async)) { //没有defer标记的才处理否则让js延后加载
$link_js_array[] = $link_script->src;
$link_script->outertext = ''; //删除链接,移动到页底
}
} else {
//网页内的js不需要提取
//$js_content.= $link_script->innertext;//js的内容
// $js_content.= $link_script;//js的内容包含<script>标签
//含有$表示调用了jquery的函数添加延迟加载defer
//包含gtm的js引入就跳出本次循环
if(strpos($link_script, 'https://www.googletagmanager.com/gtm.js')){
continue;
}
if (strpos($link_script, '$') !== FALSE) {
$js_jquery_content .= $link_script->innertext;
} else {
$js_inline_content .= $link_script; //js的内容包含<script>标签
}
$link_script->outertext = ''; //删除js移动到页底
}
}
$js_scr_content = '';
foreach ($link_js_array as $item) {
$get_http_temp = GET_HTTP($this->format_url($item, $this->websitehost));
if ($get_http_temp == false) {
$this->output->set_status_header(404);
echo 'JS文件下载错误';
log_message('error', "optimize js文件下载错误! " . $item);
return FALSE;
}
$js_scr_content .= $get_http_temp;
}
//把网页内容和css提交到purifycss处理
$purifycss_server = 'http://184.172.113.216:33033/';
if (strpos($this->websitehost, 'chinarundreisen.com') !== false) {//德语站点使用自己的css处理服务器
$purifycss_server = 'http://158.177.67.52:33033/';
}
$optimize_css = GET_HTTP($purifycss_server, 'html_source=' . urlencode($this->htmlsource) . '&html_css=' . urlencode($css_content), 'POST');
if (empty($optimize_css)) {
$this->output->set_status_header(500);
echo 'css精简错误';
return FALSE;
}
//把精简的css添加到head前面
$this->html_object = str_replace('</head>', '<style type="text/css">' . $optimize_css . "</style></head>", $this->html_object);
//删除多余空格和换行符
$this->html_object = str_replace(array(" ", "\t", "\n", "\r"), " ", $this->html_object);
//循环n次把双空格替换为一个空格
for ($i = 0; $i <= 4; $i++) {
$this->html_object = str_replace(" ", " ", $this->html_object);
}
//在最后加载原始css文件和js文件
//把css移动到页面底部延迟加载
$lastload_js = '<noscript id="deferred-styles">';
foreach ($link_css_array as $item) {
$lastload_js .= '<link rel="stylesheet" type="text/css" href="' . $item . '"/>';
}
$lastload_js .= '</noscript><script>var loadDeferredStyles=function(){var addStylesNode=document.getElementById("deferred-styles");var replacement=document.createElement("div");replacement.innerHTML=addStylesNode.textContent;document.body.appendChild(replacement);addStylesNode.parentElement.removeChild(addStylesNode)};var raf=requestAnimationFrame||mozRequestAnimationFrame||webkitRequestAnimationFrame||msRequestAnimationFrame;if(raf){raf(function(){window.setTimeout(loadDeferredStyles,0)})}else{window.addEventListener("load",loadDeferredStyles)};</script>';
$js_content = $js_scr_content . $js_jquery_content;
//延迟加载js需要把返回的js代码保存到一个文件中然后替换占位符以便加载js文件
$lastload_js .= '<script async src="##DEFERSCRIPTDEFER##"></script>';
$lastload_js .= $js_inline_content;
$this->html_object = str_replace('</body>', $lastload_js . '</body>', $this->html_object);
echo json_encode(array('result' => 'ok', 'data' => array('html_object' => $this->html_object, 'js_content' => $js_content)));
}
//生成AMP版本 用移动端版本生成
/*
1.加载站点AMP模板模板只有导航、头部、底部
2.删除全部js引用和内链代码
3.删除全部css引用内联css放入<style amp-custom>
4.图片全部替换成amp-img设置宽高设置layout="responsive"
5.去掉所有元素中的style
6.去掉样式表中的!important
7.替换读取源文件title description keywords canonical
8.todo:增加application/ld+json
要求: 图片文件必须带有域名,否则的获取尺寸会出错,如 https://data.asiahighlights.com/pic/abc.jpg
*/
function create_amp(){
//读取站点的AMP模板
$amp_template = $this->load->view('amp-template/' . $this->template_name, null, true);
//生成移动版本,删除多余元素
foreach ($this->html_object->find('.hidden-xs') as $hidden_item) {
$hidden_item->outertext = '';
}
foreach ($this->html_object->find('title') as $title) {
$amp_template = str_replace('<!--@TITLE@-->', $title->innertext, $amp_template);
}
foreach ($this->html_object->find('meta') as $meta) {
if ($meta->name == 'description') {
$amp_template = str_replace('<!--@DESCRIPTION@-->', $meta->content, $amp_template);
}
if ($meta->name == 'keywords') {
$amp_template = str_replace('<!--@KEYWORDS@-->', $meta->content, $amp_template);
}
}
foreach ($this->html_object->find('script') as $script) {
$script->outertext = '';
}
foreach ($this->html_object->find('noscript') as $noscript) {
$noscript->outertext = '';
}
foreach ($this->html_object->find('link') as $link) {
if ($link->rel == 'stylesheet' && !empty($link->href)) {
$link->outertext = '';
}
if ($link->rel == 'canonical' && !empty($link->href)) {
$amp_template = str_replace('<!--@CANONICAL@-->', $link->href, $amp_template);
}
}
//获取图片尺寸AMP要求必须填写width和height
//格式化图片URL
foreach ($this->html_object->find('img') as $image) {
if (!empty($image->src)) {
$img_src_host = parse_url($image->src, PHP_URL_HOST);
if (empty($img_src_host)) {
$img_src_host = $this->websitehost;
}
$image->src = $this->format_url($image->src, $img_src_host);
}
if (!empty($image->originalsrc)) {
$img_src_host = parse_url($image->src, PHP_URL_HOST);
if (empty($img_src_host)) {
$img_src_host = $websitehost;
}
$image->originalsrc = $this->format_url($image->originalsrc, $img_src_host);
}
}
//拼接请求的图片url如 https://data.asiahighlights.com/imagesize.php?photo=/pic/logo-ah.png,/pic/ah-slide-logo.png
$request_size = array();
foreach ($this->html_object->find('img') as $image) {
$img_src = $image->src;
if (!empty($image->originalsrc)) {
$img_src = $image->originalsrc;
}
if (strpos($img_src, 'facebook.com') !== false || strpos($img_src, 'dmca.com') !== false) {//facebook dmca会添加一个跟踪代码需要删除掉否则获取图片尺寸会报错
$image->outertext = '';
continue;
}
$img_src_urls = parse_url($img_src);
$request_size[$img_src_urls['host']][] = $img_src_urls['path'];
}
//请求图片尺寸
$image_sizes = array();
foreach ($request_size as $host => $path) {
$parse_url = "https://{$host}/imagesize.php?photo=" . urlencode(implode(',', $path));
$size_data = GET_HTTP($parse_url);
if (!empty($size_data)) {
$size_data = json_decode($size_data);
foreach ($size_data as $size_item) {
$size_item->photo = "https://{$host}" . $size_item->photo;
$image_sizes[$size_item->photo] = $size_item;
}
}
}
//获取图片尺寸 end
foreach ($this->html_object->find('img') as $image) {
$img_src = $image->src;
if (!empty($image->originalsrc)) {
$img_src = $image->originalsrc;
}
if(empty($image_sizes[$img_src])){
//如果尺寸不存在则跳过
continue;
}
$img_size = $image_sizes[$img_src];
$img_alt = $image->alt;
$img_class = $image->class;
if (!empty($img_size)) {
//添加webp图片支持
$img_src_webp='';
$layout = 'responsive';
if(substr($img_src, -4) == '.jpg'){
if($img_size->width < 400){
$layout = 'fixed';
}
$img_src_jpg=" <amp-img fallback layout=\"{$layout}\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\"></amp-img>";
$img_src_webp=substr($img_src, 0, strlen($img_src) - 4).'.webp';
$img_src_webp = " <amp-img layout=\"{$layout}\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src_webp}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\">{$img_src_jpg}</amp-img>";
}
if(!empty($img_src_webp)){
$image->outertext =$img_src_webp;
}else{
$image->outertext = " <amp-img layout=\"{$layout}\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\"></amp-img>";
}
}
}
//只将中间主要部分用于css精简提高速度
foreach ($this->html_object->find('#content_main') as $content_main) {
$amp_template = str_replace('<!--@CUSTOM-CONENT@-->', $content_main, $amp_template);
}
//产品页面多做一些特殊的处理
foreach ($this->html_object->find('.tab-nav') as $tabnav) {
$amp_template = str_replace($tabnav, '', $amp_template);
}
//产品页面删除表单
foreach ($this->html_object->find('.sticky') as $sticky) {
$amp_template = str_replace($sticky, '@AMPFOOTER-BUTTON@', $amp_template);
}
foreach ($this->html_object->find('form') as $form) {
$amp_template = str_replace($form, '', $amp_template);
}
if(!empty($form)){
$replace_text = '<script async="" custom-element="amp-form" src="https://cdn.ampproject.org/v0/amp-form-0.1.js"></script><style amp-custom>';
$amp_template = str_replace('<style amp-custom>',$replace_text, $amp_template);
}
//产品页面处理结束
$link_css_array = array();
$css_content = '';
foreach ($this->html_object->find('link') as $link_css) {
//将所有的样式链接存储为一个数组
if ($link_css->rel == 'stylesheet' && !empty($link_css->href)) {
$link_css_array[] = $link_css->href;
$link_css->outertext = ''; //删除链接
}
}
//循环下载所有的css样式拼接成字符串
foreach ($link_css_array as $item) {
//排除一定不会用到的css提高精简的速度
if(strpos($item,'base.css')){
$item = 'https://data.chinarundreisen.com/min/?f=/css/base.css';
}
if(strpos($item,'reiseroute.css')){
$item = 'https://data.chinarundreisen.com/min/?f=/css/reiseroute-amp.css';
}
$get_http_temp = GET_HTTP($this->format_url($item, $this->websitehost));
if ($get_http_temp == false) {
$this->output->set_status_header(404);
echo 'CSS文件下载错误';
log_message('error', "optimize CSS文件下载错误! " . $item);
return FALSE;
}
$css_content .= $get_http_temp;
}
foreach ($this->html_object->find('style') as $style) {
$css_content .= $style->innertext;
$style->outertext = '';
}
//把网页内容和css提交到purifycss处理
$purifycss_server = 'http://158.177.67.52:33033/';
$optimize_css = GET_HTTP($purifycss_server, 'html_source=' . urlencode($content_main) . '&html_css=' . urlencode($css_content), 'POST');
if (empty($optimize_css)) {
$this->output->set_status_header(500);
echo 'css精简错误';
return FALSE;
}
$optimize_css = str_replace('!important', '', $optimize_css);
$optimize_css = str_replace('@charset "utf-8";', '', $optimize_css);
$amp_template = str_replace('/*@CUSTOM-CSS@*/', $optimize_css, $amp_template);
//替换掉写在组件上的样式
$amp_template = preg_replace('/style=.+?[\'|"]/i', '', $amp_template);
echo json_encode(array('result' => 'ok', 'data' => array('amp' => $amp_template)));
}
//格式化url保证请求的URL有域名//更换为对应的域名路径
function format_url($url, $host = ''){
if (substr($url, 0, 8) == 'https://' || substr($url, 0, 7) == 'http://') {
$url = str_replace('http://', 'https://', $url);//http要改为https
return urldecode($url);
}
if (substr($url, 0, 2) == '//') { //https或http
return urldecode('https://'.substr($url, 2, strlen($url)-2)); //只替换开头的//部分url不规范有//在url中间
}
return urldecode($host . $url);
}
}