You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
information-system/application/third_party/htmlcompressor/controllers/index.php

334 lines
15 KiB
PHP

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<?php
if (!defined('BASEPATH')) exit('No direct script access allowed');
class Index extends CI_Controller
{
public function __construct()
{
parent::__construct();
$this->load->library('simple_html_dom_lib');
}
public function index()
{
$this->load->view('welcome');
}
public function optimize()
{
$htmlsource = $this->input->post('htmlsource');
$websitehost = $this->input->post('websitehost');
$template_name = $this->input->post('template_name');
$create_amp = $this->input->post('create_amp');
$debug = $this->input->post('debug');
if (empty($htmlsource) || empty($websitehost)) {
$this->output->set_status_header(500);
echo 'error:htmlsource or websitehost is empty!';
log_message('error', "htmlsource or websitehost is empty! " . $websitehost);
return false;
}
//域名后面不能有/
if (substr($websitehost, -1, 1) == '/') {
$websitehost = substr($websitehost, 0, -1);
}
$html_object = str_get_html($htmlsource);
if (!empty($html_object)) {
//提取和下载所有CSS样式包括链接文件和页面样式
$link_css_array = array();
$css_content = '';
foreach ($html_object->find('link') as $link_css) {
if ($link_css->rel == 'stylesheet' && !empty($link_css->href)) {
$link_css_array[] = $link_css->href;
$link_css->outertext = ''; //删除链接
}
}
//print_r($link_css_array);
foreach ($link_css_array as $item) {
$get_http_temp = GET_HTTP($this->format_url($item, $websitehost));
if ($get_http_temp == false) {
$this->output->set_status_header(404);
echo 'CSS文件下载错误';
log_message('error', "optimize CSS文件下载错误! " . $item);
return FALSE;
}
$css_content .= $get_http_temp;
}
foreach ($html_object->find('style') as $style_css) {
if ($style_css->type == "text/css") {
$css_content .= $style_css->innertext;
}
}
// echo $css_content;
// echo $html_object;die();
//
//提取和下载所有JS脚本包括链接文件和页面脚本
$link_js_array = array();
$js_inline_content = '';
$js_jquery_content = '';
foreach ($html_object->find('script') as $link_script) {
if (!empty($link_script->src)) {
if (empty($link_script->defer) && empty($link_script->async)) { //没有defer标记的才处理否则让js延后加载
$link_js_array[] = $link_script->src;
$link_script->outertext = ''; //删除链接,移动到页底
}
} else {
//网页内的js不需要提取
//$js_content.= $link_script->innertext;//js的内容
// $js_content.= $link_script;//js的内容包含<script>标签
//含有$表示调用了jquery的函数添加延迟加载defer
//包含gtm的js引入就跳出本次循环
if(strpos($link_script, 'https://www.googletagmanager.com/gtm.js')){
continue;
}
if (strpos($link_script, '$') !== FALSE) {
$js_jquery_content .= $link_script->innertext;
} else {
$js_inline_content .= $link_script; //js的内容包含<script>标签
}
$link_script->outertext = ''; //删除js移动到页底
}
}
$js_scr_content = '';
foreach ($link_js_array as $item) {
$get_http_temp = GET_HTTP($this->format_url($item, $websitehost));
if ($get_http_temp == false) {
$this->output->set_status_header(404);
echo 'JS文件下载错误';
log_message('error', "optimize js文件下载错误! " . $item);
return FALSE;
}
$js_scr_content .= $get_http_temp;
}
//echo $js_content;
//把网页内容和css提交到purifycss处理
$purifycss_server = 'http://169.54.167.189:33033/';
if (strpos($websitehost, 'chinarundreisen.com') !== false) {//德语站点使用自己的css处理服务器
$purifycss_server = 'http://158.177.67.52:33033/';
}
$optimize_css = GET_HTTP($purifycss_server, 'html_source=' . urlencode($htmlsource) . '&html_css=' . urlencode($css_content), 'POST');
if (empty($optimize_css)) {
$this->output->set_status_header(500);
echo 'css精简错误';
return FALSE;
}
//把精简的css添加到head前面
$html_object = str_replace('</head>', '<style type="text/css">' . $optimize_css . "</style></head>", $html_object);
//删除多余空格和换行符
$html_object = str_replace(array(" ", "\t", "\n", "\r"), " ", $html_object);
//循环n次把双空格替换为一个空格
for ($i = 0; $i <= 4; $i++) {
$html_object = str_replace(" ", " ", $html_object);
}
//在最后加载原始css文件和js文件
//把css移动到页面底部延迟加载
$lastload_js = '<noscript id="deferred-styles">';
foreach ($link_css_array as $item) {
$lastload_js .= '<link rel="stylesheet" type="text/css" href="' . $item . '"/>';
}
$lastload_js .= '</noscript><script>var loadDeferredStyles=function(){var addStylesNode=document.getElementById("deferred-styles");var replacement=document.createElement("div");replacement.innerHTML=addStylesNode.textContent;document.body.appendChild(replacement);addStylesNode.parentElement.removeChild(addStylesNode)};var raf=requestAnimationFrame||mozRequestAnimationFrame||webkitRequestAnimationFrame||msRequestAnimationFrame;if(raf){raf(function(){window.setTimeout(loadDeferredStyles,0)})}else{window.addEventListener("load",loadDeferredStyles)};</script>';
$js_content = $js_scr_content . $js_jquery_content;
//延迟加载js需要把返回的js代码保存到一个文件中然后替换占位符以便加载js文件
$lastload_js .= '<script async src="##DEFERSCRIPTDEFER##"></script>';
$lastload_js .= $js_inline_content;
$html_object = str_replace('</body>', $lastload_js . '</body>', $html_object);
}
//只返回AMP代码
if (!empty($create_amp)) {
$amp = $this->create_amp($html_object, $template_name, $websitehost,$debug);
echo json_encode(array('result' => 'ok', 'data' => array('amp' => $amp)));
return;
}
echo json_encode(array('result' => 'ok', 'data' => array('html_object' => $html_object, 'js_content' => $js_content)));
}
//格式化url保证请求的URL有域名//更换为对应的域名路径
function format_url($url, $host = '')
{
if (substr($url, 0, 8) == 'https://' || substr($url, 0, 7) == 'http://') {
$url = str_replace('http://', 'https://', $url);//http要改为https
return urldecode($url);
}
if (substr($url, 0, 2) == '//') { //https或http
return urldecode('https://'.substr($url, 2, strlen($url)-2)); //只替换开头的//部分url不规范有//在url中间
}
return urldecode($host . $url);
}
//生成AMP版本 用移动端版本生成
/*
1.加载站点AMP模板模板只有导航、头部、底部
2.删除全部js引用和内链代码
3.删除全部css引用内联css放入<style amp-custom>
4.图片全部替换成amp-img设置宽高设置layout="responsive"
5.去掉所有元素中的style
6.去掉样式表中的!important
7.替换读取源文件title description keywords canonical
8.todo:增加application/ld+json
要求: 图片文件必须带有域名,否则的获取尺寸会出错,如 https://data.asiahighlights.com/pic/abc.jpg
*/
//$htmlsource 移动端版本的HTML对象 $cache_path AMP文件保存路径
function create_amp($htmlsource, $site_code, $websitehost,$debug=false)
{
$html_object = str_get_html($htmlsource);
//读取站点的AMP模板
$amp_template = $this->load->view('amp-template/' . $site_code, null, true);
//生成移动版本,删除多余元素
foreach ($html_object->find('.hidden-xs') as $hidden_item) {
$hidden_item->outertext = '';
}
foreach ($html_object->find('title') as $title) {
$amp_template = str_replace('<!--@TITLE@-->', $title->innertext, $amp_template);
}
foreach ($html_object->find('meta') as $meta) {
if ($meta->name == 'description') {
$amp_template = str_replace('<!--@DESCRIPTION@-->', $meta->content, $amp_template);
}
if ($meta->name == 'keywords') {
$amp_template = str_replace('<!--@KEYWORDS@-->', $meta->content, $amp_template);
}
}
foreach ($html_object->find('script') as $script) {
$script->outertext = '';
}
foreach ($html_object->find('noscript') as $noscript) {
$noscript->outertext = '';
}
foreach ($html_object->find('link') as $link) {
if ($link->rel == 'stylesheet' && !empty($link->href)) {
$link->outertext = '';
}
if ($link->rel == 'canonical' && !empty($link->href)) {
$amp_template = str_replace('<!--@CANONICAL@-->', $link->href, $amp_template);
}
}
$style_content = '';
foreach ($html_object->find('style') as $style) {
$style_content .= $style->innertext;
$style->outertext = '';
}
$style_content = str_replace('!important', '', $style_content);
$style_content = str_replace('@charset "utf-8";', '', $style_content);
$amp_template = str_replace('/*@CUSTOM-CSS@*/', $style_content, $amp_template);
//获取图片尺寸AMP要求必须填写width和height
//格式化图片URL
foreach ($html_object->find('img') as $image) {
if (!empty($image->src)) {
$img_src_host = parse_url($image->src, PHP_URL_HOST);
if (empty($img_src_host)) {
$img_src_host = $websitehost;
}
$image->src = $this->format_url($image->src, $img_src_host);
}
if (!empty($image->originalsrc)) {
$img_src_host = parse_url($image->src, PHP_URL_HOST);
if (empty($img_src_host)) {
$img_src_host = $websitehost;
}
$image->originalsrc = $this->format_url($image->originalsrc, $img_src_host);
}
}
//拼接请求的图片url如 https://data.asiahighlights.com/imagesize.php?photo=/pic/logo-ah.png,/pic/ah-slide-logo.png
$request_size = array();
foreach ($html_object->find('img') as $image) {
$img_src = $image->src;
if (!empty($image->originalsrc)) {
$img_src = $image->originalsrc;
}
if (strpos($img_src, 'facebook.com') !== false || strpos($img_src, 'dmca.com') !== false) {//facebook dmca会添加一个跟踪代码需要删除掉否则获取图片尺寸会报错
$image->outertext = '';
continue;
}
$img_src_urls = parse_url($img_src);
$request_size[$img_src_urls['host']][] = $img_src_urls['path'];
}
if($debug){
print_r($request_size);
}
//请求图片尺寸
$image_sizes = array();
foreach ($request_size as $host => $path) {
$parse_url = "https://{$host}/imagesize.php?photo=" . implode(',', $path);
$size_data = GET_HTTP($parse_url);
if (!empty($size_data)) {
$size_data = json_decode($size_data);
foreach ($size_data as $size_item) {
$size_item->photo = "https://{$host}" . $size_item->photo;
$image_sizes[$size_item->photo] = $size_item;
}
}
}
if($debug){
print_r($image_sizes);
}
//获取图片尺寸 end
foreach ($html_object->find('img') as $image) {
$img_src = $image->src;
if (!empty($image->originalsrc)) {
$img_src = $image->originalsrc;
}
if(empty($image_sizes[$img_src])){
//如果尺寸不存在则跳过
continue;
}
$img_size = $image_sizes[$img_src];
$img_alt = $image->alt;
$img_class = $image->class;
if (!empty($img_size)) {
//添加webp图片支持
$img_src_webp='';
if(substr($img_src, -4) == '.jpg'){
$img_src_jpg=" <amp-img fallback layout=\"responsive\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\"></amp-img>";
$img_src_webp=substr($img_src, 0, strlen($img_src) - 4).'.webp';
$img_src_webp = " <amp-img layout=\"responsive\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src_webp}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\">{$img_src_jpg}</amp-img>";
}
if(!empty($img_src_webp)){
$image->outertext =$img_src_webp;
}else{
$image->outertext = " <amp-img layout=\"responsive\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\"></amp-img>";
}
}
}
foreach ($html_object->find('#content_main') as $content_main) {
$amp_template = str_replace('<!--@CUSTOM-CONENT@-->', $content_main, $amp_template);
}
//替换掉写在组件上的样式
$amp_template = preg_replace('/style=.+?[\'|"]/i', '', $amp_template);
return $amp_template;
}
}