This one is beginning to hurt my brain - any help would be appreciated.
My script is designed to monitor file changes on remote web sites and on detection, sends an email alert. It works perfectly on small and medium size sites.
The problem arises when it scans very large sites (where possibly millions of files reside).
There are 2 main functions: build_lists() which takes the results of the scan and stores it in a db. The second: raw_list() scans the site via ftp, using PHP’s ftp_rawlist() function. During the scan, raw_list() calls itself continuously until the file list is exhausted and then returns the list of files and their details to build_list().
The result of the scan by raw_list() is stored in memory until it is complete.
Where there are possibly millions of files, the server starts squealing and PHP returns a fatal error: “Fatal Error: Allowed memory size of xxxxxxxx bytes exhausted”
I want to avoid using ini_set(“memory_limit”,“xxxM”); as I feel it would be bad practice and I’m not sure it would work anyway. I think the only way to do this is to combine the 2 functions in such a way that the db is updated many times during the scan so that only parts of the scan are held in memory at any point in time.
I’m not an experienced PHP programmer, so I’ve come here for help - here are the 2 functions:
function build_lists($ftp_server, $ftp_user, $ftp_pw ,$db_server,$db_user,$db_pass,$startdir,$db_name,$date,$root_dir){
$con = mysql_connect($db_server,$db_user,$db_pass)or die(mysql_error());
mysql_select_db($db_name, $con)or die(mysql_error());
$site_table = 'ssa_'.stripslashes(str_replace('-','_',str_replace('.','_',$ftp_server))).'_site';
$result = mysql_query("SELECT * FROM $site_table") or die(mysql_error());
while($row = mysql_fetch_array($result))
{
$email_subject = $row[email_subj];
$skipfiles = $row[skip_files];
$skipdir = $row[skip_dir];
$email_alert_addr = $row[email_alert];
$email_header = $row[email_header];
$email_from_addr = $row[from_addr];
$excludes = explode(',',$skipfiles);
$skip_dir = explode(',',$skipdir);
}
mysql_close($con)or die(mysql_error());
$email_subject = $email_subject.' - '.$ftp_server; //email subject text
$email_text = $email_header.' - '.$ftp_server."\\r\
\
";
// make FTP connection
$conn_id = @ftp_connect($ftp_server) OR die("Unable to establish an FTP connection");
@ftp_login($conn_id, $ftp_user, $ftp_pw) OR die("ftp-login failed - User name or password not correct");
@ftp_pasv ( $conn_id, true ) or die("Unable to set FTP passive mode."); //Use passive mode for client-side action
//Call for the list
$file_list = raw_list($root_dir,$conn_id);
ftp_close($conn_id);
$newlist_prefix = 'ssa_'.str_replace('-','_',str_replace('.','_',$ftp_server)).'_newlist';
$log_prefix = 'ssa_'.str_replace('-','_',str_replace('.','_',$ftp_server)).'_log';
$con = mysql_connect($db_server,$db_user,$db_pass)or die(mysql_error());
mysql_select_db($db_name, $con)or die(mysql_error());
$oldlist = array();
$oldlist = oldlist($newlist_prefix);
if(!empty($oldlist)){
$first_run = 'N';
}else{
$first_run = 'Y';
}
mysql_query("TRUNCATE TABLE `$newlist_prefix`") or die('Unable to empty the table:<br> '.mysql_error());
echo 'SSA v1.5.1 Multisite - Script run on '.$ftp_server.' on '.$date."\\r\
";
foreach ($file_list as $value) {
$perms = $value[0];
$size = $value[4];
$month = $value[5];
$day = $value[6];
$year = $value[7];
$file_name = $value[8];
$path = $value[9];
$root_removed = str_replace($root_dir.'/','',$path);
$dir_array = explode('/',$root_removed);
if($file_name != "" && !in_array($file_name,$excludes) && !array_intersect($dir_array,$skip_dir)){
if(strpos($year, ':')){
$time = $year;
}
mysql_query("INSERT INTO $newlist_prefix
(path,
filename,
size,
date,
time,
perms)
VALUES ('$path',
'$file_name',
'$size',
'$day$month',
'$time',
'$perms')")or die(mysql_error());
}
}
$newlist = newlist($newlist_prefix);
if(!empty($oldlist) && is_array($newlist)){
$diff = array_diff_key($oldlist,$newlist);
foreach($diff as $key=>$value){
$len = strlen($value[perms]);
$remove_dirs = substr($perms,$len-10,1);
$start = str_replace('./',"", $value[path]);
$start = str_replace(':',"", $start);
print 'File missing: '.$key.' - Last seen: '.$value[date].' at '.$value[time]."\\r\
";
$email_text .= 'File missing: '.$key."\\r\
".'Last seen: '.$value[date].' at '.$value[time]."\\r\
\
";
mysql_query("INSERT INTO $log_prefix
(status,
file,
date,
time,
old_perms,
new_perms,
old_size,
new_size,
last_run)
VALUES ('Missing',
'$key',
'$value[date]',
'$value[time]',
'',
'',
'',
'',
'$date')")or die(mysql_error());
}
}
$i = 0;
foreach ($file_list as $value) {
$perms = $value[0];
$size = $value[4];
$month = $value[5];
$day = $value[6];
$year = $value[7];
$file_name = $value[8];
$path = $value[9];
$root_removed = str_replace($root_dir.'/','',$path);
$dir_array = explode('/',$root_removed);
if($file_name != ""){
if(strpos($year, ':')){
$time = $year;
}
$resultB = mysql_query("SELECT * FROM $newlist_prefix WHERE path = '$path' AND filename = '$file_name' ")or die(mysql_error());
$row2 = mysql_fetch_row($resultB);
$file = trim($path.'/'.$file_name);
$size_newlist = $newlist[$file][size];
$size_oldlist = $oldlist[$file][size];
$new_perms = convert_perms($newlist[$file][perms]);
$old_perms = convert_perms($oldlist[$file][perms]);
if(!in_array($file_name,$excludes) && !array_intersect($dir_array,$skip_dir)){
if($size_newlist != $size_oldlist && $newlist[$file][path] != "" && $oldlist[$file][path] != ""){
print 'File modified: '.$file.' - Date '.$row2[4].' Time: '.$row2[5].' Old file size = '.$size_oldlist.'bytes. New file size = '.$size_newlist.'bytes'."\\r\
";
$email_text .= 'File modified: '.$file."\\r\
".'Date '.$row2[4].' Time: '.$row2[5].' Old file size = '.$size_oldlist.'bytes. New file size = '.$size_newlist."bytes.\\r\
\
";
mysql_query("INSERT INTO $log_prefix
(status,
file,
date,
time,
old_perms,
new_perms,
old_size,
new_size,
last_run)
VALUES ('Modified',
'$file',
'$row2[4]',
'$row2[5]',
'$old_perms',
'$new_perms',
'$size_oldlist',
'$size_newlist',
'$date')")or die(mysql_error());
$i++;
}
if(!empty($diff)){
$i++;
}
if(!empty($oldlist) && $newlist[$file][path] != "" && $oldlist[$file][path] == ""){
print 'File added: '.$file.' - Date added: '.$row2[4].' Time added: '.$row2[5]."\\r\
";
$email_text .= 'File added: '.$file."\\r\
".'Date: '.$row2[4].' Time: '.$row2[5]."\\r\
\
";
mysql_query("INSERT INTO $log_prefix
(status,
file,
date,
time,
old_perms,
new_perms,
old_size,
new_size,
last_run)
VALUES ('Added',
'$file',
'$row2[4]',
'$row2[5]',
'',
'$new_perms',
'$size_oldlist',
'$size_newlist',
'$date')")or die(mysql_error());
$i++;
}
if($newlist[$file][perms] != $oldlist[$file][perms] && $newlist[$file][path] != "" && $oldlist[$file][path] != ""){
print 'File permissions changed: '.$file.' - Old perms: '.$old_perms.' New perms: '.$new_perms."\\r\
";
$email_text .= 'File permissions changed: '.$file."\\r\
".'Old perms: '.$old_perms.' New perms: '.$new_perms."\\r\
\
";
mysql_query("INSERT INTO $log_prefix
(status,
file,
date,
time,
old_perms,
new_perms,
old_size,
new_size,
last_run)
VALUES ('Permissions',
'$file',
'$row2[4]',
'$row2[5]',
'$old_perms',
'$new_perms',
'$size_oldlist',
'$size_newlist',
'$date')")or die(mysql_error());
$i++;
}
}
}
}// end foreach loop
if($i == 0 && $first_run == 'N'){
echo 'NO CHANGES FOUND';
}
if($first_run == 'Y'){
echo 'First run completed - All current website files have been added to the database';
}
if($i > 0){
// Send email
$headers = 'From: '.$email_from_addr . "\\r\
" . 'X-Mailer: PHP/' . phpversion();
mail($email_alert_addr, $email_subject, $email_text, $headers); //Simple mail function for alert.
}
// Close mysql connection
mysql_close($con)or die(mysql_error());
}
#*********************************************************************
# rawlist in recursive form (without parameter true!!!)
#*********************************************************************
function raw_list($folder,$conn_id){
$list = ftp_rawlist($conn_id, $folder);
$anzlist = count($list);
$i = 0;
while ($i < $anzlist){
$split = preg_split("/[\\s]+/", $list[$i], 9, PREG_SPLIT_NO_EMPTY);
array_push($split, $folder);
$ItemName = $split[8];
$path = "$folder/$ItemName";
if (substr($list[$i],0,1) === "d" AND substr($ItemName,0,1) != "."){
if (substr($list[$i],0,1) != "d"){
array_push($files, $split);
}
raw_list($path,$conn_id);
}elseif (substr($list[$i],0,1) != "d"){
array_push($files, $split);
}
$i++;
}
return $files;
}
I know it’s a big ask, but maybe someone can throw some ideas this way.
Regards to all