From 98383800aec1f73d32031bb6c540baf7bdbb1e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20St=C3=B6ckel?= Date: Mon, 26 Jul 2010 15:30:23 +0000 Subject: [PATCH] Added currently unused find_iterator which allows less memory intensive file 'find' operations --- phpgwapi/inc/class.egw_find_iterator.inc.php | 738 +++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 phpgwapi/inc/class.egw_find_iterator.inc.php diff --git a/phpgwapi/inc/class.egw_find_iterator.inc.php b/phpgwapi/inc/class.egw_find_iterator.inc.php new file mode 100644 index 0000000000..01241822f3 --- /dev/null +++ b/phpgwapi/inc/class.egw_find_iterator.inc.php @@ -0,0 +1,738 @@ + + * + * @link http://www.egroupware.org + * @license http://opensource.org/licenses/gpl-license.php GPL - GNU General Public License + * @package api + * @subpackage vfs + * @author Andreas Stoeckel + * @copyright (c) 2010 by Andreas Stoeckel + * @version $Id$ + */ + +define("FILTER_NOFOLLOW", 2); //Display the element but do not follow +define("FILTER_OK", 1); //The filter has passed +define("FILTER_DISPLAY", -1); //The element filtered should not be returned to the caller of find, it should not be displayed +define("FILTER_ALL", -2); //The element didn't pass the filter + +function translate_path($path, $isurl) { + return $isurl ? $path : egw_vfs::PREFIX.parse_url($path, PHP_URL_PATH); +} + +class egw_find_file +{ + private $isurl = false; + private $need_mime = false; + private $base_path = ""; + + private $stat = null; + private $mime = null; + private $props = null; + private $result = null; + + public $filename = ''; //< The filename including the file path, e.g. /home/user/example.file + public $path = ''; //< The filename, e.g. example.file + public $name = ''; //< The file path, e.g. /home/user/ + public $isdot = false; + public $filterstate = FILTER_ALL; + + public function __construct($path, $isurl, $need_mime) + { + //Copy the given parameters + $this->isurl = $isurl; + $this->need_mime = $need_mime; + $this->pair_result = $pair_result; + $this->base_path = $path; + + //Set filename, path and path + name as this information will most likely + //be needed + $paths = $this->isurl ? explode('/', $path) : explode('/', parse_url($path, PHP_URL_PATH)); + $this->name = implode('/',$paths); + $this->filename = array_pop($paths); + $this->path = implode('/',$paths).'/'; + $this->is_dot = ($this->filename == '.') || ($this->filename == '..'); + } + + public function get_stat() + { + //Check whether the stat data has already been created, if not, create it + if ($this->stat === null) + { + $this->stat = $this->isurl ? lstat($this->base_path) : + egw_vfs::url_stat($this->base_path, STREAM_URL_STAT_LINK); + + //Remove numerical indices 0-12 + $this->stat = array_slice($this->stat, 13); + }; + + return $this->stat; + } + + public function get_mime() + { + //Check if the mime data has already been read, if not, read it + if ($this->mime === null) + $res['mime'] = egw_vfs::mime_content_type($this->base_path); + + return $this->mime; + } + + public function get_props() + { + //Check whether props has already been read, if not, read it + if ($this->props === null) + { + $this->props = array( + "isfile" => is_file($this->base_path), + "isdir" => is_dir($this->base_path), + "islink" => is_link($this->base_path), + ); + + if ($this->props['islink']) + $this->props['linktarget'] = readlink($this->base_path); + } + + return $this->props; + } + + public function get_result($pair_result) + { + //Check whether the result has already been produced, if not, produce it + if ($this->result === null) + { + //Check whether only the file name/path should be returned or the complete + //resultset + if ($pair_result) + { + //Copy the state and add some custom information + $this->result = $this->get_stat(); + $this->result['path'] = $this->name; + $this->result['name'] = $this->filename; + if ($this->need_mime) + $this->result['mime'] = $this->get_mime(); + } + else + { + $this->result = $this->name; + } + } + + return $this->result; + } + + public function translate_path() + { + return translate_path($this->name, $this->isurl); + } + +} + +/* + * egw_directory_iterator_filter filters files based on the options set supplied during + * construction. It is used by the RecursiveDirectoryIteratorWrapper and + * FindIterator classes. Filtering is performed by calling the "filter" function + * and passing an url_state array. + */ +class egw_directory_iterator_filter +{ + //Parsed options + private $hidden = false; + private $type = null; + private $name_preg = null; + private $path_preg = null; + private $curkey = 0; + private $lcur = null; + private $uid = null; + private $gid = null; + private $ctime = null; + private $mtime = null; + private $cmin = null; + private $mmin = null; + private $size = null; + private $empty = true; + private $follow = false; + private $mime = null; + + /* + * check_num is internally used to compare strings/numerical values. + * @param(value is an integer parameter) + * @param(argument may be a string or an integer. If argument is an integer + * the result gets true if argument equals value. If argument is a string, the + * first char, which may be '+' or '-' defines whether the result gets true if + * value is greater than argument (+) or when it is smaller than argument (-)) + */ + private static function check_num($value, $argument) + { + if (is_int($argument) && $argument >= 0 || $argument[0] != '-' && $argument[0] != '+') + return $value == $argument; + + if ($argument < 0) + return $value < abs($argument); + + return $value > (int) substr($argument,1); + } + + public function __construct($options) + { + //Preprocess the parameters + $this->hidden = isset($options['hidden']) ? $options['hidden'] : false; + $this->type = isset($options['type']) ? $options['type'] : null; + $this->name_preg = isset($options['name_preg']) ? $options['name_preg'] : null; + $this->path_preg = isset($options['path_preg']) ? $options['path_preg'] : null; + + $this->cmin = isset($options['cmin']) ? $options['cmin'] : null; + $this->mmin = isset($options['mmin']) ? $options['mmin'] : null; + $this->ctime = isset($options['ctime']) ? $options['ctime'] : null; + $this->mtime = isset($options['mtime']) ? $options['mtime'] : null; + + $this->empty = isset($options['empty']) ? $options['empty'] : true; + $this->size = isset($options['size']) ? $options['size'] : null; + $this->follow = isset($options['follow']) ? $options['follow'] : false; + + $this->mime = isset($options['mime']) ? $options['mime'] : null; + + //Convert the (probably) given path/name filters to regular expressions + if (isset($options['name']) && !isset($options['name_preg'])) // change from simple *,? wildcards to preg regular expression once + $this->name_preg = '/^'.str_replace(array('\\?','\\*'), + array('.{1}','.*'), preg_quote($options['name'])).'$/i'; + + if (isset($options['path']) && !isset($options['preg_path'])) // change from simple *,? wildcards to preg regular expression once + $this->path_preg = '/^'.str_replace(array('\\?','\\*'), + array('.{1}','.*'), preg_quote($options['path'])).'$/i'; + + //Translate username to uid + if (!isset($options['uid'])) + { + if (isset($options['user'])) + { + $this->uid = $GLOBALS['egw']->accounts->name2id($options['user'],'account_lid','u'); + } + elseif (isset($options['nouser'])) + { + $this->uid = 0; + } + } + else + { + $this->uid = $options['uid']; + } + + //Translate groupid to gid + if (!isset($options['gid'])) + { + if (isset($options['group'])) + { + $this->gid = abs($GLOBALS['egw']->accounts->name2id($options['group'],'account_lid','g')); + } + elseif (isset($options['nogroup'])) + { + $this->gid = 0; + } + } + else + { + $this->gid = $options['group']; + } + } + + public function filter($findfile) + { + if ($findfile) + { + //Exclude directory and parent directory + if ($findfile->isdot) + return FILTER_ALL; + + //Exclude hidden files + if (!$this->hidden && (($findfile->filename[0] == '.') || ($findfile->filename == 'Thumbs.db'))) + return FILTER_ALL; + + //Read the file properties (like (is link etc.)) + $props = $findfile->get_props(); + + //If this is a directory, we probably need to keep this entry to crawl + //for files inside this directory. That's what FILTER_DISPLAY is for + if ($props['isdir'] && (!($props['islink']) || $this->follow)) + $dir_filter = FILTER_DISPLAY; + else + $dir_filter = FILTER_ALL; + + //Exclude files/directories based on the given regular expression + if ($this->name_preg && !preg_match($this->name_preg, $findfile->filename) || + $this->path_preg && !preg_match($this->path_preg, $findfile->path)) + return $dir_filter; + + //Exclude directories/symlinks/files + if ($this->type && ($this->type == 'd') == !($props['isdir'] && !$props['islink']) || + (($this->type == 'F') && ($props['isdir'] || $props['islink']))) + return $dir_filter; + + if (($this->cmin !== null) || ($this->mmin !== null) || + ($this->ctime !== null) || ($this->mtime !== null) || + ($this->size !== null) || (!$this->empty) || + ($this->gid !== null) || ($this->uid !== null)) + { + //Check user/group + if (($this->gid !== null) && $stat['gid'] != $options['gid'] || + ($this->uid !== null) && $stat['uid'] != $options['uid']) + { + return FILTER_ALL; // wrong user or group + } + + //Retrieve the file stat + $stat = $this->get_stat(); + + //Excludes files where the creation/last modification timestamps is greater than + //the given time + if ($this->cmin && !self::check_num(round((time()-$stat['ctime'])/60), $this->cmin) || + $this->mmin && !self::check_num(round((time()-$stat['mtime'])/60), $this->mmin) || + $this->ctime && !self::check_num(round((time()-$stat['ctime'])/86400), $this->ctime) || + $this->mtime && !self::check_num(round((time()-$stat['mtime'])/86400), $this->mtime)) + { + return $dir_filter; + } + + //Check filesize + if (($this->size !== null) && !self::check_num($stat['size'], $this->size) || + (!$this->empty && $stat['size'] <= 0)) //TODO Check this + { + return $dir_filter; + } + } + + //Check the mime type and subtype. As loading the mime type takes the + //longest time, it is postponed as long as possible + if ($this->mime !== null) { + $mime = $findfile->get_mime(); + if ($options['mime'] != $mime) + { + list($type, $subtype) = explode('/', $this->mime); + + // no subtype (eg. 'image') --> check only the main type + if ($sub_type || substr($this->mime, 0, strlen($type)+1) != $type.'/') + return $dir_filter; // wrong mime-type + } + } + + //Return FILTER_OK, if the filters have passed + return FILTER_OK; + } + else + { + return FILTER_ALL; + } + } +} + +class egw_recursive_directory_iterator_wrapper implements RecursiveIterator +{ + private $base = ""; + private $exec = null; + private $exec_params = null; + private $filter = null; + + private $elements = null; + private $index = 0; + private $iterator = null; + private $follow = false; + private $sortfunc; + private $sort = false; + private $url = false; + private $needmime = false; + + private function fillelemarray() + { + //Fill the "elements" array with all filestates + $this->elements = array(); + + //Loop through each element of the recursive directory iterator + foreach($this->iterator as $key => $value) + { + //Create a new egw_find_file class for each entry and filter it + $findfile = new egw_find_file( + $this->iterator->getPath().'/'.$this->iterator->getFilename(), + $this->url, $this->needmime); + + //Filter the element + $filterresult = $this->filter->filter($findfile); + if ($filterresult >= FILTER_DISPLAY) + { + $findfile->filterstate = $filterresult; + $this->elements[] = $findfile; + } + } + + //Sort the elements + if ($this->sort) + { + usort($this->elements, $this->sortfunc); + } + } + + public function __construct($iterator, &$filter, $follow, $url, $needmime, + $sort, $sortfunc) + { + $this->filter = $filter; + $this->iterator = $iterator; + if ($this->iterator->valid()) + { + $this->base = $this->iterator->getPath(); + } + $this->follow = $follow; + $this->sortfunc = $sortfunc; + $this->sort = $sort; + $this->url = $url; + $this->needmime = $needmime; + + //Get the element array. As this array only contains the current directory, + //and subdirectories are first loaded when they are accessed, a lot of memory + //is saved. + $this->fillelemarray(); + } + + public function valid() + { + return $this->index < count($this->elements); + } + + public function next() + { + $this->index++; + } + + public function current() + { + return $this->elements[$this->index]; + } + + public function key() + { + return $this->elements[$this->index]['name']; + } + + public function hasChildren() + { + $props = &$this->elements[$this->index]->get_props(); + + //Only follow symlinks if we were ought to do so + if ($props['isdir'] && !$props['isdot'] && (!$props['islink'] || $this->follow)) + { + //Check whether the link points on a file + if ($props['islink']) + { + $info = new SplFileInfo($props['linktarget']); + //Only follow the symlink if the symlink points to a directory. + //TODO: Recursion prevention? + return ($info->isDir()); + } + else + { + return true; + } + } + return false; + } + + public function getChildren() + { + $it = new RecursiveDirectoryIterator($this->elements[$this->index]->translate_path()); + return new egw_recursive_directory_iterator_wrapper($it, $this->filter, + $this->follow, $this->url, $this->needmime, $this->sort, + $this->sortfunc, false); + } + + public function rewind() + { + $this->index = 0; + } +} + +class egw_find_iterator implements Iterator +{ + private $filter = null; + private $follow = false; + private $iterator_stack = array(); + private $base = array(); + private $base_index = 0; + private $current_cache = null; + private $needs_next = false; + private $rewind = false; + private $pairs = false; + + private $numerical_sort = false; + private $dirsontop = true; + private $sort = 1; + private $order = null; + + private $index = 0; + private $lower_limit = 0; + private $upper_limit = null; + private $mindepth = null; + private $maxdepth = null; + private $depth = false; + private $url = false; + private $needmime = false; + + private function push_root_iterator() + { + $root_iterator = new egw_recursive_directory_iterator_wrapper( + new RecursiveDirectoryIterator($this->base[$this->base_index]), + $this->filter, $this->follow, $this->url, $this->needmime, + ($this->order !== null) || $this->dirsontop, array($this, "sort"), true); + array_push($this->iterator_stack, $root_iterator); + } + + public function sort($a, $b) + { + //TODO restore, make it much faster :-) + + /*if ($this->dirsontop) + { + if (($a['isdir'] != $b['isdir']) && ($a['isdir'] || $b['isdir'])) + return $a['isdir'] ? -1:1; + } + + if ($this->numerical_sort) + { + $res = $a[$this->order] == $b[$this->order] ? 0 : ($a[$this->order] > $b[$this->order] ? -1:1); + } + else + { + $res = strcasecmp($a[$this->order], $b[$this->order]); + } + + //Always use name as second sort criteria + if (($res == 0) && ($this->order != 'name')) + { + $res = strcasecmp($a['name'], $b['name']); + }*/ + + return 0; //$res * $this->sort; + } + + private function cur_iterator() + { + $s = count($this->iterator_stack); + if ($s > 0) + { + return $this->iterator_stack[$s - 1]; + } + else + { + return null; + } + } + + private function output_directory(&$findfile) + { + //If the element is visible, set $current_cache properly and return + if ($findfile->filterstate >= FILTER_OK) + { + $this->index++; + if (($this->index-1 >= $this->lower_limit) && + (($this->mindepth === null) || count($this->iterator_stack) >= $this->mindepth)) + { + $this->current_cache = $findfile->get_result($this->pairs); + return true; + } + } + return false; + } + + private function prefetch_current() + { + //If needs_next is true, a new element has been requested by calling the + //"next()" function + if ($this->needs_next) + { + //Reset $current_cache + $this->needs_next = false; + $this->current_cache = null; + $this->rewind = false; + + $cur = $this->cur_iterator(); + while (($cur != null) && (!$this->upper_limit || ($this->index <= $this->upper_limit))) + { + $skip = false; + if ($cur->valid()) + { + //Get a copy of the current file state + $findfile = $cur->current(); + + //Add the children elements, if some exist + if ($cur->hasChildren() && !($findfile->filterstate == FILTER_NOFOLLOW)) + { + if (($this->maxdepth === null) || count($this->iterator_stack) < $this->maxdepth) + { + array_push($this->iterator_stack, $cur->getChildren()); + $skip = $this->depth; + } + } + + //Step the current element one further and get the (probably) new iterator + if (!$skip) + $cur->next(); + $cur = $this->cur_iterator(); + + if (!$skip && $this->output_directory($findfile)) + break; + } + + //If the current iterator doesn't contain any valid element, we're at the + //end of the current directory. Switch to the last directory + if (!$cur->valid()) + { + array_pop($this->iterator_stack); + $cur = $this->cur_iterator(); + //If cur = null we're at the end of the current directory tree. + //Check whether there was another directory specified + if ($cur === null) + { + //Increment the base dir array index + $this->base_index++; + if ($this->base_index < count($this->base)) + { + $this->push_root_iterator(); + $cur = $this->cur_iterator(); + } + } + else + { + //If the contents of the directory were listed before the directory itself + //output the directory name when it was popped from the iterator stack + if ($this->depth && $cur && $cur->valid()) + { + $findfile = $cur->current(); + $cur->next(); + if ($this->output_directory($findfile)) + break; + } + } + } + } + } + } + + private function parseoptions(&$options) + { + //If options is not set, make it an empty array + if (!$options) + $options = array(); + + //Create the filter object (passed to other objects by reference) and + //parse some other options relevant for this base class + $this->filter = new egw_directory_iterator_filter($options); + $this->follow = isset($options['follow']) ? $options['follow'] : false; + $this->mindepth = isset($options['mindepth']) ? $options['mindepth'] : null; + $this->maxdepth = isset($options['maxdepth']) ? $options['maxdepth'] : null; + $this->depth = isset($options['depth']) ? $options['depth'] : false; + $this->dirsontop = isset($options['dirsontop']) ? (boolean)$options['dirsontop'] : + (isset($options['maxdepth']) && $options['maxdepth']>0); + $this->sort = isset($options['sort']) ? (($options['sort'] == 'ASC') ? 1 : -1) : 1; + $this->order = isset($options['order']) ? $options['order'] : null; + + $this->numerical_sort = ($this->order == 'size' || $this->order == 'uid' || + $this->order == 'gid' || $this->order == 'mode' || $this->order == 'ctime' || + $this->order == 'mtime'); + + $this->url = isset($options['url']) ? (boolean)$options['url'] : false; + + $this->needmime = isset($options['need_mime']) ? $options['need_mime'] : false; + $this->needmime = $this->needmime || isset($options['mime']) || ($this->order == 'mime'); + + //Retrieve the limit parameters + if (isset($options['limit'])) + { + $args = explode(',', $options['limit']); + $count = $args[0]; + if (isset($args[1])) + { + $this->lower_limit = $args[1]; + } + $this->upper_limit = $this->lower_limit + $count; + } + } + + public function __construct($base, $options=null, $pairs = false) + { + //_debug_array($options); + $this->pairs = $pairs; + + $this->parseoptions($options); + + //Make sure that base is an array with numeric keys + if (is_array($base)) + { + $this->base = array_values($base); + } + else + { + $this->base[] = $base; + } + + //Add the default url prefix to every path if url is not set. + foreach($this->base as $key => $path) + $this->base[$key] = translate_path($path, $this->url); + + $this->rewind(); + } + + public function current() + { + $this->prefetch_current(); + return $this->current_cache; + } + + public function key() + { + $this->prefetch_current(); + if ($this->pairs) + { + return $this->current_cache->path; + } + else + { + return $this->index - 1 - $this->lower_limit; + } + } + + public function next() + { + $this->needs_next = true; + $this->prefetch_current(); + } + + public function valid() + { + if (!$this->upper_limit || ($this->index <= $this->upper_limit)) + { + $this->prefetch_current(); + return $this->current_cache != null; + } + else + { + return false; + } + } + + public function rewind() + { + if (!$this->rewind) + { + //Empty the iterator stack + $this->iterator_stack = array(); + $this->current_cache = null; + $this->needs_next = true; + $this->index = 0; + $this->base_index = 0; + $this->rewind = true; + + //Create the root directory iterator + $this->push_root_iterator(); + } + } +} +