diakonos/lib/diakonos/vendor/fuzzy_file_finder.rb

380 lines
13 KiB
Ruby

#--
# ==================================================================
# Author: Jamis Buck (jamis@jamisbuck.org)
# Date: 2008-10-09
#
# This file is in the public domain. Usage, modification, and
# redistribution of this file are unrestricted.
# ==================================================================
#++
# The "fuzzy" file finder provides a way for searching a directory
# tree with only a partial name. This is similar to the "cmd-T"
# feature in TextMate (http://macromates.com).
#
# Usage:
#
# finder = FuzzyFileFinder.new
# finder.search("app/blogcon") do |match|
# puts match[:highlighted_path]
# end
#
# In the above example, all files matching "app/blogcon" will be
# yielded to the block. The given pattern is reduced to a regular
# expression internally, so that any file that contains those
# characters in that order (even if there are other characters
# in between) will match.
#
# In other words, "app/blogcon" would match any of the following
# (parenthesized strings indicate how the match was made):
#
# * (app)/controllers/(blog)_(con)troller.rb
# * lib/c(ap)_(p)ool/(bl)ue_(o)r_(g)reen_(co)loratio(n)
# * test/(app)/(blog)_(con)troller_test.rb
#
# And so forth.
class FuzzyFileFinder
# This is the exception that is raised if you try to scan a
# directory tree with too many entries. By default, a ceiling of
# 10,000 entries is enforced, but you can change that number via
# the +ceiling+ parameter to FuzzyFileFinder.new.
class TooManyEntries < RuntimeError; end
# Used internally to represent a run of characters within a
# match. This is used to build the highlighted version of
# a file name.
class CharacterRun < Struct.new(:string, :inside) #:nodoc:
def to_s
if inside
"(#{string})"
else
string
end
end
end
# Used internally to represent a file within the directory tree.
class FileSystemEntry #:nodoc:
attr_reader :parent
attr_reader :name
def initialize(parent, name)
@parent = parent
@name = name
end
def path
File.join(parent.name, name)
end
end
# Used internally to represent a subdirectory within the directory
# tree.
class Directory #:nodoc:
attr_reader :name
def initialize(name, is_root=false)
@name = name
@is_root = is_root
end
def root?
is_root
end
end
# The roots directory trees to search.
attr_reader :roots
# The list of files beneath all +roots+
attr_reader :files
# The maximum number of files beneath all +roots+
attr_reader :ceiling
# The prefix shared by all +roots+.
attr_reader :shared_prefix
# The list of glob patterns to ignore.
attr_reader :ignores
# Initializes a new FuzzyFileFinder. This will scan the
# given +directories+, using +ceiling+ as the maximum number
# of entries to scan. If there are more than +ceiling+ entries
# a TooManyEntries exception will be raised.
def initialize( params = {} )
@ceiling = params[:ceiling] || 10_000
@ignores = Array(params[:ignores])
if params[:directories]
directories = Array(params[:directories])
directories << "." if directories.empty?
else
directories = ['.']
end
@recursive = params[:recursive].nil? ? true : params[:recursive]
# expand any paths with ~
root_dirnames = directories.map { |d|
File.realpath(d)
}.select { |d|
File.directory?(d)
}.uniq
@roots = root_dirnames.map { |d| Directory.new(d, true) }
@shared_prefix = determine_shared_prefix
@shared_prefix_re = Regexp.new("^#{Regexp.escape(shared_prefix)}" + (shared_prefix.empty? ? "" : "/"))
@files = []
@directories = {} # To detect link cycles
@dirs_with_many = []
rescan!
end
# Rescans the subtree. If the directory contents every change,
# you'll need to call this to force the finder to be aware of
# the changes.
def rescan!
@files.clear
roots.each { |root| follow_tree(root) }
end
# Takes the given +pattern+ (which must be a string) and searches
# all files beneath +root+, yielding each match.
#
# +pattern+ is interpreted thus:
#
# * "foo" : look for any file with the characters 'f', 'o', and 'o'
# in its basename (discounting directory names). The characters
# must be in that order.
# * "foo/bar" : look for any file with the characters 'b', 'a',
# and 'r' in its basename (discounting directory names). Also,
# any successful match must also have at least one directory
# element matching the characters 'f', 'o', and 'o' (in that
# order.
# * "foo/bar/baz" : same as "foo/bar", but matching two
# directory elements in addition to a file name of "baz".
#
# Each yielded match will be a hash containing the following keys:
#
# * :path refers to the full path to the file
# * :directory refers to the directory of the file
# * :name refers to the name of the file (without directory)
# * :highlighted_directory refers to the directory of the file with
# matches highlighted in parentheses.
# * :highlighted_name refers to the name of the file with matches
# highlighted in parentheses
# * :highlighted_path refers to the full path of the file with
# matches highlighted in parentheses
# * :abbr refers to an abbreviated form of :highlighted_path, where
# path segments without matches are compressed to just their first
# character.
# * :score refers to a value between 0 and 1 indicating how closely
# the file matches the given pattern. A score of 1 means the
# pattern matches the file exactly.
def search(pattern, &block)
path_parts = pattern.strip.split("/")
path_parts.push "" if pattern[-1,1] == "/"
file_name_part = path_parts.pop || ""
if path_parts.any?
path_regex_raw = "^(.*?)" + path_parts.map { |part| make_pattern(part) }.join("(.*?/.*?)") + "(.*?)$"
path_regex = Regexp.new(path_regex_raw, Regexp::IGNORECASE)
end
file_regex_raw = "^(.*?)" << make_pattern(file_name_part) << "(.*)$"
file_regex = Regexp.new(file_regex_raw, Regexp::IGNORECASE)
path_matches = {}
files.each do |file|
path_match = match_path(file.parent, path_matches, path_regex, path_parts.length)
next if path_match[:missed]
match_file(file, file_regex, path_match, &block)
end
end
# Takes the given +pattern+ (which must be a string, formatted as
# described in #search), and returns up to +max+ matches in an
# Array. If +max+ is nil, all matches will be returned.
def find(pattern, max=nil)
results = []
search(pattern) do |match|
results << match
break if max && results.length >= max
end
return results
end
# Displays the finder object in a sane, non-explosive manner.
def inspect #:nodoc:
"#<%s:0x%x roots=%s, files=%d>" % [self.class.name, object_id, roots.map { |r| r.name.inspect }.join(", "), files.length]
end
private
# Recursively scans +directory+ and all files and subdirectories
# beneath it, depth-first.
def follow_tree(directory)
real_dir = File.realpath(directory.name)
if ! @directories[real_dir]
@directories[real_dir] = true
Dir.entries(directory.name)
.tap { |_entries|
if _entries.length > ceiling/10
@dirs_with_many << [_entries.length, directory.name]
$diakonos.log "[#{self.class}] Many dir entries: #{_entries.length} in #{directory.name}"
end
}.each do |entry|
next if entry[0,1] == "."
if files.length > ceiling
raise TooManyEntries.new(%{
Directories with many entries:
#{@dirs_with_many.map { |d| d.join("\t") }.join("\n")}
})
end
full = File.join(directory.name, entry)
next if ignore?(full)
if File.directory?(full)
if @recursive
follow_tree(Directory.new(full))
end
else
files.push(FileSystemEntry.new(directory, entry))
end
end
end
end
# Returns +true+ if the given name matches any of the ignore
# patterns.
def ignore?(name)
n = name.sub(@shared_prefix_re, "")
ignores.any? { |pattern| File.fnmatch(pattern, n) }
end
# Takes the given pattern string "foo" and converts it to a new
# string "(f)([^/]*?)(o)([^/]*?)(o)" that can be used to create
# a regular expression.
def make_pattern(pattern)
pattern = pattern.split(//)
pattern << "" if pattern.empty?
pattern.inject("") do |regex, character|
regex << "([^/]*?)" if regex.length > 0
regex << "(" << Regexp.escape(character) << ")"
end
end
# Given a MatchData object +match+ and a number of "inside"
# segments to support, compute both the match score and the
# highlighted match string. The "inside segments" refers to how
# many patterns were matched in this one match. For a file name,
# this will always be one. For directories, it will be one for
# each directory segment in the original pattern.
def build_match_result(match, inside_segments)
runs = []
inside_chars = total_chars = 0
match.captures.each_with_index do |capture, index|
if capture.length > 0
# odd-numbered captures are matches inside the pattern.
# even-numbered captures are matches between the pattern's elements.
inside = index % 2 != 0
total_chars += capture.gsub(%r(/), "").length # ignore '/' delimiters
inside_chars += capture.length if inside
if runs.last && runs.last.inside == inside
runs.last.string << capture
else
runs << CharacterRun.new(capture, inside)
end
end
end
# Determine the score of this match.
# 1. fewer "inside runs" (runs corresponding to the original pattern)
# is better.
# 2. better coverage of the actual path name is better
inside_runs = runs.select { |r| r.inside }
run_ratio = inside_runs.length.zero? ? 1 : inside_segments / inside_runs.length.to_f
char_ratio = total_chars.zero? ? 1 : inside_chars.to_f / total_chars
score = run_ratio * char_ratio
return { :score => score, :result => runs.join }
end
# Match the given path against the regex, caching the result in +path_matches+.
# If +path+ is already cached in the path_matches cache, just return the cached
# value.
def match_path(path, path_matches, path_regex, path_segments)
return path_matches[path] if path_matches.key?(path)
name_with_slash = path.name + "/" # add a trailing slash for matching the prefix
matchable_name = name_with_slash.sub(@shared_prefix_re, "")
matchable_name.chop! # kill the trailing slash
if path_regex
match = matchable_name.match(path_regex)
path_matches[path] =
match && build_match_result(match, path_segments) ||
{ :score => 1, :result => matchable_name, :missed => true }
else
path_matches[path] = { :score => 1, :result => matchable_name }
end
end
# Match +file+ against +file_regex+. If it matches, yield the match
# metadata to the block.
def match_file(file, file_regex, path_match, &block)
if file_match = file.name.match(file_regex)
match_result = build_match_result(file_match, 1)
full_match_result = path_match[:result].empty? ? match_result[:result] : File.join(path_match[:result], match_result[:result])
shortened_path = path_match[:result].gsub(/[^\/]+/) { |m| m.index("(") ? m : m[0,1] }
abbr = shortened_path.empty? ? match_result[:result] : File.join(shortened_path, match_result[:result])
result = { :path => file.path,
:abbr => abbr,
:directory => file.parent.name,
:name => file.name,
:highlighted_directory => path_match[:result],
:highlighted_name => match_result[:result],
:highlighted_path => full_match_result,
:score => path_match[:score] * match_result[:score] }
yield result
end
end
def determine_shared_prefix
# the common case: if there is only a single root, then the entire
# name of the root is the shared prefix.
return roots.first.name if roots.length == 1
split_roots = roots.map { |root| root.name.split(%r{/}) }
segments = split_roots.map { |root| root.length }.max
master = split_roots.pop
segments.times do |segment|
if !split_roots.all? { |root| root[segment] == master[segment] }
return master[0,segment].join("/")
end
end
# shouldn't ever get here, since we uniq the root list before
# calling this method, but if we do, somehow...
return roots.first.name
end
end