Class | Gem::Indexer |
In: |
lib/rubygems/indexer.rb
|
Parent: | Object |
Top level class for building the gem repository index.
build_legacy | [RW] | Build indexes for RubyGems older than 1.2.0 when true |
build_modern | [RW] | Build indexes for RubyGems 1.2.0 and newer when true |
dest_directory | [R] | Index install location |
dest_latest_specs_index | [R] | Latest specs index install location |
dest_prerelease_specs_index | [R] | Prerelease specs index install location |
dest_specs_index | [R] | Specs index install location |
directory | [R] | Index build directory |
Create an indexer that will index the gems in directory.
# File lib/rubygems/indexer.rb, line 55 55: def initialize(directory, options = {}) 56: require 'fileutils' 57: require 'tmpdir' 58: require 'zlib' 59: 60: unless defined?(Builder::XChar) then 61: raise "Gem::Indexer requires that the XML Builder library be installed:" \ 62: "\n\tgem install builder" 63: end 64: 65: options = { :build_legacy => true, :build_modern => true }.merge options 66: 67: @build_legacy = options[:build_legacy] 68: @build_modern = options[:build_modern] 69: 70: @rss_title = options[:rss_title] 71: @rss_host = options[:rss_host] 72: @rss_gems_host = options[:rss_gems_host] 73: 74: @dest_directory = directory 75: @directory = File.join Dir.tmpdir, "gem_generate_index_#{$$}" 76: 77: marshal_name = "Marshal.#{Gem.marshal_version}" 78: 79: @master_index = File.join @directory, 'yaml' 80: @marshal_index = File.join @directory, marshal_name 81: 82: @quick_dir = File.join @directory, 'quick' 83: 84: @quick_marshal_dir = File.join @quick_dir, marshal_name 85: 86: @quick_index = File.join @quick_dir, 'index' 87: @latest_index = File.join @quick_dir, 'latest_index' 88: 89: @specs_index = File.join @directory, "specs.#{Gem.marshal_version}" 90: @latest_specs_index = File.join @directory, 91: "latest_specs.#{Gem.marshal_version}" 92: @prerelease_specs_index = File.join(@directory, 93: "prerelease_specs.#{Gem.marshal_version}") 94: 95: @dest_specs_index = File.join @dest_directory, 96: "specs.#{Gem.marshal_version}" 97: @dest_latest_specs_index = File.join @dest_directory, 98: "latest_specs.#{Gem.marshal_version}" 99: @dest_prerelease_specs_index = File.join @dest_directory, 100: "prerelease_specs.#{Gem.marshal_version}" 101: 102: @rss_index = File.join @directory, 'index.rss' 103: 104: @files = [] 105: end
Abbreviate the spec for downloading. Abbreviated specs are only used for searching, downloading and related activities and do not need deployment specific information (e.g. list of files). So we abbreviate the spec, making it much smaller for quicker downloads.
# File lib/rubygems/indexer.rb, line 113 113: def abbreviate(spec) 114: spec.files = [] 115: spec.test_files = [] 116: spec.rdoc_options = [] 117: spec.extra_rdoc_files = [] 118: spec.cert_chain = [] 119: spec 120: end
Build various indicies
# File lib/rubygems/indexer.rb, line 125 125: def build_indicies(index) 126: # Marshal gemspecs are used by both modern and legacy RubyGems 127: build_marshal_gemspecs index 128: build_legacy_indicies index if @build_legacy 129: build_modern_indicies index if @build_modern 130: build_rss index 131: 132: compress_indicies 133: end
Builds indicies for RubyGems older than 1.2.x
# File lib/rubygems/indexer.rb, line 138 138: def build_legacy_indicies(index) 139: say "Generating Marshal master index" 140: 141: Gem.time 'Generated Marshal master index' do 142: open @marshal_index, 'wb' do |io| 143: io.write index.dump 144: end 145: end 146: 147: @files << @marshal_index 148: @files << "#{@marshal_index}.Z" 149: end
Builds Marshal quick index gemspecs.
# File lib/rubygems/indexer.rb, line 154 154: def build_marshal_gemspecs(index) 155: progress = ui.progress_reporter index.size, 156: "Generating Marshal quick index gemspecs for #{index.size} gems", 157: "Complete" 158: 159: files = [] 160: 161: Gem.time 'Generated Marshal quick index gemspecs' do 162: index.gems.each do |original_name, spec| 163: spec_file_name = "#{original_name}.gemspec.rz" 164: marshal_name = File.join @quick_marshal_dir, spec_file_name 165: 166: marshal_zipped = Gem.deflate Marshal.dump(spec) 167: open marshal_name, 'wb' do |io| io.write marshal_zipped end 168: 169: files << marshal_name 170: 171: progress.updated original_name 172: end 173: 174: progress.done 175: end 176: 177: @files << @quick_marshal_dir 178: 179: files 180: end
Build a single index for RubyGems 1.2 and newer
# File lib/rubygems/indexer.rb, line 185 185: def build_modern_index(index, file, name) 186: say "Generating #{name} index" 187: 188: Gem.time "Generated #{name} index" do 189: open(file, 'wb') do |io| 190: specs = index.map do |*spec| 191: # We have to splat here because latest_specs is an array, 192: # while the others are hashes. See the TODO in source_index.rb 193: spec = spec.flatten.last 194: platform = spec.original_platform 195: 196: # win32-api-1.0.4-x86-mswin32-60 197: unless String === platform then 198: alert_warning "Skipping invalid platform in gem: #{spec.full_name}" 199: next 200: end 201: 202: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 203: [spec.name, spec.version, platform] 204: end 205: 206: specs = compact_specs(specs) 207: Marshal.dump(specs, io) 208: end 209: end 210: end
Builds indicies for RubyGems 1.2 and newer. Handles full, latest, prerelease
# File lib/rubygems/indexer.rb, line 215 215: def build_modern_indicies(index) 216: build_modern_index(index.released_specs.sort, @specs_index, 'specs') 217: build_modern_index(index.latest_specs.sort, 218: @latest_specs_index, 219: 'latest specs') 220: build_modern_index(index.prerelease_specs.sort, 221: @prerelease_specs_index, 222: 'prerelease specs') 223: 224: @files += [@specs_index, 225: "#{@specs_index}.gz", 226: @latest_specs_index, 227: "#{@latest_specs_index}.gz", 228: @prerelease_specs_index, 229: "#{@prerelease_specs_index}.gz"] 230: end
Builds an RSS feed for past two days gem releases according to the gem‘s date.
# File lib/rubygems/indexer.rb, line 236 236: def build_rss(index) 237: if @rss_host.nil? or @rss_gems_host.nil? then 238: if Gem.configuration.really_verbose then 239: alert_warning "no --rss-host or --rss-gems-host, RSS generation disabled" 240: end 241: return 242: end 243: 244: require 'cgi' 245: require 'rubygems/text' 246: 247: extend Gem::Text 248: 249: Gem.time 'Generated rss' do 250: open @rss_index, 'wb' do |io| 251: rss_host = CGI.escapeHTML @rss_host 252: rss_title = CGI.escapeHTML(@rss_title || 'gems') 253: 254: io.puts "<?xml version=\"1.0\"?>\n<rss version=\"2.0\">\n<channel>\n<title>\#{rss_title}</title>\n<link>http://\#{rss_host}</link>\n<description>Recently released gems from http://\#{rss_host}</description>\n<generator>RubyGems v\#{Gem::VERSION}</generator>\n<docs>http://cyber.law.harvard.edu/rss/rss.html</docs>\n" 255: 256: today = Gem::Specification::TODAY 257: yesterday = today - 86400 258: 259: index = index.select do |_, spec| 260: spec_date = spec.date 261: 262: case spec_date 263: when Date 264: Time.parse(spec_date.to_s) >= yesterday 265: when Time 266: spec_date >= yesterday 267: end 268: end 269: 270: index = index.select do |_, spec| 271: spec_date = spec.date 272: 273: case spec_date 274: when Date 275: Time.parse(spec_date.to_s) <= today 276: when Time 277: spec_date <= today 278: end 279: end 280: 281: index.sort_by { |_, spec| [-spec.date.to_i, spec] }.each do |_, spec| 282: gem_path = CGI.escapeHTML "http://#{@rss_gems_host}/gems/#{spec.file_name}" 283: size = File.stat(spec.loaded_from).size rescue next 284: 285: description = spec.description || spec.summary || '' 286: authors = Array spec.authors 287: emails = Array spec.email 288: authors = emails.zip(authors).map do |email, author| 289: email += " (#{author})" if author and not author.empty? 290: end.join ', ' 291: 292: description = description.split(/\n\n+/).map do |chunk| 293: format_text chunk, 78 294: end 295: 296: description = description.join "\n\n" 297: 298: item = '' 299: 300: item << "<item>\n<title>\#{CGI.escapeHTML spec.full_name}</title>\n<description>\n<pre>\#{CGI.escapeHTML description.chomp}</pre>\n</description>\n<author>\#{CGI.escapeHTML authors}</author>\n<guid>\#{CGI.escapeHTML spec.full_name}</guid>\n<enclosure url=\\\"\#{gem_path}\\\"\nlength=\\\"\#{size}\\\" type=\\\"application/octet-stream\\\" />\n<pubDate>\#{spec.date.rfc2822}</pubDate>\n" 301: 302: item << "<link>\#{CGI.escapeHTML spec.homepage}</link>\n" if spec.homepage 303: 304: item << "</item>\n" 305: 306: io.puts item 307: end 308: 309: io.puts "</channel>\n</rss>\n" 310: end 311: end 312: 313: @files << @rss_index 314: end
Collect specifications from .gem files from the gem directory.
# File lib/rubygems/indexer.rb, line 351 351: def collect_specs(gems = gem_file_list) 352: index = Gem::SourceIndex.new 353: 354: progress = ui.progress_reporter gems.size, 355: "Loading #{gems.size} gems from #{@dest_directory}", 356: "Loaded all gems" 357: 358: Gem.time 'loaded' do 359: gems.each do |gemfile| 360: if File.size(gemfile.to_s) == 0 then 361: alert_warning "Skipping zero-length gem: #{gemfile}" 362: next 363: end 364: 365: begin 366: spec = Gem::Format.from_file_by_path(gemfile).spec 367: spec.loaded_from = gemfile 368: 369: unless gemfile =~ /\/#{Regexp.escape spec.original_name}.*\.gem\z/i then 370: expected_name = spec.full_name 371: expected_name << " (#{spec.original_name})" if 372: spec.original_name != spec.full_name 373: alert_warning "Skipping misnamed gem: #{gemfile} should be named #{expected_name}" 374: next 375: end 376: 377: abbreviate spec 378: sanitize spec 379: 380: index.add_spec spec, spec.original_name 381: 382: progress.updated spec.original_name 383: 384: rescue SignalException => e 385: alert_error "Received signal, exiting" 386: raise 387: rescue Exception => e 388: alert_error "Unable to process #{gemfile}\n#{e.message} (#{e.class})\n\t#{e.backtrace.join "\n\t"}" 389: end 390: end 391: 392: progress.done 393: end 394: 395: index 396: end
Compacts Marshal output for the specs index data source by using identical objects as much as possible.
# File lib/rubygems/indexer.rb, line 424 424: def compact_specs(specs) 425: names = {} 426: versions = {} 427: platforms = {} 428: 429: specs.map do |(name, version, platform)| 430: names[name] = name unless names.include? name 431: versions[version] = version unless versions.include? version 432: platforms[platform] = platform unless platforms.include? platform 433: 434: [names[name], versions[version], platforms[platform]] 435: end 436: end
Compress filename with extension.
# File lib/rubygems/indexer.rb, line 441 441: def compress(filename, extension) 442: data = Gem.read_binary filename 443: 444: zipped = Gem.deflate data 445: 446: open "#{filename}.#{extension}", 'wb' do |io| 447: io.write zipped 448: end 449: end
Compresses indicies on disk
# File lib/rubygems/indexer.rb, line 403 403: def compress_indicies 404: say "Compressing indicies" 405: 406: Gem.time 'Compressed indicies' do 407: if @build_legacy then 408: compress @marshal_index, 'Z' 409: paranoid @marshal_index, 'Z' 410: end 411: 412: if @build_modern then 413: gzip @specs_index 414: gzip @latest_specs_index 415: gzip @prerelease_specs_index 416: end 417: end 418: end
List of gem file names to index.
# File lib/rubygems/indexer.rb, line 454 454: def gem_file_list 455: Dir.glob(File.join(@dest_directory, "gems", "*.gem")) 456: end
Builds and installs indicies.
# File lib/rubygems/indexer.rb, line 461 461: def generate_index 462: make_temp_directories 463: index = collect_specs 464: build_indicies index 465: install_indicies 466: rescue SignalException 467: ensure 468: FileUtils.rm_rf @directory 469: end
Zlib::GzipWriter wrapper that gzips filename on disk.
# File lib/rubygems/indexer.rb, line 474 474: def gzip(filename) 475: Zlib::GzipWriter.open "#{filename}.gz" do |io| 476: io.write Gem.read_binary(filename) 477: end 478: end
Install generated indicies into the destination directory.
# File lib/rubygems/indexer.rb, line 483 483: def install_indicies 484: verbose = Gem.configuration.really_verbose 485: 486: say "Moving index into production dir #{@dest_directory}" if verbose 487: 488: files = @files.dup 489: files.delete @quick_marshal_dir if files.include? @quick_dir 490: 491: if files.include? @quick_marshal_dir and 492: not files.include? @quick_dir then 493: files.delete @quick_marshal_dir 494: quick_marshal_dir = @quick_marshal_dir.sub @directory, '' 495: 496: dst_name = File.join @dest_directory, quick_marshal_dir 497: 498: FileUtils.mkdir_p File.dirname(dst_name), :verbose => verbose 499: FileUtils.rm_rf dst_name, :verbose => verbose 500: FileUtils.mv @quick_marshal_dir, dst_name, :verbose => verbose, 501: :force => true 502: end 503: 504: files = files.map do |path| 505: path.sub @directory, '' 506: end 507: 508: files.each do |file| 509: src_name = File.join @directory, file 510: dst_name = File.join @dest_directory, file 511: 512: FileUtils.rm_rf dst_name, :verbose => verbose 513: FileUtils.mv src_name, @dest_directory, :verbose => verbose, 514: :force => true 515: end 516: end
Make directories for index generation
# File lib/rubygems/indexer.rb, line 521 521: def make_temp_directories 522: FileUtils.rm_rf @directory 523: FileUtils.mkdir_p @directory, :mode => 0700 524: FileUtils.mkdir_p @quick_marshal_dir 525: end
Ensure path and path with extension are identical.
# File lib/rubygems/indexer.rb, line 530 530: def paranoid(path, extension) 531: data = Gem.read_binary path 532: compressed_data = Gem.read_binary "#{path}.#{extension}" 533: 534: unless data == Gem.inflate(compressed_data) then 535: raise "Compressed file #{compressed_path} does not match uncompressed file #{path}" 536: end 537: end
Sanitize the descriptive fields in the spec. Sometimes non-ASCII characters will garble the site index. Non-ASCII characters will be replaced by their XML entity equivalent.
# File lib/rubygems/indexer.rb, line 544 544: def sanitize(spec) 545: spec.summary = sanitize_string(spec.summary) 546: spec.description = sanitize_string(spec.description) 547: spec.post_install_message = sanitize_string(spec.post_install_message) 548: spec.authors = spec.authors.collect { |a| sanitize_string(a) } 549: 550: spec 551: end
Sanitize a single string.
# File lib/rubygems/indexer.rb, line 556 556: def sanitize_string(string) 557: return string unless string 558: 559: # HACK the #to_s is in here because RSpec has an Array of Arrays of 560: # Strings for authors. Need a way to disallow bad values on gemspec 561: # generation. (Probably won't happen.) 562: string = string.to_s 563: 564: begin 565: Builder::XChar.encode string 566: rescue NameError, NoMethodError 567: string.to_xs 568: end 569: end
Perform an in-place update of the repository from newly added gems. Only works for modern indicies, and sets build_legacy to false when run.
# File lib/rubygems/indexer.rb, line 575 575: def update_index 576: @build_legacy = false 577: 578: make_temp_directories 579: 580: specs_mtime = File.stat(@dest_specs_index).mtime 581: newest_mtime = Time.at 0 582: 583: updated_gems = gem_file_list.select do |gem| 584: gem_mtime = File.stat(gem).mtime 585: newest_mtime = gem_mtime if gem_mtime > newest_mtime 586: gem_mtime >= specs_mtime 587: end 588: 589: if updated_gems.empty? then 590: say 'No new gems' 591: terminate_interaction 0 592: end 593: 594: index = collect_specs updated_gems 595: 596: files = build_marshal_gemspecs index 597: 598: Gem.time 'Updated indexes' do 599: update_specs_index index.released_gems, @dest_specs_index, @specs_index 600: update_specs_index index.released_gems, @dest_latest_specs_index, @latest_specs_index 601: update_specs_index(index.prerelease_gems, @dest_prerelease_specs_index, 602: @prerelease_specs_index) 603: end 604: 605: compress_indicies 606: 607: verbose = Gem.configuration.really_verbose 608: 609: say "Updating production dir #{@dest_directory}" if verbose 610: 611: files << @specs_index 612: files << "#{@specs_index}.gz" 613: files << @latest_specs_index 614: files << "#{@latest_specs_index}.gz" 615: files << @prerelease_specs_index 616: files << "#{@prerelease_specs_index}.gz" 617: 618: files = files.map do |path| 619: path.sub @directory, '' 620: end 621: 622: files.each do |file| 623: src_name = File.join @directory, file 624: dst_name = File.join @dest_directory, File.dirname(file) 625: 626: FileUtils.mv src_name, dst_name, :verbose => verbose, 627: :force => true 628: 629: File.utime newest_mtime, newest_mtime, dst_name 630: end 631: end
Combines specs in index and source then writes out a new copy to dest. For a latest index, does not ensure the new file is minimal.
# File lib/rubygems/indexer.rb, line 637 637: def update_specs_index(index, source, dest) 638: specs_index = Marshal.load Gem.read_binary(source) 639: 640: index.each do |_, spec| 641: platform = spec.original_platform 642: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 643: specs_index << [spec.name, spec.version, platform] 644: end 645: 646: specs_index = compact_specs specs_index.uniq.sort 647: 648: open dest, 'wb' do |io| 649: Marshal.dump specs_index, io 650: end 651: end