diff --git a/lib/doc2text/xml_based_document_file.rb b/lib/doc2text/xml_based_document_file.rb index e0a6695..9459505 100644 --- a/lib/doc2text/xml_based_document_file.rb +++ b/lib/doc2text/xml_based_document_file.rb @@ -1,4 +1,5 @@ require 'zip' +require 'tmpdir' module Doc2Text module XmlBasedDocument @@ -8,13 +9,22 @@ def initialize(document_path) end def unpack + destination_root = Pathname.new(extract_path).realpath + Zip::File.open(@document_path) { |zip_file| - Dir.mkdir(extract_path) zip_file.each do |entry| - zipped_file_extract_path = File.join extract_path, entry.name - FileUtils.mkdir_p File.dirname(zipped_file_extract_path) - zip_file.extract entry, zipped_file_extract_path + entry_path = Pathname.new(entry.name) + + next if entry_path.absolute? + destination_path = destination_root.join(entry.name).cleanpath + + unless destination_path.to_s.start_with?(destination_root.to_s + File::SEPARATOR) + raise "Unsafe zip entry: #{entry.name}" + end + + FileUtils.mkdir_p(destination_path.dirname) + zip_file.extract entry, entry.name, destination_directory: extract_path end } end @@ -41,7 +51,7 @@ def extract_extension end def extract_path - File.join File.dirname(@document_path), ".#{File.basename(@document_path)}_#{extract_extension}" + @extract_path ||= Dir.mktmpdir(".#{File.basename(@document_path)}_#{extract_extension}") end end end diff --git a/spec/unpack_docx_spec.rb b/spec/unpack_docx_spec.rb index d693b66..8b400f4 100644 --- a/spec/unpack_docx_spec.rb +++ b/spec/unpack_docx_spec.rb @@ -8,7 +8,7 @@ def rspec_extract_docx entries = Dir.glob "#{@odt.extract_path}/**/*" mandatory_files = %w([Content_Types].xml).map { |entry| File.join @odt.extract_path, entry } - expect(entries.to_set.subset? mandatory_files.to_set) + expect(mandatory_files.to_set).to be_subset(entries.to_set) @odt.clean end diff --git a/spec/unpack_odt_spec.rb b/spec/unpack_odt_spec.rb index f3e9f0d..bce3347 100644 --- a/spec/unpack_odt_spec.rb +++ b/spec/unpack_odt_spec.rb @@ -1,4 +1,5 @@ require 'spec_helper' +require 'tempfile' describe 'odt' do def rspec_extract_odt @@ -8,7 +9,7 @@ def rspec_extract_odt entries = Dir.glob "#{@odt.extract_path}/**/*" mandatory_files = %w(manifest.rdf content.xml settings.xml styles.xml META-INF META-INF/manifest.xml meta.xml mimetype).map { |entry| File.join @odt.extract_path, entry } - expect(entries.to_set.subset? mandatory_files.to_set) + expect(mandatory_files.to_set).to be_subset(entries.to_set) @odt.clean end @@ -17,4 +18,24 @@ def rspec_extract_odt rspec_extract_odt rspec_extract_odt end + + context "when the odt is a temporary file" do + it "runs from a temp file" do + tempfile = Tempfile.new('text_styles.odt') + tempfile.write File.read(File.join 'spec', 'fixtures', 'text_styles.odt') + tempfile.rewind + tempfile.close + + @odt = Doc2Text::Odt::Document.new tempfile + @odt.unpack + + entries = Dir.glob "#{@odt.extract_path}/**/*" + mandatory_files = %w(manifest.rdf content.xml settings.xml styles.xml META-INF META-INF/manifest.xml meta.xml mimetype).map { |entry| + File.join @odt.extract_path, entry } + expect(mandatory_files.to_set).to be_subset(entries.to_set) + + tempfile.unlink + @odt.clean + end + end end