Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions lib/doc2text/xml_based_document_file.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'zip'
require 'tmpdir'

module Doc2Text
module XmlBasedDocument
Expand All @@ -8,13 +9,22 @@ def initialize(document_path)
end

def unpack
destination_root = Pathname.new(extract_path).realpath

Zip::File.open(@document_path) {
|zip_file|
Dir.mkdir(extract_path)
zip_file.each do |entry|
zipped_file_extract_path = File.join extract_path, entry.name
FileUtils.mkdir_p File.dirname(zipped_file_extract_path)
zip_file.extract entry, zipped_file_extract_path
entry_path = Pathname.new(entry.name)

next if entry_path.absolute?
destination_path = destination_root.join(entry.name).cleanpath

unless destination_path.to_s.start_with?(destination_root.to_s + File::SEPARATOR)
raise "Unsafe zip entry: #{entry.name}"
end

FileUtils.mkdir_p(destination_path.dirname)
zip_file.extract entry, entry.name, destination_directory: extract_path
Comment on lines 16 to +27
end
}
end
Expand All @@ -41,7 +51,7 @@ def extract_extension
end

def extract_path
File.join File.dirname(@document_path), ".#{File.basename(@document_path)}_#{extract_extension}"
@extract_path ||= Dir.mktmpdir(".#{File.basename(@document_path)}_#{extract_extension}")
end
Comment thread
alecslupu marked this conversation as resolved.
Comment thread
alecslupu marked this conversation as resolved.
end
end
Expand Down
2 changes: 1 addition & 1 deletion spec/unpack_docx_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def rspec_extract_docx
entries = Dir.glob "#{@odt.extract_path}/**/*"
mandatory_files = %w([Content_Types].xml).map { |entry|
File.join @odt.extract_path, entry }
expect(entries.to_set.subset? mandatory_files.to_set)
expect(mandatory_files.to_set).to be_subset(entries.to_set)

@odt.clean
end
Expand Down
23 changes: 22 additions & 1 deletion spec/unpack_odt_spec.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'spec_helper'
require 'tempfile'

describe 'odt' do
def rspec_extract_odt
Expand All @@ -8,7 +9,7 @@ def rspec_extract_odt
entries = Dir.glob "#{@odt.extract_path}/**/*"
mandatory_files = %w(manifest.rdf content.xml settings.xml styles.xml META-INF META-INF/manifest.xml meta.xml mimetype).map { |entry|
File.join @odt.extract_path, entry }
expect(entries.to_set.subset? mandatory_files.to_set)
expect(mandatory_files.to_set).to be_subset(entries.to_set)

@odt.clean
end
Expand All @@ -17,4 +18,24 @@ def rspec_extract_odt
rspec_extract_odt
rspec_extract_odt
end

context "when the odt is a temporary file" do
it "runs from a temp file" do
tempfile = Tempfile.new('text_styles.odt')
tempfile.write File.read(File.join 'spec', 'fixtures', 'text_styles.odt')
Comment on lines +22 to +25
tempfile.rewind
Comment thread
alecslupu marked this conversation as resolved.
tempfile.close

@odt = Doc2Text::Odt::Document.new tempfile
@odt.unpack

entries = Dir.glob "#{@odt.extract_path}/**/*"
mandatory_files = %w(manifest.rdf content.xml settings.xml styles.xml META-INF META-INF/manifest.xml meta.xml mimetype).map { |entry|
File.join @odt.extract_path, entry }
expect(mandatory_files.to_set).to be_subset(entries.to_set)

Comment thread
alecslupu marked this conversation as resolved.
tempfile.unlink
@odt.clean
Comment thread
alecslupu marked this conversation as resolved.
end
end
end
Loading