From 670dd4179ea4d2b801f0e1f7bd8a6a70c3dacd57 Mon Sep 17 00:00:00 2001 From: Alexandru Emil Lupu Date: Sat, 9 May 2026 08:42:23 +0300 Subject: [PATCH 1/5] Add spec for temp file --- spec/unpack_odt_spec.rb | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/spec/unpack_odt_spec.rb b/spec/unpack_odt_spec.rb index f3e9f0d..4ef70d0 100644 --- a/spec/unpack_odt_spec.rb +++ b/spec/unpack_odt_spec.rb @@ -17,4 +17,23 @@ def rspec_extract_odt rspec_extract_odt rspec_extract_odt end + + context "when the odt is a temporary file" do + it "runs from a temp file" do + tempfile = Tempfile.new('text_styles.odt') + tempfile.write File.read(File.join 'spec', 'fixtures', 'text_styles.odt') + tempfile.rewind + tempfile.close + + @odt = Doc2Text::Odt::Document.new tempfile + @odt.unpack + + entries = Dir.glob "#{@odt.extract_path}/**/*" + mandatory_files = %w(manifest.rdf content.xml settings.xml styles.xml META-INF META-INF/manifest.xml meta.xml mimetype).map { |entry| + File.join @odt.extract_path, entry } + expect(entries.to_set.subset? mandatory_files.to_set) + + @odt.clean + end + end end From ddea7d5846dd4b565854935c2d6448f90281571e Mon Sep 17 00:00:00 2001 From: Alexandru Emil Lupu Date: Sat, 9 May 2026 23:25:57 +0300 Subject: [PATCH 2/5] Implement fix --- lib/doc2text/xml_based_document_file.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/doc2text/xml_based_document_file.rb b/lib/doc2text/xml_based_document_file.rb index e0a6695..2b6c14e 100644 --- a/lib/doc2text/xml_based_document_file.rb +++ b/lib/doc2text/xml_based_document_file.rb @@ -10,11 +10,10 @@ def initialize(document_path) def unpack Zip::File.open(@document_path) { |zip_file| - Dir.mkdir(extract_path) zip_file.each do |entry| zipped_file_extract_path = File.join extract_path, entry.name FileUtils.mkdir_p File.dirname(zipped_file_extract_path) - zip_file.extract entry, zipped_file_extract_path + zip_file.extract entry, zipped_file_extract_path, destination_directory: "/" end } end @@ -41,7 +40,7 @@ def extract_extension end def extract_path - File.join File.dirname(@document_path), ".#{File.basename(@document_path)}_#{extract_extension}" + @extract_path ||= Dir.mktmpdir(".#{File.basename(@document_path)}_#{extract_extension}") end end end From 8c39dc4cfc16b5424b6d097cb3fc0b0da92af8aa Mon Sep 17 00:00:00 2001 From: Alexandru Emil Lupu Date: Wed, 13 May 2026 12:10:04 +0300 Subject: [PATCH 3/5] Fix copilor recomendations --- lib/doc2text/xml_based_document_file.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/doc2text/xml_based_document_file.rb b/lib/doc2text/xml_based_document_file.rb index 2b6c14e..bc3d401 100644 --- a/lib/doc2text/xml_based_document_file.rb +++ b/lib/doc2text/xml_based_document_file.rb @@ -1,4 +1,5 @@ require 'zip' +require 'tmpdir' module Doc2Text module XmlBasedDocument @@ -13,7 +14,7 @@ def unpack zip_file.each do |entry| zipped_file_extract_path = File.join extract_path, entry.name FileUtils.mkdir_p File.dirname(zipped_file_extract_path) - zip_file.extract entry, zipped_file_extract_path, destination_directory: "/" + zip_file.extract entry, entry.name, destination_directory: extract_path end } end From 8239bcc76830380aa997515d1f852258f13cc6a3 Mon Sep 17 00:00:00 2001 From: Alexandru Emil Lupu Date: Wed, 13 May 2026 12:18:33 +0300 Subject: [PATCH 4/5] Fix subset assertion --- spec/unpack_docx_spec.rb | 2 +- spec/unpack_odt_spec.rb | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/spec/unpack_docx_spec.rb b/spec/unpack_docx_spec.rb index d693b66..8b400f4 100644 --- a/spec/unpack_docx_spec.rb +++ b/spec/unpack_docx_spec.rb @@ -8,7 +8,7 @@ def rspec_extract_docx entries = Dir.glob "#{@odt.extract_path}/**/*" mandatory_files = %w([Content_Types].xml).map { |entry| File.join @odt.extract_path, entry } - expect(entries.to_set.subset? mandatory_files.to_set) + expect(mandatory_files.to_set).to be_subset(entries.to_set) @odt.clean end diff --git a/spec/unpack_odt_spec.rb b/spec/unpack_odt_spec.rb index 4ef70d0..892ee10 100644 --- a/spec/unpack_odt_spec.rb +++ b/spec/unpack_odt_spec.rb @@ -8,7 +8,7 @@ def rspec_extract_odt entries = Dir.glob "#{@odt.extract_path}/**/*" mandatory_files = %w(manifest.rdf content.xml settings.xml styles.xml META-INF META-INF/manifest.xml meta.xml mimetype).map { |entry| File.join @odt.extract_path, entry } - expect(entries.to_set.subset? mandatory_files.to_set) + expect(mandatory_files.to_set).to be_subset(entries.to_set) @odt.clean end @@ -31,8 +31,9 @@ def rspec_extract_odt entries = Dir.glob "#{@odt.extract_path}/**/*" mandatory_files = %w(manifest.rdf content.xml settings.xml styles.xml META-INF META-INF/manifest.xml meta.xml mimetype).map { |entry| File.join @odt.extract_path, entry } - expect(entries.to_set.subset? mandatory_files.to_set) + expect(mandatory_files.to_set).to be_subset(entries.to_set) + tempfile.unlink @odt.clean end end From 636e2b5cbcb889581731ed959eaf2b6d23f7e1a1 Mon Sep 17 00:00:00 2001 From: Alexandru Emil Lupu Date: Wed, 13 May 2026 13:03:53 +0300 Subject: [PATCH 5/5] APply copilot recommendations --- lib/doc2text/xml_based_document_file.rb | 14 ++++++++++++-- spec/unpack_odt_spec.rb | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/doc2text/xml_based_document_file.rb b/lib/doc2text/xml_based_document_file.rb index bc3d401..9459505 100644 --- a/lib/doc2text/xml_based_document_file.rb +++ b/lib/doc2text/xml_based_document_file.rb @@ -9,11 +9,21 @@ def initialize(document_path) end def unpack + destination_root = Pathname.new(extract_path).realpath + Zip::File.open(@document_path) { |zip_file| zip_file.each do |entry| - zipped_file_extract_path = File.join extract_path, entry.name - FileUtils.mkdir_p File.dirname(zipped_file_extract_path) + entry_path = Pathname.new(entry.name) + + next if entry_path.absolute? + destination_path = destination_root.join(entry.name).cleanpath + + unless destination_path.to_s.start_with?(destination_root.to_s + File::SEPARATOR) + raise "Unsafe zip entry: #{entry.name}" + end + + FileUtils.mkdir_p(destination_path.dirname) zip_file.extract entry, entry.name, destination_directory: extract_path end } diff --git a/spec/unpack_odt_spec.rb b/spec/unpack_odt_spec.rb index 892ee10..bce3347 100644 --- a/spec/unpack_odt_spec.rb +++ b/spec/unpack_odt_spec.rb @@ -1,4 +1,5 @@ require 'spec_helper' +require 'tempfile' describe 'odt' do def rspec_extract_odt