Skip to content

Commit 100d064

Browse files
🐛 ♻️ optimize un-needed buffer management in image crop extraction
1 parent 76dc287 commit 100d064

2 files changed

Lines changed: 13 additions & 16 deletions

File tree

lib/mindee/image/image_extractor.rb

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,35 +37,32 @@ def self.extract_multiple_images_from_source(input_source, page_id, polygons)
3737
new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id)
3838
new_stream.seek(0)
3939

40-
extract_images_from_polygons(input_source, new_stream, page_id, polygons)
40+
extract_images_from_polygons(input_source, page_id, polygons)
4141
end
4242

4343
# Extracts images from their positions on a file (as polygons).
4444
#
4545
# @param [Input::Source::LocalInputSource] input_source Local input source.
46-
# @param [StringIO] pdf_stream Buffer of the PDF.
4746
# @param [Integer] page_id Page ID.
4847
# @param [Array<Geometry::Point, Geometry::Polygon, Geometry::Quadrilateral>] polygons
4948
# @return [Array<Image::ExtractedImage>] Extracted Images.
50-
def self.extract_images_from_polygons(input_source, pdf_stream, page_id, polygons)
49+
def self.extract_images_from_polygons(input_source, page_id, polygons)
5150
extracted_elements = [] # @type var extracted_elements: Array[Image::ExtractedImage]
5251

5352
polygons.each_with_index do |polygon, element_id|
5453
polygon = ImageUtils.normalize_polygon(polygon)
54+
input_source.io_stream.rewind
55+
pdf_stream = StringIO.new(input_source.io_stream.read.to_s)
5556
page_content = ImageUtils.read_page_content(pdf_stream)
57+
points = [
58+
polygon.top_left,
59+
polygon.bottom_right,
60+
polygon.top_right,
61+
polygon.bottom_left,
62+
]
5663

57-
min_max_x = Geometry.get_min_max_x([
58-
polygon.top_left,
59-
polygon.bottom_right,
60-
polygon.top_right,
61-
polygon.bottom_left,
62-
])
63-
min_max_y = Geometry.get_min_max_y([
64-
polygon.top_left,
65-
polygon.bottom_right,
66-
polygon.top_right,
67-
polygon.bottom_left,
68-
])
64+
min_max_x = Geometry.get_min_max_x(points)
65+
min_max_y = Geometry.get_min_max_y(points)
6966
file_extension = ImageUtils.determine_file_extension(input_source)
7067
cropped_image = ImageUtils.crop_image(page_content, min_max_x, min_max_y)
7168
if file_extension == 'pdf'

sig/mindee/image/image_extractor.rbs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ module Mindee
55
def self.attach_image_as_new_file: (StringIO | File, ?format: String) -> Origami::PDF
66
def self.to_blob: () -> String
77
def self.extract_multiple_images_from_source: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] |Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
8-
def self.extract_images_from_polygons: (Input::Source::LocalInputSource, StringIO | File, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
8+
def self.extract_images_from_polygons: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
99
def self.create_extracted_image: (StringIO | File, String, Integer, Integer) -> ExtractedImage
1010
def self.load_input_source_pdf_page_as_stringio: (Input::Source::LocalInputSource, Integer) -> (StringIO | File)
1111
end

0 commit comments

Comments
 (0)