diff --git a/README.md b/README.md index b424f19..c168809 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,8 @@ by generating multiple derivative images from one or more sources. *Image Process* will not overwrite your original images. +![Image Process overview](image-process-overview.svg) + ## Installation The easiest way to install *Image Process* is via Pip. This @@ -64,7 +66,7 @@ compute a thumbnail from a larger image: ```python IMAGE_PROCESS = { - "article-image": ["scale_in 300 300 True"], + "article-image": (["scale_in 300 300 True"], "webp") "thumb": ["crop 0 0 50% 50%", "scale_out 150 150 True", "crop 0 0 150 150"], } ``` @@ -74,13 +76,22 @@ referred to by the `src` attribute of an `` according to the list of operations specified, and replace the `src` attribute with the URL of the transformed image. +You can also transcode the image from one image format into another, for +example, from `png` to `webp`. Supported are all image formats that are also +supported by the underlying Pillow library (see [Image File +Formats](#image-file-formats)). This is useful when you want to keep a single +large high-resolution image in your repository, but distribute a more +lightweight, web-optimized image with your web site. + For consistency with other types of transformations described below, there is an alternative syntax for the processing instructions: + ```python IMAGE_PROCESS = { "thumb": { "type": "image", + "output-format": "webp" "ops": ["crop 0 0 50% 50%", "scale_out 150 150 True", "crop 0 0 150 150"], }, "article-image": { @@ -149,15 +160,17 @@ dictionary, with the following syntax: IMAGE_PROCESS = { "crisp": { "type": "responsive-image", + "output-format": "webp", "srcset": [ - ("1x", ["scale_in 800 600 True"]), + ("1x", ["scale_in 800 600 True"], "avif"), ("2x", ["scale_in 1600 1200 True"]), - ("4x", ["scale_in 3200 2400 True"]), + ("4x", ["scale_in 3200 2400 True"], "original"), ], "default": "1x", }, "large-photo": { "type": "responsive-image", + "output-format": "jpg", "sizes": ( "(min-width: 1200px) 800px, " "(min-width: 992px) 650px, " @@ -165,9 +178,9 @@ IMAGE_PROCESS = { "100vw" ), "srcset": [ - ("600w", ["scale_in 600 450 True"]), + ("600w", ["scale_in 600 450 True"], "webp"), ("800w", ["scale_in 800 600 True"]), - ("1600w", ["scale_in 1600 1200 True"]), + ("1600w", ["scale_in 1600 1200 True"], "original"), ], "default": "800w", }, @@ -199,6 +212,25 @@ width in pixels of the associated image and must have the suffix attribute of the image. This is the image that will be displayed by browsers that do not support the `srcset` syntax. +Both definitions above also demonstrate how +the input image may be transcoded into another file format. This allows you to +transcode your image from, for example, a `png` original to `webp` +derivative images. The setting `"output-format": "jpg"` sets the default for the +derivative images. This default can be overriden in each `srcset` +specification. In the `large-photo` example above, by default, all derivative +images will be transcoded into `jpg`, however the line `("600w", ["scale_in 600 +450 True"], "webp"),` will override this for this specific derivative image. You +can also specify that you want to keep the original format, by using the keyword +`original` instead of an image file format specification. + +Similarly, the `crisp` transformation also specifies a top-level output format +`"output-format": "webp"`, which means that in absence of other specifications, +the derivative images will be transcoded into the *WebP* image format. However, +within the `srcset` this is overruled: the `1x` derivative image will be +transcoded into `avif`, the `2x` image will be transcoded into `webp` (as +specified by `output-format`), and lastly the `4x` image will retain the original +image format. + In the two examples above, the `default` setting is a string referring to one of the images in the `srcset`. However, the `default` value could also be a list of operations to generate a different derivative @@ -253,6 +285,7 @@ IMAGE_PROCESS = { "sources": [ { "name": "default", + "output-format": "webp", "media": "(min-width: 640px)", "srcset": [ ("640w", ["scale_in 640 480 True"]), @@ -264,7 +297,7 @@ IMAGE_PROCESS = { { "name": "source-1", "srcset": [ - ("1x", ["crop 100 100 200 200"]), + ("1x", ["crop 100 100 200 200"], "avif"), ("2x", ["crop 100 100 300 300"]), ] }, @@ -285,6 +318,10 @@ displayed by browsers that do not support the `` syntax. In this example, it will use the image `640w` from the source `default`. A list of operations could have been specified instead of `640w`. +Similar to `responsive image` described above, `` also allows the +specification of "output-format" and image format extensions like `webp`, +`avif`, and `jpg`. + To generate a responsive `` for the images in your articles, you must add to your article a pseudo `` tag that looks like this: @@ -430,6 +467,67 @@ IMAGE_PROCESS = { } ``` +### Image File Formats + +*Image Process* uses Python's Pillow library (PIL) to read and write files. The +file formats that Pillow can read and write depend on libraries/plugins that +may or may not be installed on a particular system. While most common image +formats will likely work out of the box (`png`, `jpg`, `jpeg`, `gif`, `tif`, +`webp`), uncommon formats may cause issues depending on the system you are +working on. + +To specify an image format for the derivative image, Pillow will infer the image +format from the file extension you specify. This follows common conventions, for +example: the extensions `j2c`, `j2k`, `jp2`, and `jpx` will all result in a +*JPEG2000* file, while `jpe`, `jpg`, and `jpeg` will produce a *JPEG* derivative +file. + +To see a full list of extensions and file formats available on your system, run +the following Python snippet: + +```python +from PIL import Image + +# Map every available image extension to its format +Image.init() +print(f"{'Extension'.ljust(10)} -> {'Format'.ljust(10)} | Read/Write") +for ext, fmt in sorted(Image.EXTENSION.items()): + readonly = ("" if Image.SAVE.get(fmt) else "| READ-ONLY") + writeonly = ("" if Image.OPEN.get(fmt) else "| WRITE-ONLY") + print(f"{ext.ljust(10)} -> {fmt.ljust(10)} {readonly}{writeonly}") +``` + +Not all image formats can be read *and* written. For example, the *PDF* image +format can be written with Pillow, but cannot be read. Consequently, it can be +used as `output-format` by *Image Process* but does not work when you attempt to +use it as the original input format. + +The ability to *display* a particular image format depends on the browser. +Modern browsers will typically support the following formats: JPEG, PNG, GIF, +SVG, WebP, AVIF (and ICO). + +For displaying images on your Pelican web site consider the following output formats: + +| Format | Best For... | Browser Support | +|---|---|---| +| JPEG | Standard photo (no transparency) | 100% | +| PNG | Graphics including transparency | 100% | +| WebP | All-purpose images (smaller size than JPEG/PNG) | ~97% (modern) | +| AVIF | All-purpose images (smaller size than WebP) | ~94% (latest) | +| GIF | Simple, low-resolution animations. | 100% | + +For most use cases, selecting either *AVIF* or *WebP* as output format (setting +`output-format`), with a fallback (setting `default`) of *JPEG* or *PNG* will +give good results. + +The *SVG* image format is omitted on purpose from the list above; it is a +*vector* image format (as opposed to the others, which are *raster* formats), +that is best used for logos and illustrations. You should not blindly convert +images (especially not photographs!) to this format unless you are sure what you +are doing. For more information on how vector image formats compare to raster +image formats, see this [Wikipedia +article](https://en.wikipedia.org/wiki/Vector_graphics). + ### Additional Settings #### Destination Directory @@ -638,9 +736,12 @@ is a helper function to do this for you. From the Python REPL: ```python >>> from pelican.plugins.image_process.test_image_process import generate_test_images >>> generate_test_images() -36 test images generated! +60 test images generated! ``` +This generates both standard transform test images (54) and format conversion +test images (6 for WebP and AVIF). + ## License This project is licensed under the [AGPL-3.0 license](http://www.gnu.org/licenses/agpl-3.0.html). diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..3821124 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,3 @@ +Release type: minor + +Add setting to specify image output file format to automatically transcode source images. diff --git a/image-process-overview.svg b/image-process-overview.svg new file mode 100644 index 0000000..34c454a --- /dev/null +++ b/image-process-overview.svg @@ -0,0 +1,1187 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + elican + + large, high resolution image + Transformations: crop, blur, grayscale, ... + Responsive images +(srcset or picture tag) + Image Process + Image Process automates the image conversion and updates the html output. + + + + + + + + + + + + + + + + + + + + + + + + + + + elican + + + mobile + + + + + + + + + + + + + + + + + + + + + + + + + + + elican + + + tablet + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + elican + + + large monitor + + + + + + + + + + + + + + + + + + + + + + elican + + + + + + + + + + + + + + + + + + + + + + + elican + + + + + + + + Pelican + + + + Image Process + + + + Content + + + + + + + + + + + + + + + + + + + + + + + + + + + + elican + + + Optionally convert image into web-formats (webp, avif, jpg, ...) + 1920 x 1080 + From a single image generate various sizes to make your website-images responsive and faster. + + + + + 1920 x 1080 + 1280 x 720 + 640 x 320 + + diff --git a/pelican/plugins/image_process/image_process.py b/pelican/plugins/image_process/image_process.py index 644fc5e..5891605 100644 --- a/pelican/plugins/image_process/image_process.py +++ b/pelican/plugins/image_process/image_process.py @@ -140,6 +140,56 @@ def _send_command(self, params): ) +def get_target_format(config, default_format=None): + """Extract the target format from various configuration structures. + + Target format can be specified in a number of different ways in the configuration: + + - Top-level default format: "output-format": "webp" + - Responsive image: "srcset": [ ("small", ["scale_in 100 100 True"], "webp"),] + - Picture: "sources": [ ("small", ["scale_in 100 100 True"], "webp"),] + + Returns the target format string (e.g., "webp") or None. + """ + if isinstance(config, dict): + return config.get("output-format", default_format) + + if isinstance(config, (list, str)): + return default_format + + if isinstance(config, tuple): + # Handle (condition, ops, format) or (ops, format) + match config: + # Matches (condition, ops, format) where 1st element is a string + case (str(), ops, str() as format_str): + return format_str + + # Matches (ops, format) with ops not string + case (ops, format_str) if not isinstance(ops, str): + return format_str + + return default_format + + +def normalize_shorthand_transform(config): + """Normalize shorthand tuple (ops, format) to a dict transform config.""" + match config: + case (ops, str() as format_str) if not isinstance(ops, str): + return {"type": "image", "ops": ops, "output-format": format_str} + + raise TypeError(f"Cannot normalize shorthand config: {config}") + + +def get_target_filename(filename, target_format): + """Return the filename with the target format extension.""" + if not target_format or target_format == "original": + return filename + + base, _ext = os.path.splitext(filename) + target_format = target_format.lstrip(".") + return f"{base}.{target_format}" + + def convert_box(image, top, left, right, bottom): """Convert box coordinates strings to integer. @@ -359,19 +409,25 @@ def harvest_images_in_fragment(fragment, settings): if isinstance(d, list): # Single source image specification. - process_img_tag(img, settings, derivative) + process_img_tag(img, settings, derivative, d) + continue - elif not isinstance(d, dict): + if isinstance(d, tuple): + # Handle shorthand tuple format: (ops, format) + d = normalize_shorthand_transform(d) + # Fall through to dict handling + + if not isinstance(d, dict): raise TypeError( f"Derivative {derivative} definition not handled (must be list or dict)" ) - elif "type" not in d: + if "type" not in d: raise RuntimeError(f'"type" is mandatory for {derivative}.') - elif d["type"] == "image": + if d["type"] == "image": # Single source image specification. - process_img_tag(img, settings, derivative) + process_img_tag(img, settings, derivative, d) elif d["type"] == "responsive-image" and "srcset" not in img.attrs: # srcset image specification. @@ -444,12 +500,17 @@ def compute_paths(image_url, settings, derivative): return Path(base_url, source, base_path, filename) -def process_img_tag(img, settings, derivative): +def process_img_tag(img, settings, derivative, process_config=None): path = compute_paths(img["src"], settings, derivative) - process = settings["IMAGE_PROCESS"][derivative] + process = ( + process_config if process_config else settings["IMAGE_PROCESS"][derivative] + ) - img["src"] = posixpath.join(path.base_url, path.filename) - destination = os.path.join(str(path.base_path), path.filename) + target_format = get_target_format(process) + filename = get_target_filename(path.filename, target_format) + + img["src"] = posixpath.join(path.base_url, filename) + destination = os.path.join(str(path.base_path), filename) if not isinstance(process, list): process = process["ops"] @@ -474,10 +535,20 @@ def build_srcset(img, settings, derivative): path = compute_paths(img["src"], settings, derivative) process = settings["IMAGE_PROCESS"][derivative] + # Top-level default format. + top_default_format = get_target_format(process) + default = process["default"] default_name = "" + default_format = top_default_format if isinstance(default, str): - breakpoints = {i for i, _ in process["srcset"]} + # find the entry in srcset to get its format + for entry in process["srcset"]: + if entry[0] == default: + default_format = get_target_format(entry, top_default_format) + break + + breakpoints = {entry[0] for entry in process["srcset"]} if default not in breakpoints: logger.error( '%s srcset "%s" does not define default "%s"', @@ -486,29 +557,35 @@ def build_srcset(img, settings, derivative): default, ) default_name = default - elif isinstance(default, list): + elif isinstance(default, (list, tuple)): default_name = "default" - destination = os.path.join(str(path.base_path), default_name, path.filename) - process_image((path.source, destination, default), settings) + default_format = get_target_format(default, top_default_format) + ops = default[0] if isinstance(default, tuple) else default + filename = get_target_filename(path.filename, default_format) + destination = os.path.join(str(path.base_path), default_name, filename) + process_image((path.source, destination, ops), settings) - img["src"] = posixpath.join(path.base_url, default_name, path.filename) + filename = get_target_filename(path.filename, default_format) + img["src"] = posixpath.join(path.base_url, default_name, filename) if "sizes" in process: img["sizes"] = process["sizes"] srcset = [] for src in process["srcset"]: - file_path = posixpath.join(path.base_url, src[0], path.filename) + entry_format = get_target_format(src, top_default_format) + filename = get_target_filename(path.filename, entry_format) + file_path = posixpath.join(path.base_url, src[0], filename) srcset.append(format_srcset_element(file_path, src[0])) - destination = os.path.join(str(path.base_path), src[0], path.filename) + destination = os.path.join(str(path.base_path), src[0], filename) process_image((path.source, destination, src[1]), settings) if len(srcset) > 0: img["srcset"] = ", ".join(srcset) -def convert_div_to_picture_tag(soup, img, group, settings, derivative): - """Convert a div containing multiple images to a picture.""" +def prepare_image_sources(img, group, settings, derivative): + """Prepare image sources for the picture tag.""" process_dir = settings["IMAGE_PROCESS_DIR"] # Compile sources URL. Special source "default" uses the main # image URL. Other sources use the img with classes @@ -529,6 +606,42 @@ def convert_div_to_picture_tag(soup, img, group, settings, derivative): url_path, s["filename"] = os.path.split(s["url"]) s["base_url"] = os.path.join(url_path, process_dir, derivative) s["base_path"] = os.path.join(settings["OUTPUT_PATH"], s["base_url"][1:]) + return sources + + +def construct_picture_tag(soup, img, sources, settings): + """Construct the picture tag and add it to the DOM.""" + picture_tag = soup.new_tag("picture") + for s in sources: + # Create new + source_attrs = {k: s[k] for k in s if k in ["media", "sizes"]} + source_tag = soup.new_tag("source", **source_attrs) + + top_source_format = get_target_format(s) + + srcset = [] + for src in s["srcset"]: + entry_format = get_target_format(src, top_source_format) + filename = get_target_filename(s["filename"], entry_format) + url = os.path.join(s["base_url"], s["name"], src[0], filename) + srcset.append(format_srcset_element(str(url), src[0])) + + source = os.path.join(settings["PATH"], s["url"][1:]) + destination = os.path.join(s["base_path"], s["name"], src[0], filename) + process_image((source, destination, src[1]), settings) + + if len(srcset) > 0: + source_tag["srcset"] = ", ".join(srcset) + + picture_tag.append(source_tag) + + # Wrap img with + img.wrap(picture_tag) + + +def convert_div_to_picture_tag(soup, img, group, settings, derivative): + """Convert a div containing multiple images to a picture.""" + sources = prepare_image_sources(img, group, settings, derivative) # If default is not None, change default img source to the image # derivative referenced. @@ -550,18 +663,41 @@ def convert_div_to_picture_tag(soup, img, group, settings, derivative): if isinstance(default[1], str): default_item_name = default[1] - - elif isinstance(default[1], list): + # Check for format in position 3: ("default", "640w", "webp") + match default: + case (_, _, str() as default_item_format): + pass + case _: + # find format from srcset + default_item_format = None + for entry in default_source["srcset"]: + if entry[0] == default_item_name: + default_item_format = get_target_format(entry) + break + # fallback to top-level output-format + if default_item_format is None: + default_item_format = settings["IMAGE_PROCESS"][derivative].get( + "output-format" + ) + + elif isinstance(default[1], (list, tuple)): default_item_name = "default" + default_item_format = get_target_format(default[1]) or default_source.get( + "output-format" + ) + ops = default[1][0] if isinstance(default[1], tuple) else default[1] source = os.path.join(settings["PATH"], default_source["url"][1:]) + filename = get_target_filename( + default_source["filename"], default_item_format + ) destination = os.path.join( default_source["base_path"], default_source_name, default_item_name, - default_source["filename"], + filename, ) - process_image((source, destination, default[1]), settings) + process_image((source, destination, ops), settings) else: raise RuntimeError( "Unexpected type for the second value of tuple " @@ -569,37 +705,39 @@ def convert_div_to_picture_tag(soup, img, group, settings, derivative): (derivative,), ) + filename = get_target_filename(default_source["filename"], default_item_format) # Change img src to url of default processed image. img["src"] = os.path.join( default_source["base_url"], default_source_name, default_item_name, - default_source["filename"], + filename, ) - # Create picture tag. - picture_tag = soup.new_tag("picture") - for s in sources: - # Create new - source_attrs = {k: s[k] for k in s if k in ["media", "sizes"]} - source_tag = soup.new_tag("source", **source_attrs) + construct_picture_tag(soup, img, sources, settings) - srcset = [] - for src in s["srcset"]: - url = os.path.join(s["base_url"], s["name"], src[0], s["filename"]) - srcset.append(format_srcset_element(str(url), src[0])) - source = os.path.join(settings["PATH"], s["url"][1:]) - destination = os.path.join(s["base_path"], s["name"], src[0], s["filename"]) - process_image((source, destination, src[1]), settings) +def generate_srcset_and_insert_source(img, s, settings): + """Generate srcset for a source and insert it into the DOM.""" + top_source_format = get_target_format(s) - if len(srcset) > 0: - source_tag["srcset"] = ", ".join(srcset) + srcset = [] + for src in s["srcset"]: + entry_format = get_target_format(src, top_source_format) + filename = get_target_filename(s["filename"], entry_format) + url = posixpath.join(s["base_url"], s["name"], src[0], filename) + srcset.append(format_srcset_element(str(url), src[0])) - picture_tag.append(source_tag) + source = os.path.join(settings["PATH"], s["url"][1:]) + destination = os.path.join(s["base_path"], s["name"], src[0], filename) + process_image((source, destination, src[1]), settings) - # Wrap img with - img.wrap(picture_tag) + if len(srcset) > 0: + # Append source elements to the picture in the same order + # as they are found in + # settings['IMAGE_PROCESS'][derivative]['sources']. + s["element"]["srcset"] = ", ".join(srcset) + img.insert_before(s["element"]) def process_picture(soup, img, group, settings, derivative): @@ -662,18 +800,41 @@ def process_picture(soup, img, group, settings, derivative): if isinstance(default[1], str): default_item_name = default[1] - - elif isinstance(default[1], list): + # Check for format in position 3: ("default", "640w", "webp") + match default: + case (_, _, str() as default_item_format): + pass + case _: + # find format from srcset + default_item_format = None + for entry in default_source["srcset"]: + if entry[0] == default_item_name: + default_item_format = get_target_format(entry) + break + # fallback to top-level output-format + if default_item_format is None: + default_item_format = settings["IMAGE_PROCESS"][derivative].get( + "output-format" + ) + + elif isinstance(default[1], (list, tuple)): default_item_name = "default" + default_item_format = get_target_format(default[1]) or default_source.get( + "output-format" + ) + ops = default[1][0] if isinstance(default[1], tuple) else default[1] source = os.path.join(settings["PATH"], default_source["url"][1:]) + filename = get_target_filename( + default_source["filename"], default_item_format + ) destination = os.path.join( default_source["base_path"], default_source_name, default_item_name, - default_source["filename"], + filename, ) - process_image((source, destination, default[1]), settings) + process_image((source, destination, ops), settings) else: raise RuntimeError( @@ -682,31 +843,18 @@ def process_picture(soup, img, group, settings, derivative): (derivative,), ) + filename = get_target_filename(default_source["filename"], default_item_format) # Change img src to url of default processed image. img["src"] = posixpath.join( default_source["base_url"], default_source_name, default_item_name, - default_source["filename"], + filename, ) # Generate srcsets and put back s in . for s in sources: - srcset = [] - for src in s["srcset"]: - url = posixpath.join(s["base_url"], s["name"], src[0], s["filename"]) - srcset.append(format_srcset_element(str(url), src[0])) - - source = os.path.join(settings["PATH"], s["url"][1:]) - destination = os.path.join(s["base_path"], s["name"], src[0], s["filename"]) - process_image((source, destination, src[1]), settings) - - if len(srcset) > 0: - # Append source elements to the picture in the same order - # as they are found in - # settings['IMAGE_PROCESS'][derivative]['sources']. - s["element"]["srcset"] = ", ".join(srcset) - img.insert_before(s["element"]) + generate_srcset_and_insert_source(img, s, settings) def try_open_image(path): @@ -828,10 +976,12 @@ def process_metadata(generator, metadata): path = compute_paths(value, generator.context, derivative) original_values[key] = value - metadata[key] = urljoin( - site_url, posixpath.join(path.base_url, path.filename) - ) - destination = os.path.join(str(path.base_path), path.filename) + + target_format = get_target_format(process) + filename = get_target_filename(path.filename, target_format) + + metadata[key] = urljoin(site_url, posixpath.join(path.base_url, filename)) + destination = os.path.join(str(path.base_path), filename) if not isinstance(process, list): process = process["ops"] diff --git a/pelican/plugins/image_process/test_data/black-borders.jpg b/pelican/plugins/image_process/test_data/black-borders.jpg new file mode 100644 index 0000000..9554a6a Binary files /dev/null and b/pelican/plugins/image_process/test_data/black-borders.jpg differ diff --git a/pelican/plugins/image_process/test_data/results/scale_in_avif/alpha-borders.avif b/pelican/plugins/image_process/test_data/results/scale_in_avif/alpha-borders.avif new file mode 100644 index 0000000..32e78c5 Binary files /dev/null and b/pelican/plugins/image_process/test_data/results/scale_in_avif/alpha-borders.avif differ diff --git a/pelican/plugins/image_process/test_data/results/scale_in_avif/black-borders.avif b/pelican/plugins/image_process/test_data/results/scale_in_avif/black-borders.avif new file mode 100644 index 0000000..57648be Binary files /dev/null and b/pelican/plugins/image_process/test_data/results/scale_in_avif/black-borders.avif differ diff --git a/pelican/plugins/image_process/test_data/results/scale_in_avif/pelican-bird.avif b/pelican/plugins/image_process/test_data/results/scale_in_avif/pelican-bird.avif new file mode 100644 index 0000000..25ce34b Binary files /dev/null and b/pelican/plugins/image_process/test_data/results/scale_in_avif/pelican-bird.avif differ diff --git a/pelican/plugins/image_process/test_data/results/scale_in_webp/alpha-borders.webp b/pelican/plugins/image_process/test_data/results/scale_in_webp/alpha-borders.webp new file mode 100644 index 0000000..d4aee72 Binary files /dev/null and b/pelican/plugins/image_process/test_data/results/scale_in_webp/alpha-borders.webp differ diff --git a/pelican/plugins/image_process/test_data/results/scale_in_webp/black-borders.webp b/pelican/plugins/image_process/test_data/results/scale_in_webp/black-borders.webp new file mode 100644 index 0000000..83b575c Binary files /dev/null and b/pelican/plugins/image_process/test_data/results/scale_in_webp/black-borders.webp differ diff --git a/pelican/plugins/image_process/test_data/results/scale_in_webp/pelican-bird.webp b/pelican/plugins/image_process/test_data/results/scale_in_webp/pelican-bird.webp new file mode 100644 index 0000000..94574a6 Binary files /dev/null and b/pelican/plugins/image_process/test_data/results/scale_in_webp/pelican-bird.webp differ diff --git a/pelican/plugins/image_process/test_image_process.py b/pelican/plugins/image_process/test_image_process.py index cfc2868..89548ae 100644 --- a/pelican/plugins/image_process/test_image_process.py +++ b/pelican/plugins/image_process/test_image_process.py @@ -12,6 +12,7 @@ from pelican.plugins.image_process import ( ExifTool, compute_paths, + get_target_filename, harvest_images_in_fragment, process_image, process_metadata, @@ -47,6 +48,10 @@ TEST_DATA.joinpath("noexif", f"pelican-bird.{ext}").resolve() for ext in SUPPORTED_EXIF_IMAGE_FORMATS ] +FORMAT_TEST_IMAGES_JPG = [ + TEST_DATA.joinpath(f"{file}.jpg").resolve() + for file in ["pelican-bird", "black-borders"] +] TRANSFORM_RESULTS = TEST_DATA.joinpath("results").resolve() # Register all supported transforms. @@ -71,6 +76,19 @@ "sharpen": ["sharpen"], } +FORMAT_TRANSFORMS = { + "scale_in_webp": { + "type": "image", + "ops": ["scale_in 200 250 False"], + "output-format": "webp", + }, + "scale_in_avif": { + "type": "image", + "ops": ["scale_in 200 250 False"], + "output-format": "avif", + }, +} + # The expected sizes of the transformed images. EXPECTED_SIZES = { "crop": (300, 200), @@ -151,6 +169,326 @@ def test_all_transforms(tmp_path, transform_id, transform_params, image_path): raise ValueError(f"Unsupported image mode: {transformed.mode}") +COMPLEX_FORMAT_TRANSFORMS = { + "short_webp": (["scale_in 300 300 True"], "webp"), + "resp_top_webp": { + "type": "responsive-image", + "output-format": "webp", + "srcset": [ + ("1x", ["scale_in 800 600 True"]), + ("2x", ["scale_in 1600 1200 True"]), + ], + "default": "1x", + }, + "resp_no_top": { + "type": "responsive-image", + "srcset": [ + ("1x", ["scale_in 800 600 True"]), + ("2x", ["scale_in 1600 1200 True"], "webp"), + ], + "default": "1x", + }, + "resp_per_entry_mixed": { + "type": "responsive-image", + "srcset": [ + ("1x", ["scale_in 800 600 True"], "webp"), + ("2x", ["scale_in 1600 1200 True"], "avif"), + ("4x", ["scale_in 3200 2400 True"], "original"), + ], + "default": "1x", + }, + "resp_custom_default_jpg": { + "type": "responsive-image", + "srcset": [ + ("1x", ["scale_in 800 600 True"]), + ], + "default": (["scale_in 400 300 True"], "jpg"), + }, + "resp_mixed_top_and_entry": { + "type": "responsive-image", + "output-format": "jpg", + "srcset": [ + ("1x", ["scale_in 800 600 True"]), + ("2x", ["scale_in 1600 1200 True"], "webp"), + ], + "default": "1x", + }, + "picture_formats": { + "type": "picture", + "sources": [ + { + "name": "webp-src", + "output-format": "webp", + "srcset": [ + ("640w", ["scale_in 640 480 True"]), + ("1024w", ["scale_in 1024 768 True"]), + ], + }, + { + "name": "avif-src", + "output-format": "avif", + "srcset": [ + ("640w", ["scale_in 640 480 True"]), + ], + }, + { + "name": "orig-src", + "srcset": [ + ("1x", ["crop 100 100 200 200"], "original"), + ], + }, + ], + "default": ("webp-src", "640w", "webp"), + }, +} + + +# Expected file extensions per transform and source extension. +COMPLEX_FORMAT_EXPECTED_EXTENSIONS = { + "short_webp": { + ".png": {".webp"}, + ".jpg": {".webp"}, + }, + "resp_top_webp": { + ".png": {".webp"}, + ".jpg": {".webp"}, + }, + "resp_no_top": { + ".png": {".png", ".webp"}, + ".jpg": {".jpg", ".webp"}, + }, + "resp_per_entry_mixed": { + ".png": {".png", ".webp", ".avif"}, + ".jpg": {".jpg", ".webp", ".avif"}, + }, + "resp_custom_default_jpg": { + ".png": {".png", ".jpg"}, + ".jpg": {".jpg"}, + }, + "resp_mixed_top_and_entry": { + ".png": {".jpg", ".webp"}, + ".jpg": {".jpg", ".webp"}, + }, + "picture_formats": { + ".png": {".png"}, + ".jpg": {".jpg"}, + }, +} + + +class TestComplexFormatTransforms: + """Test complex format transforms of file format conversions.""" + + @pytest.mark.parametrize("transform_id", COMPLEX_FORMAT_TRANSFORMS.keys()) + @pytest.mark.parametrize( + "image_path", TRANSFORM_TEST_IMAGES + FORMAT_TEST_IMAGES_JPG + ) + def test_complex_format_transforms(self, tmp_path, transform_id, image_path): + """Test complex format transforms generate correct output extensions. + + This test verifies that the generated image URLs have the expected + image format extensions based on the transform configuration, including + handling of default entries and per-entry format specifications, but + excluding verification of the actual image content or format. + """ + settings = get_settings( + IMAGE_PROCESS=COMPLEX_FORMAT_TRANSFORMS, IMAGE_PROCESS_DIR="transformderivs" + ) + + image_src = f"/tmp/{image_path.name}" + tag = f'' + + result = harvest_images_in_fragment(tag, settings) + soup = BeautifulSoup(result, "html.parser") + urls = self._extract_urls(soup) + + assert len(urls) > 0, f"No URLs generated for {transform_id}" + + # find the expected extension from the transform_id and source_ext in + # the COMPLEX_FORMAT_EXPECTED_EXTENSIONS dict. + source_ext = image_path.suffix.lower() + expected_exts = COMPLEX_FORMAT_EXPECTED_EXTENSIONS[transform_id][source_ext] + for url in urls: + ext = Path(url).suffix.lower() + assert ext in expected_exts, ( + f"Extension mismatch for {transform_id} with {image_path.name}: " + f"expected one of {expected_exts}, got {ext} in URL {url}" + ) + + def _extract_urls(self, soup): + """Extract all image URLs from parsed HTML soup.""" + urls = [] + if soup.img.get("src"): + urls.append(soup.img["src"]) + if soup.img.get("srcset"): + for item in soup.img["srcset"].split(","): + parts = item.strip().split() + if parts: + urls.append(parts[0]) + for source in soup.find_all("source"): + if source.get("srcset"): + for item in source["srcset"].split(","): + parts = item.strip().split() + if parts: + urls.append(parts[0]) + return urls + + +PICTURE_DEFAULT_FORMAT_FALLBACK = { + "pic_default_fmt_fallback": { + "type": "picture", + "sources": [ + { + "name": "main", + "output-format": "webp", + "srcset": [ + ("640w", ["scale_in 640 480 True"]), + ], + }, + ], + "default": ("main", ["scale_in 500 500 True"]), + }, + "pic_default_fmt_fallback_div": { + "type": "picture", + "sources": [ + { + "name": "main", + "output-format": "webp", + "srcset": [ + ("640w", ["scale_in 640 480 True"]), + ], + }, + ], + "default": ("main", ["scale_in 500 500 True"]), + }, +} + + +@pytest.mark.parametrize("transform_id", ["pic_default_fmt_fallback"]) +def test_picture_default_falls_back_to_source_format_when_using_ops_list( + mocker, transform_id +): + """Picture default (source_name, ops_list) must fall back to source output-format. + + When default is a 2-tuple of (source_name, ops_list) and the source has + output-format set (e.g. "webp"), get_target_format(ops_list) returns None + because a plain ops list carries no format info. Without a fallback to + the source's output-format, the default image silently keeps its original + extension instead of being transcoded. + + Regression test for the bug at image_process.py:~820 (process_picture). + """ + process = mocker.patch("pelican.plugins.image_process.image_process.process_image") + process.return_value = (512, 384) + + settings = get_settings( + IMAGE_PROCESS=PICTURE_DEFAULT_FORMAT_FALLBACK, + IMAGE_PROCESS_DIR="derivs", + ) + + tag = ( + "" + '' + '' + "" + ) + + result = harvest_images_in_fragment(tag, settings) + soup = BeautifulSoup(result, "html.parser") + + img_src = soup.img["src"] + assert img_src.endswith(".webp"), ( + f"Expected default img src to end with .webp " + f"(source has output-format: webp), got: {img_src}" + ) + + +@pytest.mark.parametrize("transform_id", ["pic_default_fmt_fallback_div"]) +def test_div_picture_default_falls_back_to_source_format_when_using_ops_list( + mocker, transform_id +): + """Same as above, but for the div-to-picture code path (convert_div_to_picture_tag). + + Regression test for the bug at image_process.py:~685 (convert_div_to_picture_tag). + """ + process = mocker.patch("pelican.plugins.image_process.image_process.process_image") + process.return_value = (512, 384) + + settings = get_settings( + IMAGE_PROCESS=PICTURE_DEFAULT_FORMAT_FALLBACK, + IMAGE_PROCESS_DIR="derivs", + ) + + tag = ( + '
' + 'pelican' + '

A pelican

' + '
' + 'Other view' + "
" + "
" + ) + + result = harvest_images_in_fragment(tag, settings) + soup = BeautifulSoup(result, "html.parser") + + img_src = soup.img["src"] + assert img_src.endswith(".webp"), ( + f"Expected default img src to end with .webp " + f"(source has output-format: webp), got: {img_src}" + ) + + +@pytest.mark.parametrize("transform_id, transform_config", FORMAT_TRANSFORMS.items()) +@pytest.mark.parametrize("image_path", TRANSFORM_TEST_IMAGES) +def test_format_conversion(tmp_path, transform_id, transform_config, image_path): + """Test format conversion (WebP, AVIF) with binary match vs pre-rendered images.""" + settings = get_settings() + + image_name = image_path.name + target_format = transform_config["output-format"] + expected_filename = get_target_filename(image_name, target_format) + destination_path = tmp_path.joinpath(transform_id, expected_filename) + expected_path = TRANSFORM_RESULTS.joinpath(transform_id, expected_filename) + + process_image( + (str(image_path), str(destination_path), transform_config["ops"]), settings + ) + + transformed = Image.open(destination_path) + expected = Image.open(expected_path) + + assert transformed.size == expected.size + assert transformed.format.upper() == target_format.upper() + assert expected.format.upper() == target_format.upper() + + if transformed.mode == "RGB": + for _, (transformed_pixel, expected_pixel) in enumerate( + zip(transformed.getdata(), expected.getdata(), strict=False) + ): + assert abs(transformed_pixel[0] - expected_pixel[0]) <= 1 + assert abs(transformed_pixel[1] - expected_pixel[1]) <= 1 + assert abs(transformed_pixel[2] - expected_pixel[2]) <= 1 + elif transformed.mode == "RGBA": + for _, (transformed_pixel, expected_pixel) in enumerate( + zip(transformed.getdata(), expected.getdata(), strict=False) + ): + assert abs(transformed_pixel[0] - expected_pixel[0]) <= 1 + assert abs(transformed_pixel[1] - expected_pixel[1]) <= 1 + assert abs(transformed_pixel[2] - expected_pixel[2]) <= 1 + assert abs(transformed_pixel[3] - expected_pixel[3]) <= 1 + elif transformed.mode == "L": + for _, (transformed_pixel, expected_pixel) in enumerate( + zip(transformed.getdata(), expected.getdata(), strict=False) + ): + assert abs(transformed_pixel - expected_pixel) <= 1 + else: + raise ValueError(f"Unsupported image mode: {transformed.mode}") + + @pytest.mark.parametrize("image_path", FILE_FORMAT_TEST_IMAGES) def test_image_formats(tmp_path, image_path): """Test that we can process images in various formats.""" @@ -992,24 +1330,50 @@ def test_process_metadata_image( # noqa: PLR0913 def generate_test_images(): settings = get_settings() image_count = 0 + + for jpg_image_path in FORMAT_TEST_IMAGES_JPG: + if not jpg_image_path.exists(): + png_path = jpg_image_path.with_suffix(".png") + if png_path.exists(): + img = Image.open(png_path).convert("RGB") + img.save(jpg_image_path, "JPEG", quality=85) + + all_transforms = {**SINGLE_TRANSFORMS, **FORMAT_TRANSFORMS} + for image_path in TRANSFORM_TEST_IMAGES: - for transform_id, transform_params in SINGLE_TRANSFORMS.items(): + for transform_id, transform_config in all_transforms.items(): + if isinstance(transform_config, list): + ops = transform_config + output_format = None + else: + ops = transform_config.get("ops", []) + output_format = transform_config.get("output-format") + + if output_format: + dest_filename = get_target_filename(image_path.name, output_format) + else: + dest_filename = image_path.name + destination_path = str( - TRANSFORM_RESULTS.joinpath(transform_id, image_path.name) + TRANSFORM_RESULTS.joinpath(transform_id, dest_filename) ) process_image( ( str(image_path), destination_path, - transform_params, + ops, ), settings, ) image_count += 1 # Check the size of the transformed image. - expected_size = EXPECTED_SIZES.get(transform_id) + base_transform_id = transform_id.replace("_webp", "").replace("_avif", "") + expected_size = EXPECTED_SIZES.get(base_transform_id) transformed = Image.open(destination_path) assert expected_size is None or expected_size == transformed.size + # Check the format of the transformed image (if specified). + if output_format: + assert transformed.format.upper() == output_format.upper() print(f"{image_count} test images generated!") # noqa: T201