[PATCH v2 61/79] ssdfs: introduce PEB-based deduplication technique

From: Viacheslav Dubeyko

Date: Sun Mar 15 2026 - 22:28:14 EST


Complete patchset is available here:
https://github.com/dubeyko/ssdfs-driver/tree/master/patchset/linux-kernel-6.18.0

SSDFS file system splits the whole volume space on
sequence of segments. Every segment contains one or
several erase blocks. Erase block contains a sequence
of logs. As a result, log contains files' or metadata's
contents as a log's payload. Flush thread of erase
block receives the requests to store new logical blocks
or to update existing ones. PEB-based deduplication
logic calculates the fingerprints for logical blocks
in requests. If the calculated fingerprint is identical
to any existing one, then the deduplication is happened
for the log under creation. It means that if a logical
block with identical content was already stored into
the erase block, then it will be used the reference for
this logical block instead of storing the logical
block with the same content again. This deduplication
logic can be efficient if there are many duplicated
logical blocks in the same file or different files
that are flushing together.

Signed-off-by: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
---
fs/ssdfs/fingerprint.h | 261 ++++++++++++
fs/ssdfs/fingerprint_array.c | 795 +++++++++++++++++++++++++++++++++++
fs/ssdfs/fingerprint_array.h | 82 ++++
fs/ssdfs/peb_deduplication.c | 483 +++++++++++++++++++++
4 files changed, 1621 insertions(+)
create mode 100644 fs/ssdfs/fingerprint.h
create mode 100644 fs/ssdfs/fingerprint_array.c
create mode 100644 fs/ssdfs/fingerprint_array.h
create mode 100644 fs/ssdfs/peb_deduplication.c

diff --git a/fs/ssdfs/fingerprint.h b/fs/ssdfs/fingerprint.h
new file mode 100644
index 000000000000..0e1da7f461bb
--- /dev/null
+++ b/fs/ssdfs/fingerprint.h
@@ -0,0 +1,261 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause-Clear
+ *
+ * SSDFS -- SSD-oriented File System.
+ *
+ * fs/ssdfs/fingerprint.h - fingerprint's declarations.
+ *
+ * Copyright (c) 2023-2026 Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ * http://www.ssdfs.org/
+ * All rights reserved.
+ *
+ * Authors: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ */
+
+#ifndef _SSDFS_FINGERPRINT_H
+#define _SSDFS_FINGERPRINT_H
+
+#include <crypto/hash_info.h>
+#include <crypto/ghash.h>
+#include <crypto/polyval.h>
+
+/*
+ * struct ssdfs_fingerprint - fingerprint object
+ * @buf: fingerprint buffer
+ * @len: fingerprint length
+ * @type: fingerprint type
+ */
+struct ssdfs_fingerprint {
+ u8 buf[SSDFS_FINGERPRINT_LENGTH_MAX];
+ u8 len;
+ u8 type;
+};
+
+/* Fingerprint types */
+enum {
+ SSDFS_UNKNOWN_FINGERPRINT_TYPE = 0,
+ SSDFS_MD5_FINGERPRINT,
+ SSDFS_SHA1_FINGERPRINT,
+ SSDFS_SHA224_FINGERPRINT,
+ SSDFS_SHA256_FINGERPRINT,
+ SSDFS_GHASH_FINGERPRINT,
+ SSDFS_POLYVAL_FINGERPRINT,
+ SSDFS_FINGERPRINT_TYPE_MAX
+};
+
+/* Fingerprint algorithm names */
+#define SSDFS_MD5_HASH_FUNCTION_NAME ("md5")
+#define SSDFS_SHA1_HASH_FUNCTION_NAME ("sha1")
+#define SSDFS_SHA224_HASH_FUNCTION_NAME ("sha224")
+#define SSDFS_SHA256_HASH_FUNCTION_NAME ("sha256")
+#define SSDFS_GHASH_HASH_FUNCTION_NAME ("ghash")
+#define SSDFS_POLYVAL_HASH_FUNCTION_NAME ("polyval")
+
+/*
+ * struct ssdfs_fingerprint_range - range of fingerprints
+ * @start: starting fingerprint
+ * @end: ending fingerprint
+ */
+struct ssdfs_fingerprint_range {
+ struct ssdfs_fingerprint start;
+ struct ssdfs_fingerprint end;
+};
+
+/*
+ * Inline methods
+ */
+
+/*
+ * SSDFS_DEFAULT_FINGERPRINT_TYPE() - default fingerprint type
+ */
+static inline
+int SSDFS_DEFAULT_FINGERPRINT_TYPE(void)
+{
+#ifdef CONFIG_SSDFS_MD5_FINGEPRINT_TYPE
+ return SSDFS_MD5_FINGERPRINT;
+#elif defined(CONFIG_SSDFS_SHA1_FINGEPRINT_TYPE)
+ return SSDFS_SHA1_FINGERPRINT;
+#elif defined(CONFIG_SSDFS_SHA224_FINGEPRINT_TYPE)
+ return SSDFS_SHA224_FINGERPRINT;
+#elif defined(CONFIG_SSDFS_SHA256_FINGEPRINT_TYPE)
+ return SSDFS_SHA256_FINGERPRINT;
+#elif defined(CONFIG_SSDFS_GHASH_FINGEPRINT_TYPE)
+ return SSDFS_GHASH_FINGERPRINT;
+#elif defined(CONFIG_SSDFS_POLYVAL_FINGEPRINT_TYPE)
+ return SSDFS_POLYVAL_FINGERPRINT;
+#else
+ return SSDFS_UNKNOWN_FINGERPRINT_TYPE;
+#endif
+}
+
+/*
+ * SSDFS_FINGERPRINT_TYPE2NAME() - convert fingerprint type into name
+ */
+static inline
+const char *SSDFS_FINGERPRINT_TYPE2NAME(int type)
+{
+ switch (type) {
+ case SSDFS_MD5_FINGERPRINT:
+ return SSDFS_MD5_HASH_FUNCTION_NAME;
+ case SSDFS_SHA1_FINGERPRINT:
+ return SSDFS_SHA1_HASH_FUNCTION_NAME;
+ case SSDFS_SHA224_FINGERPRINT:
+ return SSDFS_SHA224_HASH_FUNCTION_NAME;
+ case SSDFS_SHA256_FINGERPRINT:
+ return SSDFS_SHA256_HASH_FUNCTION_NAME;
+ case SSDFS_GHASH_FINGERPRINT:
+ return SSDFS_GHASH_HASH_FUNCTION_NAME;
+ case SSDFS_POLYVAL_FINGERPRINT:
+ return SSDFS_POLYVAL_HASH_FUNCTION_NAME;
+ default:
+ /* SHA1 is used by default */
+ break;
+ }
+
+ return SSDFS_SHA1_HASH_FUNCTION_NAME;
+}
+
+/*
+ * SSDFS_DEFAULT_FINGERPRINT_NAME() - default fingerprint algorithm name
+ */
+static inline
+const char *SSDFS_DEFAULT_FINGERPRINT_NAME(void)
+{
+#ifdef CONFIG_SSDFS_MD5_FINGEPRINT_TYPE
+ return SSDFS_FINGERPRINT_TYPE2NAME(SSDFS_MD5_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_SHA1_FINGEPRINT_TYPE)
+ return SSDFS_FINGERPRINT_TYPE2NAME(SSDFS_SHA1_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_SHA224_FINGEPRINT_TYPE)
+ return SSDFS_FINGERPRINT_TYPE2NAME(SSDFS_SHA224_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_SHA256_FINGEPRINT_TYPE)
+ return SSDFS_FINGERPRINT_TYPE2NAME(SSDFS_SHA256_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_GHASH_FINGEPRINT_TYPE)
+ return SSDFS_FINGERPRINT_TYPE2NAME(SSDFS_GHASH_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_POLYVAL_FINGEPRINT_TYPE)
+ return SSDFS_FINGERPRINT_TYPE2NAME(SSDFS_POLYVAL_FINGERPRINT);
+#else
+ return SSDFS_FINGERPRINT_TYPE2NAME(SSDFS_UNKNOWN_FINGERPRINT_TYPE);
+#endif
+}
+
+/*
+ * SSDFS_FINGEPRINT_TYPE2LENGTH() - convert fingerprint type into digest size
+ */
+static inline
+u32 SSDFS_FINGEPRINT_TYPE2LENGTH(int type)
+{
+ switch (type) {
+ case SSDFS_MD5_FINGERPRINT:
+ return MD5_DIGEST_SIZE;
+ case SSDFS_SHA1_FINGERPRINT:
+ return SHA1_DIGEST_SIZE;
+ case SSDFS_SHA224_FINGERPRINT:
+ return SHA224_DIGEST_SIZE;
+ case SSDFS_SHA256_FINGERPRINT:
+ return SHA256_DIGEST_SIZE;
+ case SSDFS_GHASH_FINGERPRINT:
+ return GHASH_DIGEST_SIZE;
+ case SSDFS_POLYVAL_FINGERPRINT:
+ return POLYVAL_DIGEST_SIZE;
+ default:
+ SSDFS_WARN("unknown fingerprint type %#x\n",
+ type);
+ break;
+ }
+
+ return U32_MAX;
+}
+
+/*
+ * SSDFS_DEFAULT_FINGERPRINT_LENGTH() - default fingerprint digest size
+ */
+static inline
+u32 SSDFS_DEFAULT_FINGERPRINT_LENGTH(void)
+{
+#ifdef CONFIG_SSDFS_MD5_FINGEPRINT_TYPE
+ return SSDFS_FINGEPRINT_TYPE2LENGTH(SSDFS_MD5_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_SHA1_FINGEPRINT_TYPE)
+ return SSDFS_FINGEPRINT_TYPE2LENGTH(SSDFS_SHA1_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_SHA224_FINGEPRINT_TYPE)
+ return SSDFS_FINGEPRINT_TYPE2LENGTH(SSDFS_SHA224_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_SHA256_FINGEPRINT_TYPE)
+ return SSDFS_FINGEPRINT_TYPE2LENGTH(SSDFS_SHA256_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_GHASH_FINGEPRINT_TYPE)
+ return SSDFS_FINGEPRINT_TYPE2LENGTH(SSDFS_GHASH_FINGERPRINT);
+#elif defined(CONFIG_SSDFS_POLYVAL_FINGEPRINT_TYPE)
+ return SSDFS_FINGEPRINT_TYPE2LENGTH(SSDFS_POLYVAL_FINGERPRINT);
+#else
+ return SSDFS_FINGEPRINT_TYPE2LENGTH(SSDFS_UNKNOWN_FINGERPRINT_TYPE);
+#endif
+}
+
+/*
+ * IS_FINGERPRINT_VALID() - check that fingerprint is valid
+ */
+static inline
+bool IS_FINGERPRINT_VALID(struct ssdfs_fingerprint *hash)
+{
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!hash);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (hash->type <= SSDFS_UNKNOWN_FINGERPRINT_TYPE ||
+ hash->type >= SSDFS_FINGERPRINT_TYPE_MAX)
+ return false;
+
+ if (hash->len == 0 || hash->len > SSDFS_FINGERPRINT_LENGTH_MAX)
+ return false;
+
+ return true;
+}
+
+/*
+ * IS_FINGERPRINTS_COMPARABLE() - check that fingerprints can be compared
+ */
+static inline
+bool IS_FINGERPRINTS_COMPARABLE(struct ssdfs_fingerprint *hash1,
+ struct ssdfs_fingerprint *hash2)
+{
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!hash1 || !hash2);
+ BUG_ON(!IS_FINGERPRINT_VALID(hash1));
+ BUG_ON(!IS_FINGERPRINT_VALID(hash2));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (hash1->type == hash2->type && hash1->len == hash2->len)
+ return true;
+
+ return false;
+}
+
+/*
+ * ssdfs_compare_fingerprints() - compare fingerprints
+ * @hash1: first fingerprint
+ * @hash2: second fingerprint
+ *
+ * This function tries to compare two fingerprints.
+ *
+ * RETURN:
+ * [-1] - hash1 is lesser that hash2
+ * [ 0] - hash1 is equal to hash2
+ * [+1] - hash1 is bigger that hash2
+ */
+static inline
+int ssdfs_compare_fingerprints(struct ssdfs_fingerprint *hash1,
+ struct ssdfs_fingerprint *hash2)
+{
+ size_t len;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!hash1 || !hash2);
+ BUG_ON(!IS_FINGERPRINT_VALID(hash1));
+ BUG_ON(!IS_FINGERPRINT_VALID(hash2));
+ BUG_ON(!IS_FINGERPRINTS_COMPARABLE(hash1, hash2));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ len = min_t(u8, hash1->len, hash2->len);
+
+ return memcmp(hash1->buf, hash2->buf, len);
+}
+
+#endif /* _SSDFS_FINGERPRINT_H */
diff --git a/fs/ssdfs/fingerprint_array.c b/fs/ssdfs/fingerprint_array.c
new file mode 100644
index 000000000000..22036b1dfabf
--- /dev/null
+++ b/fs/ssdfs/fingerprint_array.c
@@ -0,0 +1,795 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause-Clear
+ *
+ * SSDFS -- SSD-oriented File System.
+ *
+ * fs/ssdfs/fingerprint_array.c - fingerprint array implementation.
+ *
+ * Copyright (c) 2023-2026 Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ * http://www.ssdfs.org/
+ * All rights reserved.
+ *
+ * Authors: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ */
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/pagevec.h>
+
+#include "peb_mapping_queue.h"
+#include "peb_mapping_table_cache.h"
+#include "ssdfs.h"
+#include "fingerprint_array.h"
+
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+atomic64_t ssdfs_fingerprint_array_folio_leaks;
+atomic64_t ssdfs_fingerprint_array_memory_leaks;
+atomic64_t ssdfs_fingerprint_array_cache_leaks;
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+
+/*
+ * void ssdfs_fingerprint_array_cache_leaks_increment(void *kaddr)
+ * void ssdfs_fingerprint_array_cache_leaks_decrement(void *kaddr)
+ * void *ssdfs_fingerprint_array_kmalloc(size_t size, gfp_t flags)
+ * void *ssdfs_fingerprint_array_kzalloc(size_t size, gfp_t flags)
+ * void *ssdfs_fingerprint_array_kcalloc(size_t n, size_t size, gfp_t flags)
+ * void ssdfs_fingerprint_array_kfree(void *kaddr)
+ * struct folio *ssdfs_fingerprint_array_alloc_folio(gfp_t gfp_mask,
+ * unsigned int order)
+ * struct folio *ssdfs_file_add_batch_folio(struct folio_batch *batch,
+ * unsigned int order)
+ * void ssdfs_fingerprint_array_free_folio(struct folio *folio)
+ * void ssdfs_file_folio_batch_release(struct folio_batch *batch)
+ */
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ SSDFS_MEMORY_LEAKS_CHECKER_FNS(fingerprint_array)
+#else
+ SSDFS_MEMORY_ALLOCATOR_FNS(fingerprint_array)
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+
+void ssdfs_fingerprint_array_memory_leaks_init(void)
+{
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ atomic64_set(&ssdfs_fingerprint_array_folio_leaks, 0);
+ atomic64_set(&ssdfs_fingerprint_array_memory_leaks, 0);
+ atomic64_set(&ssdfs_fingerprint_array_cache_leaks, 0);
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+}
+
+void ssdfs_fingerprint_array_check_memory_leaks(void)
+{
+#ifdef CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING
+ if (atomic64_read(&ssdfs_fingerprint_array_folio_leaks) != 0) {
+ SSDFS_ERR("FINGERPRINT ARRAY: "
+ "memory leaks include %lld folios\n",
+ atomic64_read(&ssdfs_fingerprint_array_folio_leaks));
+ }
+
+ if (atomic64_read(&ssdfs_fingerprint_array_memory_leaks) != 0) {
+ SSDFS_ERR("FINGERPRINT ARRAY: "
+ "memory allocator suffers from %lld leaks\n",
+ atomic64_read(&ssdfs_fingerprint_array_memory_leaks));
+ }
+
+ if (atomic64_read(&ssdfs_fingerprint_array_cache_leaks) != 0) {
+ SSDFS_ERR("FINGERPRINT ARRAY: "
+ "caches suffers from %lld leaks\n",
+ atomic64_read(&ssdfs_fingerprint_array_cache_leaks));
+ }
+#endif /* CONFIG_SSDFS_MEMORY_LEAKS_ACCOUNTING */
+}
+
+/*
+ * ssdfs_fingerprint_array_create() - create fingerprint array
+ * @farray: pointer on fingerprint array object
+ * @capacity: maximum number of items in array
+ */
+int ssdfs_fingerprint_array_create(struct ssdfs_fingerprint_array *farray,
+ u32 capacity)
+{
+ size_t item_size = sizeof(struct ssdfs_fingerprint_item);
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!farray);
+
+ SSDFS_DBG("array %p, capacity %u, item_size %zu\n",
+ farray, capacity, item_size);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ atomic_set(&farray->state, SSDFS_FINGERPRINT_ARRAY_UNKNOWN_STATE);
+
+ init_rwsem(&farray->lock);
+ farray->items_count = 0;
+
+ err = ssdfs_dynamic_array_create(&farray->array,
+ capacity, item_size,
+ 0xFF);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to create dynamic array: "
+ "capacity %u, item_size %zu, err %d\n",
+ capacity, item_size, err);
+ return err;
+ }
+
+ atomic_set(&farray->state, SSDFS_FINGERPRINT_ARRAY_CREATED);
+
+ return 0;
+}
+
+/*
+ * ssdfs_fingerprint_array_destroy() - destroy fingerprint array
+ * @farray: pointer on fingerprint array object
+ */
+void ssdfs_fingerprint_array_destroy(struct ssdfs_fingerprint_array *farray)
+{
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!farray);
+
+ SSDFS_DBG("array %p\n", farray);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ switch (atomic_read(&farray->state)) {
+ case SSDFS_FINGERPRINT_ARRAY_CREATED:
+ /* continue logic */
+ break;
+
+ default:
+ /* do nothing */
+ return;
+ }
+
+ down_write(&farray->lock);
+ ssdfs_dynamic_array_destroy(&farray->array);
+ farray->items_count = 0;
+ up_write(&farray->lock);
+}
+
+/*
+ * ssdfs_check_fingerprint_item() - compare fingerprint item with hash
+ * @hash: fingerprint with hash value
+ * @item: fingerprint item for comparison with hash value
+ *
+ * This method tries to compare the fingerprint hashes.
+ *
+ * RETURN:
+ * [success]
+ *
+ * %-ENOENT - hash is lesser than item's fingerprint.
+ * %-EEXIST - hash is equal to item's fingerprint.
+ * %-EAGAIN - hash is bigger than item's fingerprint.
+ *
+ * [failure]
+ *
+ * %-EINVAL - invalid input.
+ * %-ERANGE - internal error.
+ */
+int ssdfs_check_fingerprint_item(struct ssdfs_fingerprint *hash,
+ struct ssdfs_fingerprint_item *item)
+{
+ int res;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!item || !hash);
+
+ SSDFS_DBG("item %p, hash %p\n",
+ item, hash);
+
+ SSDFS_DBG("HASH: type %#x, len %u\n",
+ hash->type, hash->len);
+ SSDFS_DBG("HASH DUMP\n");
+ print_hex_dump_bytes("", DUMP_PREFIX_OFFSET,
+ hash->buf,
+ SSDFS_FINGERPRINT_LENGTH_MAX);
+ SSDFS_DBG("\n");
+
+ SSDFS_DBG("ITEM HASH: type %#x, len %u\n",
+ item->hash.type, item->hash.len);
+ SSDFS_DBG("ITEM HASH DUMP\n");
+ print_hex_dump_bytes("", DUMP_PREFIX_OFFSET,
+ item->hash.buf,
+ SSDFS_FINGERPRINT_LENGTH_MAX);
+ SSDFS_DBG("\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (!IS_FINGERPRINT_VALID(hash)) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("hash is invalid\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EINVAL;
+ }
+
+ if (!IS_FINGERPRINT_VALID(&item->hash)) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("item is invalid\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EINVAL;
+ }
+
+ if (!IS_FINGERPRINTS_COMPARABLE(hash, &item->hash)) {
+ SSDFS_ERR("fingerprings are incomparable\n");
+ return -EINVAL;
+ }
+
+ res = ssdfs_compare_fingerprints(hash, &item->hash);
+ if (res < 0) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("search fingerprint is lesser\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -ENOENT;
+ } else if (res == 0) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("identical fingerprint is found\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EEXIST;
+ } else {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("search fingerprint is bigger\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/*
+ * ssdfs_fingerprint_array_find_nolock() - find fingerprint item without lock
+ * @farray: pointer on fingerprint array object
+ * @hash: fingerprint with hash value
+ * @item_index: pointer on found item index [out]
+ *
+ * This method tries to find the position in array for
+ * requested fingerprint hash without lock.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-ENOENT - item is not found.
+ * %-ERANGE - internal error.
+ */
+static
+int ssdfs_fingerprint_array_find_nolock(struct ssdfs_fingerprint_array *farray,
+ struct ssdfs_fingerprint *hash,
+ u32 *item_index)
+{
+ struct ssdfs_fingerprint_item *items_array;
+ struct ssdfs_fingerprint_item *item;
+ void *kaddr;
+ u32 total_items;
+ u32 items_count;
+ u32 processed_items = 0;
+ u32 lower_bound;
+ u32 upper_bound;
+ u32 cur_index;
+ u32 diff;
+ int res;
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!farray || !hash || !item_index);
+ BUG_ON(!rwsem_is_locked(&farray->lock));
+
+ SSDFS_DBG("array %p, hash %p\n",
+ farray, hash);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ total_items = ssdfs_dynamic_array_items_count(&farray->array);
+ if (total_items == 0) {
+ err = -ENOENT;
+ *item_index = 0;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("empty array: total_items %u\n",
+ total_items);
+#endif /* CONFIG_SSDFS_DEBUG */
+ goto finish_search_item;
+ }
+
+ if (farray->items_count > total_items) {
+ err = -ERANGE;
+ SSDFS_ERR("items_count %u > total_items %u\n",
+ farray->items_count,
+ total_items);
+ goto finish_search_item;
+ }
+
+ if (farray->items_count == 0) {
+ err = -ENOENT;
+ *item_index = 0;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("empty array: items_count %u\n",
+ farray->items_count);
+#endif /* CONFIG_SSDFS_DEBUG */
+ goto finish_search_item;
+ }
+
+ while (processed_items < farray->items_count) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("processed_items %u, total_items %u\n",
+ processed_items, farray->items_count);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ kaddr = ssdfs_dynamic_array_get_content_locked(&farray->array,
+ processed_items,
+ &items_count);
+ if (IS_ERR_OR_NULL(kaddr)) {
+ err = (kaddr == NULL ? -ENOENT : PTR_ERR(kaddr));
+ SSDFS_ERR("fail to get fingerprints range: "
+ "processed_items %u, err %d\n",
+ processed_items, err);
+ goto finish_search_item;
+ }
+
+ if (items_count == 0) {
+ err = -ENOENT;
+ *item_index = processed_items;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("fingerprint portion is empty\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ goto unlock_fingerprints_portion;
+ }
+
+ items_array = (struct ssdfs_fingerprint_item *)kaddr;
+
+ item = &items_array[0];
+
+ err = ssdfs_check_fingerprint_item(hash, item);
+ if (err == -ENOENT) {
+ *item_index = processed_items;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("stop search: item %u, err %d\n",
+ *item_index, err);
+#endif /* CONFIG_SSDFS_DEBUG */
+ goto unlock_fingerprints_portion;
+ } else if (err == -EEXIST) {
+ *item_index = processed_items;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("stop search: item %u, err %d\n",
+ *item_index, err);
+#endif /* CONFIG_SSDFS_DEBUG */
+ goto unlock_fingerprints_portion;
+ } else if (unlikely(err)) {
+ SSDFS_ERR("fail to check fingerprint: err %d\n",
+ err);
+ goto unlock_fingerprints_portion;
+ } else
+ BUG();
+
+ if (items_count == 1) {
+ err = -EAGAIN;
+ *item_index = processed_items;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("continue search: "
+ "item %u\n",
+ *item_index);
+#endif /* CONFIG_SSDFS_DEBUG */
+ goto unlock_fingerprints_portion;
+ }
+
+ item = &items_array[items_count - 1];
+
+ err = ssdfs_check_fingerprint_item(hash, item);
+ if (err == -ENOENT) {
+ /*
+ * Continue search in the range
+ */
+ } else if (err == -EEXIST) {
+ *item_index = items_count - 1;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("stop search: item %u, err %d\n",
+ *item_index, err);
+#endif /* CONFIG_SSDFS_DEBUG */
+ goto unlock_fingerprints_portion;
+ } else if (err == -EAGAIN) {
+ *item_index = items_count - 1;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("continue search: "
+ "item %u\n",
+ *item_index);
+#endif /* CONFIG_SSDFS_DEBUG */
+ goto unlock_fingerprints_portion;
+ } else if (unlikely(err)) {
+ SSDFS_ERR("fail to check fingerprint: err %d\n",
+ err);
+ goto unlock_fingerprints_portion;
+ } else
+ BUG();
+
+ lower_bound = 0;
+ *item_index = lower_bound;
+ upper_bound = items_count;
+ cur_index = upper_bound / 2;
+
+ do {
+ item = &items_array[cur_index];
+
+ err = ssdfs_check_fingerprint_item(hash, item);
+ if (err == -ENOENT) {
+ /* correct upper_bound */
+ upper_bound = cur_index;
+ } else if (err == -EEXIST) {
+ *item_index = cur_index;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("stop search: item %u, err %d\n",
+ *item_index, err);
+#endif /* CONFIG_SSDFS_DEBUG */
+ goto unlock_fingerprints_portion;
+ } else if (err == -EAGAIN) {
+ /* correct lower_bound */
+ lower_bound = cur_index;
+ *item_index = lower_bound;
+ } else if (unlikely(err)) {
+ SSDFS_ERR("fail to check fingerprint: err %d\n",
+ err);
+ goto unlock_fingerprints_portion;
+ } else
+ BUG();
+
+ diff = upper_bound - lower_bound;
+ cur_index = lower_bound + (diff / 2);
+ } while (lower_bound < upper_bound);
+
+unlock_fingerprints_portion:
+ res = ssdfs_dynamic_array_release(&farray->array,
+ processed_items,
+ kaddr);
+ if (unlikely(res)) {
+ SSDFS_ERR("fail to release fingerprints portion: "
+ "processed_items %u, err %d\n",
+ processed_items, res);
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG();
+#endif /* CONFIG_SSDFS_DEBUG */
+ }
+
+ processed_items += items_count;
+
+ if (err == -EAGAIN) {
+ if (processed_items < farray->items_count) {
+ err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("continue search: "
+ "item %u\n",
+ *item_index);
+#endif /* CONFIG_SSDFS_DEBUG */
+ continue;
+ } else {
+ err = -ENOENT;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("stop search: "
+ "nothing is found: item %u\n",
+ *item_index);
+#endif /* CONFIG_SSDFS_DEBUG */
+ break;
+ }
+ } else if (err)
+ break;
+ }
+
+ if (err == -EEXIST) {
+ err = 0;
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("fingerprint is found: "
+ "item %u\n",
+ *item_index);
+#endif /* CONFIG_SSDFS_DEBUG */
+ }
+
+finish_search_item:
+ return err;
+}
+
+/*
+ * ssdfs_fingerprint_array_find() - find fingerprint item
+ * @farray: pointer on fingerprint array object
+ * @hash: fingerprint with hash value
+ * @item_index: pointer on found item index [out]
+ *
+ * This method tries to find the position in array for
+ * requested fingerprint hash.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-ENOENT - item is not found.
+ * %-ERANGE - internal error.
+ */
+int ssdfs_fingerprint_array_find(struct ssdfs_fingerprint_array *farray,
+ struct ssdfs_fingerprint *hash,
+ u32 *item_index)
+{
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!farray || !hash || !item_index);
+
+ SSDFS_DBG("array %p, hash %p\n",
+ farray, hash);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ *item_index = U32_MAX;
+
+ down_read(&farray->lock);
+ err = ssdfs_fingerprint_array_find_nolock(farray, hash, item_index);
+ up_read(&farray->lock);
+
+ return err;
+}
+
+/*
+ * ssdfs_fingerprint_array_get_nolock() - get fingerprint item without lock
+ * @farray: pointer on fingerprint array object
+ * @item_index: item index
+ * @item: pointer on buffer for requested fingerprint item [out]
+ *
+ * This method tries to extract the item on @item_index position
+ * in array without lock.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-ERANGE - internal error.
+ */
+static inline
+int ssdfs_fingerprint_array_get_nolock(struct ssdfs_fingerprint_array *farray,
+ u32 item_index,
+ struct ssdfs_fingerprint_item *item)
+{
+ void *kaddr;
+ size_t item_size = sizeof(struct ssdfs_fingerprint_item);
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!farray || !item);
+ BUG_ON(!rwsem_is_locked(&farray->lock));
+
+ SSDFS_DBG("array %p, item_index %u\n",
+ farray, item_index);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ kaddr = ssdfs_dynamic_array_get_locked(&farray->array, item_index);
+ if (IS_ERR_OR_NULL(kaddr)) {
+ err = (kaddr == NULL ? -ENOENT : PTR_ERR(kaddr));
+ SSDFS_ERR("fail to get fingerprint item: "
+ "item_index %u, err %d\n",
+ item_index, err);
+ goto finish_get_item;
+ }
+
+ ssdfs_memcpy(item, 0, item_size,
+ kaddr, 0, item_size,
+ item_size);
+
+ err = ssdfs_dynamic_array_release(&farray->array, item_index, kaddr);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to release: "
+ "item_index %u, err %d\n",
+ item_index, err);
+ goto finish_get_item;
+ }
+
+finish_get_item:
+ return err;
+}
+
+/*
+ * ssdfs_fingerprint_array_get() - get fingerprint item
+ * @farray: pointer on fingerprint array object
+ * @item_index: item index
+ * @item: pointer on buffer for requested fingerprint item [out]
+ *
+ * This method tries to extract the item on @item_index position
+ * in array.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-ERANGE - internal error.
+ */
+int ssdfs_fingerprint_array_get(struct ssdfs_fingerprint_array *farray,
+ u32 item_index,
+ struct ssdfs_fingerprint_item *item)
+{
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!farray || !item);
+
+ SSDFS_DBG("array %p, item_index %u\n",
+ farray, item_index);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ down_read(&farray->lock);
+ err = ssdfs_fingerprint_array_get_nolock(farray, item_index, item);
+ up_read(&farray->lock);
+
+ return err;
+}
+
+/*
+ * ssdfs_fingerprint_array_add() - add fingerprint item into array
+ * @farray: pointer on fingerprint array object
+ * @item: fingerprint item
+ * @item_index: item index
+ *
+ * This method tries to add the item in array.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EEXIST - item exists in the array.
+ * %-ERANGE - internal error.
+ */
+int ssdfs_fingerprint_array_add(struct ssdfs_fingerprint_array *farray,
+ struct ssdfs_fingerprint_item *item,
+ u32 item_index)
+{
+ struct ssdfs_fingerprint_item existing;
+ u32 total_items;
+ u32 cur_index;
+ int err = 0;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!farray || !item);
+
+ SSDFS_DBG("array %p, item_index %u\n",
+ farray, item_index);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ down_write(&farray->lock);
+
+ total_items = farray->items_count;
+
+ if (item_index >= U32_MAX || item_index > total_items) {
+ err = ssdfs_fingerprint_array_find_nolock(farray,
+ &item->hash,
+ &item_index);
+ if (err == -ENOENT) {
+ err = 0;
+ /* expected state */
+ } else if (err == -EEXIST) {
+ SSDFS_ERR("item exists for requested fingerprint\n");
+ goto finish_add_fingerprint;
+ } else if (unlikely(err)) {
+ SSDFS_ERR("fail to find position for fingerprint: "
+ "err %d\n", err);
+ goto finish_add_fingerprint;
+ } else {
+ err = -ERANGE;
+ SSDFS_ERR("unexpected result of position search\n");
+ goto finish_add_fingerprint;
+ }
+ }
+
+ if (item_index > total_items) {
+ err = -ERANGE;
+ SSDFS_ERR("item_index %u > total_items %u\n",
+ item_index, total_items);
+ goto finish_add_fingerprint;
+ }
+
+ if (total_items == 0) {
+ err = ssdfs_dynamic_array_set(&farray->array, item_index, item);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to add fingerprint: "
+ "item_index %u, err %d\n",
+ item_index, err);
+ goto finish_add_fingerprint;
+ }
+ } else if (item_index == total_items) {
+ cur_index = item_index - 1;
+
+ err = ssdfs_fingerprint_array_get_nolock(farray,
+ cur_index,
+ &existing);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to get previous item: "
+ "index %u, err %d\n",
+ cur_index, err);
+ goto finish_add_fingerprint;
+ }
+
+ err = ssdfs_check_fingerprint_item(&existing.hash, item);
+ if (err == -ENOENT) {
+ err = 0;
+ /*
+ * Continue logic
+ */
+ } else if (unlikely(err)) {
+ SSDFS_ERR("corrupted fingerprints' sequence: "
+ "index1 %u, index2 %u, err %d\n",
+ cur_index, item_index, err);
+ goto finish_add_fingerprint;
+ } else
+ BUG();
+
+ err = ssdfs_dynamic_array_set(&farray->array, item_index, item);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to add fingerprint: "
+ "item_index %u, err %d\n",
+ item_index, err);
+ goto finish_add_fingerprint;
+ }
+ } else {
+ cur_index = item_index;
+
+ err = ssdfs_fingerprint_array_get_nolock(farray,
+ cur_index,
+ &existing);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to get previous item: "
+ "index %u, err %d\n",
+ cur_index, err);
+ goto finish_add_fingerprint;
+ }
+
+ err = ssdfs_check_fingerprint_item(&item->hash, &existing);
+ if (err == -ENOENT) {
+ err = 0;
+ /*
+ * Continue logic
+ */
+ } else if (unlikely(err)) {
+ SSDFS_ERR("corrupted fingerprints' sequence: "
+ "index1 %u, index2 %u, err %d\n",
+ cur_index, item_index, err);
+ goto finish_add_fingerprint;
+ } else
+ BUG();
+
+ cur_index = item_index - 1;
+
+ err = ssdfs_fingerprint_array_get_nolock(farray,
+ cur_index,
+ &existing);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to get previous item: "
+ "index %u, err %d\n",
+ cur_index, err);
+ goto finish_add_fingerprint;
+ }
+
+ err = ssdfs_check_fingerprint_item(&existing.hash, item);
+ if (err == -ENOENT) {
+ err = 0;
+ /*
+ * Continue logic
+ */
+ } else if (unlikely(err)) {
+ SSDFS_ERR("corrupted fingerprints' sequence: "
+ "index1 %u, index2 %u, err %d\n",
+ cur_index, item_index, err);
+ goto finish_add_fingerprint;
+ } else
+ BUG();
+
+ err = ssdfs_dynamic_array_shift_content_right(&farray->array,
+ item_index, 1);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to shift range: "
+ "index %u, err %d\n",
+ item_index, err);
+ goto finish_add_fingerprint;
+ }
+
+ err = ssdfs_dynamic_array_set(&farray->array, item_index, item);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to add fingerprint: "
+ "item_index %u, err %d\n",
+ item_index, err);
+ goto finish_add_fingerprint;
+ }
+ }
+
+ farray->items_count++;
+
+finish_add_fingerprint:
+ up_write(&farray->lock);
+
+ return err;
+}
diff --git a/fs/ssdfs/fingerprint_array.h b/fs/ssdfs/fingerprint_array.h
new file mode 100644
index 000000000000..ba70977d5a8e
--- /dev/null
+++ b/fs/ssdfs/fingerprint_array.h
@@ -0,0 +1,82 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause-Clear
+ *
+ * SSDFS -- SSD-oriented File System.
+ *
+ * fs/ssdfs/fingerprint_array.h - fingerprint array's declarations.
+ *
+ * Copyright (c) 2023-2026 Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ * http://www.ssdfs.org/
+ * All rights reserved.
+ *
+ * Authors: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ */
+
+#ifndef _SSDFS_FINGERPRINT_ARRAY_H
+#define _SSDFS_FINGERPRINT_ARRAY_H
+
+#include "dynamic_array.h"
+#include "fingerprint.h"
+
+/*
+ * struct ssdfs_fingerprint_item - fingerprint item
+ * @hash: fingerprint hash
+ * @logical_blk: logical block ID
+ * @blk_desc: logical block descriptor
+ */
+struct ssdfs_fingerprint_item {
+ struct ssdfs_fingerprint hash;
+ u32 logical_blk;
+ struct ssdfs_block_descriptor blk_desc;
+};
+
+/*
+ * struct ssdfs_fingerprint_pair - item + index pair
+ * @item: fingerprint item
+ * @item_index: item index in array
+ */
+struct ssdfs_fingerprint_pair {
+ struct ssdfs_fingerprint_item item;
+ u32 item_index;
+};
+
+/*
+ * struct ssdfs_fingerprint_array - fingerprint array
+ * @state: state of fingerprint array
+ * @lock: array lock
+ * @items_count: items count in array
+ * @array: array of fingerprints
+ */
+struct ssdfs_fingerprint_array {
+ atomic_t state;
+ struct rw_semaphore lock;
+ u32 items_count;
+ struct ssdfs_dynamic_array array;
+};
+
+/* Fingeprint array states */
+enum {
+ SSDFS_FINGERPRINT_ARRAY_UNKNOWN_STATE,
+ SSDFS_FINGERPRINT_ARRAY_CREATED,
+ SSDFS_FINGERPRINT_ARRAY_STATE_MAX
+};
+
+/*
+ * Fingerprint array's API
+ */
+int ssdfs_fingerprint_array_create(struct ssdfs_fingerprint_array *array,
+ u32 capacity);
+void ssdfs_fingerprint_array_destroy(struct ssdfs_fingerprint_array *array);
+int ssdfs_check_fingerprint_item(struct ssdfs_fingerprint *hash,
+ struct ssdfs_fingerprint_item *item);
+int ssdfs_fingerprint_array_find(struct ssdfs_fingerprint_array *array,
+ struct ssdfs_fingerprint *hash,
+ u32 *item_index);
+int ssdfs_fingerprint_array_get(struct ssdfs_fingerprint_array *array,
+ u32 item_index,
+ struct ssdfs_fingerprint_item *item);
+int ssdfs_fingerprint_array_add(struct ssdfs_fingerprint_array *array,
+ struct ssdfs_fingerprint_item *item,
+ u32 item_index);
+
+#endif /* _SSDFS_FINGERPRINT_ARRAY_H */
diff --git a/fs/ssdfs/peb_deduplication.c b/fs/ssdfs/peb_deduplication.c
new file mode 100644
index 000000000000..1e755e96538d
--- /dev/null
+++ b/fs/ssdfs/peb_deduplication.c
@@ -0,0 +1,483 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause-Clear
+ *
+ * SSDFS -- SSD-oriented File System.
+ *
+ * fs/ssdfs/peb_deduplication.c - PEB-based deduplication logic.
+ *
+ * Copyright (c) 2023-2026 Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ * http://www.ssdfs.org/
+ * All rights reserved.
+ *
+ * Authors: Viacheslav Dubeyko <slava@xxxxxxxxxxx>
+ */
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/pagevec.h>
+#include <crypto/hash.h>
+
+#include "peb_mapping_queue.h"
+#include "peb_mapping_table_cache.h"
+#include "folio_vector.h"
+#include "ssdfs.h"
+#include "folio_array.h"
+#include "request_queue.h"
+#include "peb.h"
+#include "offset_translation_table.h"
+#include "peb_container.h"
+#include "segment_bitmap.h"
+#include "segment.h"
+#include "fingerprint.h"
+#include "fingerprint_array.h"
+
+/*
+ * ssdfs_calculate_fingerprint() - calculate block's fingerprint
+ * @pebi: pointer on PEB object
+ * @req: I/O request
+ * @hash: calculated fingerprint hash [out]
+ *
+ * This method tries to calculate block's fingerprint.
+ */
+static
+int ssdfs_calculate_fingerprint(struct ssdfs_peb_info *pebi,
+ struct ssdfs_segment_request *req,
+ struct ssdfs_fingerprint *hash)
+{
+ struct ssdfs_fs_info *fsi;
+ struct ssdfs_request_content_block *block;
+ struct ssdfs_content_block *blk_state;
+ SHASH_DESC_ON_STACK(shash, pebi->dedup.shash_tfm);
+ u32 rest_bytes;
+ u32 start_blk = 0;
+ u32 num_blks = 0;
+ u32 processed_bytes = 0;
+ int i, j, k;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!req || !hash);
+ BUG_ON(req->place.len >= U16_MAX);
+ BUG_ON(req->result.processed_blks > req->place.len);
+ BUG_ON(!is_ssdfs_peb_container_locked(pebi->pebc));
+
+ SSDFS_DBG("ino %llu, seg_id %llu, logical_offset %llu, "
+ "processed_blks %d, logical_block %u, data_bytes %u, "
+ "cno %llu, parent_snapshot %llu, cmd %#x, type %#x\n",
+ req->extent.ino, req->place.start.seg_id,
+ req->extent.logical_offset, req->result.processed_blks,
+ req->place.start.blk_index,
+ req->extent.data_bytes, req->extent.cno,
+ req->extent.parent_snapshot,
+ req->private.cmd, req->private.type);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ fsi = pebi->pebc->parent_si->fsi;
+
+ shash->tfm = pebi->dedup.shash_tfm;
+ crypto_shash_init(shash);
+
+ rest_bytes = ssdfs_request_rest_bytes(pebi, req);
+
+ start_blk = req->result.processed_blks;
+ rest_bytes = min_t(u32, rest_bytes, fsi->pagesize);
+ num_blks = rest_bytes + fsi->pagesize - 1;
+ num_blks >>= fsi->log_pagesize;
+
+ for (i = 0; i < num_blks; i++) {
+ struct folio *folio;
+ void *kaddr;
+ int blk_index = i + start_blk;
+ u32 portion_size;
+
+ if (blk_index >= req->result.content.count)
+ break;
+
+ block = &req->result.content.blocks[blk_index];
+ blk_state = &block->new_state;
+
+ for (j = 0; j < folio_batch_count(&blk_state->batch); j++) {
+ u32 mem_pages_per_folio;
+
+ folio = blk_state->batch.folios[j];
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!folio);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ mem_pages_per_folio = folio_size(folio) >> PAGE_SHIFT;
+
+ for (k = 0; k < mem_pages_per_folio; k++) {
+ portion_size = min_t(u32, PAGE_SIZE,
+ rest_bytes - processed_bytes);
+
+ kaddr = kmap_local_folio(folio, k * PAGE_SIZE);
+ crypto_shash_update(shash, kaddr, portion_size);
+ kunmap_local(kaddr);
+
+ processed_bytes += portion_size;
+ }
+ }
+ }
+
+ crypto_shash_final(shash, hash->buf);
+
+ hash->type = SSDFS_DEFAULT_FINGERPRINT_TYPE();
+ hash->len = SSDFS_DEFAULT_FINGERPRINT_LENGTH();
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("HASH (type %#x, len %u)\n",
+ hash->type, hash->len);
+
+ SSDFS_DBG("FINGERPRINT DUMP:\n");
+ print_hex_dump_bytes("", DUMP_PREFIX_OFFSET,
+ hash->buf,
+ SSDFS_FINGERPRINT_LENGTH_MAX);
+ SSDFS_DBG("\n");
+
+ BUG_ON(!IS_FINGERPRINT_VALID(hash));
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ return 0;
+}
+
+/*
+ * is_data_size_fingerprint_ready() - is data size bigger than page size
+ * @pebi: pointer on PEB object
+ * @req: I/O request
+ */
+static inline
+bool is_data_size_fingerprint_ready(struct ssdfs_peb_info *pebi,
+ struct ssdfs_segment_request *req)
+{
+ u32 pagesize;
+ u32 logical_block;
+ u32 processed_blks;
+ u64 rest_bytes;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!pebi || !pebi->pebc || !req);
+ BUG_ON(!pebi->pebc->parent_si || !pebi->pebc->parent_si->fsi);
+ BUG_ON(req->place.len >= U16_MAX);
+ BUG_ON(req->result.processed_blks > req->place.len);
+ BUG_ON(!is_ssdfs_peb_container_locked(pebi->pebc));
+
+ SSDFS_DBG("ino %llu, seg %llu, peb %llu, "
+ "peb_index %u, logical_offset %llu, "
+ "processed_blks %d, logical_block %u, data_bytes %u, "
+ "cno %llu, parent_snapshot %llu, cmd %#x, type %#x\n",
+ req->extent.ino, req->place.start.seg_id,
+ pebi->peb_id, pebi->peb_index,
+ req->extent.logical_offset, req->result.processed_blks,
+ req->place.start.blk_index,
+ req->extent.data_bytes, req->extent.cno,
+ req->extent.parent_snapshot,
+ req->private.cmd, req->private.type);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ pagesize = pebi->pebc->parent_si->fsi->pagesize;
+
+ processed_blks = req->result.processed_blks;
+ logical_block = req->place.start.blk_index + processed_blks;
+
+ rest_bytes = processed_blks * pagesize;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(rest_bytes >= req->extent.data_bytes);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ rest_bytes = req->extent.data_bytes - rest_bytes;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("data_size %llu, pagesize %u\n",
+ rest_bytes, pagesize);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (rest_bytes < PAGE_SIZE)
+ return false;
+
+ return true;
+}
+
+/*
+ * is_ssdfs_block_duplicated() - check that block is duplicated
+ * @pebi: pointer on PEB object
+ * @req: I/O request
+ * @pair: fingerprint pair [out]
+ *
+ * This method tries to check that block is duplicated.
+ * Pre-allocated blocks will be ignored.
+ */
+bool is_ssdfs_block_duplicated(struct ssdfs_peb_info *pebi,
+ struct ssdfs_segment_request *req,
+ struct ssdfs_fingerprint_pair *pair)
+{
+ u32 logical_block;
+ u32 processed_blks;
+ bool is_duplicated = false;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!pebi || !pebi->pebc || !req || !pair);
+ BUG_ON(!pebi->pebc->parent_si || !pebi->pebc->parent_si->fsi);
+ BUG_ON(req->place.len >= U16_MAX);
+ BUG_ON(req->result.processed_blks > req->place.len);
+ BUG_ON(!is_ssdfs_peb_container_locked(pebi->pebc));
+
+ SSDFS_DBG("ino %llu, seg %llu, peb %llu, "
+ "peb_index %u, logical_offset %llu, "
+ "processed_blks %d, logical_block %u, data_bytes %u, "
+ "cno %llu, parent_snapshot %llu, cmd %#x, type %#x\n",
+ req->extent.ino, req->place.start.seg_id,
+ pebi->peb_id, pebi->peb_index,
+ req->extent.logical_offset, req->result.processed_blks,
+ req->place.start.blk_index,
+ req->extent.data_bytes, req->extent.cno,
+ req->extent.parent_snapshot,
+ req->private.cmd, req->private.type);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (!is_ssdfs_peb_containing_user_data(pebi->pebc)) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("ignore metadata\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ return false;
+ }
+
+ if (!is_data_size_fingerprint_ready(pebi, req)) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("data is too small for fingerprint calculation\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ return false;
+ }
+
+ processed_blks = req->result.processed_blks;
+ logical_block = req->place.start.blk_index + processed_blks;
+
+ memset(&pair->item.hash, 0, sizeof(struct ssdfs_fingerprint));
+ pair->item.logical_blk = logical_block;
+ SSDFS_BLK_DESC_INIT(&pair->item.blk_desc);
+
+ err = ssdfs_calculate_fingerprint(pebi, req, &pair->item.hash);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to calculate fingerprint: "
+ "logical_block %u, err %d\n",
+ logical_block, err);
+ return false;
+ }
+
+ err = ssdfs_fingerprint_array_find(&pebi->dedup.fingerprints,
+ &pair->item.hash,
+ &pair->item_index);
+ if (err == -ENOENT) {
+ is_duplicated = false;
+ } else if (unlikely(err)) {
+ SSDFS_ERR("fail to find fingerprint: "
+ "logical_block %u, err %d\n",
+ logical_block, err);
+ return false;
+ } else {
+ is_duplicated = true;
+ }
+
+ return is_duplicated;
+}
+
+/*
+ * should_ssdfs_save_fingerprint() - should fingerprint to be saved
+ * @pebi: pointer on PEB object
+ * @req: I/O request
+ */
+bool should_ssdfs_save_fingerprint(struct ssdfs_peb_info *pebi,
+ struct ssdfs_segment_request *req)
+{
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!pebi || !pebi->pebc || !req);
+ BUG_ON(!pebi->pebc->parent_si || !pebi->pebc->parent_si->fsi);
+ BUG_ON(req->place.len >= U16_MAX);
+ BUG_ON(req->result.processed_blks > req->place.len);
+
+ SSDFS_DBG("ino %llu, seg %llu, peb %llu, "
+ "peb_index %u, logical_offset %llu, "
+ "processed_blks %d, logical_block %u, data_bytes %u, "
+ "cno %llu, parent_snapshot %llu, cmd %#x, type %#x\n",
+ req->extent.ino, req->place.start.seg_id,
+ pebi->peb_id, pebi->peb_index,
+ req->extent.logical_offset, req->result.processed_blks,
+ req->place.start.blk_index,
+ req->extent.data_bytes, req->extent.cno,
+ req->extent.parent_snapshot,
+ req->private.cmd, req->private.type);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (!is_ssdfs_peb_containing_user_data(pebi->pebc)) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("ignore metadata\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ return false;
+ }
+
+ if (!is_data_size_fingerprint_ready(pebi, req)) {
+#ifdef CONFIG_SSDFS_DEBUG
+ SSDFS_DBG("data is too small for fingerprint calculation\n");
+#endif /* CONFIG_SSDFS_DEBUG */
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * ssdfs_peb_deduplicate_logical_block() - deduplicate a logical block
+ * @pebi: pointer on PEB object
+ * @req: I/O request
+ * @pair: fingerprint pair
+ * @blk_desc: pointer on buffer for block descriptor [out]
+ *
+ * This method tries to deduplicate the logical block.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EINVAL - invalid input.
+ * %-ERANGE - internal error.
+ */
+int ssdfs_peb_deduplicate_logical_block(struct ssdfs_peb_info *pebi,
+ struct ssdfs_segment_request *req,
+ struct ssdfs_fingerprint_pair *pair,
+ struct ssdfs_block_descriptor *blk_desc)
+{
+ struct ssdfs_fingerprint_item item;
+ size_t blk_desc_size = sizeof(struct ssdfs_block_descriptor);
+ int res;
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!pebi || !pebi->pebc || !req);
+ BUG_ON(!pair || !blk_desc);
+ BUG_ON(!pebi->pebc->parent_si || !pebi->pebc->parent_si->fsi);
+ BUG_ON(req->place.len >= U16_MAX);
+ BUG_ON(req->result.processed_blks > req->place.len);
+ BUG_ON(!is_ssdfs_peb_container_locked(pebi->pebc));
+
+ SSDFS_DBG("ino %llu, seg %llu, peb %llu, "
+ "peb_index %u, logical_offset %llu, "
+ "processed_blks %d, logical_block %u, data_bytes %u, "
+ "cno %llu, parent_snapshot %llu, cmd %#x, type %#x\n",
+ req->extent.ino, req->place.start.seg_id,
+ pebi->peb_id, pebi->peb_index,
+ req->extent.logical_offset, req->result.processed_blks,
+ req->place.start.blk_index,
+ req->extent.data_bytes, req->extent.cno,
+ req->extent.parent_snapshot,
+ req->private.cmd, req->private.type);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (pair->item.logical_blk >= U32_MAX) {
+ SSDFS_ERR("invalid logical block\n");
+ return -EINVAL;
+ }
+
+ if (!IS_FINGERPRINT_VALID(&pair->item.hash)) {
+ SSDFS_ERR("invalid hash: logical block %u\n",
+ pair->item.logical_blk);
+ return -EINVAL;
+ }
+
+ err = ssdfs_fingerprint_array_get(&pebi->dedup.fingerprints,
+ pair->item_index, &item);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to get fingerprint item: "
+ "item_index %u, err %d\n",
+ pair->item_index, err);
+ return err;
+ }
+
+ res = ssdfs_check_fingerprint_item(&pair->item.hash, &item);
+ if (res == -EEXIST) {
+ /*
+ * fingerprints are identical
+ */
+ } else {
+ SSDFS_ERR("fingerprints are different\n");
+ return -ERANGE;
+ }
+
+ ssdfs_memcpy(blk_desc, 0, blk_desc_size,
+ &item.blk_desc, 0, blk_desc_size,
+ blk_desc_size);
+
+ return 0;
+}
+
+/*
+ * ssdfs_peb_save_fingerprint() - save new fingerprint into array
+ * @pebi: pointer on PEB object
+ * @req: I/O request
+ * @blk_desc: block descriptor of logical block
+ * @pair: fingerprint pair
+ *
+ * This method tries to store the new fingerprint of logical
+ * block into the fingerprint array.
+ *
+ * RETURN:
+ * [success]
+ * [failure] - error code:
+ *
+ * %-EINVAL - invalid input.
+ * %-ERANGE - internal error.
+ */
+int ssdfs_peb_save_fingerprint(struct ssdfs_peb_info *pebi,
+ struct ssdfs_segment_request *req,
+ struct ssdfs_block_descriptor *blk_desc,
+ struct ssdfs_fingerprint_pair *pair)
+{
+ size_t blk_desc_size = sizeof(struct ssdfs_block_descriptor);
+ int err;
+
+#ifdef CONFIG_SSDFS_DEBUG
+ BUG_ON(!pebi || !pebi->pebc || !req || !blk_desc || !pair);
+ BUG_ON(!pebi->pebc->parent_si || !pebi->pebc->parent_si->fsi);
+ BUG_ON(req->place.len >= U16_MAX);
+ BUG_ON(req->result.processed_blks > req->place.len);
+ BUG_ON(!is_ssdfs_peb_container_locked(pebi->pebc));
+
+ SSDFS_DBG("ino %llu, seg %llu, peb %llu, "
+ "peb_index %u, logical_offset %llu, "
+ "processed_blks %d, logical_block %u, data_bytes %u, "
+ "cno %llu, parent_snapshot %llu, cmd %#x, type %#x\n",
+ req->extent.ino, req->place.start.seg_id,
+ pebi->peb_id, pebi->peb_index,
+ req->extent.logical_offset, req->result.processed_blks,
+ req->place.start.blk_index,
+ req->extent.data_bytes, req->extent.cno,
+ req->extent.parent_snapshot,
+ req->private.cmd, req->private.type);
+#endif /* CONFIG_SSDFS_DEBUG */
+
+ if (pair->item.logical_blk >= U32_MAX) {
+ SSDFS_ERR("invalid logical block\n");
+ return -EINVAL;
+ }
+
+ if (!IS_FINGERPRINT_VALID(&pair->item.hash)) {
+ SSDFS_ERR("invalid hash: logical block %u\n",
+ pair->item.logical_blk);
+ return -EINVAL;
+ }
+
+ ssdfs_memcpy(&pair->item.blk_desc, 0, blk_desc_size,
+ blk_desc, 0, blk_desc_size,
+ blk_desc_size);
+
+ err = ssdfs_fingerprint_array_add(&pebi->dedup.fingerprints, &pair->item,
+ pair->item_index);
+ if (unlikely(err)) {
+ SSDFS_ERR("fail to add fingerprint item: "
+ "logical_block %u, item_index %u, err %d\n",
+ pair->item.logical_blk, pair->item_index, err);
+ return err;
+ }
+
+ return 0;
+}
--
2.34.1