aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan200101 <sentrycraft123@gmail.com>2024-02-08 22:07:04 +0100
committerJan200101 <sentrycraft123@gmail.com>2024-02-08 22:07:04 +0100
commita76b9e93d4de9f14a7e4aaa6d19fc721fc2e17d3 (patch)
treec77f07714c04275e3aa69b885a4ef7673985245d
parent32a4026b609472b5b278fb1a9c2e5d740782edd2 (diff)
downloadkernel-fsync-a76b9e93d4de9f14a7e4aaa6d19fc721fc2e17d3.tar.gz
kernel-fsync-a76b9e93d4de9f14a7e4aaa6d19fc721fc2e17d3.zip
kernel 6.7.3
-rw-r--r--SOURCES/0001-amd-hdr.patch142
-rw-r--r--SOURCES/Patchlist.changelog152
-rw-r--r--SOURCES/amdgpu-HAINAN-variant-fixup.patch34
-rw-r--r--SOURCES/asus-linux.patch414
-rw-r--r--SOURCES/dracut-virt.conf6
-rwxr-xr-xSOURCES/filter-modules.sh.fedora5
-rw-r--r--SOURCES/kernel-aarch64-16k-debug-fedora.config255
-rw-r--r--SOURCES/kernel-aarch64-16k-fedora.config255
-rw-r--r--SOURCES/kernel-aarch64-64k-debug-rhel.config194
-rw-r--r--SOURCES/kernel-aarch64-64k-rhel.config194
-rw-r--r--SOURCES/kernel-aarch64-debug-fedora.config255
-rw-r--r--SOURCES/kernel-aarch64-debug-rhel.config194
-rw-r--r--SOURCES/kernel-aarch64-fedora.config255
-rw-r--r--SOURCES/kernel-aarch64-rhel.config194
-rw-r--r--SOURCES/kernel-aarch64-rt-debug-rhel.config195
-rw-r--r--SOURCES/kernel-aarch64-rt-rhel.config195
-rw-r--r--SOURCES/kernel-ppc64le-debug-fedora.config140
-rw-r--r--SOURCES/kernel-ppc64le-debug-rhel.config188
-rw-r--r--SOURCES/kernel-ppc64le-fedora.config140
-rw-r--r--SOURCES/kernel-ppc64le-rhel.config188
-rw-r--r--SOURCES/kernel-s390x-debug-fedora.config141
-rw-r--r--SOURCES/kernel-s390x-debug-rhel.config193
-rw-r--r--SOURCES/kernel-s390x-fedora.config141
-rw-r--r--SOURCES/kernel-s390x-rhel.config193
-rw-r--r--SOURCES/kernel-s390x-zfcpdump-rhel.config195
-rw-r--r--SOURCES/kernel-x86_64-debug-fedora.config200
-rw-r--r--SOURCES/kernel-x86_64-debug-rhel.config192
-rw-r--r--SOURCES/kernel-x86_64-fedora.config200
-rw-r--r--SOURCES/kernel-x86_64-rhel.config192
-rw-r--r--SOURCES/kernel-x86_64-rt-debug-rhel.config193
-rw-r--r--SOURCES/kernel-x86_64-rt-rhel.config193
-rw-r--r--SOURCES/kernel.changelog2477
-rw-r--r--SOURCES/linux-surface.patch3612
-rw-r--r--SOURCES/mod-internal.list8
-rw-r--r--SOURCES/nouveau-gsp-default.patch23
-rw-r--r--SOURCES/patch-6.7-redhat.patch (renamed from SOURCES/patch-6.6-redhat.patch)893
-rw-r--r--SOURCES/rog-ally-audio-fix.patch2
-rw-r--r--SOURCES/rog-ally-gyro-fix.patch2974
-rw-r--r--SOURCES/rpminspect.yaml2
-rw-r--r--SOURCES/steam-deck.patch18
-rw-r--r--SOURCES/steamdeck-oled-audio.patch10
-rw-r--r--SOURCES/steamdeck-oled-bt.patch239
-rw-r--r--SOURCES/steamdeck-oled-hw-quirks.patch110
-rw-r--r--SOURCES/steamdeck-oled-wifi.patch207
-rw-r--r--SOURCES/t2linux.patch12153
-rw-r--r--SOURCES/tkg-BBRv2.patch3311
-rw-r--r--SOURCES/tkg-bcachefs.patch98955
-rw-r--r--SOURCES/tkg-misc-additions.patch902
-rw-r--r--SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch4
-rw-r--r--SOURCES/valve-gamescope-framerate-control-fixups.patch647
-rw-r--r--SOURCES/winesync.patch20
-rw-r--r--SPECS/kernel.spec783
-rwxr-xr-xTOOLS/patch_configs.py43
53 files changed, 24552 insertions, 108469 deletions
diff --git a/SOURCES/0001-amd-hdr.patch b/SOURCES/0001-amd-hdr.patch
index 6c0deff..c6fc3af 100644
--- a/SOURCES/0001-amd-hdr.patch
+++ b/SOURCES/0001-amd-hdr.patch
@@ -13,9 +13,6 @@ Subject: [PATCH] hdr
.../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 234 ++++-
.../amd/display/dc/dcn10/dcn10_cm_common.c | 95 +-
.../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 14 +-
- .../drm/amd/display/dc/dcn30/dcn30_hwseq.c | 37 +
- .../drm/amd/display/dc/dcn30/dcn30_hwseq.h | 3 +
- .../drm/amd/display/dc/dcn301/dcn301_init.c | 2 +-
.../gpu/drm/amd/display/include/fixed31_32.h | 12 +
drivers/gpu/drm/arm/malidp_crtc.c | 2 +-
drivers/gpu/drm/drm_atomic.c | 1 +
@@ -25,7 +22,6 @@ Subject: [PATCH] hdr
include/drm/drm_plane.h | 7 +
include/drm/drm_property.h | 6 +
include/uapi/drm/drm_mode.h | 8 +
- 21 files changed, 1473 insertions(+), 97 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 32fe05c81..84bf501b0 100644
@@ -1328,7 +1324,7 @@ index 97b7a0b8a..f1707c774 100644
state->crc_skip_count = cur->crc_skip_count;
state->mpo_requested = cur->mpo_requested;
/* TODO Duplicate dc_stream after objects are stream object is flattened */
-@@ -296,6 +297,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
+@@ -296,6 +296,70 @@
}
#endif
@@ -1398,7 +1394,7 @@ index 97b7a0b8a..f1707c774 100644
+
/* Implemented only the options currently available for the driver */
static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
- .reset = dm_crtc_reset_state,
+ .reset = amdgpu_dm_crtc_reset_state,
@@ -314,6 +379,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
#if defined(CONFIG_DEBUG_FS)
.late_register = amdgpu_dm_crtc_late_register,
@@ -1409,7 +1405,7 @@ index 97b7a0b8a..f1707c774 100644
+#endif
};
- static void dm_crtc_helper_disable(struct drm_crtc *crtc)
+ static void amdgpu_dm_crtc_helper_disable(struct drm_crtc *crtc)
@@ -489,6 +558,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
@@ -1424,7 +1420,7 @@ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/g
index cc74dd69a..2ed20e6e4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
-@@ -1333,8 +1333,14 @@ static void dm_drm_plane_reset(struct drm_plane *plane)
+@@ -1337,8 +1337,14 @@
amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
WARN_ON(amdgpu_state == NULL);
@@ -1440,7 +1436,7 @@ index cc74dd69a..2ed20e6e4 100644
+ amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
}
- static struct drm_plane_state *
+ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane)
@@ -1354,6 +1360,32 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)
dc_plane_state_retain(dm_plane_state->dc_state);
}
@@ -1497,7 +1493,7 @@ index cc74dd69a..2ed20e6e4 100644
+#ifdef CONFIG_DRM_AMD_COLOR_STEAMDECK
+static void
-+dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
++amdgpu_dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
+ struct drm_plane *plane)
+{
+ struct amdgpu_mode_info mode_info = dm->adev->mode_info;
@@ -1553,7 +1549,7 @@ index cc74dd69a..2ed20e6e4 100644
+}
+
+static int
-+dm_atomic_plane_set_property(struct drm_plane *plane,
++amdgpu_dm_atomic_plane_set_property(struct drm_plane *plane,
+ struct drm_plane_state *state,
+ struct drm_property *property,
+ uint64_t val)
@@ -1635,7 +1631,7 @@ index cc74dd69a..2ed20e6e4 100644
+}
+
+static int
-+dm_atomic_plane_get_property(struct drm_plane *plane,
++amdgpu_dm_atomic_plane_get_property(struct drm_plane *plane,
+ const struct drm_plane_state *state,
+ struct drm_property *property,
+ uint64_t *val)
@@ -1678,23 +1674,23 @@ index cc74dd69a..2ed20e6e4 100644
static const struct drm_plane_funcs dm_plane_funcs = {
.update_plane = drm_atomic_helper_update_plane,
.disable_plane = drm_atomic_helper_disable_plane,
-@@ -1435,6 +1658,10 @@ static const struct drm_plane_funcs dm_plane_funcs = {
- .atomic_duplicate_state = dm_drm_plane_duplicate_state,
- .atomic_destroy_state = dm_drm_plane_destroy_state,
- .format_mod_supported = dm_plane_format_mod_supported,
+@@ -1658,6 +1881,10 @@ static const struct drm_plane_funcs dm_plane_funcs = {
+ .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
+ .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
+ .format_mod_supported = amdgpu_dm_plane_format_mod_supported,
+#ifdef CONFIG_DRM_AMD_COLOR_STEAMDECK
-+ .atomic_set_property = dm_atomic_plane_set_property,
-+ .atomic_get_property = dm_atomic_plane_get_property,
++ .atomic_set_property = amdgpu_dm_atomic_plane_set_property,
++ .atomic_get_property = amdgpu_dm_atomic_plane_get_property,
+#endif
};
-
+
int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
@@ -1514,6 +1741,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
drm_plane_helper_add(plane, &dm_plane_helper_funcs);
+#ifdef CONFIG_DRM_AMD_COLOR_STEAMDECK
-+ dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
++ amdgpu_dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
+#endif
/* Create (reset) the plane state */
if (plane->funcs->reset)
@@ -1817,10 +1813,10 @@ index 3538973bd..04b2e04b6 100644
j++;
}
}
-diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 79befa17b..4daf8621b 100644
---- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
-+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -2486,17 +2486,17 @@ void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx)
adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
@@ -1855,81 +1851,6 @@ index 79befa17b..4daf8621b 100644
hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
/* dcn10_translate_regamma_to_hw_format takes 750us to finish
-diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
-index 255713ec2..fce9b33c0 100644
---- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
-+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
-@@ -186,6 +186,43 @@ bool dcn30_set_input_transfer_func(struct dc *dc,
- return result;
- }
-
-+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx)
-+{
-+ int i = 0;
-+ struct dpp_grph_csc_adjustment dpp_adjust;
-+ struct mpc_grph_gamut_adjustment mpc_adjust;
-+ int mpcc_id = pipe_ctx->plane_res.hubp->inst;
-+ struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
-+
-+ memset(&dpp_adjust, 0, sizeof(dpp_adjust));
-+ dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-+
-+ if (pipe_ctx->plane_state &&
-+ pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) {
-+ dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
-+ dpp_adjust.temperature_matrix[i] =
-+ pipe_ctx->plane_state->gamut_remap_matrix.matrix[i];
-+ }
-+
-+ pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp,
-+ &dpp_adjust);
-+
-+ memset(&mpc_adjust, 0, sizeof(mpc_adjust));
-+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-+
-+ if (pipe_ctx->top_pipe == NULL) {
-+ if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
-+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
-+ mpc_adjust.temperature_matrix[i] =
-+ pipe_ctx->stream->gamut_remap_matrix.matrix[i];
-+ }
-+ }
-+
-+ mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust);
-+}
-+
- bool dcn30_set_output_transfer_func(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- const struct dc_stream_state *stream)
-diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
-index ce19c5409..e557e2b98 100644
---- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
-+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
-@@ -58,6 +58,9 @@ bool dcn30_set_blend_lut(struct pipe_ctx *pipe_ctx,
- bool dcn30_set_input_transfer_func(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- const struct dc_plane_state *plane_state);
-+
-+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx);
-+
- bool dcn30_set_output_transfer_func(struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- const struct dc_stream_state *stream);
-diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
-index 61205cdbe..fdbe3d42c 100644
---- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
-+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
-@@ -33,7 +33,7 @@
- #include "dcn301_init.h"
-
- static const struct hw_sequencer_funcs dcn301_funcs = {
-- .program_gamut_remap = dcn10_program_gamut_remap,
-+ .program_gamut_remap = dcn30_program_gamut_remap,
- .init_hw = dcn10_init_hw,
- .power_down_on_boot = dcn10_power_down_on_boot,
- .apply_ctx_to_hw = dce110_apply_ctx_to_hw,
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index d4cf7ead1..84da1dd34 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -2120,3 +2041,28 @@ index ea1b639bc..cea5653e4 100644
--
2.43.0
+From b938468f07222b4faab5ae5cf5391eccd9532bb0 Mon Sep 17 00:00:00 2001
+From: Bouke Sybren Haarsma <boukehaarsma23@gmail.com>
+Date: Fri, 15 Dec 2023 11:14:58 +0100
+Subject: [PATCH] Don't create color_mgmt_properties on asics < SIENNA_CICHLID
+
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+index 2ed20e6e439bb5..65ee8745e96540 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+@@ -1742,7 +1742,8 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
+ drm_plane_helper_add(plane, &dm_plane_helper_funcs);
+
+ #ifdef CONFIG_DRM_AMD_COLOR_STEAMDECK
+- amdgpu_dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
++ if (dm->adev->asic_type >= CHIP_SIENNA_CICHLID)
++ amdgpu_dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
+ #endif
+ /* Create (reset) the plane state */
+ if (plane->funcs->reset)
+--
+2.43.0
diff --git a/SOURCES/Patchlist.changelog b/SOURCES/Patchlist.changelog
index 7a6ee75..4e240b5 100644
--- a/SOURCES/Patchlist.changelog
+++ b/SOURCES/Patchlist.changelog
@@ -1,120 +1,96 @@
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/bbdede94e2dfb64c3fdb376f90222394422d0131
- bbdede94e2dfb64c3fdb376f90222394422d0131 ida: Fix crash in ida_free when the bitmap is empty
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/46a8350ec1068377bdfcd55191012325f85113bb
+ 46a8350ec1068377bdfcd55191012325f85113bb Revert "cpupower: Bump soname version"
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/ed93ec720e04b598e451e23635bd8201ecaf9c60
- ed93ec720e04b598e451e23635bd8201ecaf9c60 wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev()
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/caef9732dac92b5afac527584a71e0d9fe783c11
+ caef9732dac92b5afac527584a71e0d9fe783c11 wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev()
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/becca34be9cd8577a101032917438af982aa7d29
- becca34be9cd8577a101032917438af982aa7d29 ALSA: hda: cs35l41: Add notification support into component binding
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/e04ed37ee7a38d7b21d8811666ec556c83f55931
+ e04ed37ee7a38d7b21d8811666ec556c83f55931 drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/2fa4b6a18ce384be968eda55c9cb12a6bb5cb4ca
- 2fa4b6a18ce384be968eda55c9cb12a6bb5cb4ca ALSA: hda: cs35l41: Support mute notifications for CS35L41 HDA
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/f6b5c078b94f3e8ad78f8eb246af98a93f715bdb
+ f6b5c078b94f3e8ad78f8eb246af98a93f715bdb scsi: sd: Add "probe_type" module parameter to allow synchronous probing
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/0181cc27d637d9f2606dbf33fac4ddf5a64162ca
- 0181cc27d637d9f2606dbf33fac4ddf5a64162ca Add support for various laptops using CS35L41 HDA without _DSD
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/af25a577fcd4af790374718790a7d9e7bace804c
+ af25a577fcd4af790374718790a7d9e7bace804c Enable IO_URING for RHEL
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/ed5f19c3892cc5dcfe95dd4d296c5e617a26c821
- ed5f19c3892cc5dcfe95dd4d296c5e617a26c821 Revert "netfilter: nf_tables: remove catchall element in GC sync path"
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/ad5d1b5ae72b9e8b846f94b4589d8b0430178c66
+ ad5d1b5ae72b9e8b846f94b4589d8b0430178c66 redhat: version two of Makefile.rhelver tweaks
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/81689414a7974a3f3fa3b28c18226c9d583761d4
- 81689414a7974a3f3fa3b28c18226c9d583761d4 netfilter: nf_tables: remove catchall element in GC sync path
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/fa8ac4aeabd1e76ce80b4016fa3f636507e62a8e
+ fa8ac4aeabd1e76ce80b4016fa3f636507e62a8e redhat: adapt to upstream Makefile change
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/91d392fbbe771b2b4c45fd39b9150e27be3251ba
- 91d392fbbe771b2b4c45fd39b9150e27be3251ba ACPI: video: Use acpi_device_fix_up_power_children()
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/82f10d50c891e830513a6203c8dfedc9c5fc605d
+ 82f10d50c891e830513a6203c8dfedc9c5fc605d Change acpi_bus_get_acpi_device to acpi_get_acpi_dev
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/3bd5c005766e37c5c60b1210e844091ddebd28d6
- 3bd5c005766e37c5c60b1210e844091ddebd28d6 ACPI: PM: Add acpi_device_fix_up_power_children() function
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/2e9237134898be1ad28c8ea25deb1c14f7d2cdc6
+ 2e9237134898be1ad28c8ea25deb1c14f7d2cdc6 RHEL: disable io_uring support
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/46f41fa0448229c32bbc60d3c8ef50d22c33b117
- 46f41fa0448229c32bbc60d3c8ef50d22c33b117 rtc: cmos: Use ACPI alarm for non-Intel x86 systems too
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/670907cf282993feb5c27387e485baabbed3f82d
+ 670907cf282993feb5c27387e485baabbed3f82d REDHAT: coresight: etm4x: Disable coresight on HPE Apollo 70
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/84c68fe1f91beef8b25ca2202d3581260447b334
- 84c68fe1f91beef8b25ca2202d3581260447b334 drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/4cc4347e9085cb8d6b5b5e203e76737a909bfd6d
+ 4cc4347e9085cb8d6b5b5e203e76737a909bfd6d KEYS: Make use of platform keyring for module signature verify
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/51d40b1c54cf09e93d42dc0d090765016362d692
- 51d40b1c54cf09e93d42dc0d090765016362d692 scsi: sd: Add "probe_type" module parameter to allow synchronous probing
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/d818fe69a863c9c9b32bf0afc0fc9ea1a13d1e03
+ d818fe69a863c9c9b32bf0afc0fc9ea1a13d1e03 Input: rmi4 - remove the need for artificial IRQ in case of HID
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/6e9d8352504d3de95ebdff5289e2da6e93b90767
- 6e9d8352504d3de95ebdff5289e2da6e93b90767 Enable IO_URING for RHEL
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/54bcbdc93a456a037372861f3c305001d19c1380
+ 54bcbdc93a456a037372861f3c305001d19c1380 ARM: tegra: usb no reset
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/e65e1c9cfce51744f3ccce9ede248e74a6e7bb5c
- e65e1c9cfce51744f3ccce9ede248e74a6e7bb5c redhat: version two of Makefile.rhelver tweaks
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/d53b41ea053ea5dcbc6410262e64e06baa756ab8
+ d53b41ea053ea5dcbc6410262e64e06baa756ab8 s390: Lock down the kernel when the IPL secure flag is set
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/97edcc85a591ed63b65fa583a1593e379ec779a0
- 97edcc85a591ed63b65fa583a1593e379ec779a0 redhat: adapt to upstream Makefile change
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/0a47e98dff708b27a6f92034258fe7b4f53a0707
+ 0a47e98dff708b27a6f92034258fe7b4f53a0707 efi: Lock down the kernel if booted in secure boot mode
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/f0274138f93a5de8d5757dc1310a51fd9adae739
- f0274138f93a5de8d5757dc1310a51fd9adae739 Change acpi_bus_get_acpi_device to acpi_get_acpi_dev
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/920dfefdd06948d8741fa4846ddc9e35cd50ce65
+ 920dfefdd06948d8741fa4846ddc9e35cd50ce65 efi: Add an EFI_SECURE_BOOT flag to indicate secure boot mode
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/b18359baab10beb33452ec4bac68a25db0ac2531
- b18359baab10beb33452ec4bac68a25db0ac2531 RHEL: disable io_uring support
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/b459dd558e0197eb8dd6ca0ebd7f06945f480e78
+ b459dd558e0197eb8dd6ca0ebd7f06945f480e78 security: lockdown: expose a hook to lock the kernel down
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/e86246f6bff2cae6ed0728cb9855dc321aa22442
- e86246f6bff2cae6ed0728cb9855dc321aa22442 REDHAT: coresight: etm4x: Disable coresight on HPE Apollo 70
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/872668749f5ac08e01bcb2ddf0f33ac935793aac
+ 872668749f5ac08e01bcb2ddf0f33ac935793aac Make get_cert_list() use efi_status_to_str() to print error messages.
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/780e15df6bfda4a86de773b5a76348845cd287e2
- 780e15df6bfda4a86de773b5a76348845cd287e2 KEYS: Make use of platform keyring for module signature verify
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/485d3acca52183cb0cd8dc62413d83b8ca6d1be9
+ 485d3acca52183cb0cd8dc62413d83b8ca6d1be9 Add efi_status_to_str() and rework efi_status_to_err().
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/e16c46de5e538011a405f267e0591a03fe4434f1
- e16c46de5e538011a405f267e0591a03fe4434f1 Input: rmi4 - remove the need for artificial IRQ in case of HID
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/f28b90cbf277b2ae8b2585fbd453dfa0d69ae53d
+ f28b90cbf277b2ae8b2585fbd453dfa0d69ae53d iommu/arm-smmu: workaround DMA mode issues
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/cefdb4374d26857e1d90cdd35936f219693dff11
- cefdb4374d26857e1d90cdd35936f219693dff11 ARM: tegra: usb no reset
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/faf72983b9f33a9ffd8b230496d2314e9b9a826f
+ faf72983b9f33a9ffd8b230496d2314e9b9a826f ipmi: do not configure ipmi for HPE m400
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/161fba6e6557f41e7d2e1be2d4300aac25894c22
- 161fba6e6557f41e7d2e1be2d4300aac25894c22 s390: Lock down the kernel when the IPL secure flag is set
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/0088681628eecc37b983514b7aa099cb1ff4ce2c
+ 0088681628eecc37b983514b7aa099cb1ff4ce2c kABI: Add generic kABI macros to use for kABI workarounds
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/f23df5d91bbc852ed9a289c88b478b5890ff3aff
- f23df5d91bbc852ed9a289c88b478b5890ff3aff efi: Lock down the kernel if booted in secure boot mode
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/eb7070ab5324076a14c90f71f6ed80c6d186aa90
+ eb7070ab5324076a14c90f71f6ed80c6d186aa90 ahci: thunderx2: Fix for errata that affects stop engine
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/388c5040283f7748c06961a807ab82960cfac7b2
- 388c5040283f7748c06961a807ab82960cfac7b2 efi: Add an EFI_SECURE_BOOT flag to indicate secure boot mode
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/12b80aa03dd87334c9fdbda6a93bd2359a5cf15a
+ 12b80aa03dd87334c9fdbda6a93bd2359a5cf15a Vulcan: AHCI PCI bar fix for Broadcom Vulcan early silicon
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/e81d15d326fecd1c90d82b2acb9bdb259b4033ac
- e81d15d326fecd1c90d82b2acb9bdb259b4033ac security: lockdown: expose a hook to lock the kernel down
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/59521420421d2bd7341dbcea94bf2c756f95fcac
+ 59521420421d2bd7341dbcea94bf2c756f95fcac tags.sh: Ignore redhat/rpm
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/1297962689c5c00929be45b6261ab21f0e5de41c
- 1297962689c5c00929be45b6261ab21f0e5de41c Make get_cert_list() use efi_status_to_str() to print error messages.
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/d5ab5c4e27530f7a54b6aa9a581d3d45bb9a5b16
+ d5ab5c4e27530f7a54b6aa9a581d3d45bb9a5b16 put RHEL info into generated headers
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/2b290761ed33270b9f8fea815c9f29476ead5d5d
- 2b290761ed33270b9f8fea815c9f29476ead5d5d Add efi_status_to_str() and rework efi_status_to_err().
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/dc74674e8fced0408caaad1c59abc890ecb8d6f2
+ dc74674e8fced0408caaad1c59abc890ecb8d6f2 aarch64: acpi scan: Fix regression related to X-Gene UARTs
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/4f23de5adca7e96a6bb3abc9f7e0546b997c8ea0
- 4f23de5adca7e96a6bb3abc9f7e0546b997c8ea0 iommu/arm-smmu: workaround DMA mode issues
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/9f7d0c67a3cd2e80b31258a5af096e7ab5d00ea3
+ 9f7d0c67a3cd2e80b31258a5af096e7ab5d00ea3 ACPI / irq: Workaround firmware issue on X-Gene based m400
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/3c9be29ba9986f465b7c8fc6e391978833ffac22
- 3c9be29ba9986f465b7c8fc6e391978833ffac22 ipmi: do not configure ipmi for HPE m400
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/46a9c575bbde2341f1f550fb8f479935673035f8
+ 46a9c575bbde2341f1f550fb8f479935673035f8 modules: add rhelversion MODULE_INFO tag
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/f9bd4dd2a3e0ce4fc91eea39c747a2b06ac8852c
- f9bd4dd2a3e0ce4fc91eea39c747a2b06ac8852c kABI: Add generic kABI macros to use for kABI workarounds
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/bc4896017cc68caa5bd7ead6a06b075b2e17c0e8
+ bc4896017cc68caa5bd7ead6a06b075b2e17c0e8 ACPI: APEI: arm64: Ignore broken HPE moonshot APEI support
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/0a826069d941f0249fa44005fbc6511875553497
- 0a826069d941f0249fa44005fbc6511875553497 ahci: thunderx2: Fix for errata that affects stop engine
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/0e21b15103c452cf6cd2afe831e32b9ceb6de255
+ 0e21b15103c452cf6cd2afe831e32b9ceb6de255 Pull the RHEL version defines out of the Makefile
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/b156077e6f7cdf9bc390551e7b65b80d1d5e285d
- b156077e6f7cdf9bc390551e7b65b80d1d5e285d Vulcan: AHCI PCI bar fix for Broadcom Vulcan early silicon
-
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/129dc65edece93e256a9c79b3e1f962fc7074406
- 129dc65edece93e256a9c79b3e1f962fc7074406 tags.sh: Ignore redhat/rpm
-
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/7558a6cc3029f6cce7cb79ad0ab6348fa1083bb1
- 7558a6cc3029f6cce7cb79ad0ab6348fa1083bb1 put RHEL info into generated headers
-
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/fabdc6fc464674d789063463c8a9abe954ed9f0e
- fabdc6fc464674d789063463c8a9abe954ed9f0e aarch64: acpi scan: Fix regression related to X-Gene UARTs
-
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/3332716406fc60dbfbe218275c546081215fd4b3
- 3332716406fc60dbfbe218275c546081215fd4b3 ACPI / irq: Workaround firmware issue on X-Gene based m400
-
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/ad67ea7af8f6e47745455046d807a8cf0b4e3864
- ad67ea7af8f6e47745455046d807a8cf0b4e3864 modules: add rhelversion MODULE_INFO tag
-
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/4a9e157a9522218c86b14c2f91423dc8c6cc32ed
- 4a9e157a9522218c86b14c2f91423dc8c6cc32ed ACPI: APEI: arm64: Ignore broken HPE moonshot APEI support
-
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/dd03abf0c6a7dde42a4f540f782c67d81319118d
- dd03abf0c6a7dde42a4f540f782c67d81319118d Pull the RHEL version defines out of the Makefile
-
-"https://gitlab.com/cki-project/kernel-ark/-/commit"/866075bfad9aeeb466f2516e071428e9ac5aafaa
- 866075bfad9aeeb466f2516e071428e9ac5aafaa [initial commit] Add Red Hat variables in the top level makefile
+"https://gitlab.com/cki-project/kernel-ark/-/commit"/1993198591da4482b9721dec18306b6d2c556e17
+ 1993198591da4482b9721dec18306b6d2c556e17 [initial commit] Add Red Hat variables in the top level makefile
diff --git a/SOURCES/amdgpu-HAINAN-variant-fixup.patch b/SOURCES/amdgpu-HAINAN-variant-fixup.patch
new file mode 100644
index 0000000..43075e0
--- /dev/null
+++ b/SOURCES/amdgpu-HAINAN-variant-fixup.patch
@@ -0,0 +1,34 @@
+diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
+index fbf968e3f6d7..8afc4fa73101 100644
+--- a/drivers/gpu/drm/radeon/si_dpm.c
++++ b/drivers/gpu/drm/radeon/si_dpm.c
+@@ -2959,9 +2959,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
+ if (rdev->family == CHIP_HAINAN) {
+ if ((rdev->pdev->revision == 0x81) ||
+ (rdev->pdev->revision == 0xC3) ||
++ (rdev->pdev->device == 0x6660) ||
+ (rdev->pdev->device == 0x6664) ||
+ (rdev->pdev->device == 0x6665) ||
+- (rdev->pdev->device == 0x6667)) {
++ (rdev->pdev->device == 0x6667) ||
++ (rdev->pdev->device == 0x666F)) {
+ max_sclk = 75000;
+ }
+ if ((rdev->pdev->revision == 0xC3) ||
+diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+index 02e69ccff3ba..b9a60851d799 100644
+--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
++++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+@@ -3435,9 +3435,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
+ if (adev->asic_type == CHIP_HAINAN) {
+ if ((adev->pdev->revision == 0x81) ||
+ (adev->pdev->revision == 0xC3) ||
++ (adev->pdev->device == 0x6660) ||
+ (adev->pdev->device == 0x6664) ||
+ (adev->pdev->device == 0x6665) ||
+- (adev->pdev->device == 0x6667)) {
++ (adev->pdev->device == 0x6667) ||
++ (adev->pdev->device == 0x666F)) {
+ max_sclk = 75000;
+ }
+ if ((adev->pdev->revision == 0xC3) ||
diff --git a/SOURCES/asus-linux.patch b/SOURCES/asus-linux.patch
index 6caf775..acdc4a3 100644
--- a/SOURCES/asus-linux.patch
+++ b/SOURCES/asus-linux.patch
@@ -36,14 +36,14 @@ diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_pr
index c9eb70290..2b8f8fd52 100644
--- a/sound/pci/hda/cs35l41_hda_property.c
+++ b/sound/pci/hda/cs35l41_hda_property.c
-@@ -57,6 +57,7 @@ static const struct cs35l41_config cs35l41_config_table[] = {
- { "104316D3", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
- { "104316F3", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
- { "104317F3", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
-+ { "10431B93", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
- { "10431863", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
- { "104318D3", I2C, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
- { "10431C9F", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
+@@ -79,6 +79,7 @@
+ { "104316D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+ { "104316F3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+ { "104317F3", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
++ { "10431B93", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
+ { "10431863", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
+ { "104318D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
+ { "10431C9F", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
@@ -360,6 +361,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
{ "CSC3551", "104316D3", generic_dsd_config },
{ "CSC3551", "104316F3", generic_dsd_config },
@@ -55,401 +55,3 @@ index c9eb70290..2b8f8fd52 100644
--
2.41.0
-
-From b35a4c957b3f0e5b4c7c73dec4fe3a5b9dbc4873 Mon Sep 17 00:00:00 2001
-From: "Luke D. Jones" <luke@ljones.dev>
-Date: Sun, 30 Apr 2023 10:56:34 +1200
-Subject: [PATCH v6 1/1] platform/x86: asus-wmi: add support for ASUS screenpad
-
-Add support for the WMI methods used to turn off and adjust the
-brightness of the secondary "screenpad" device found on some high-end
-ASUS laptops like the GX650P series and others.
-
-There are some small quirks with this device when considering only the
-raw WMI methods:
-1. The Off method can only switch the device off
-2. Changing the brightness turns the device back on
-3. To turn the device back on the brightness must be > 1
-4. When the device is off the brightness can't be changed (so it is
- stored by the driver if device is off).
-5. Booting with a value of 0 brightness (retained by bios) means the bios
- will set a value of >0 <15
-6. When the device is off it is "unplugged"
-
-asus_wmi sets the minimum brightness as 20 in general use, and 60 for
-booting with values <= min.
-
-The ACPI methods are used in a new backlight device named asus_screenpad.
-
-Signed-off-by: Luke D. Jones <luke@ljones.dev>
----
- drivers/platform/x86/asus-wmi.c | 133 +++++++++++++++++++++
- drivers/platform/x86/asus-wmi.h | 1 +
- include/linux/platform_data/x86/asus-wmi.h | 4 +
- 3 files changed, 138 insertions(+)
-
-diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
-index f54178d6f780..0b13be703856 100644
---- a/drivers/platform/x86/asus-wmi.c
-+++ b/drivers/platform/x86/asus-wmi.c
-@@ -25,6 +25,7 @@
- #include <linux/input/sparse-keymap.h>
- #include <linux/kernel.h>
- #include <linux/leds.h>
-+#include <linux/minmax.h>
- #include <linux/module.h>
- #include <linux/pci.h>
- #include <linux/pci_hotplug.h>
-@@ -127,6 +128,10 @@ module_param(fnlock_default, bool, 0444);
- #define NVIDIA_TEMP_MIN 75
- #define NVIDIA_TEMP_MAX 87
-
-+#define ASUS_SCREENPAD_BRIGHT_MIN 20
-+#define ASUS_SCREENPAD_BRIGHT_MAX 255
-+#define ASUS_SCREENPAD_BRIGHT_DEFAULT 60
-+
- static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
-
- static int throttle_thermal_policy_write(struct asus_wmi *);
-@@ -212,6 +217,7 @@ struct asus_wmi {
-
- struct input_dev *inputdev;
- struct backlight_device *backlight_device;
-+ struct backlight_device *screenpad_backlight_device;
- struct platform_device *platform_device;
-
- struct led_classdev wlan_led;
-@@ -3776,6 +3782,124 @@ static int is_display_toggle(int code)
- return 0;
- }
-
-+/* Screenpad backlight *******************************************************/
-+
-+static int read_screenpad_backlight_power(struct asus_wmi *asus)
-+{
-+ int ret;
-+
-+ ret = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_SCREENPAD_POWER);
-+ if (ret < 0)
-+ return ret;
-+ /* 1 == powered */
-+ return ret ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN;
-+}
-+
-+static int read_screenpad_brightness(struct backlight_device *bd)
-+{
-+ struct asus_wmi *asus = bl_get_data(bd);
-+ u32 retval;
-+ int err;
-+
-+ err = read_screenpad_backlight_power(asus);
-+ if (err < 0)
-+ return err;
-+ /* The device brightness can only be read if powered, so return stored */
-+ if (err == FB_BLANK_POWERDOWN)
-+ return asus->driver->screenpad_brightness - ASUS_SCREENPAD_BRIGHT_MIN;
-+
-+ err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_SCREENPAD_LIGHT, &retval);
-+ if (err < 0)
-+ return err;
-+
-+ return (retval & ASUS_WMI_DSTS_BRIGHTNESS_MASK) - ASUS_SCREENPAD_BRIGHT_MIN;
-+}
-+
-+static int update_screenpad_bl_status(struct backlight_device *bd)
-+{
-+ struct asus_wmi *asus = bl_get_data(bd);
-+ int power, err = 0;
-+ u32 ctrl_param;
-+
-+ power = read_screenpad_backlight_power(asus);
-+ if (power < 0)
-+ return power;
-+
-+ if (bd->props.power != power) {
-+ if (power != FB_BLANK_UNBLANK) {
-+ /* Only brightness > 0 can power it back on */
-+ ctrl_param = asus->driver->screenpad_brightness - ASUS_SCREENPAD_BRIGHT_MIN;
-+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_SCREENPAD_LIGHT,
-+ ctrl_param, NULL);
-+ } else {
-+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_SCREENPAD_POWER, 0, NULL);
-+ }
-+ } else if (power == FB_BLANK_UNBLANK) {
-+ /* Only set brightness if powered on or we get invalid/unsync state */
-+ ctrl_param = bd->props.brightness + ASUS_SCREENPAD_BRIGHT_MIN;
-+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_SCREENPAD_LIGHT, ctrl_param, NULL);
-+ }
-+
-+ /* Ensure brightness is stored to turn back on with */
-+ if (err == 0)
-+ asus->driver->screenpad_brightness = bd->props.brightness + ASUS_SCREENPAD_BRIGHT_MIN;
-+
-+ return err;
-+}
-+
-+static const struct backlight_ops asus_screenpad_bl_ops = {
-+ .get_brightness = read_screenpad_brightness,
-+ .update_status = update_screenpad_bl_status,
-+ .options = BL_CORE_SUSPENDRESUME,
-+};
-+
-+static int asus_screenpad_init(struct asus_wmi *asus)
-+{
-+ struct backlight_device *bd;
-+ struct backlight_properties props;
-+ int err, power;
-+ int brightness = 0;
-+
-+ power = read_screenpad_backlight_power(asus);
-+ if (power < 0)
-+ return power;
-+
-+ if (power != FB_BLANK_POWERDOWN) {
-+ err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_SCREENPAD_LIGHT, &brightness);
-+ if (err < 0)
-+ return err;
-+ }
-+ /* default to an acceptable min brightness on boot if too low */
-+ if (brightness < ASUS_SCREENPAD_BRIGHT_MIN)
-+ brightness = ASUS_SCREENPAD_BRIGHT_DEFAULT;
-+
-+ memset(&props, 0, sizeof(struct backlight_properties));
-+ props.type = BACKLIGHT_RAW; /* ensure this bd is last to be picked */
-+ props.max_brightness = ASUS_SCREENPAD_BRIGHT_MAX - ASUS_SCREENPAD_BRIGHT_MIN;
-+ bd = backlight_device_register("asus_screenpad",
-+ &asus->platform_device->dev, asus,
-+ &asus_screenpad_bl_ops, &props);
-+ if (IS_ERR(bd)) {
-+ pr_err("Could not register backlight device\n");
-+ return PTR_ERR(bd);
-+ }
-+
-+ asus->screenpad_backlight_device = bd;
-+ asus->driver->screenpad_brightness = brightness;
-+ bd->props.brightness = brightness;
-+ bd->props.power = power;
-+ backlight_update_status(bd);
-+
-+ return 0;
-+}
-+
-+static void asus_screenpad_exit(struct asus_wmi *asus)
-+{
-+ backlight_device_unregister(asus->screenpad_backlight_device);
-+
-+ asus->screenpad_backlight_device = NULL;
-+}
-+
- /* Fn-lock ********************************************************************/
-
- static bool asus_wmi_has_fnlock_key(struct asus_wmi *asus)
-@@ -4431,6 +4555,12 @@ static int asus_wmi_add(struct platform_device *pdev)
- } else if (asus->driver->quirks->wmi_backlight_set_devstate)
- err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL);
-
-+ if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_SCREENPAD_LIGHT)) {
-+ err = asus_screenpad_init(asus);
-+ if (err && err != -ENODEV)
-+ goto fail_screenpad;
-+ }
-+
- if (asus_wmi_has_fnlock_key(asus)) {
- asus->fnlock_locked = fnlock_default;
- asus_wmi_fnlock_update(asus);
-@@ -4454,6 +4584,8 @@ static int asus_wmi_add(struct platform_device *pdev)
- asus_wmi_backlight_exit(asus);
- fail_backlight:
- asus_wmi_rfkill_exit(asus);
-+fail_screenpad:
-+ asus_screenpad_exit(asus);
- fail_rfkill:
- asus_wmi_led_exit(asus);
- fail_leds:
-@@ -4481,6 +4481,7 @@
- i8042_remove_filter(asus->driver->quirks->i8042_filter);
- wmi_remove_notify_handler(asus->driver->event_guid);
- asus_wmi_backlight_exit(asus);
-+ asus_screenpad_exit(asus);
- asus_wmi_input_exit(asus);
- asus_wmi_led_exit(asus);
- asus_wmi_rfkill_exit(asus);
-diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
-index a478ebfd34df..5fbdd0eafa02 100644
---- a/drivers/platform/x86/asus-wmi.h
-+++ b/drivers/platform/x86/asus-wmi.h
-@@ -57,6 +57,7 @@ struct quirk_entry {
- struct asus_wmi_driver {
- int brightness;
- int panel_power;
-+ int screenpad_brightness;
- int wlan_ctrl_by_user;
-
- const char *name;
-diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
-index 16e99a1c37fc..63e630276499 100644
---- a/include/linux/platform_data/x86/asus-wmi.h
-+++ b/include/linux/platform_data/x86/asus-wmi.h
-@@ -58,6 +58,10 @@
- #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021
- #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */
- #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025
-+/* This can only be used to disable the screen, not re-enable */
-+#define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031
-+/* Writing a brightness re-enables the screen if disabled */
-+#define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032
- #define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018
- #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075
-
---
-2.41.0
-
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Luke D. Jones" <luke@ljones.dev>
-Date: Mon, 27 Nov 2023 12:05:21 +1300
-Subject: [PATCH] platform/x86: asus-wmi: disable USB0 hub on ROG Ally before
- suspend
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-ASUS have worked around an issue in XInput where it doesn't support USB
-selective suspend, which causes suspend issues in Windows. They worked
-around this by adjusting the MCU firmware to disable the USB0 hub when
-the screen is switched off during the Microsoft DSM suspend path in ACPI.
-
-The issue we have with this however is one of timing - the call the tells
-the MCU to this isn't able to complete before suspend is done so we call
-this in a prepare() and add a small msleep() to ensure it is done. This
-must be done before the screen is switched off to prevent a variety of
-possible races.
-
-Further to this the MCU powersave option must also be disabled as it can
-cause a number of issues such as:
-- unreliable resume connection of N-Key
-- complete loss of N-Key if the power is plugged in while suspended
-Disabling the powersave option prevents this.
-
-Without this the MCU is unable to initialise itself correctly on resume.
-
-Signed-off-by: "Luke D. Jones" <luke@ljones.dev>
-Tested-by: Philip Mueller <philm@manjaro.org>
-Reviewed-by: Hans de Goede <hdegoede@redhat.com>
-Link: https://lore.kernel.org/r/20231126230521.125708-2-luke@ljones.dev
-Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
----
- drivers/platform/x86/asus-wmi.c | 50 ++++++++++++++++++++++
- include/linux/platform_data/x86/asus-wmi.h | 3 ++
- 2 files changed, 53 insertions(+)
-
-diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
-index ca668cf04020..9f7e23c5c6b4 100644
---- a/drivers/platform/x86/asus-wmi.c
-+++ b/drivers/platform/x86/asus-wmi.c
-@@ -16,6 +16,7 @@
- #include <linux/acpi.h>
- #include <linux/backlight.h>
- #include <linux/debugfs.h>
-+#include <linux/delay.h>
- #include <linux/dmi.h>
- #include <linux/fb.h>
- #include <linux/hwmon.h>
-@@ -132,6 +133,11 @@ module_param(fnlock_default, bool, 0444);
- #define ASUS_SCREENPAD_BRIGHT_MAX 255
- #define ASUS_SCREENPAD_BRIGHT_DEFAULT 60
-
-+/* Controls the power state of the USB0 hub on ROG Ally which input is on */
-+#define ASUS_USB0_PWR_EC0_CSEE "\\_SB.PCI0.SBRG.EC0.CSEE"
-+/* 300ms so far seems to produce a reliable result on AC and battery */
-+#define ASUS_USB0_PWR_EC0_CSEE_WAIT 300
-+
- static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
-
- static int throttle_thermal_policy_write(struct asus_wmi *);
-@@ -300,6 +306,9 @@ struct asus_wmi {
-
- bool fnlock_locked;
-
-+ /* The ROG Ally device requires the MCU USB device be disconnected before suspend */
-+ bool ally_mcu_usb_switch;
-+
- struct asus_wmi_debug debug;
-
- struct asus_wmi_driver *driver;
-@@ -4488,6 +4497,8 @@ static int asus_wmi_add(struct platform_device *pdev)
- asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET);
- asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD);
- asus->mini_led_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE);
-+ asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE)
-+ && dmi_match(DMI_BOARD_NAME, "RC71L");
-
- err = fan_boost_mode_check_present(asus);
- if (err)
-@@ -4662,6 +4673,43 @@ static int asus_hotk_resume(struct device *device)
- asus_wmi_fnlock_update(asus);
-
- asus_wmi_tablet_mode_get_state(asus);
-+
-+ return 0;
-+}
-+
-+static int asus_hotk_resume_early(struct device *device)
-+{
-+ struct asus_wmi *asus = dev_get_drvdata(device);
-+
-+ if (asus->ally_mcu_usb_switch) {
-+ if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB8)))
-+ dev_err(device, "ROG Ally MCU failed to connect USB dev\n");
-+ else
-+ msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT);
-+ }
-+ return 0;
-+}
-+
-+static int asus_hotk_prepare(struct device *device)
-+{
-+ struct asus_wmi *asus = dev_get_drvdata(device);
-+ int result, err;
-+
-+ if (asus->ally_mcu_usb_switch) {
-+ /* When powersave is enabled it causes many issues with resume of USB hub */
-+ result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_MCU_POWERSAVE);
-+ if (result == 1) {
-+ dev_warn(device, "MCU powersave enabled, disabling to prevent resume issues");
-+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_MCU_POWERSAVE, 0, &result);
-+ if (err || result != 1)
-+ dev_err(device, "Failed to set MCU powersave mode: %d\n", err);
-+ }
-+ /* sleep required to ensure USB0 is disabled before sleep continues */
-+ if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB7)))
-+ dev_err(device, "ROG Ally MCU failed to disconnect USB dev\n");
-+ else
-+ msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT);
-+ }
- return 0;
- }
-
-@@ -4709,6 +4757,8 @@ static const struct dev_pm_ops asus_pm_ops = {
- .thaw = asus_hotk_thaw,
- .restore = asus_hotk_restore,
- .resume = asus_hotk_resume,
-+ .resume_early = asus_hotk_resume_early,
-+ .prepare = asus_hotk_prepare,
- };
-
- /* Registration ***************************************************************/
-diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
-index 63e630276499..ab1c7deff118 100644
---- a/include/linux/platform_data/x86/asus-wmi.h
-+++ b/include/linux/platform_data/x86/asus-wmi.h
-@@ -114,6 +114,9 @@
- /* Charging mode - 1=Barrel, 2=USB */
- #define ASUS_WMI_DEVID_CHARGE_MODE 0x0012006C
-
-+/* MCU powersave mode */
-+#define ASUS_WMI_DEVID_MCU_POWERSAVE 0x001200E2
-+
- /* epu is connected? 1 == true */
- #define ASUS_WMI_DEVID_EGPU_CONNECTED 0x00090018
- /* egpu on/off */
diff --git a/SOURCES/dracut-virt.conf b/SOURCES/dracut-virt.conf
index 3724026..c639fda 100644
--- a/SOURCES/dracut-virt.conf
+++ b/SOURCES/dracut-virt.conf
@@ -14,6 +14,12 @@ dracutmodules+=" dm lvm rootfs-block fs-lib "
# modules: tpm and crypto
dracutmodules+=" crypt crypt-loop tpm2-tss "
+# modules: support root on virtiofs
+dracutmodules+=" virtiofs "
+
+# modules: use sysext images (see 'man systemd-sysext')
+dracutmodules+=" systemd-sysext "
+
# drivers: virtual buses, pci
drivers+=" virtio-pci virtio-mmio " # qemu-kvm
drivers+=" hv-vmbus pci-hyperv " # hyperv
diff --git a/SOURCES/filter-modules.sh.fedora b/SOURCES/filter-modules.sh.fedora
index c14a790..7ef7614 100755
--- a/SOURCES/filter-modules.sh.fedora
+++ b/SOURCES/filter-modules.sh.fedora
@@ -45,7 +45,7 @@ netprots="6lowpan appletalk atm ax25 batman-adv bluetooth can dsa ieee802154 l2t
drmdrvs="amd ast bridge gma500 i2c i915 mgag200 nouveau panel radeon"
-singlemods="ntb_netdev iscsi_ibft iscsi_boot_sysfs megaraid pmcraid qedi qla1280 9pnet_rdma rpcrdma nvmet-rdma nvme-rdma hid-picolcd hid-prodikeys hwpoison-inject target_core_user sbp_target cxgbit chcr parport_serial regmap-sdw regmap-sdw-mbq arizona-micsupp hid-asus iTCO_wdt rnbd-client rnbd-server mlx5_vdpa spi-altera-dfl nct6775 hid-playstation hid-nintendo asus_wmi_sensors asus_wmi_ec_sensors mlx5-vfio-pci video int3406_thermal apple_bl ptp_dfl_tod intel-m10-bmc-hwmon intel_rapl_tpmi pds_vdpa hp-wmi-sensors pds-vfio-pci"
+singlemods="ntb_netdev iscsi_ibft iscsi_boot_sysfs megaraid pmcraid qedi qla1280 9pnet_rdma rpcrdma nvmet-rdma nvme-rdma hid-picolcd hid-prodikeys hwpoison-inject target_core_user sbp_target cxgbit chcr parport_serial regmap-sdw regmap-sdw-mbq arizona-micsupp hid-asus iTCO_wdt rnbd-client rnbd-server mlx5_vdpa spi-altera-dfl nct6775 hid-playstation hid-nintendo asus_wmi_sensors asus_wmi_ec_sensors mlx5-vfio-pci video int3406_thermal apple_bl ptp_dfl_tod intel-m10-bmc-hwmon intel_rapl_tpmi pds_vdpa hp-wmi-sensors pds-vfio-pci gpio-ljca spi-ljca i2c-ljca"
# Grab the arch-specific filter list overrides
source ./filter-$2.sh
@@ -84,6 +84,9 @@ filter_ko() {
return 0
}
+# HACK: move surface_fan and surface_temp to kernel-modules
+singlemods="${singlemods} surface_fan surface_temp"
+
# Filter the drivers/ subsystems
for subsys in ${driverdirs}
do
diff --git a/SOURCES/kernel-aarch64-16k-debug-fedora.config b/SOURCES/kernel-aarch64-16k-debug-fedora.config
index 83c340e..990ffa6 100644
--- a/SOURCES/kernel-aarch64-16k-debug-fedora.config
+++ b/SOURCES/kernel-aarch64-16k-debug-fedora.config
@@ -254,6 +254,7 @@ CONFIG_AMD_XGBE_DCB=y
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
CONFIG_AMLOGIC_THERMAL=m
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
CONFIG_AMT=m
CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder"
@@ -329,12 +330,33 @@ CONFIG_ARCH_MXC=y
CONFIG_ARCH_NR_GPIO=2048
CONFIG_ARCH_NXP=y
# CONFIG_ARCH_OMAP1 is not set
+# CONFIG_ARCH_PENSANDO is not set
# CONFIG_ARCH_PXA is not set
CONFIG_ARCH_QCOM=y
+CONFIG_ARCH_R8A774A1=y
+# CONFIG_ARCH_R8A774B1 is not set
+# CONFIG_ARCH_R8A774C0 is not set
+# CONFIG_ARCH_R8A774E1 is not set
+# CONFIG_ARCH_R8A77951 is not set
+# CONFIG_ARCH_R8A77960 is not set
+# CONFIG_ARCH_R8A77961 is not set
+# CONFIG_ARCH_R8A77965 is not set
+# CONFIG_ARCH_R8A77970 is not set
+# CONFIG_ARCH_R8A77980 is not set
+# CONFIG_ARCH_R8A77990 is not set
+# CONFIG_ARCH_R8A77995 is not set
+# CONFIG_ARCH_R8A779A0 is not set
+# CONFIG_ARCH_R8A779F0 is not set
+# CONFIG_ARCH_R8A779G0 is not set
+CONFIG_ARCH_R9A07G043=y
+CONFIG_ARCH_R9A07G044=y
+CONFIG_ARCH_R9A07G054=y
+# CONFIG_ARCH_R9A08G045 is not set
+# CONFIG_ARCH_R9A09G011 is not set
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_RDA is not set
# CONFIG_ARCH_REALTEK is not set
-# CONFIG_ARCH_RENESAS is not set
+CONFIG_ARCH_RENESAS=y
CONFIG_ARCH_ROCKCHIP=y
# CONFIG_ARCH_S32 is not set
# CONFIG_ARCH_SA1100 is not set
@@ -396,6 +418,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -484,6 +507,7 @@ CONFIG_ARM_RASPBERRYPI_CPUFREQ=m
CONFIG_ARM_RK3399_DMC_DEVFREQ=m
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
CONFIG_ARM_SCMI_POWERCAP=m
CONFIG_ARM_SCMI_POWER_CONTROL=m
CONFIG_ARM_SCMI_POWER_DOMAIN=m
@@ -531,7 +555,7 @@ CONFIG_ATA_ACPI=y
CONFIG_ATA_BMDMA=y
CONFIG_ATA_FORCE=y
CONFIG_ATA_GENERIC=m
-# CONFIG_ATALK is not set
+CONFIG_ATALK=m
CONFIG_ATA_OVER_ETH=m
CONFIG_ATA_PIIX=y
# CONFIG_ATARI_PARTITION is not set
@@ -702,6 +726,7 @@ CONFIG_BATTERY_GAUGE_LTC2941=m
CONFIG_BATTERY_MAX17040=m
CONFIG_BATTERY_MAX17042=m
# CONFIG_BATTERY_MAX1721X is not set
+# CONFIG_BATTERY_PM8916_BMS_VM is not set
CONFIG_BATTERY_QCOM_BATTMGR=m
CONFIG_BATTERY_RT5033=m
CONFIG_BATTERY_SAMSUNG_SDI=y
@@ -715,6 +740,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y
+CONFIG_BCACHEFS_DEBUG=y
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_LOCK_TIME_STATS=y
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM2711_THERMAL=m
CONFIG_BCM2835_MBOX=y
@@ -867,7 +901,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCMSTB_L2_IRQ=y
CONFIG_BRCM_TRACING=y
CONFIG_BRCMUTIL=m
@@ -980,7 +1013,6 @@ CONFIG_CADENCE_WATCHDOG=m
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -1017,6 +1049,8 @@ CONFIG_CAN_NETLINK=y
CONFIG_CAN_PEAK_PCIEFD=m
CONFIG_CAN_PEAK_USB=m
CONFIG_CAN_RAW=m
+# CONFIG_CAN_RCAR_CANFD is not set
+# CONFIG_CAN_RCAR is not set
# CONFIG_CAN_SJA1000 is not set
CONFIG_CAN_SLCAN=m
# CONFIG_CAN_SOFTING is not set
@@ -1075,6 +1109,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -1117,6 +1152,7 @@ CONFIG_CHARGER_MAX77650=m
CONFIG_CHARGER_MAX77976=m
# CONFIG_CHARGER_MAX8903 is not set
CONFIG_CHARGER_MT6370=m
+# CONFIG_CHARGER_PM8916_LBC is not set
# CONFIG_CHARGER_QCOM_SMB2 is not set
CONFIG_CHARGER_QCOM_SMBB=m
CONFIG_CHARGER_RK817=m
@@ -1168,6 +1204,7 @@ CONFIG_CIO2_BRIDGE=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM2711_DVP=m
CONFIG_CLK_BCM2835=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
CONFIG_CLK_ICST=y
@@ -1183,6 +1220,7 @@ CONFIG_CLK_LS1028A_PLLDIG=y
CONFIG_CLK_PX30=y
CONFIG_CLK_QORIQ=y
CONFIG_CLK_RASPBERRYPI=y
+# CONFIG_CLK_RCAR_USB2_CLOCK_SEL is not set
CONFIG_CLK_RK3036=y
CONFIG_CLK_RK312X=y
CONFIG_CLK_RK3188=y
@@ -1268,6 +1306,8 @@ CONFIG_COMMON_CLK_QCOM=y
CONFIG_COMMON_CLK_RK808=m
CONFIG_COMMON_CLK_ROCKCHIP=y
CONFIG_COMMON_CLK_RS9_PCIE=m
+CONFIG_COMMON_CLK_S4_PERIPHERALS=y
+CONFIG_COMMON_CLK_S4_PLL=y
CONFIG_COMMON_CLK_SCMI=y
CONFIG_COMMON_CLK_SCPI=m
# CONFIG_COMMON_CLK_SI514 is not set
@@ -1299,7 +1339,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1387,6 +1426,7 @@ CONFIG_CROS_EC_UART=m
CONFIG_CROS_EC_VBC=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1533,6 +1573,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1644,6 +1689,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
CONFIG_DEBUG_CREDENTIALS=y
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1740,7 +1786,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
CONFIG_DEFAULT_SECURITY_SELINUX=y
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1853,6 +1898,7 @@ CONFIG_DPOT_DAC=m
# CONFIG_DPS310 is not set
CONFIG_DRAGONRISE_FF=y
CONFIG_DRBD_FAULT_INJECTION=y
+CONFIG_DRIVER_PE_KUNIT_TEST=m
CONFIG_DRM_ACCEL_QAIC=m
CONFIG_DRM_ACCEL=y
CONFIG_DRM_AMD_ACP=y
@@ -1936,6 +1982,7 @@ CONFIG_DRM_IMX8QXP_LDB=m
CONFIG_DRM_IMX8QXP_PIXEL_COMBINER=m
CONFIG_DRM_IMX8QXP_PIXEL_LINK=m
CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI=m
+CONFIG_DRM_IMX93_MIPI_DSI=m
CONFIG_DRM_IMX_DCSS=m
CONFIG_DRM_IMX_LCDC=m
CONFIG_DRM_IMX_LCDIF=m
@@ -1999,9 +2046,11 @@ CONFIG_DRM_PANEL_HIMAX_HX8394=m
CONFIG_DRM_PANEL_ILITEK_IL9322=m
CONFIG_DRM_PANEL_ILITEK_ILI9341=m
CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
CONFIG_DRM_PANEL_KHADAS_TS050=m
@@ -2031,6 +2080,7 @@ CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -2075,13 +2125,16 @@ CONFIG_DRM_PL111=m
CONFIG_DRM_QXL=m
CONFIG_DRM_RADEON=m
CONFIG_DRM_RADEON_USERPTR=y
+# CONFIG_DRM_RCAR_DU is not set
# CONFIG_DRM_RCAR_DW_HDMI is not set
# CONFIG_DRM_RCAR_LVDS is not set
# CONFIG_DRM_RCAR_MIPI_DSI is not set
# CONFIG_DRM_RCAR_USE_LVDS is not set
# CONFIG_DRM_RCAR_USE_MIPI_DSI is not set
CONFIG_DRM_ROCKCHIP=m
+# CONFIG_DRM_RZG2L_MIPI_DSI is not set
CONFIG_DRM_SAMSUNG_DSIM=m
+# CONFIG_DRM_SHMOBILE is not set
# CONFIG_DRM_SII902X is not set
CONFIG_DRM_SII9234=m
# CONFIG_DRM_SIL_SII8620 is not set
@@ -2101,7 +2154,7 @@ CONFIG_DRM_TEGRA_STAGING=y
# CONFIG_DRM_THINE_THC63LVD1024 is not set
CONFIG_DRM_TI_DLPC3433=m
CONFIG_DRM_TIDSS=m
-# CONFIG_DRM_TI_SN65DSI83 is not set
+CONFIG_DRM_TI_SN65DSI83=m
CONFIG_DRM_TI_SN65DSI86=m
CONFIG_DRM_TI_TFP410=m
CONFIG_DRM_TI_TPD12S015=m
@@ -2269,6 +2322,7 @@ CONFIG_EDAC_LEGACY_SYSFS=y
CONFIG_EDAC_QCOM=m
CONFIG_EDAC_SYNOPSYS=m
CONFIG_EDAC_THUNDERX=m
+CONFIG_EDAC_VERSAL=m
CONFIG_EDAC_XGENE=m
CONFIG_EDAC=y
CONFIG_EDAC_ZYNQMP=m
@@ -2278,7 +2332,6 @@ CONFIG_EEPROM_AT24=m
CONFIG_EEPROM_AT25=m
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
CONFIG_EFI_ARMSTUB_DTB_LOADER=y
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -2409,7 +2462,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2563,6 +2616,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
# CONFIG_FTWDT010_WATCHDOG is not set
+CONFIG_FUEL_GAUGE_MM8013=m
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2693,6 +2747,7 @@ CONFIG_GPIO_PCI_IDIO_16=m
# CONFIG_GPIO_PISOSR is not set
CONFIG_GPIO_PL061=y
CONFIG_GPIO_RASPBERRYPI_EXP=m
+CONFIG_GPIO_RCAR=m
# CONFIG_GPIO_RDC321X is not set
CONFIG_GPIO_ROCKCHIP=y
# CONFIG_GPIO_SAMA5D2_PIOBU is not set
@@ -2922,6 +2977,7 @@ CONFIG_HNS_ENET=m
CONFIG_HOLTEK_FF=y
# CONFIG_HOSTAP is not set
CONFIG_HOTPLUG_CPU=y
+CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA=m
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -3075,9 +3131,13 @@ CONFIG_I2C_PXA=m
CONFIG_I2C_QCOM_CCI=m
CONFIG_I2C_QCOM_GENI=m
CONFIG_I2C_QUP=m
+# CONFIG_I2C_RCAR is not set
+# CONFIG_I2C_RIIC is not set
CONFIG_I2C_RK3X=y
# CONFIG_I2C_ROBOTFUZZ_OSIF is not set
+# CONFIG_I2C_RZV2M is not set
CONFIG_I2C_SCMI=m
+# CONFIG_I2C_SH_MOBILE is not set
CONFIG_I2C_SI470X=m
# CONFIG_I2C_SI4713 is not set
CONFIG_I2C_SIMTEC=m
@@ -3119,6 +3179,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
@@ -3211,7 +3272,6 @@ CONFIG_IMA_NG_TEMPLATE=y
CONFIG_IMA_READ_POLICY=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -3420,6 +3480,7 @@ CONFIG_INTERCONNECT_QCOM_SC8280XP=m
CONFIG_INTERCONNECT_QCOM_SDM845=m
# CONFIG_INTERCONNECT_QCOM_SDX55 is not set
# CONFIG_INTERCONNECT_QCOM_SDX65 is not set
+CONFIG_INTERCONNECT_QCOM_SDX75=m
# CONFIG_INTERCONNECT_QCOM_SM6350 is not set
CONFIG_INTERCONNECT_QCOM_SM8150=m
CONFIG_INTERCONNECT_QCOM_SM8250=m
@@ -3482,8 +3543,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
CONFIG_IPMB_DEVICE_INTERFACE=m
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3494,6 +3553,7 @@ CONFIG_IPMI_POWEROFF=m
CONFIG_IPMI_SI=m
CONFIG_IPMI_SSIF=m
CONFIG_IPMI_WATCHDOG=m
+# CONFIG_IPMMU_VMSA is not set
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MULTICAST=y
@@ -3824,7 +3884,7 @@ CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3859,6 +3919,7 @@ CONFIG_KUNIT=m
CONFIG_KUNIT_TEST=m
# CONFIG_KUNPENG_HCCS is not set
CONFIG_KUSER_HELPERS=y
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3916,6 +3977,7 @@ CONFIG_LEDS_GPIO=m
CONFIG_LEDS_GROUP_MULTICOLOR=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -4043,6 +4105,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -4057,6 +4120,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -4065,6 +4129,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
@@ -4137,6 +4202,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
CONFIG_MCP320X=m
CONFIG_MCP3422=m
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -4147,6 +4213,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -4168,7 +4235,7 @@ CONFIG_MDIO_I2C=m
CONFIG_MDIO_IPQ8064=m
# CONFIG_MDIO_MSCC_MIIM is not set
CONFIG_MDIO_MVUSB=m
-# CONFIG_MDIO_OCTEON is not set
+CONFIG_MDIO_OCTEON=m
# CONFIG_MDIO_SUN4I is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MDIO_XGENE=m
@@ -4182,6 +4249,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -4423,18 +4491,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -4522,7 +4594,11 @@ CONFIG_MMC_SDHCI_PLTFM=m
CONFIG_MMC_SDHCI_PXAV3=m
CONFIG_MMC_SDHCI_TEGRA=m
CONFIG_MMC_SDHCI_XENON=m
+CONFIG_MMC_SDHI_INTERNAL_DMAC=m
+CONFIG_MMC_SDHI=m
+# CONFIG_MMC_SDHI_SYS_DMAC is not set
CONFIG_MMC_SDRICOH_CS=m
+# CONFIG_MMC_SH_MMCIF is not set
CONFIG_MMC_SPI=m
# CONFIG_MMC_STM32_SDMMC is not set
CONFIG_MMC_SUNXI=m
@@ -4556,6 +4632,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -4646,6 +4725,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -4704,6 +4785,7 @@ CONFIG_MTD_NAND_NANDSIM=m
# CONFIG_MTD_NAND_PL35X is not set
# CONFIG_MTD_NAND_PLATFORM is not set
# CONFIG_MTD_NAND_QCOM is not set
+# CONFIG_MTD_NAND_RENESAS is not set
# CONFIG_MTD_NAND_RICOH is not set
# CONFIG_MTD_NAND_ROCKCHIP is not set
# CONFIG_MTD_NAND_SUNXI is not set
@@ -4771,7 +4853,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
CONFIG_MWL8K=m
-# CONFIG_MX3_IPU is not set
CONFIG_MXC4005=m
CONFIG_MXC6255=m
# CONFIG_MXS_DMA is not set
@@ -4825,9 +4906,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4900,12 +4978,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4998,6 +5076,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -5010,15 +5089,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
CONFIG_NET_SB1000=y
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -5052,6 +5128,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -5189,7 +5266,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -5383,11 +5460,13 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_NVIDIA_SHIELD_FF=y
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
CONFIG_NVME_APPLE=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_APPLE_EFUSES=m
# CONFIG_NVMEM_IMX_IIM is not set
@@ -5423,7 +5502,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -5572,6 +5653,7 @@ CONFIG_PCI_AARDVARK=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -5610,10 +5692,13 @@ CONFIG_PCIE_MOBIVEIL=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCIE_PTM=y
CONFIG_PCIE_QCOM=y
+# CONFIG_PCIE_RCAR_GEN4_HOST is not set
+# CONFIG_PCIE_RCAR_HOST is not set
CONFIG_PCIE_ROCKCHIP_DW_HOST=y
CONFIG_PCIE_ROCKCHIP_HOST=y
CONFIG_PCIE_TEGRA194_HOST=y
CONFIG_PCIE_XILINX_CPM=y
+CONFIG_PCIE_XILINX_DMA_PL=y
CONFIG_PCIE_XILINX_NWL=y
CONFIG_PCIE_XILINX=y
# CONFIG_PCI_FTPCI100 is not set
@@ -5647,6 +5732,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -5696,7 +5782,7 @@ CONFIG_PHY_MESON_G12A_MIPI_DPHY_ANALOG=y
CONFIG_PHY_MESON_G12A_USB2=y
CONFIG_PHY_MESON_G12A_USB3_PCIE=m
CONFIG_PHY_MESON_GXL_USB2=m
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
+CONFIG_PHY_MIXEL_LVDS_PHY=m
CONFIG_PHY_MIXEL_MIPI_DPHY=m
CONFIG_PHY_MVEBU_A3700_COMPHY=m
CONFIG_PHY_MVEBU_A3700_UTMI=m
@@ -5730,6 +5816,11 @@ CONFIG_PHY_QCOM_USB_HSIC=m
CONFIG_PHY_QCOM_USB_HS=m
CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2=m
CONFIG_PHY_QCOM_USB_SS=m
+# CONFIG_PHY_R8A779F0_ETHERNET_SERDES is not set
+# CONFIG_PHY_RCAR_GEN2 is not set
+# CONFIG_PHY_RCAR_GEN3_PCIE is not set
+CONFIG_PHY_RCAR_GEN3_USB2=m
+# CONFIG_PHY_RCAR_GEN3_USB3 is not set
CONFIG_PHY_ROCKCHIP_DPHY_RX0=m
CONFIG_PHY_ROCKCHIP_DP=m
CONFIG_PHY_ROCKCHIP_EMMC=m
@@ -5762,6 +5853,7 @@ CONFIG_PINCONF=y
CONFIG_PINCTRL_ALDERLAKE=m
CONFIG_PINCTRL_AMD=y
CONFIG_PINCTRL_AMLOGIC_C3=y
+CONFIG_PINCTRL_AMLOGIC_T7=y
CONFIG_PINCTRL_APPLE_GPIO=m
CONFIG_PINCTRL_AS3722=y
CONFIG_PINCTRL_AXP209=m
@@ -5882,12 +5974,13 @@ CONFIG_PINCTRL_SUN50I_H6=y
# CONFIG_PINCTRL_SUN8I_A33 is not set
# CONFIG_PINCTRL_SUN8I_A83T is not set
# CONFIG_PINCTRL_SUN8I_A83T_R is not set
-# CONFIG_PINCTRL_SUN8I_H3 is not set
CONFIG_PINCTRL_SUN8I_H3_R=y
+CONFIG_PINCTRL_SUN8I_H3=y
# CONFIG_PINCTRL_SUN8I_V3S is not set
# CONFIG_PINCTRL_SUN9I_A80 is not set
# CONFIG_PINCTRL_SUN9I_A80_R is not set
# CONFIG_PINCTRL_SX150X is not set
+CONFIG_PINCTRL_TEGRA234=y
CONFIG_PINCTRL=y
CONFIG_PINCTRL_ZYNQMP=y
# CONFIG_PING is not set
@@ -5933,7 +6026,6 @@ CONFIG_POWERCAP=y
CONFIG_POWER_MLXBF=m
CONFIG_POWER_RESET_AS3722=y
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -6068,6 +6160,8 @@ CONFIG_PWM_MESON=m
CONFIG_PWM_OMAP_DMTIMER=m
CONFIG_PWM_PCA9685=m
CONFIG_PWM_RASPBERRYPI_POE=m
+# CONFIG_PWM_RCAR is not set
+# CONFIG_PWM_RENESAS_TPU is not set
CONFIG_PWM_ROCKCHIP=m
CONFIG_PWM_STMPE=y
CONFIG_PWM_SUN4I=m
@@ -6134,6 +6228,8 @@ CONFIG_QCOM_Q6V5_WCSS=m
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
CONFIG_QCOM_QFPROM=m
CONFIG_QCOM_QMI_HELPERS=m
+CONFIG_QCOM_QSEECOM_UEFISECAPP=y
+CONFIG_QCOM_QSEECOM=y
CONFIG_QCOM_RAMP_CTRL=m
CONFIG_QCOM_RMTFS_MEM=m
CONFIG_QCOM_RPMHPD=y
@@ -6240,6 +6336,10 @@ CONFIG_RASPBERRYPI_POWER=y
CONFIG_RATIONAL_KUNIT_TEST=m
# CONFIG_RAVE_SP_CORE is not set
# CONFIG_RBTREE_TEST is not set
+# CONFIG_RCAR_DMAC is not set
+# CONFIG_RCAR_GEN3_THERMAL is not set
+# CONFIG_RCAR_REMOTEPROC is not set
+# CONFIG_RCAR_THERMAL is not set
CONFIG_RC_ATI_REMOTE=m
CONFIG_RC_CORE=y
CONFIG_RC_DECODERS=y
@@ -6275,7 +6375,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=y
@@ -6319,6 +6419,7 @@ CONFIG_REGULATOR_HI655X=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77650=m
CONFIG_REGULATOR_MAX77686=m
@@ -6405,7 +6506,14 @@ CONFIG_RELOCATABLE=y
# CONFIG_REMOTEPROC_CDEV is not set
CONFIG_REMOTEPROC=y
CONFIG_REMOTE_TARGET=m
+# CONFIG_RENESAS_OSTM is not set
# CONFIG_RENESAS_PHY is not set
+# CONFIG_RENESAS_RPCIF is not set
+# CONFIG_RENESAS_RZAWDT is not set
+# CONFIG_RENESAS_RZG2LWDT is not set
+# CONFIG_RENESAS_RZN1WDT is not set
+# CONFIG_RENESAS_USB_DMAC is not set
+# CONFIG_RENESAS_WDT is not set
# CONFIG_RESET_ATTACK_MITIGATION is not set
CONFIG_RESET_CONTROLLER=y
CONFIG_RESET_HISI=y
@@ -6416,6 +6524,7 @@ CONFIG_RESET_MESON=m
CONFIG_RESET_QCOM_AOSS=y
CONFIG_RESET_QCOM_PDC=m
CONFIG_RESET_RASPBERRYPI=y
+CONFIG_RESET_RZG2L_USBPHY_CTRL=m
CONFIG_RESET_SCMI=y
CONFIG_RESET_SIMPLE=y
CONFIG_RESET_TI_SCI=m
@@ -6472,6 +6581,7 @@ CONFIG_ROCKCHIP_VOP2=y
CONFIG_ROCKCHIP_VOP=y
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -6531,7 +6641,6 @@ CONFIG_RTC_DRV_ARMADA38X=m
CONFIG_RTC_DRV_AS3722=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
CONFIG_RTC_DRV_CADENCE=m
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_CROS_EC=m
@@ -6619,6 +6728,7 @@ CONFIG_RTC_DRV_RX8581=m
CONFIG_RTC_DRV_S35390A=m
# CONFIG_RTC_DRV_SA1100 is not set
CONFIG_RTC_DRV_SD3078=m
+# CONFIG_RTC_DRV_SH is not set
CONFIG_RTC_DRV_SNVS=m
CONFIG_RTC_DRV_STK17TA8=m
CONFIG_RTC_DRV_SUN6I=y
@@ -6694,6 +6804,10 @@ CONFIG_RV_REACT_PRINTK=y
CONFIG_RV=y
CONFIG_RXKAD=y
# CONFIG_RXPERF is not set
+# CONFIG_RZ_DMAC is not set
+# CONFIG_RZG2L_ADC is not set
+# CONFIG_RZG2L_THERMAL is not set
+# CONFIG_RZ_MTU3 is not set
CONFIG_S2IO=m
# CONFIG_S390_KPROBES_SANITY_TEST is not set
# CONFIG_S390_MODULES_SANITY_TEST is not set
@@ -6714,6 +6828,7 @@ CONFIG_SATA_MV=m
CONFIG_SATA_PMP=y
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_RCAR is not set
CONFIG_SATA_SIL24=m
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIS is not set
@@ -6863,6 +6978,7 @@ CONFIG_SCSI_UFS_HISI=m
CONFIG_SCSI_UFS_HPB=y
CONFIG_SCSI_UFS_HWMON=y
CONFIG_SCSI_UFS_QCOM=m
+# CONFIG_SCSI_UFS_RENESAS is not set
CONFIG_SCSI_UFS_TI_J721E=m
CONFIG_SCSI_VIRTIO=m
CONFIG_SCSI_WD719X=m
@@ -6892,11 +7008,12 @@ CONFIG_SDM_VIDEOCC_845=m
# CONFIG_SDX_GCC_75 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -7038,6 +7155,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -7098,6 +7216,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -7171,6 +7290,7 @@ CONFIG_SERIAL_8250_CS=m
CONFIG_SERIAL_8250_DFL=m
CONFIG_SERIAL_8250_DMA=y
CONFIG_SERIAL_8250_DW=y
+# CONFIG_SERIAL_8250_EM is not set
CONFIG_SERIAL_8250_EXAR=m
CONFIG_SERIAL_8250_EXTENDED=y
# CONFIG_SERIAL_8250_FINTEK is not set
@@ -7233,6 +7353,11 @@ CONFIG_SERIAL_SC16IS7XX_I2C=y
CONFIG_SERIAL_SC16IS7XX=m
CONFIG_SERIAL_SC16IS7XX_SPI=y
# CONFIG_SERIAL_SCCNXP is not set
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_SH_SCI_DMA=y
+CONFIG_SERIAL_SH_SCI_EARLYCON=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=18
+CONFIG_SERIAL_SH_SCI=y
# CONFIG_SERIAL_SIFIVE is not set
# CONFIG_SERIAL_SPRD is not set
# CONFIG_SERIAL_ST_ASC is not set
@@ -7299,7 +7424,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -7313,12 +7437,14 @@ CONFIG_SMC91X=m
# CONFIG_SM_CAMCC_6350 is not set
# CONFIG_SM_CAMCC_8250 is not set
# CONFIG_SM_CAMCC_8450 is not set
+# CONFIG_SM_CAMCC_8550 is not set
CONFIG_SMC_DIAG=m
CONFIG_SMC=m
# CONFIG_SM_DISPCC_8250 is not set
CONFIG_SM_DISPCC_8450=m
# CONFIG_SM_DISPCC_8550 is not set
# CONFIG_SM_FTL is not set
+# CONFIG_SM_GCC_4450 is not set
# CONFIG_SM_GCC_6115 is not set
# CONFIG_SM_GCC_6125 is not set
# CONFIG_SM_GCC_6350 is not set
@@ -7353,7 +7479,7 @@ CONFIG_SMS_USB_DRV=m
# CONFIG_SM_TCSRCC_8550 is not set
# CONFIG_SM_VIDEOCC_8150 is not set
# CONFIG_SM_VIDEOCC_8250 is not set
-# CONFIG_SM_VIDEOCC_8350 is not set
+CONFIG_SM_VIDEOCC_8350=m
# CONFIG_SM_VIDEOCC_8450 is not set
# CONFIG_SM_VIDEOCC_8550 is not set
CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
@@ -7422,6 +7548,7 @@ CONFIG_SND_FM801=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -7575,8 +7702,10 @@ CONFIG_SND_SOC_APQ8016_SBC=m
CONFIG_SND_SOC_ARNDALE=m
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -7782,6 +7911,7 @@ CONFIG_SND_SOC_PCM512x_SPI=m
# CONFIG_SND_SOC_PEB2466 is not set
CONFIG_SND_SOC_QCOM=m
CONFIG_SND_SOC_QDSP6=m
+# CONFIG_SND_SOC_RCAR is not set
CONFIG_SND_SOC_RK3288_HDMI_ANALOG=m
CONFIG_SND_SOC_RK3328=m
CONFIG_SND_SOC_RK3399_GRU_SOUND=m
@@ -7816,6 +7946,8 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
+# CONFIG_SND_SOC_RZ is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811=m
@@ -7827,6 +7959,7 @@ CONFIG_SND_SOC_SC8280XP=m
CONFIG_SND_SOC_SDM845=m
# CONFIG_SND_SOC_SDW_MOCKUP is not set
CONFIG_SND_SOC_SGTL5000=m
+# CONFIG_SND_SOC_SH4_FSI is not set
CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
CONFIG_SND_SOC_SIMPLE_MUX=m
# CONFIG_SND_SOC_SM8250 is not set
@@ -8120,8 +8253,12 @@ CONFIG_SPI_QCOM_GENI=m
CONFIG_SPI_QCOM_QSPI=m
CONFIG_SPI_QUP=m
CONFIG_SPI_ROCKCHIP=m
-# CONFIG_SPI_ROCKCHIP_SFC is not set
+CONFIG_SPI_ROCKCHIP_SFC=m
+# CONFIG_SPI_RSPI is not set
+# CONFIG_SPI_RZV2M_CSI is not set
# CONFIG_SPI_SC18IS602 is not set
+# CONFIG_SPI_SH_HSPI is not set
+# CONFIG_SPI_SH_MSIOF is not set
# CONFIG_SPI_SIFIVE is not set
# CONFIG_SPI_SLAVE is not set
CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
@@ -8354,6 +8491,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -8430,6 +8568,7 @@ CONFIG_TEST_LOCKUP=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -8502,7 +8641,7 @@ CONFIG_TIFM_7XX1=m
CONFIG_TIFM_CORE=m
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
+CONFIG_TI_ICSSG_PRUETH=m
CONFIG_TI_ICSS_IEP=m
CONFIG_TI_K3_AM65_CPSW_NUSS=m
CONFIG_TI_K3_AM65_CPSW_SWITCHDEV=y
@@ -8717,6 +8856,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
CONFIG_TYPEC_QCOM_PMIC=m
# CONFIG_TYPEC_RT1711H is not set
@@ -8728,7 +8868,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m
CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TPS6598X=m
CONFIG_TYPEC_UCSI=m
-CONFIG_TYPEC_WCOVE=m
CONFIG_TYPEC_WUSB3801=m
CONFIG_TYPHOON=m
CONFIG_UACCE=m
@@ -8821,6 +8960,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
@@ -8885,6 +9025,7 @@ CONFIG_USB_EHCI_TT_NEWSCHED=y
# CONFIG_USB_EHSET_TEST_FIXTURE is not set
CONFIG_USB_EMI26=m
CONFIG_USB_EMI62=m
+# CONFIG_USB_EMXX is not set
CONFIG_USB_EPSON2888=y
# CONFIG_USB_ETH is not set
CONFIG_USB_EZUSB_FX2=m
@@ -8997,6 +9138,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_M66592 is not set
CONFIG_USB_MA901=m
@@ -9057,6 +9199,7 @@ CONFIG_USB_OTG_FSM=m
# CONFIG_USB_OTG_PRODUCTLIST is not set
CONFIG_USB_OTG=y
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -9073,6 +9216,9 @@ CONFIG_USB_QCOM_EUD=m
CONFIG_USB_RAINSHADOW_CEC=m
# CONFIG_USB_RAREMONO is not set
CONFIG_USB_RAW_GADGET=m
+# CONFIG_USB_RENESAS_USB3 is not set
+# CONFIG_USB_RENESAS_USBF is not set
+# CONFIG_USB_RENESAS_USBHS is not set
CONFIG_USB_ROLE_SWITCH=y
CONFIG_USB_RTL8150=m
CONFIG_USB_RTL8152=m
@@ -9184,6 +9330,7 @@ CONFIG_USB_XHCI_MVEBU=m
CONFIG_USB_XHCI_PCI_RENESAS=y
CONFIG_USB_XHCI_PCI=y
CONFIG_USB_XHCI_PLATFORM=m
+CONFIG_USB_XHCI_RCAR=m
CONFIG_USB_XHCI_TEGRA=m
CONFIG_USB_XUSBATM=m
CONFIG_USB=y
@@ -9354,7 +9501,7 @@ CONFIG_VIDEO_IMX8_ISI=m
CONFIG_VIDEO_IMX8_ISI_M2M=y
CONFIG_VIDEO_IMX8_JPEG=m
CONFIG_VIDEO_IMX8MQ_MIPI_CSI2=m
-CONFIG_VIDEO_IMX_MEDIA=m
+# CONFIG_VIDEO_IMX_MEDIA is not set
CONFIG_VIDEO_IMX_MIPI_CSIS=m
CONFIG_VIDEO_IMX_PXP=m
# CONFIG_VIDEO_IPU3_CIO2 is not set
@@ -9371,10 +9518,12 @@ CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
CONFIG_VIDEO_MESON_GE2D=m
CONFIG_VIDEO_MESON_VDEC=m
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -9422,12 +9571,19 @@ CONFIG_VIDEO_PVRUSB2=m
CONFIG_VIDEO_PVRUSB2_SYSFS=y
CONFIG_VIDEO_QCOM_CAMSS=m
CONFIG_VIDEO_QCOM_VENUS=m
+# CONFIG_VIDEO_RCAR_CSI2 is not set
+# CONFIG_VIDEO_RCAR_ISP is not set
+# CONFIG_VIDEO_RCAR_VIN is not set
CONFIG_VIDEO_RDACM20=m
# CONFIG_VIDEO_RDACM21 is not set
+# CONFIG_VIDEO_RENESAS_FCP is not set
+# CONFIG_VIDEO_RENESAS_JPU is not set
CONFIG_VIDEO_RJ54N1=m
CONFIG_VIDEO_ROCKCHIP_ISP1=m
CONFIG_VIDEO_ROCKCHIP_RGA=m
CONFIG_VIDEO_ROCKCHIP_VDEC=m
+# CONFIG_VIDEO_RZG2L_CRU is not set
+# CONFIG_VIDEO_RZG2L_CSI2 is not set
CONFIG_VIDEO_S5C73M3=m
CONFIG_VIDEO_S5K4ECGX=m
CONFIG_VIDEO_S5K5BAF=m
@@ -9478,6 +9634,7 @@ CONFIG_VIDEO_THS7303=m
CONFIG_VIDEO_THS8200=m
CONFIG_VIDEO_TI_CAL=m
CONFIG_VIDEO_TI_CAL_MC=y
+CONFIG_VIDEO_TI_J721E_CSI2RX=m
CONFIG_VIDEO_TLV320AIC23B=m
CONFIG_VIDEO_TM6000_ALSA=m
CONFIG_VIDEO_TM6000_DVB=m
@@ -9683,6 +9840,7 @@ CONFIG_XDP_SOCKETS=y
# CONFIG_XEN_GRANT_DMA_ALLOC is not set
# CONFIG_XEN is not set
CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_FRONTEND is not set
@@ -9801,19 +9959,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-aarch64-16k-fedora.config b/SOURCES/kernel-aarch64-16k-fedora.config
index 2de220e..ec0e8aa 100644
--- a/SOURCES/kernel-aarch64-16k-fedora.config
+++ b/SOURCES/kernel-aarch64-16k-fedora.config
@@ -254,6 +254,7 @@ CONFIG_AMD_XGBE_DCB=y
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
CONFIG_AMLOGIC_THERMAL=m
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
CONFIG_AMT=m
CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder"
@@ -329,12 +330,33 @@ CONFIG_ARCH_MXC=y
CONFIG_ARCH_NR_GPIO=2048
CONFIG_ARCH_NXP=y
# CONFIG_ARCH_OMAP1 is not set
+# CONFIG_ARCH_PENSANDO is not set
# CONFIG_ARCH_PXA is not set
CONFIG_ARCH_QCOM=y
+CONFIG_ARCH_R8A774A1=y
+# CONFIG_ARCH_R8A774B1 is not set
+# CONFIG_ARCH_R8A774C0 is not set
+# CONFIG_ARCH_R8A774E1 is not set
+# CONFIG_ARCH_R8A77951 is not set
+# CONFIG_ARCH_R8A77960 is not set
+# CONFIG_ARCH_R8A77961 is not set
+# CONFIG_ARCH_R8A77965 is not set
+# CONFIG_ARCH_R8A77970 is not set
+# CONFIG_ARCH_R8A77980 is not set
+# CONFIG_ARCH_R8A77990 is not set
+# CONFIG_ARCH_R8A77995 is not set
+# CONFIG_ARCH_R8A779A0 is not set
+# CONFIG_ARCH_R8A779F0 is not set
+# CONFIG_ARCH_R8A779G0 is not set
+CONFIG_ARCH_R9A07G043=y
+CONFIG_ARCH_R9A07G044=y
+CONFIG_ARCH_R9A07G054=y
+# CONFIG_ARCH_R9A08G045 is not set
+# CONFIG_ARCH_R9A09G011 is not set
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_RDA is not set
# CONFIG_ARCH_REALTEK is not set
-# CONFIG_ARCH_RENESAS is not set
+CONFIG_ARCH_RENESAS=y
CONFIG_ARCH_ROCKCHIP=y
# CONFIG_ARCH_S32 is not set
# CONFIG_ARCH_SA1100 is not set
@@ -396,6 +418,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -484,6 +507,7 @@ CONFIG_ARM_RASPBERRYPI_CPUFREQ=m
CONFIG_ARM_RK3399_DMC_DEVFREQ=m
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
CONFIG_ARM_SCMI_POWERCAP=m
CONFIG_ARM_SCMI_POWER_CONTROL=m
CONFIG_ARM_SCMI_POWER_DOMAIN=m
@@ -531,7 +555,7 @@ CONFIG_ATA_ACPI=y
CONFIG_ATA_BMDMA=y
CONFIG_ATA_FORCE=y
CONFIG_ATA_GENERIC=m
-# CONFIG_ATALK is not set
+CONFIG_ATALK=m
CONFIG_ATA_OVER_ETH=m
CONFIG_ATA_PIIX=y
# CONFIG_ATARI_PARTITION is not set
@@ -702,6 +726,7 @@ CONFIG_BATTERY_GAUGE_LTC2941=m
CONFIG_BATTERY_MAX17040=m
CONFIG_BATTERY_MAX17042=m
# CONFIG_BATTERY_MAX1721X is not set
+# CONFIG_BATTERY_PM8916_BMS_VM is not set
CONFIG_BATTERY_QCOM_BATTMGR=m
CONFIG_BATTERY_RT5033=m
CONFIG_BATTERY_SAMSUNG_SDI=y
@@ -715,6 +740,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM2711_THERMAL=m
CONFIG_BCM2835_MBOX=y
@@ -867,7 +901,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCMSTB_L2_IRQ=y
# CONFIG_BRCM_TRACING is not set
CONFIG_BRCMUTIL=m
@@ -980,7 +1013,6 @@ CONFIG_CADENCE_WATCHDOG=m
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -1017,6 +1049,8 @@ CONFIG_CAN_NETLINK=y
CONFIG_CAN_PEAK_PCIEFD=m
CONFIG_CAN_PEAK_USB=m
CONFIG_CAN_RAW=m
+# CONFIG_CAN_RCAR_CANFD is not set
+# CONFIG_CAN_RCAR is not set
# CONFIG_CAN_SJA1000 is not set
CONFIG_CAN_SLCAN=m
# CONFIG_CAN_SOFTING is not set
@@ -1075,6 +1109,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -1117,6 +1152,7 @@ CONFIG_CHARGER_MAX77650=m
CONFIG_CHARGER_MAX77976=m
# CONFIG_CHARGER_MAX8903 is not set
CONFIG_CHARGER_MT6370=m
+# CONFIG_CHARGER_PM8916_LBC is not set
# CONFIG_CHARGER_QCOM_SMB2 is not set
CONFIG_CHARGER_QCOM_SMBB=m
CONFIG_CHARGER_RK817=m
@@ -1168,6 +1204,7 @@ CONFIG_CIO2_BRIDGE=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM2711_DVP=m
CONFIG_CLK_BCM2835=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
CONFIG_CLK_ICST=y
@@ -1183,6 +1220,7 @@ CONFIG_CLK_LS1028A_PLLDIG=y
CONFIG_CLK_PX30=y
CONFIG_CLK_QORIQ=y
CONFIG_CLK_RASPBERRYPI=y
+# CONFIG_CLK_RCAR_USB2_CLOCK_SEL is not set
CONFIG_CLK_RK3036=y
CONFIG_CLK_RK312X=y
CONFIG_CLK_RK3188=y
@@ -1268,6 +1306,8 @@ CONFIG_COMMON_CLK_QCOM=y
CONFIG_COMMON_CLK_RK808=m
CONFIG_COMMON_CLK_ROCKCHIP=y
CONFIG_COMMON_CLK_RS9_PCIE=m
+CONFIG_COMMON_CLK_S4_PERIPHERALS=y
+CONFIG_COMMON_CLK_S4_PLL=y
CONFIG_COMMON_CLK_SCMI=y
CONFIG_COMMON_CLK_SCPI=m
# CONFIG_COMMON_CLK_SI514 is not set
@@ -1299,7 +1339,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1387,6 +1426,7 @@ CONFIG_CROS_EC_UART=m
CONFIG_CROS_EC_VBC=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1533,6 +1573,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1644,6 +1689,7 @@ CONFIG_DE2104X=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
# CONFIG_DEBUG_CREDENTIALS is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1732,7 +1778,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
CONFIG_DEFAULT_SECURITY_SELINUX=y
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
# CONFIG_DETECT_HUNG_TASK is not set
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1844,6 +1889,7 @@ CONFIG_DPOT_DAC=m
# CONFIG_DPS310 is not set
CONFIG_DRAGONRISE_FF=y
# CONFIG_DRBD_FAULT_INJECTION is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
CONFIG_DRM_ACCEL_QAIC=m
CONFIG_DRM_ACCEL=y
CONFIG_DRM_AMD_ACP=y
@@ -1927,6 +1973,7 @@ CONFIG_DRM_IMX8QXP_LDB=m
CONFIG_DRM_IMX8QXP_PIXEL_COMBINER=m
CONFIG_DRM_IMX8QXP_PIXEL_LINK=m
CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI=m
+CONFIG_DRM_IMX93_MIPI_DSI=m
CONFIG_DRM_IMX_DCSS=m
CONFIG_DRM_IMX_LCDC=m
CONFIG_DRM_IMX_LCDIF=m
@@ -1990,9 +2037,11 @@ CONFIG_DRM_PANEL_HIMAX_HX8394=m
CONFIG_DRM_PANEL_ILITEK_IL9322=m
CONFIG_DRM_PANEL_ILITEK_ILI9341=m
CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
CONFIG_DRM_PANEL_KHADAS_TS050=m
@@ -2022,6 +2071,7 @@ CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -2066,13 +2116,16 @@ CONFIG_DRM_PL111=m
CONFIG_DRM_QXL=m
CONFIG_DRM_RADEON=m
CONFIG_DRM_RADEON_USERPTR=y
+# CONFIG_DRM_RCAR_DU is not set
# CONFIG_DRM_RCAR_DW_HDMI is not set
# CONFIG_DRM_RCAR_LVDS is not set
# CONFIG_DRM_RCAR_MIPI_DSI is not set
# CONFIG_DRM_RCAR_USE_LVDS is not set
# CONFIG_DRM_RCAR_USE_MIPI_DSI is not set
CONFIG_DRM_ROCKCHIP=m
+# CONFIG_DRM_RZG2L_MIPI_DSI is not set
CONFIG_DRM_SAMSUNG_DSIM=m
+# CONFIG_DRM_SHMOBILE is not set
# CONFIG_DRM_SII902X is not set
CONFIG_DRM_SII9234=m
# CONFIG_DRM_SIL_SII8620 is not set
@@ -2092,7 +2145,7 @@ CONFIG_DRM_TEGRA_STAGING=y
# CONFIG_DRM_THINE_THC63LVD1024 is not set
CONFIG_DRM_TI_DLPC3433=m
CONFIG_DRM_TIDSS=m
-# CONFIG_DRM_TI_SN65DSI83 is not set
+CONFIG_DRM_TI_SN65DSI83=m
CONFIG_DRM_TI_SN65DSI86=m
CONFIG_DRM_TI_TFP410=m
CONFIG_DRM_TI_TPD12S015=m
@@ -2260,6 +2313,7 @@ CONFIG_EDAC_LEGACY_SYSFS=y
CONFIG_EDAC_QCOM=m
CONFIG_EDAC_SYNOPSYS=m
CONFIG_EDAC_THUNDERX=m
+CONFIG_EDAC_VERSAL=m
CONFIG_EDAC_XGENE=m
CONFIG_EDAC=y
CONFIG_EDAC_ZYNQMP=m
@@ -2269,7 +2323,6 @@ CONFIG_EEPROM_AT24=m
CONFIG_EEPROM_AT25=m
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
CONFIG_EFI_ARMSTUB_DTB_LOADER=y
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -2392,7 +2445,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2546,6 +2599,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
# CONFIG_FTWDT010_WATCHDOG is not set
+CONFIG_FUEL_GAUGE_MM8013=m
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2676,6 +2730,7 @@ CONFIG_GPIO_PCI_IDIO_16=m
# CONFIG_GPIO_PISOSR is not set
CONFIG_GPIO_PL061=y
CONFIG_GPIO_RASPBERRYPI_EXP=m
+CONFIG_GPIO_RCAR=m
# CONFIG_GPIO_RDC321X is not set
CONFIG_GPIO_ROCKCHIP=y
# CONFIG_GPIO_SAMA5D2_PIOBU is not set
@@ -2905,6 +2960,7 @@ CONFIG_HNS_ENET=m
CONFIG_HOLTEK_FF=y
# CONFIG_HOSTAP is not set
CONFIG_HOTPLUG_CPU=y
+CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA=m
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -3058,9 +3114,13 @@ CONFIG_I2C_PXA=m
CONFIG_I2C_QCOM_CCI=m
CONFIG_I2C_QCOM_GENI=m
CONFIG_I2C_QUP=m
+# CONFIG_I2C_RCAR is not set
+# CONFIG_I2C_RIIC is not set
CONFIG_I2C_RK3X=y
# CONFIG_I2C_ROBOTFUZZ_OSIF is not set
+# CONFIG_I2C_RZV2M is not set
CONFIG_I2C_SCMI=m
+# CONFIG_I2C_SH_MOBILE is not set
CONFIG_I2C_SI470X=m
# CONFIG_I2C_SI4713 is not set
CONFIG_I2C_SIMTEC=m
@@ -3102,6 +3162,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
@@ -3194,7 +3255,6 @@ CONFIG_IMA_NG_TEMPLATE=y
CONFIG_IMA_READ_POLICY=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -3403,6 +3463,7 @@ CONFIG_INTERCONNECT_QCOM_SC8280XP=m
CONFIG_INTERCONNECT_QCOM_SDM845=m
# CONFIG_INTERCONNECT_QCOM_SDX55 is not set
# CONFIG_INTERCONNECT_QCOM_SDX65 is not set
+CONFIG_INTERCONNECT_QCOM_SDX75=m
# CONFIG_INTERCONNECT_QCOM_SM6350 is not set
CONFIG_INTERCONNECT_QCOM_SM8150=m
CONFIG_INTERCONNECT_QCOM_SM8250=m
@@ -3465,8 +3526,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
CONFIG_IPMB_DEVICE_INTERFACE=m
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3477,6 +3536,7 @@ CONFIG_IPMI_POWEROFF=m
CONFIG_IPMI_SI=m
CONFIG_IPMI_SSIF=m
CONFIG_IPMI_WATCHDOG=m
+# CONFIG_IPMMU_VMSA is not set
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MULTICAST=y
@@ -3799,7 +3859,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3835,6 +3895,7 @@ CONFIG_KUNIT=m
CONFIG_KUNIT_TEST=m
# CONFIG_KUNPENG_HCCS is not set
CONFIG_KUSER_HELPERS=y
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3892,6 +3953,7 @@ CONFIG_LEDS_GPIO=m
CONFIG_LEDS_GROUP_MULTICOLOR=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -4019,6 +4081,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -4033,6 +4096,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -4041,6 +4105,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
@@ -4112,6 +4177,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
CONFIG_MCP320X=m
CONFIG_MCP3422=m
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -4122,6 +4188,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -4143,7 +4210,7 @@ CONFIG_MDIO_I2C=m
CONFIG_MDIO_IPQ8064=m
# CONFIG_MDIO_MSCC_MIIM is not set
CONFIG_MDIO_MVUSB=m
-# CONFIG_MDIO_OCTEON is not set
+CONFIG_MDIO_OCTEON=m
# CONFIG_MDIO_SUN4I is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MDIO_XGENE=m
@@ -4157,6 +4224,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -4398,18 +4466,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -4497,7 +4569,11 @@ CONFIG_MMC_SDHCI_PLTFM=m
CONFIG_MMC_SDHCI_PXAV3=m
CONFIG_MMC_SDHCI_TEGRA=m
CONFIG_MMC_SDHCI_XENON=m
+CONFIG_MMC_SDHI_INTERNAL_DMAC=m
+CONFIG_MMC_SDHI=m
+# CONFIG_MMC_SDHI_SYS_DMAC is not set
CONFIG_MMC_SDRICOH_CS=m
+# CONFIG_MMC_SH_MMCIF is not set
CONFIG_MMC_SPI=m
# CONFIG_MMC_STM32_SDMMC is not set
CONFIG_MMC_SUNXI=m
@@ -4530,6 +4606,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -4620,6 +4699,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -4678,6 +4759,7 @@ CONFIG_MTD_NAND_NANDSIM=m
# CONFIG_MTD_NAND_PL35X is not set
# CONFIG_MTD_NAND_PLATFORM is not set
# CONFIG_MTD_NAND_QCOM is not set
+# CONFIG_MTD_NAND_RENESAS is not set
# CONFIG_MTD_NAND_RICOH is not set
# CONFIG_MTD_NAND_ROCKCHIP is not set
# CONFIG_MTD_NAND_SUNXI is not set
@@ -4745,7 +4827,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
CONFIG_MWL8K=m
-# CONFIG_MX3_IPU is not set
CONFIG_MXC4005=m
CONFIG_MXC6255=m
# CONFIG_MXS_DMA is not set
@@ -4799,9 +4880,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4874,12 +4952,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4972,6 +5050,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4984,15 +5063,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
CONFIG_NET_SB1000=y
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -5026,6 +5102,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -5163,7 +5240,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -5357,11 +5434,13 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_NVIDIA_SHIELD_FF=y
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
CONFIG_NVME_APPLE=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_APPLE_EFUSES=m
# CONFIG_NVMEM_IMX_IIM is not set
@@ -5397,7 +5476,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -5545,6 +5626,7 @@ CONFIG_PCI_AARDVARK=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -5583,10 +5665,13 @@ CONFIG_PCIE_MOBIVEIL=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCIE_PTM=y
CONFIG_PCIE_QCOM=y
+# CONFIG_PCIE_RCAR_GEN4_HOST is not set
+# CONFIG_PCIE_RCAR_HOST is not set
CONFIG_PCIE_ROCKCHIP_DW_HOST=y
CONFIG_PCIE_ROCKCHIP_HOST=y
CONFIG_PCIE_TEGRA194_HOST=y
CONFIG_PCIE_XILINX_CPM=y
+CONFIG_PCIE_XILINX_DMA_PL=y
CONFIG_PCIE_XILINX_NWL=y
CONFIG_PCIE_XILINX=y
# CONFIG_PCI_FTPCI100 is not set
@@ -5620,6 +5705,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -5669,7 +5755,7 @@ CONFIG_PHY_MESON_G12A_MIPI_DPHY_ANALOG=y
CONFIG_PHY_MESON_G12A_USB2=y
CONFIG_PHY_MESON_G12A_USB3_PCIE=m
CONFIG_PHY_MESON_GXL_USB2=m
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
+CONFIG_PHY_MIXEL_LVDS_PHY=m
CONFIG_PHY_MIXEL_MIPI_DPHY=m
CONFIG_PHY_MVEBU_A3700_COMPHY=m
CONFIG_PHY_MVEBU_A3700_UTMI=m
@@ -5703,6 +5789,11 @@ CONFIG_PHY_QCOM_USB_HSIC=m
CONFIG_PHY_QCOM_USB_HS=m
CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2=m
CONFIG_PHY_QCOM_USB_SS=m
+# CONFIG_PHY_R8A779F0_ETHERNET_SERDES is not set
+# CONFIG_PHY_RCAR_GEN2 is not set
+# CONFIG_PHY_RCAR_GEN3_PCIE is not set
+CONFIG_PHY_RCAR_GEN3_USB2=m
+# CONFIG_PHY_RCAR_GEN3_USB3 is not set
CONFIG_PHY_ROCKCHIP_DPHY_RX0=m
CONFIG_PHY_ROCKCHIP_DP=m
CONFIG_PHY_ROCKCHIP_EMMC=m
@@ -5735,6 +5826,7 @@ CONFIG_PINCONF=y
CONFIG_PINCTRL_ALDERLAKE=m
CONFIG_PINCTRL_AMD=y
CONFIG_PINCTRL_AMLOGIC_C3=y
+CONFIG_PINCTRL_AMLOGIC_T7=y
CONFIG_PINCTRL_APPLE_GPIO=m
CONFIG_PINCTRL_AS3722=y
CONFIG_PINCTRL_AXP209=m
@@ -5855,12 +5947,13 @@ CONFIG_PINCTRL_SUN50I_H6=y
# CONFIG_PINCTRL_SUN8I_A33 is not set
# CONFIG_PINCTRL_SUN8I_A83T is not set
# CONFIG_PINCTRL_SUN8I_A83T_R is not set
-# CONFIG_PINCTRL_SUN8I_H3 is not set
CONFIG_PINCTRL_SUN8I_H3_R=y
+CONFIG_PINCTRL_SUN8I_H3=y
# CONFIG_PINCTRL_SUN8I_V3S is not set
# CONFIG_PINCTRL_SUN9I_A80 is not set
# CONFIG_PINCTRL_SUN9I_A80_R is not set
# CONFIG_PINCTRL_SX150X is not set
+CONFIG_PINCTRL_TEGRA234=y
CONFIG_PINCTRL=y
CONFIG_PINCTRL_ZYNQMP=y
# CONFIG_PING is not set
@@ -5906,7 +5999,6 @@ CONFIG_POWERCAP=y
CONFIG_POWER_MLXBF=m
CONFIG_POWER_RESET_AS3722=y
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -6041,6 +6133,8 @@ CONFIG_PWM_MESON=m
CONFIG_PWM_OMAP_DMTIMER=m
CONFIG_PWM_PCA9685=m
CONFIG_PWM_RASPBERRYPI_POE=m
+# CONFIG_PWM_RCAR is not set
+# CONFIG_PWM_RENESAS_TPU is not set
CONFIG_PWM_ROCKCHIP=m
CONFIG_PWM_STMPE=y
CONFIG_PWM_SUN4I=m
@@ -6107,6 +6201,8 @@ CONFIG_QCOM_Q6V5_WCSS=m
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
CONFIG_QCOM_QFPROM=m
CONFIG_QCOM_QMI_HELPERS=m
+CONFIG_QCOM_QSEECOM_UEFISECAPP=y
+CONFIG_QCOM_QSEECOM=y
CONFIG_QCOM_RAMP_CTRL=m
CONFIG_QCOM_RMTFS_MEM=m
CONFIG_QCOM_RPMHPD=y
@@ -6213,6 +6309,10 @@ CONFIG_RASPBERRYPI_POWER=y
CONFIG_RATIONAL_KUNIT_TEST=m
# CONFIG_RAVE_SP_CORE is not set
# CONFIG_RBTREE_TEST is not set
+# CONFIG_RCAR_DMAC is not set
+# CONFIG_RCAR_GEN3_THERMAL is not set
+# CONFIG_RCAR_REMOTEPROC is not set
+# CONFIG_RCAR_THERMAL is not set
CONFIG_RC_ATI_REMOTE=m
CONFIG_RC_CORE=y
CONFIG_RC_DECODERS=y
@@ -6248,7 +6348,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=y
@@ -6292,6 +6392,7 @@ CONFIG_REGULATOR_HI655X=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77650=m
CONFIG_REGULATOR_MAX77686=m
@@ -6378,7 +6479,14 @@ CONFIG_RELOCATABLE=y
# CONFIG_REMOTEPROC_CDEV is not set
CONFIG_REMOTEPROC=y
CONFIG_REMOTE_TARGET=m
+# CONFIG_RENESAS_OSTM is not set
# CONFIG_RENESAS_PHY is not set
+# CONFIG_RENESAS_RPCIF is not set
+# CONFIG_RENESAS_RZAWDT is not set
+# CONFIG_RENESAS_RZG2LWDT is not set
+# CONFIG_RENESAS_RZN1WDT is not set
+# CONFIG_RENESAS_USB_DMAC is not set
+# CONFIG_RENESAS_WDT is not set
# CONFIG_RESET_ATTACK_MITIGATION is not set
CONFIG_RESET_CONTROLLER=y
CONFIG_RESET_HISI=y
@@ -6389,6 +6497,7 @@ CONFIG_RESET_MESON=m
CONFIG_RESET_QCOM_AOSS=y
CONFIG_RESET_QCOM_PDC=m
CONFIG_RESET_RASPBERRYPI=y
+CONFIG_RESET_RZG2L_USBPHY_CTRL=m
CONFIG_RESET_SCMI=y
CONFIG_RESET_SIMPLE=y
CONFIG_RESET_TI_SCI=m
@@ -6445,6 +6554,7 @@ CONFIG_ROCKCHIP_VOP2=y
CONFIG_ROCKCHIP_VOP=y
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -6504,7 +6614,6 @@ CONFIG_RTC_DRV_ARMADA38X=m
CONFIG_RTC_DRV_AS3722=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
CONFIG_RTC_DRV_CADENCE=m
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_CROS_EC=m
@@ -6592,6 +6701,7 @@ CONFIG_RTC_DRV_RX8581=m
CONFIG_RTC_DRV_S35390A=m
# CONFIG_RTC_DRV_SA1100 is not set
CONFIG_RTC_DRV_SD3078=m
+# CONFIG_RTC_DRV_SH is not set
CONFIG_RTC_DRV_SNVS=m
CONFIG_RTC_DRV_STK17TA8=m
CONFIG_RTC_DRV_SUN6I=y
@@ -6667,6 +6777,10 @@ CONFIG_RV_REACT_PRINTK=y
CONFIG_RV=y
CONFIG_RXKAD=y
# CONFIG_RXPERF is not set
+# CONFIG_RZ_DMAC is not set
+# CONFIG_RZG2L_ADC is not set
+# CONFIG_RZG2L_THERMAL is not set
+# CONFIG_RZ_MTU3 is not set
CONFIG_S2IO=m
# CONFIG_S390_KPROBES_SANITY_TEST is not set
# CONFIG_S390_MODULES_SANITY_TEST is not set
@@ -6687,6 +6801,7 @@ CONFIG_SATA_MV=m
CONFIG_SATA_PMP=y
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_RCAR is not set
CONFIG_SATA_SIL24=m
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIS is not set
@@ -6836,6 +6951,7 @@ CONFIG_SCSI_UFS_HISI=m
CONFIG_SCSI_UFS_HPB=y
CONFIG_SCSI_UFS_HWMON=y
CONFIG_SCSI_UFS_QCOM=m
+# CONFIG_SCSI_UFS_RENESAS is not set
CONFIG_SCSI_UFS_TI_J721E=m
CONFIG_SCSI_VIRTIO=m
CONFIG_SCSI_WD719X=m
@@ -6865,11 +6981,12 @@ CONFIG_SDM_VIDEOCC_845=m
# CONFIG_SDX_GCC_75 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -7011,6 +7128,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -7071,6 +7189,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -7144,6 +7263,7 @@ CONFIG_SERIAL_8250_CS=m
CONFIG_SERIAL_8250_DFL=m
CONFIG_SERIAL_8250_DMA=y
CONFIG_SERIAL_8250_DW=y
+# CONFIG_SERIAL_8250_EM is not set
CONFIG_SERIAL_8250_EXAR=m
CONFIG_SERIAL_8250_EXTENDED=y
# CONFIG_SERIAL_8250_FINTEK is not set
@@ -7206,6 +7326,11 @@ CONFIG_SERIAL_SC16IS7XX_I2C=y
CONFIG_SERIAL_SC16IS7XX=m
CONFIG_SERIAL_SC16IS7XX_SPI=y
# CONFIG_SERIAL_SCCNXP is not set
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_SH_SCI_DMA=y
+CONFIG_SERIAL_SH_SCI_EARLYCON=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=18
+CONFIG_SERIAL_SH_SCI=y
# CONFIG_SERIAL_SIFIVE is not set
# CONFIG_SERIAL_SPRD is not set
# CONFIG_SERIAL_ST_ASC is not set
@@ -7272,7 +7397,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -7286,12 +7410,14 @@ CONFIG_SMC91X=m
# CONFIG_SM_CAMCC_6350 is not set
# CONFIG_SM_CAMCC_8250 is not set
# CONFIG_SM_CAMCC_8450 is not set
+# CONFIG_SM_CAMCC_8550 is not set
CONFIG_SMC_DIAG=m
CONFIG_SMC=m
# CONFIG_SM_DISPCC_8250 is not set
CONFIG_SM_DISPCC_8450=m
# CONFIG_SM_DISPCC_8550 is not set
# CONFIG_SM_FTL is not set
+# CONFIG_SM_GCC_4450 is not set
# CONFIG_SM_GCC_6115 is not set
# CONFIG_SM_GCC_6125 is not set
# CONFIG_SM_GCC_6350 is not set
@@ -7326,7 +7452,7 @@ CONFIG_SMS_USB_DRV=m
# CONFIG_SM_TCSRCC_8550 is not set
# CONFIG_SM_VIDEOCC_8150 is not set
# CONFIG_SM_VIDEOCC_8250 is not set
-# CONFIG_SM_VIDEOCC_8350 is not set
+CONFIG_SM_VIDEOCC_8350=m
# CONFIG_SM_VIDEOCC_8450 is not set
# CONFIG_SM_VIDEOCC_8550 is not set
CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
@@ -7395,6 +7521,7 @@ CONFIG_SND_FM801=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -7547,8 +7674,10 @@ CONFIG_SND_SOC_APQ8016_SBC=m
CONFIG_SND_SOC_ARNDALE=m
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -7754,6 +7883,7 @@ CONFIG_SND_SOC_PCM512x_SPI=m
# CONFIG_SND_SOC_PEB2466 is not set
CONFIG_SND_SOC_QCOM=m
CONFIG_SND_SOC_QDSP6=m
+# CONFIG_SND_SOC_RCAR is not set
CONFIG_SND_SOC_RK3288_HDMI_ANALOG=m
CONFIG_SND_SOC_RK3328=m
CONFIG_SND_SOC_RK3399_GRU_SOUND=m
@@ -7788,6 +7918,8 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
+# CONFIG_SND_SOC_RZ is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811=m
@@ -7799,6 +7931,7 @@ CONFIG_SND_SOC_SC8280XP=m
CONFIG_SND_SOC_SDM845=m
# CONFIG_SND_SOC_SDW_MOCKUP is not set
CONFIG_SND_SOC_SGTL5000=m
+# CONFIG_SND_SOC_SH4_FSI is not set
CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
CONFIG_SND_SOC_SIMPLE_MUX=m
# CONFIG_SND_SOC_SM8250 is not set
@@ -8091,8 +8224,12 @@ CONFIG_SPI_QCOM_GENI=m
CONFIG_SPI_QCOM_QSPI=m
CONFIG_SPI_QUP=m
CONFIG_SPI_ROCKCHIP=m
-# CONFIG_SPI_ROCKCHIP_SFC is not set
+CONFIG_SPI_ROCKCHIP_SFC=m
+# CONFIG_SPI_RSPI is not set
+# CONFIG_SPI_RZV2M_CSI is not set
# CONFIG_SPI_SC18IS602 is not set
+# CONFIG_SPI_SH_HSPI is not set
+# CONFIG_SPI_SH_MSIOF is not set
# CONFIG_SPI_SIFIVE is not set
# CONFIG_SPI_SLAVE is not set
CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
@@ -8325,6 +8462,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -8401,6 +8539,7 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -8473,7 +8612,7 @@ CONFIG_TIFM_7XX1=m
CONFIG_TIFM_CORE=m
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
+CONFIG_TI_ICSSG_PRUETH=m
CONFIG_TI_ICSS_IEP=m
CONFIG_TI_K3_AM65_CPSW_NUSS=m
CONFIG_TI_K3_AM65_CPSW_SWITCHDEV=y
@@ -8688,6 +8827,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
CONFIG_TYPEC_QCOM_PMIC=m
# CONFIG_TYPEC_RT1711H is not set
@@ -8699,7 +8839,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m
CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TPS6598X=m
CONFIG_TYPEC_UCSI=m
-CONFIG_TYPEC_WCOVE=m
CONFIG_TYPEC_WUSB3801=m
CONFIG_TYPHOON=m
CONFIG_UACCE=m
@@ -8792,6 +8931,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
@@ -8856,6 +8996,7 @@ CONFIG_USB_EHCI_TT_NEWSCHED=y
# CONFIG_USB_EHSET_TEST_FIXTURE is not set
CONFIG_USB_EMI26=m
CONFIG_USB_EMI62=m
+# CONFIG_USB_EMXX is not set
CONFIG_USB_EPSON2888=y
# CONFIG_USB_ETH is not set
CONFIG_USB_EZUSB_FX2=m
@@ -8968,6 +9109,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_M66592 is not set
CONFIG_USB_MA901=m
@@ -9028,6 +9170,7 @@ CONFIG_USB_OTG_FSM=m
# CONFIG_USB_OTG_PRODUCTLIST is not set
CONFIG_USB_OTG=y
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -9044,6 +9187,9 @@ CONFIG_USB_QCOM_EUD=m
CONFIG_USB_RAINSHADOW_CEC=m
# CONFIG_USB_RAREMONO is not set
CONFIG_USB_RAW_GADGET=m
+# CONFIG_USB_RENESAS_USB3 is not set
+# CONFIG_USB_RENESAS_USBF is not set
+# CONFIG_USB_RENESAS_USBHS is not set
CONFIG_USB_ROLE_SWITCH=y
CONFIG_USB_RTL8150=m
CONFIG_USB_RTL8152=m
@@ -9155,6 +9301,7 @@ CONFIG_USB_XHCI_MVEBU=m
CONFIG_USB_XHCI_PCI_RENESAS=y
CONFIG_USB_XHCI_PCI=y
CONFIG_USB_XHCI_PLATFORM=m
+CONFIG_USB_XHCI_RCAR=m
CONFIG_USB_XHCI_TEGRA=m
CONFIG_USB_XUSBATM=m
CONFIG_USB=y
@@ -9325,7 +9472,7 @@ CONFIG_VIDEO_IMX8_ISI=m
CONFIG_VIDEO_IMX8_ISI_M2M=y
CONFIG_VIDEO_IMX8_JPEG=m
CONFIG_VIDEO_IMX8MQ_MIPI_CSI2=m
-CONFIG_VIDEO_IMX_MEDIA=m
+# CONFIG_VIDEO_IMX_MEDIA is not set
CONFIG_VIDEO_IMX_MIPI_CSIS=m
CONFIG_VIDEO_IMX_PXP=m
# CONFIG_VIDEO_IPU3_CIO2 is not set
@@ -9342,10 +9489,12 @@ CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
CONFIG_VIDEO_MESON_GE2D=m
CONFIG_VIDEO_MESON_VDEC=m
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -9393,12 +9542,19 @@ CONFIG_VIDEO_PVRUSB2=m
CONFIG_VIDEO_PVRUSB2_SYSFS=y
CONFIG_VIDEO_QCOM_CAMSS=m
CONFIG_VIDEO_QCOM_VENUS=m
+# CONFIG_VIDEO_RCAR_CSI2 is not set
+# CONFIG_VIDEO_RCAR_ISP is not set
+# CONFIG_VIDEO_RCAR_VIN is not set
CONFIG_VIDEO_RDACM20=m
# CONFIG_VIDEO_RDACM21 is not set
+# CONFIG_VIDEO_RENESAS_FCP is not set
+# CONFIG_VIDEO_RENESAS_JPU is not set
CONFIG_VIDEO_RJ54N1=m
CONFIG_VIDEO_ROCKCHIP_ISP1=m
CONFIG_VIDEO_ROCKCHIP_RGA=m
CONFIG_VIDEO_ROCKCHIP_VDEC=m
+# CONFIG_VIDEO_RZG2L_CRU is not set
+# CONFIG_VIDEO_RZG2L_CSI2 is not set
CONFIG_VIDEO_S5C73M3=m
CONFIG_VIDEO_S5K4ECGX=m
CONFIG_VIDEO_S5K5BAF=m
@@ -9449,6 +9605,7 @@ CONFIG_VIDEO_THS7303=m
CONFIG_VIDEO_THS8200=m
CONFIG_VIDEO_TI_CAL=m
CONFIG_VIDEO_TI_CAL_MC=y
+CONFIG_VIDEO_TI_J721E_CSI2RX=m
CONFIG_VIDEO_TLV320AIC23B=m
CONFIG_VIDEO_TM6000_ALSA=m
CONFIG_VIDEO_TM6000_DVB=m
@@ -9654,6 +9811,7 @@ CONFIG_XDP_SOCKETS=y
# CONFIG_XEN_GRANT_DMA_ALLOC is not set
# CONFIG_XEN is not set
CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_FRONTEND is not set
@@ -9772,19 +9930,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-aarch64-64k-debug-rhel.config b/SOURCES/kernel-aarch64-64k-debug-rhel.config
index a38ed02..3c11aac 100644
--- a/SOURCES/kernel-aarch64-64k-debug-rhel.config
+++ b/SOURCES/kernel-aarch64-64k-debug-rhel.config
@@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m
# CONFIG_AMD_XGBE_DCB is not set
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
# CONFIG_AMT is not set
# CONFIG_ANDROID_BINDER_IPC is not set
@@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
CONFIG_ARCH_MXC=y
# CONFIG_ARCH_NPCM is not set
CONFIG_ARCH_NXP=y
+CONFIG_ARCH_PENSANDO=y
CONFIG_ARCH_QCOM=y
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_REALTEK is not set
@@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -378,6 +381,7 @@ CONFIG_ARM_PMU=y
# CONFIG_ARM_QCOM_CPUFREQ_HW is not set
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
# CONFIG_ARM_SCMI_POWER_CONTROL is not set
CONFIG_ARM_SCMI_POWER_DOMAIN=m
CONFIG_ARM_SCMI_PROTOCOL=y
@@ -410,6 +414,7 @@ CONFIG_ARM_TI_CPUFREQ=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -523,6 +528,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -658,7 +664,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -751,7 +756,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -831,6 +835,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -912,6 +917,7 @@ CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM_NS2=y
CONFIG_CLK_BCM_SR=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -1004,7 +1010,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1088,7 +1093,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
CONFIG_CRYPTO_AES_ARM64=y
-CONFIG_CRYPTO_AES_GCM_P10=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_ANSI_CPRNG=m
@@ -1105,7 +1109,6 @@ CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
CONFIG_CRYPTO_CHACHA20_NEON=y
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CMAC=y
# CONFIG_CRYPTO_CRC32C_VPMSUM is not set
@@ -1192,6 +1195,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1216,7 +1224,6 @@ CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
CONFIG_CRYPTO_POLY1305_NEON=y
-# CONFIG_CRYPTO_POLY1305_P10 is not set
# CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1488,6 +1495,7 @@ CONFIG_DPAA2_CONSOLE=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1560,6 +1568,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
CONFIG_DRM_IMX_DCSS=m
# CONFIG_DRM_IMX_LCDC is not set
# CONFIG_DRM_IMX_LCDIF is not set
@@ -1589,38 +1598,92 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LG_LG4573 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
# CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1640,7 +1703,8 @@ CONFIG_DRM_RADEON_USERPTR=y
# CONFIG_DRM_SIMPLE_BRIDGE is not set
CONFIG_DRM_SIMPLEDRM=y
# CONFIG_DRM_SSD130X is not set
-# CONFIG_DRM_TEGRA is not set
+# CONFIG_DRM_TEGRA_DEBUG is not set
+CONFIG_DRM_TEGRA=m
# CONFIG_DRM_THINE_THC63LVD1024 is not set
# CONFIG_DRM_TI_DLPC3433 is not set
# CONFIG_DRM_TIDSS is not set
@@ -1808,7 +1872,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1842,7 +1905,12 @@ CONFIG_ENIC=m
# CONFIG_EPIC100 is not set
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1915,7 +1983,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2033,7 +2101,9 @@ CONFIG_FSL_PQ_MDIO=m
# CONFIG_FSL_RCPM is not set
CONFIG_FSL_XGMAC_MDIO=m
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -2041,6 +2111,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2193,6 +2264,7 @@ CONFIG_GPIO_XLP=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
CONFIG_GUP_TEST=y
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2376,6 +2448,7 @@ CONFIG_HNS_ENET=m
CONFIG_HNS=m
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2539,6 +2612,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2552,6 +2626,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2621,7 +2696,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2755,6 +2829,7 @@ CONFIG_INPUT_SPARSEKMAP=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2797,6 +2872,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2824,7 +2900,8 @@ CONFIG_IOMMU_DEBUGFS=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -3126,7 +3203,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3156,6 +3233,7 @@ CONFIG_KUNIT_TEST=m
CONFIG_KVM_AMD_SEV=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3323,6 +3401,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3337,6 +3416,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3345,6 +3425,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3411,6 +3492,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3448,6 +3530,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3514,7 +3597,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3655,6 +3738,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3771,6 +3855,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3843,6 +3930,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3920,7 +4009,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
# CONFIG_MWL8K is not set
-# CONFIG_MX3_IPU is not set
# CONFIG_MXC4005 is not set
# CONFIG_MXC6255 is not set
# CONFIG_MXS_DMA is not set
@@ -3965,9 +4053,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4089,6 +4174,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4101,15 +4187,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4138,6 +4221,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4242,7 +4326,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4440,9 +4524,11 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_BCM_OCOTP is not set
# CONFIG_NVMEM_IMX_IIM is not set
@@ -4469,7 +4555,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4589,6 +4677,7 @@ CONFIG_PCC=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -4654,6 +4743,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4733,6 +4823,7 @@ CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_AMD is not set
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4821,7 +4912,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -4978,6 +5068,7 @@ CONFIG_QCOM_L3_PMU=y
# CONFIG_QCOM_PDC is not set
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
# CONFIG_QCOM_QFPROM is not set
+# CONFIG_QCOM_QSEECOM is not set
# CONFIG_QCOM_RAMP_CTRL is not set
# CONFIG_QCOM_RMTFS_MEM is not set
# CONFIG_QCOM_RPMH is not set
@@ -5006,7 +5097,7 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
+CONFIG_QORIQ_THERMAL=m
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -5114,6 +5205,7 @@ CONFIG_REGULATOR_GPIO=y
# CONFIG_REGULATOR_MAX1586 is not set
# CONFIG_REGULATOR_MAX20086 is not set
# CONFIG_REGULATOR_MAX20411 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77686=m
# CONFIG_REGULATOR_MAX77826 is not set
@@ -5218,6 +5310,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -5258,7 +5351,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5548,6 +5640,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5703,6 +5796,7 @@ CONFIG_SENSORS_LTC2945=m
# CONFIG_SENSORS_LTC2978 is not set
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
# CONFIG_SENSORS_LTC4151 is not set
@@ -5761,6 +5855,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
# CONFIG_SENSORS_PMBUS is not set
+# CONFIG_SENSORS_POWERZ is not set
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -5937,7 +6032,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -6021,6 +6115,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6158,8 +6253,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -6250,6 +6347,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6364,12 +6462,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6392,6 +6484,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6588,7 +6681,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
CONFIG_SOC_IMX8M=y
CONFIG_SOC_IMX9=m
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
@@ -6819,6 +6911,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6885,6 +6978,7 @@ CONFIG_TEST_LIST_SORT=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7116,6 +7210,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7194,6 +7289,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
CONFIG_USB_CONN_GPIO=m
@@ -7297,6 +7393,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7340,6 +7437,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7490,7 +7588,10 @@ CONFIG_VEXPRESS_CONFIG=y
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
CONFIG_VFIO_FSL_MC=m
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
# CONFIG_VFIO_MDEV is not set
@@ -7605,11 +7706,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7944,19 +8047,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-aarch64-64k-rhel.config b/SOURCES/kernel-aarch64-64k-rhel.config
index bd46bde..77f32a4 100644
--- a/SOURCES/kernel-aarch64-64k-rhel.config
+++ b/SOURCES/kernel-aarch64-64k-rhel.config
@@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m
# CONFIG_AMD_XGBE_DCB is not set
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
# CONFIG_AMT is not set
# CONFIG_ANDROID_BINDER_IPC is not set
@@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
CONFIG_ARCH_MXC=y
# CONFIG_ARCH_NPCM is not set
CONFIG_ARCH_NXP=y
+CONFIG_ARCH_PENSANDO=y
CONFIG_ARCH_QCOM=y
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_REALTEK is not set
@@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -378,6 +381,7 @@ CONFIG_ARM_PMU=y
# CONFIG_ARM_QCOM_CPUFREQ_HW is not set
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
# CONFIG_ARM_SCMI_POWER_CONTROL is not set
CONFIG_ARM_SCMI_POWER_DOMAIN=m
CONFIG_ARM_SCMI_PROTOCOL=y
@@ -410,6 +414,7 @@ CONFIG_ARM_TI_CPUFREQ=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -523,6 +528,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -658,7 +664,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -751,7 +756,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -831,6 +835,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
# CONFIG_CFG80211_DEBUGFS is not set
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -912,6 +917,7 @@ CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM_NS2=y
CONFIG_CLK_BCM_SR=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -1004,7 +1010,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1088,7 +1093,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
CONFIG_CRYPTO_AES_ARM64=y
-CONFIG_CRYPTO_AES_GCM_P10=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_ANSI_CPRNG=m
@@ -1105,7 +1109,6 @@ CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
CONFIG_CRYPTO_CHACHA20_NEON=y
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CMAC=y
# CONFIG_CRYPTO_CRC32C_VPMSUM is not set
@@ -1192,6 +1195,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1216,7 +1224,6 @@ CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
CONFIG_CRYPTO_POLY1305_NEON=y
-# CONFIG_CRYPTO_POLY1305_P10 is not set
# CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1480,6 +1487,7 @@ CONFIG_DPAA2_CONSOLE=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1552,6 +1560,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
CONFIG_DRM_IMX_DCSS=m
# CONFIG_DRM_IMX_LCDC is not set
# CONFIG_DRM_IMX_LCDIF is not set
@@ -1581,38 +1590,92 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LG_LG4573 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
# CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1632,7 +1695,8 @@ CONFIG_DRM_RADEON_USERPTR=y
# CONFIG_DRM_SIMPLE_BRIDGE is not set
CONFIG_DRM_SIMPLEDRM=y
# CONFIG_DRM_SSD130X is not set
-# CONFIG_DRM_TEGRA is not set
+# CONFIG_DRM_TEGRA_DEBUG is not set
+CONFIG_DRM_TEGRA=m
# CONFIG_DRM_THINE_THC63LVD1024 is not set
# CONFIG_DRM_TI_DLPC3433 is not set
# CONFIG_DRM_TIDSS is not set
@@ -1800,7 +1864,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1834,7 +1897,12 @@ CONFIG_ENIC=m
# CONFIG_EPIC100 is not set
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1899,7 +1967,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2017,7 +2085,9 @@ CONFIG_FSL_PQ_MDIO=m
# CONFIG_FSL_RCPM is not set
CONFIG_FSL_XGMAC_MDIO=m
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -2025,6 +2095,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2177,6 +2248,7 @@ CONFIG_GPIO_XLP=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_GUP_TEST is not set
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2360,6 +2432,7 @@ CONFIG_HNS_ENET=m
CONFIG_HNS=m
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2523,6 +2596,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2536,6 +2610,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2605,7 +2680,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2739,6 +2813,7 @@ CONFIG_INPUT_SPARSEKMAP=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2781,6 +2856,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2808,7 +2884,8 @@ CONFIG_IO_DELAY_0X80=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -3104,7 +3181,7 @@ CONFIG_KEY_NOTIFICATIONS=y
# CONFIG_KEYS_REQUEST_CACHE is not set
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3135,6 +3212,7 @@ CONFIG_KUNIT_TEST=m
CONFIG_KVM_AMD_SEV=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3302,6 +3380,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3316,6 +3395,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3324,6 +3404,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3390,6 +3471,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3427,6 +3509,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3493,7 +3576,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3634,6 +3717,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3750,6 +3834,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3822,6 +3909,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3899,7 +3988,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
# CONFIG_MWL8K is not set
-# CONFIG_MX3_IPU is not set
# CONFIG_MXC4005 is not set
# CONFIG_MXC6255 is not set
# CONFIG_MXS_DMA is not set
@@ -3944,9 +4032,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4068,6 +4153,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4080,15 +4166,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4117,6 +4200,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4221,7 +4305,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4419,9 +4503,11 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_BCM_OCOTP is not set
# CONFIG_NVMEM_IMX_IIM is not set
@@ -4448,7 +4534,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4566,6 +4654,7 @@ CONFIG_PCC=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -4631,6 +4720,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4710,6 +4800,7 @@ CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_AMD is not set
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4798,7 +4889,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -4955,6 +5045,7 @@ CONFIG_QCOM_L3_PMU=y
# CONFIG_QCOM_PDC is not set
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
# CONFIG_QCOM_QFPROM is not set
+# CONFIG_QCOM_QSEECOM is not set
# CONFIG_QCOM_RAMP_CTRL is not set
# CONFIG_QCOM_RMTFS_MEM is not set
# CONFIG_QCOM_RPMH is not set
@@ -4983,7 +5074,7 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
+CONFIG_QORIQ_THERMAL=m
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -5091,6 +5182,7 @@ CONFIG_REGULATOR_GPIO=y
# CONFIG_REGULATOR_MAX1586 is not set
# CONFIG_REGULATOR_MAX20086 is not set
# CONFIG_REGULATOR_MAX20411 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77686=m
# CONFIG_REGULATOR_MAX77826 is not set
@@ -5195,6 +5287,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -5235,7 +5328,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5525,6 +5617,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5680,6 +5773,7 @@ CONFIG_SENSORS_LTC2945=m
# CONFIG_SENSORS_LTC2978 is not set
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
# CONFIG_SENSORS_LTC4151 is not set
@@ -5738,6 +5832,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
# CONFIG_SENSORS_PMBUS is not set
+# CONFIG_SENSORS_POWERZ is not set
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -5914,7 +6009,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -5998,6 +6092,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6134,8 +6229,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -6226,6 +6323,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6340,12 +6438,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6368,6 +6460,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6563,7 +6656,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
CONFIG_SOC_IMX8M=y
CONFIG_SOC_IMX9=m
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
@@ -6794,6 +6886,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6860,6 +6953,7 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7091,6 +7185,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7169,6 +7264,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
CONFIG_USB_CONN_GPIO=m
@@ -7272,6 +7368,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7315,6 +7412,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7465,7 +7563,10 @@ CONFIG_VEXPRESS_CONFIG=y
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
CONFIG_VFIO_FSL_MC=m
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
# CONFIG_VFIO_MDEV is not set
@@ -7580,11 +7681,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7919,19 +8022,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-aarch64-debug-fedora.config b/SOURCES/kernel-aarch64-debug-fedora.config
index 3a39af6..4d85066 100644
--- a/SOURCES/kernel-aarch64-debug-fedora.config
+++ b/SOURCES/kernel-aarch64-debug-fedora.config
@@ -254,6 +254,7 @@ CONFIG_AMD_XGBE_DCB=y
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
CONFIG_AMLOGIC_THERMAL=m
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
CONFIG_AMT=m
CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder"
@@ -329,12 +330,33 @@ CONFIG_ARCH_MXC=y
CONFIG_ARCH_NR_GPIO=2048
CONFIG_ARCH_NXP=y
# CONFIG_ARCH_OMAP1 is not set
+# CONFIG_ARCH_PENSANDO is not set
# CONFIG_ARCH_PXA is not set
CONFIG_ARCH_QCOM=y
+CONFIG_ARCH_R8A774A1=y
+# CONFIG_ARCH_R8A774B1 is not set
+# CONFIG_ARCH_R8A774C0 is not set
+# CONFIG_ARCH_R8A774E1 is not set
+# CONFIG_ARCH_R8A77951 is not set
+# CONFIG_ARCH_R8A77960 is not set
+# CONFIG_ARCH_R8A77961 is not set
+# CONFIG_ARCH_R8A77965 is not set
+# CONFIG_ARCH_R8A77970 is not set
+# CONFIG_ARCH_R8A77980 is not set
+# CONFIG_ARCH_R8A77990 is not set
+# CONFIG_ARCH_R8A77995 is not set
+# CONFIG_ARCH_R8A779A0 is not set
+# CONFIG_ARCH_R8A779F0 is not set
+# CONFIG_ARCH_R8A779G0 is not set
+CONFIG_ARCH_R9A07G043=y
+CONFIG_ARCH_R9A07G044=y
+CONFIG_ARCH_R9A07G054=y
+# CONFIG_ARCH_R9A08G045 is not set
+# CONFIG_ARCH_R9A09G011 is not set
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_RDA is not set
# CONFIG_ARCH_REALTEK is not set
-# CONFIG_ARCH_RENESAS is not set
+CONFIG_ARCH_RENESAS=y
CONFIG_ARCH_ROCKCHIP=y
# CONFIG_ARCH_S32 is not set
# CONFIG_ARCH_SA1100 is not set
@@ -396,6 +418,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -484,6 +507,7 @@ CONFIG_ARM_RASPBERRYPI_CPUFREQ=m
CONFIG_ARM_RK3399_DMC_DEVFREQ=m
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
CONFIG_ARM_SCMI_POWERCAP=m
CONFIG_ARM_SCMI_POWER_CONTROL=m
CONFIG_ARM_SCMI_POWER_DOMAIN=m
@@ -531,7 +555,7 @@ CONFIG_ATA_ACPI=y
CONFIG_ATA_BMDMA=y
CONFIG_ATA_FORCE=y
CONFIG_ATA_GENERIC=m
-# CONFIG_ATALK is not set
+CONFIG_ATALK=m
CONFIG_ATA_OVER_ETH=m
CONFIG_ATA_PIIX=y
# CONFIG_ATARI_PARTITION is not set
@@ -702,6 +726,7 @@ CONFIG_BATTERY_GAUGE_LTC2941=m
CONFIG_BATTERY_MAX17040=m
CONFIG_BATTERY_MAX17042=m
# CONFIG_BATTERY_MAX1721X is not set
+# CONFIG_BATTERY_PM8916_BMS_VM is not set
CONFIG_BATTERY_QCOM_BATTMGR=m
CONFIG_BATTERY_RT5033=m
CONFIG_BATTERY_SAMSUNG_SDI=y
@@ -715,6 +740,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y
+CONFIG_BCACHEFS_DEBUG=y
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_LOCK_TIME_STATS=y
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM2711_THERMAL=m
CONFIG_BCM2835_MBOX=y
@@ -867,7 +901,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCMSTB_L2_IRQ=y
CONFIG_BRCM_TRACING=y
CONFIG_BRCMUTIL=m
@@ -980,7 +1013,6 @@ CONFIG_CADENCE_WATCHDOG=m
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -1017,6 +1049,8 @@ CONFIG_CAN_NETLINK=y
CONFIG_CAN_PEAK_PCIEFD=m
CONFIG_CAN_PEAK_USB=m
CONFIG_CAN_RAW=m
+# CONFIG_CAN_RCAR_CANFD is not set
+# CONFIG_CAN_RCAR is not set
# CONFIG_CAN_SJA1000 is not set
CONFIG_CAN_SLCAN=m
# CONFIG_CAN_SOFTING is not set
@@ -1075,6 +1109,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -1117,6 +1152,7 @@ CONFIG_CHARGER_MAX77650=m
CONFIG_CHARGER_MAX77976=m
# CONFIG_CHARGER_MAX8903 is not set
CONFIG_CHARGER_MT6370=m
+# CONFIG_CHARGER_PM8916_LBC is not set
# CONFIG_CHARGER_QCOM_SMB2 is not set
CONFIG_CHARGER_QCOM_SMBB=m
CONFIG_CHARGER_RK817=m
@@ -1168,6 +1204,7 @@ CONFIG_CIO2_BRIDGE=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM2711_DVP=m
CONFIG_CLK_BCM2835=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
CONFIG_CLK_ICST=y
@@ -1183,6 +1220,7 @@ CONFIG_CLK_LS1028A_PLLDIG=y
CONFIG_CLK_PX30=y
CONFIG_CLK_QORIQ=y
CONFIG_CLK_RASPBERRYPI=y
+# CONFIG_CLK_RCAR_USB2_CLOCK_SEL is not set
CONFIG_CLK_RK3036=y
CONFIG_CLK_RK312X=y
CONFIG_CLK_RK3188=y
@@ -1268,6 +1306,8 @@ CONFIG_COMMON_CLK_QCOM=y
CONFIG_COMMON_CLK_RK808=m
CONFIG_COMMON_CLK_ROCKCHIP=y
CONFIG_COMMON_CLK_RS9_PCIE=m
+CONFIG_COMMON_CLK_S4_PERIPHERALS=y
+CONFIG_COMMON_CLK_S4_PLL=y
CONFIG_COMMON_CLK_SCMI=y
CONFIG_COMMON_CLK_SCPI=m
# CONFIG_COMMON_CLK_SI514 is not set
@@ -1299,7 +1339,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1387,6 +1426,7 @@ CONFIG_CROS_EC_UART=m
CONFIG_CROS_EC_VBC=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1533,6 +1573,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1644,6 +1689,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
CONFIG_DEBUG_CREDENTIALS=y
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1740,7 +1786,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
CONFIG_DEFAULT_SECURITY_SELINUX=y
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1853,6 +1898,7 @@ CONFIG_DPOT_DAC=m
# CONFIG_DPS310 is not set
CONFIG_DRAGONRISE_FF=y
CONFIG_DRBD_FAULT_INJECTION=y
+CONFIG_DRIVER_PE_KUNIT_TEST=m
CONFIG_DRM_ACCEL_QAIC=m
CONFIG_DRM_ACCEL=y
CONFIG_DRM_AMD_ACP=y
@@ -1936,6 +1982,7 @@ CONFIG_DRM_IMX8QXP_LDB=m
CONFIG_DRM_IMX8QXP_PIXEL_COMBINER=m
CONFIG_DRM_IMX8QXP_PIXEL_LINK=m
CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI=m
+CONFIG_DRM_IMX93_MIPI_DSI=m
CONFIG_DRM_IMX_DCSS=m
CONFIG_DRM_IMX_LCDC=m
CONFIG_DRM_IMX_LCDIF=m
@@ -1999,9 +2046,11 @@ CONFIG_DRM_PANEL_HIMAX_HX8394=m
CONFIG_DRM_PANEL_ILITEK_IL9322=m
CONFIG_DRM_PANEL_ILITEK_ILI9341=m
CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
CONFIG_DRM_PANEL_KHADAS_TS050=m
@@ -2031,6 +2080,7 @@ CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -2075,13 +2125,16 @@ CONFIG_DRM_PL111=m
CONFIG_DRM_QXL=m
CONFIG_DRM_RADEON=m
CONFIG_DRM_RADEON_USERPTR=y
+# CONFIG_DRM_RCAR_DU is not set
# CONFIG_DRM_RCAR_DW_HDMI is not set
# CONFIG_DRM_RCAR_LVDS is not set
# CONFIG_DRM_RCAR_MIPI_DSI is not set
# CONFIG_DRM_RCAR_USE_LVDS is not set
# CONFIG_DRM_RCAR_USE_MIPI_DSI is not set
CONFIG_DRM_ROCKCHIP=m
+# CONFIG_DRM_RZG2L_MIPI_DSI is not set
CONFIG_DRM_SAMSUNG_DSIM=m
+# CONFIG_DRM_SHMOBILE is not set
# CONFIG_DRM_SII902X is not set
CONFIG_DRM_SII9234=m
# CONFIG_DRM_SIL_SII8620 is not set
@@ -2101,7 +2154,7 @@ CONFIG_DRM_TEGRA_STAGING=y
# CONFIG_DRM_THINE_THC63LVD1024 is not set
CONFIG_DRM_TI_DLPC3433=m
CONFIG_DRM_TIDSS=m
-# CONFIG_DRM_TI_SN65DSI83 is not set
+CONFIG_DRM_TI_SN65DSI83=m
CONFIG_DRM_TI_SN65DSI86=m
CONFIG_DRM_TI_TFP410=m
CONFIG_DRM_TI_TPD12S015=m
@@ -2269,6 +2322,7 @@ CONFIG_EDAC_LEGACY_SYSFS=y
CONFIG_EDAC_QCOM=m
CONFIG_EDAC_SYNOPSYS=m
CONFIG_EDAC_THUNDERX=m
+CONFIG_EDAC_VERSAL=m
CONFIG_EDAC_XGENE=m
CONFIG_EDAC=y
CONFIG_EDAC_ZYNQMP=m
@@ -2278,7 +2332,6 @@ CONFIG_EEPROM_AT24=m
CONFIG_EEPROM_AT25=m
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
CONFIG_EFI_ARMSTUB_DTB_LOADER=y
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -2409,7 +2462,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2563,6 +2616,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
# CONFIG_FTWDT010_WATCHDOG is not set
+CONFIG_FUEL_GAUGE_MM8013=m
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2693,6 +2747,7 @@ CONFIG_GPIO_PCI_IDIO_16=m
# CONFIG_GPIO_PISOSR is not set
CONFIG_GPIO_PL061=y
CONFIG_GPIO_RASPBERRYPI_EXP=m
+CONFIG_GPIO_RCAR=m
# CONFIG_GPIO_RDC321X is not set
CONFIG_GPIO_ROCKCHIP=y
# CONFIG_GPIO_SAMA5D2_PIOBU is not set
@@ -2922,6 +2977,7 @@ CONFIG_HNS_ENET=m
CONFIG_HOLTEK_FF=y
# CONFIG_HOSTAP is not set
CONFIG_HOTPLUG_CPU=y
+CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA=m
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -3075,9 +3131,13 @@ CONFIG_I2C_PXA=m
CONFIG_I2C_QCOM_CCI=m
CONFIG_I2C_QCOM_GENI=m
CONFIG_I2C_QUP=m
+# CONFIG_I2C_RCAR is not set
+# CONFIG_I2C_RIIC is not set
CONFIG_I2C_RK3X=y
# CONFIG_I2C_ROBOTFUZZ_OSIF is not set
+# CONFIG_I2C_RZV2M is not set
CONFIG_I2C_SCMI=m
+# CONFIG_I2C_SH_MOBILE is not set
CONFIG_I2C_SI470X=m
# CONFIG_I2C_SI4713 is not set
CONFIG_I2C_SIMTEC=m
@@ -3119,6 +3179,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
@@ -3211,7 +3272,6 @@ CONFIG_IMA_NG_TEMPLATE=y
CONFIG_IMA_READ_POLICY=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -3420,6 +3480,7 @@ CONFIG_INTERCONNECT_QCOM_SC8280XP=m
CONFIG_INTERCONNECT_QCOM_SDM845=m
# CONFIG_INTERCONNECT_QCOM_SDX55 is not set
# CONFIG_INTERCONNECT_QCOM_SDX65 is not set
+CONFIG_INTERCONNECT_QCOM_SDX75=m
# CONFIG_INTERCONNECT_QCOM_SM6350 is not set
CONFIG_INTERCONNECT_QCOM_SM8150=m
CONFIG_INTERCONNECT_QCOM_SM8250=m
@@ -3482,8 +3543,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
CONFIG_IPMB_DEVICE_INTERFACE=m
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3494,6 +3553,7 @@ CONFIG_IPMI_POWEROFF=m
CONFIG_IPMI_SI=m
CONFIG_IPMI_SSIF=m
CONFIG_IPMI_WATCHDOG=m
+# CONFIG_IPMMU_VMSA is not set
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MULTICAST=y
@@ -3824,7 +3884,7 @@ CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3859,6 +3919,7 @@ CONFIG_KUNIT=m
CONFIG_KUNIT_TEST=m
# CONFIG_KUNPENG_HCCS is not set
CONFIG_KUSER_HELPERS=y
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3916,6 +3977,7 @@ CONFIG_LEDS_GPIO=m
CONFIG_LEDS_GROUP_MULTICOLOR=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -4043,6 +4105,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -4057,6 +4120,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -4065,6 +4129,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
@@ -4137,6 +4202,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
CONFIG_MCP320X=m
CONFIG_MCP3422=m
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -4147,6 +4213,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -4168,7 +4235,7 @@ CONFIG_MDIO_I2C=m
CONFIG_MDIO_IPQ8064=m
# CONFIG_MDIO_MSCC_MIIM is not set
CONFIG_MDIO_MVUSB=m
-# CONFIG_MDIO_OCTEON is not set
+CONFIG_MDIO_OCTEON=m
# CONFIG_MDIO_SUN4I is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MDIO_XGENE=m
@@ -4182,6 +4249,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -4423,18 +4491,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -4522,7 +4594,11 @@ CONFIG_MMC_SDHCI_PLTFM=m
CONFIG_MMC_SDHCI_PXAV3=m
CONFIG_MMC_SDHCI_TEGRA=m
CONFIG_MMC_SDHCI_XENON=m
+CONFIG_MMC_SDHI_INTERNAL_DMAC=m
+CONFIG_MMC_SDHI=m
+# CONFIG_MMC_SDHI_SYS_DMAC is not set
CONFIG_MMC_SDRICOH_CS=m
+# CONFIG_MMC_SH_MMCIF is not set
CONFIG_MMC_SPI=m
# CONFIG_MMC_STM32_SDMMC is not set
CONFIG_MMC_SUNXI=m
@@ -4556,6 +4632,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -4646,6 +4725,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -4704,6 +4785,7 @@ CONFIG_MTD_NAND_NANDSIM=m
# CONFIG_MTD_NAND_PL35X is not set
# CONFIG_MTD_NAND_PLATFORM is not set
# CONFIG_MTD_NAND_QCOM is not set
+# CONFIG_MTD_NAND_RENESAS is not set
# CONFIG_MTD_NAND_RICOH is not set
# CONFIG_MTD_NAND_ROCKCHIP is not set
# CONFIG_MTD_NAND_SUNXI is not set
@@ -4771,7 +4853,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
CONFIG_MWL8K=m
-# CONFIG_MX3_IPU is not set
CONFIG_MXC4005=m
CONFIG_MXC6255=m
# CONFIG_MXS_DMA is not set
@@ -4825,9 +4906,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4900,12 +4978,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4998,6 +5076,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -5010,15 +5089,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
CONFIG_NET_SB1000=y
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -5052,6 +5128,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -5189,7 +5266,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -5383,11 +5460,13 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_NVIDIA_SHIELD_FF=y
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
CONFIG_NVME_APPLE=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_APPLE_EFUSES=m
# CONFIG_NVMEM_IMX_IIM is not set
@@ -5423,7 +5502,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -5572,6 +5653,7 @@ CONFIG_PCI_AARDVARK=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -5610,10 +5692,13 @@ CONFIG_PCIE_MOBIVEIL=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCIE_PTM=y
CONFIG_PCIE_QCOM=y
+# CONFIG_PCIE_RCAR_GEN4_HOST is not set
+# CONFIG_PCIE_RCAR_HOST is not set
CONFIG_PCIE_ROCKCHIP_DW_HOST=y
CONFIG_PCIE_ROCKCHIP_HOST=y
CONFIG_PCIE_TEGRA194_HOST=y
CONFIG_PCIE_XILINX_CPM=y
+CONFIG_PCIE_XILINX_DMA_PL=y
CONFIG_PCIE_XILINX_NWL=y
CONFIG_PCIE_XILINX=y
# CONFIG_PCI_FTPCI100 is not set
@@ -5647,6 +5732,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -5696,7 +5782,7 @@ CONFIG_PHY_MESON_G12A_MIPI_DPHY_ANALOG=y
CONFIG_PHY_MESON_G12A_USB2=y
CONFIG_PHY_MESON_G12A_USB3_PCIE=m
CONFIG_PHY_MESON_GXL_USB2=m
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
+CONFIG_PHY_MIXEL_LVDS_PHY=m
CONFIG_PHY_MIXEL_MIPI_DPHY=m
CONFIG_PHY_MVEBU_A3700_COMPHY=m
CONFIG_PHY_MVEBU_A3700_UTMI=m
@@ -5730,6 +5816,11 @@ CONFIG_PHY_QCOM_USB_HSIC=m
CONFIG_PHY_QCOM_USB_HS=m
CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2=m
CONFIG_PHY_QCOM_USB_SS=m
+# CONFIG_PHY_R8A779F0_ETHERNET_SERDES is not set
+# CONFIG_PHY_RCAR_GEN2 is not set
+# CONFIG_PHY_RCAR_GEN3_PCIE is not set
+CONFIG_PHY_RCAR_GEN3_USB2=m
+# CONFIG_PHY_RCAR_GEN3_USB3 is not set
CONFIG_PHY_ROCKCHIP_DPHY_RX0=m
CONFIG_PHY_ROCKCHIP_DP=m
CONFIG_PHY_ROCKCHIP_EMMC=m
@@ -5762,6 +5853,7 @@ CONFIG_PINCONF=y
CONFIG_PINCTRL_ALDERLAKE=m
CONFIG_PINCTRL_AMD=y
CONFIG_PINCTRL_AMLOGIC_C3=y
+CONFIG_PINCTRL_AMLOGIC_T7=y
CONFIG_PINCTRL_APPLE_GPIO=m
CONFIG_PINCTRL_AS3722=y
CONFIG_PINCTRL_AXP209=m
@@ -5882,12 +5974,13 @@ CONFIG_PINCTRL_SUN50I_H6=y
# CONFIG_PINCTRL_SUN8I_A33 is not set
# CONFIG_PINCTRL_SUN8I_A83T is not set
# CONFIG_PINCTRL_SUN8I_A83T_R is not set
-# CONFIG_PINCTRL_SUN8I_H3 is not set
CONFIG_PINCTRL_SUN8I_H3_R=y
+CONFIG_PINCTRL_SUN8I_H3=y
# CONFIG_PINCTRL_SUN8I_V3S is not set
# CONFIG_PINCTRL_SUN9I_A80 is not set
# CONFIG_PINCTRL_SUN9I_A80_R is not set
# CONFIG_PINCTRL_SX150X is not set
+CONFIG_PINCTRL_TEGRA234=y
CONFIG_PINCTRL=y
CONFIG_PINCTRL_ZYNQMP=y
# CONFIG_PING is not set
@@ -5933,7 +6026,6 @@ CONFIG_POWERCAP=y
CONFIG_POWER_MLXBF=m
CONFIG_POWER_RESET_AS3722=y
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -6068,6 +6160,8 @@ CONFIG_PWM_MESON=m
CONFIG_PWM_OMAP_DMTIMER=m
CONFIG_PWM_PCA9685=m
CONFIG_PWM_RASPBERRYPI_POE=m
+# CONFIG_PWM_RCAR is not set
+# CONFIG_PWM_RENESAS_TPU is not set
CONFIG_PWM_ROCKCHIP=m
CONFIG_PWM_STMPE=y
CONFIG_PWM_SUN4I=m
@@ -6134,6 +6228,8 @@ CONFIG_QCOM_Q6V5_WCSS=m
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
CONFIG_QCOM_QFPROM=m
CONFIG_QCOM_QMI_HELPERS=m
+CONFIG_QCOM_QSEECOM_UEFISECAPP=y
+CONFIG_QCOM_QSEECOM=y
CONFIG_QCOM_RAMP_CTRL=m
CONFIG_QCOM_RMTFS_MEM=m
CONFIG_QCOM_RPMHPD=y
@@ -6240,6 +6336,10 @@ CONFIG_RASPBERRYPI_POWER=y
CONFIG_RATIONAL_KUNIT_TEST=m
# CONFIG_RAVE_SP_CORE is not set
# CONFIG_RBTREE_TEST is not set
+# CONFIG_RCAR_DMAC is not set
+# CONFIG_RCAR_GEN3_THERMAL is not set
+# CONFIG_RCAR_REMOTEPROC is not set
+# CONFIG_RCAR_THERMAL is not set
CONFIG_RC_ATI_REMOTE=m
CONFIG_RC_CORE=y
CONFIG_RC_DECODERS=y
@@ -6275,7 +6375,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=y
@@ -6319,6 +6419,7 @@ CONFIG_REGULATOR_HI655X=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77650=m
CONFIG_REGULATOR_MAX77686=m
@@ -6405,7 +6506,14 @@ CONFIG_RELOCATABLE=y
# CONFIG_REMOTEPROC_CDEV is not set
CONFIG_REMOTEPROC=y
CONFIG_REMOTE_TARGET=m
+# CONFIG_RENESAS_OSTM is not set
# CONFIG_RENESAS_PHY is not set
+# CONFIG_RENESAS_RPCIF is not set
+# CONFIG_RENESAS_RZAWDT is not set
+# CONFIG_RENESAS_RZG2LWDT is not set
+# CONFIG_RENESAS_RZN1WDT is not set
+# CONFIG_RENESAS_USB_DMAC is not set
+# CONFIG_RENESAS_WDT is not set
# CONFIG_RESET_ATTACK_MITIGATION is not set
CONFIG_RESET_CONTROLLER=y
CONFIG_RESET_HISI=y
@@ -6416,6 +6524,7 @@ CONFIG_RESET_MESON=m
CONFIG_RESET_QCOM_AOSS=y
CONFIG_RESET_QCOM_PDC=m
CONFIG_RESET_RASPBERRYPI=y
+CONFIG_RESET_RZG2L_USBPHY_CTRL=m
CONFIG_RESET_SCMI=y
CONFIG_RESET_SIMPLE=y
CONFIG_RESET_TI_SCI=m
@@ -6472,6 +6581,7 @@ CONFIG_ROCKCHIP_VOP2=y
CONFIG_ROCKCHIP_VOP=y
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -6531,7 +6641,6 @@ CONFIG_RTC_DRV_ARMADA38X=m
CONFIG_RTC_DRV_AS3722=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
CONFIG_RTC_DRV_CADENCE=m
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_CROS_EC=m
@@ -6619,6 +6728,7 @@ CONFIG_RTC_DRV_RX8581=m
CONFIG_RTC_DRV_S35390A=m
# CONFIG_RTC_DRV_SA1100 is not set
CONFIG_RTC_DRV_SD3078=m
+# CONFIG_RTC_DRV_SH is not set
CONFIG_RTC_DRV_SNVS=m
CONFIG_RTC_DRV_STK17TA8=m
CONFIG_RTC_DRV_SUN6I=y
@@ -6694,6 +6804,10 @@ CONFIG_RV_REACT_PRINTK=y
CONFIG_RV=y
CONFIG_RXKAD=y
# CONFIG_RXPERF is not set
+# CONFIG_RZ_DMAC is not set
+# CONFIG_RZG2L_ADC is not set
+# CONFIG_RZG2L_THERMAL is not set
+# CONFIG_RZ_MTU3 is not set
CONFIG_S2IO=m
# CONFIG_S390_KPROBES_SANITY_TEST is not set
# CONFIG_S390_MODULES_SANITY_TEST is not set
@@ -6714,6 +6828,7 @@ CONFIG_SATA_MV=m
CONFIG_SATA_PMP=y
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_RCAR is not set
CONFIG_SATA_SIL24=m
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIS is not set
@@ -6863,6 +6978,7 @@ CONFIG_SCSI_UFS_HISI=m
CONFIG_SCSI_UFS_HPB=y
CONFIG_SCSI_UFS_HWMON=y
CONFIG_SCSI_UFS_QCOM=m
+# CONFIG_SCSI_UFS_RENESAS is not set
CONFIG_SCSI_UFS_TI_J721E=m
CONFIG_SCSI_VIRTIO=m
CONFIG_SCSI_WD719X=m
@@ -6892,11 +7008,12 @@ CONFIG_SDM_VIDEOCC_845=m
# CONFIG_SDX_GCC_75 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -7038,6 +7155,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -7098,6 +7216,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -7171,6 +7290,7 @@ CONFIG_SERIAL_8250_CS=m
CONFIG_SERIAL_8250_DFL=m
CONFIG_SERIAL_8250_DMA=y
CONFIG_SERIAL_8250_DW=y
+# CONFIG_SERIAL_8250_EM is not set
CONFIG_SERIAL_8250_EXAR=m
CONFIG_SERIAL_8250_EXTENDED=y
# CONFIG_SERIAL_8250_FINTEK is not set
@@ -7233,6 +7353,11 @@ CONFIG_SERIAL_SC16IS7XX_I2C=y
CONFIG_SERIAL_SC16IS7XX=m
CONFIG_SERIAL_SC16IS7XX_SPI=y
# CONFIG_SERIAL_SCCNXP is not set
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_SH_SCI_DMA=y
+CONFIG_SERIAL_SH_SCI_EARLYCON=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=18
+CONFIG_SERIAL_SH_SCI=y
# CONFIG_SERIAL_SIFIVE is not set
# CONFIG_SERIAL_SPRD is not set
# CONFIG_SERIAL_ST_ASC is not set
@@ -7299,7 +7424,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -7313,12 +7437,14 @@ CONFIG_SMC91X=m
# CONFIG_SM_CAMCC_6350 is not set
# CONFIG_SM_CAMCC_8250 is not set
# CONFIG_SM_CAMCC_8450 is not set
+# CONFIG_SM_CAMCC_8550 is not set
CONFIG_SMC_DIAG=m
CONFIG_SMC=m
# CONFIG_SM_DISPCC_8250 is not set
CONFIG_SM_DISPCC_8450=m
# CONFIG_SM_DISPCC_8550 is not set
# CONFIG_SM_FTL is not set
+# CONFIG_SM_GCC_4450 is not set
# CONFIG_SM_GCC_6115 is not set
# CONFIG_SM_GCC_6125 is not set
# CONFIG_SM_GCC_6350 is not set
@@ -7353,7 +7479,7 @@ CONFIG_SMS_USB_DRV=m
# CONFIG_SM_TCSRCC_8550 is not set
# CONFIG_SM_VIDEOCC_8150 is not set
# CONFIG_SM_VIDEOCC_8250 is not set
-# CONFIG_SM_VIDEOCC_8350 is not set
+CONFIG_SM_VIDEOCC_8350=m
# CONFIG_SM_VIDEOCC_8450 is not set
# CONFIG_SM_VIDEOCC_8550 is not set
CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
@@ -7422,6 +7548,7 @@ CONFIG_SND_FM801=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -7575,8 +7702,10 @@ CONFIG_SND_SOC_APQ8016_SBC=m
CONFIG_SND_SOC_ARNDALE=m
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -7782,6 +7911,7 @@ CONFIG_SND_SOC_PCM512x_SPI=m
# CONFIG_SND_SOC_PEB2466 is not set
CONFIG_SND_SOC_QCOM=m
CONFIG_SND_SOC_QDSP6=m
+# CONFIG_SND_SOC_RCAR is not set
CONFIG_SND_SOC_RK3288_HDMI_ANALOG=m
CONFIG_SND_SOC_RK3328=m
CONFIG_SND_SOC_RK3399_GRU_SOUND=m
@@ -7816,6 +7946,8 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
+# CONFIG_SND_SOC_RZ is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811=m
@@ -7827,6 +7959,7 @@ CONFIG_SND_SOC_SC8280XP=m
CONFIG_SND_SOC_SDM845=m
# CONFIG_SND_SOC_SDW_MOCKUP is not set
CONFIG_SND_SOC_SGTL5000=m
+# CONFIG_SND_SOC_SH4_FSI is not set
CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
CONFIG_SND_SOC_SIMPLE_MUX=m
# CONFIG_SND_SOC_SM8250 is not set
@@ -8120,8 +8253,12 @@ CONFIG_SPI_QCOM_GENI=m
CONFIG_SPI_QCOM_QSPI=m
CONFIG_SPI_QUP=m
CONFIG_SPI_ROCKCHIP=m
-# CONFIG_SPI_ROCKCHIP_SFC is not set
+CONFIG_SPI_ROCKCHIP_SFC=m
+# CONFIG_SPI_RSPI is not set
+# CONFIG_SPI_RZV2M_CSI is not set
# CONFIG_SPI_SC18IS602 is not set
+# CONFIG_SPI_SH_HSPI is not set
+# CONFIG_SPI_SH_MSIOF is not set
# CONFIG_SPI_SIFIVE is not set
# CONFIG_SPI_SLAVE is not set
CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
@@ -8354,6 +8491,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -8430,6 +8568,7 @@ CONFIG_TEST_LOCKUP=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -8502,7 +8641,7 @@ CONFIG_TIFM_7XX1=m
CONFIG_TIFM_CORE=m
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
+CONFIG_TI_ICSSG_PRUETH=m
CONFIG_TI_ICSS_IEP=m
CONFIG_TI_K3_AM65_CPSW_NUSS=m
CONFIG_TI_K3_AM65_CPSW_SWITCHDEV=y
@@ -8717,6 +8856,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
CONFIG_TYPEC_QCOM_PMIC=m
# CONFIG_TYPEC_RT1711H is not set
@@ -8728,7 +8868,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m
CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TPS6598X=m
CONFIG_TYPEC_UCSI=m
-CONFIG_TYPEC_WCOVE=m
CONFIG_TYPEC_WUSB3801=m
CONFIG_TYPHOON=m
CONFIG_UACCE=m
@@ -8821,6 +8960,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
@@ -8885,6 +9025,7 @@ CONFIG_USB_EHCI_TT_NEWSCHED=y
# CONFIG_USB_EHSET_TEST_FIXTURE is not set
CONFIG_USB_EMI26=m
CONFIG_USB_EMI62=m
+# CONFIG_USB_EMXX is not set
CONFIG_USB_EPSON2888=y
# CONFIG_USB_ETH is not set
CONFIG_USB_EZUSB_FX2=m
@@ -8997,6 +9138,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_M66592 is not set
CONFIG_USB_MA901=m
@@ -9057,6 +9199,7 @@ CONFIG_USB_OTG_FSM=m
# CONFIG_USB_OTG_PRODUCTLIST is not set
CONFIG_USB_OTG=y
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -9073,6 +9216,9 @@ CONFIG_USB_QCOM_EUD=m
CONFIG_USB_RAINSHADOW_CEC=m
# CONFIG_USB_RAREMONO is not set
CONFIG_USB_RAW_GADGET=m
+# CONFIG_USB_RENESAS_USB3 is not set
+# CONFIG_USB_RENESAS_USBF is not set
+# CONFIG_USB_RENESAS_USBHS is not set
CONFIG_USB_ROLE_SWITCH=y
CONFIG_USB_RTL8150=m
CONFIG_USB_RTL8152=m
@@ -9184,6 +9330,7 @@ CONFIG_USB_XHCI_MVEBU=m
CONFIG_USB_XHCI_PCI_RENESAS=y
CONFIG_USB_XHCI_PCI=y
CONFIG_USB_XHCI_PLATFORM=m
+CONFIG_USB_XHCI_RCAR=m
CONFIG_USB_XHCI_TEGRA=m
CONFIG_USB_XUSBATM=m
CONFIG_USB=y
@@ -9354,7 +9501,7 @@ CONFIG_VIDEO_IMX8_ISI=m
CONFIG_VIDEO_IMX8_ISI_M2M=y
CONFIG_VIDEO_IMX8_JPEG=m
CONFIG_VIDEO_IMX8MQ_MIPI_CSI2=m
-CONFIG_VIDEO_IMX_MEDIA=m
+# CONFIG_VIDEO_IMX_MEDIA is not set
CONFIG_VIDEO_IMX_MIPI_CSIS=m
CONFIG_VIDEO_IMX_PXP=m
# CONFIG_VIDEO_IPU3_CIO2 is not set
@@ -9371,10 +9518,12 @@ CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
CONFIG_VIDEO_MESON_GE2D=m
CONFIG_VIDEO_MESON_VDEC=m
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -9422,12 +9571,19 @@ CONFIG_VIDEO_PVRUSB2=m
CONFIG_VIDEO_PVRUSB2_SYSFS=y
CONFIG_VIDEO_QCOM_CAMSS=m
CONFIG_VIDEO_QCOM_VENUS=m
+# CONFIG_VIDEO_RCAR_CSI2 is not set
+# CONFIG_VIDEO_RCAR_ISP is not set
+# CONFIG_VIDEO_RCAR_VIN is not set
CONFIG_VIDEO_RDACM20=m
# CONFIG_VIDEO_RDACM21 is not set
+# CONFIG_VIDEO_RENESAS_FCP is not set
+# CONFIG_VIDEO_RENESAS_JPU is not set
CONFIG_VIDEO_RJ54N1=m
CONFIG_VIDEO_ROCKCHIP_ISP1=m
CONFIG_VIDEO_ROCKCHIP_RGA=m
CONFIG_VIDEO_ROCKCHIP_VDEC=m
+# CONFIG_VIDEO_RZG2L_CRU is not set
+# CONFIG_VIDEO_RZG2L_CSI2 is not set
CONFIG_VIDEO_S5C73M3=m
CONFIG_VIDEO_S5K4ECGX=m
CONFIG_VIDEO_S5K5BAF=m
@@ -9478,6 +9634,7 @@ CONFIG_VIDEO_THS7303=m
CONFIG_VIDEO_THS8200=m
CONFIG_VIDEO_TI_CAL=m
CONFIG_VIDEO_TI_CAL_MC=y
+CONFIG_VIDEO_TI_J721E_CSI2RX=m
CONFIG_VIDEO_TLV320AIC23B=m
CONFIG_VIDEO_TM6000_ALSA=m
CONFIG_VIDEO_TM6000_DVB=m
@@ -9683,6 +9840,7 @@ CONFIG_XDP_SOCKETS=y
# CONFIG_XEN_GRANT_DMA_ALLOC is not set
# CONFIG_XEN is not set
CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_FRONTEND is not set
@@ -9801,19 +9959,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-aarch64-debug-rhel.config b/SOURCES/kernel-aarch64-debug-rhel.config
index edb7db5..ebbe575 100644
--- a/SOURCES/kernel-aarch64-debug-rhel.config
+++ b/SOURCES/kernel-aarch64-debug-rhel.config
@@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m
# CONFIG_AMD_XGBE_DCB is not set
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
# CONFIG_AMT is not set
# CONFIG_ANDROID_BINDER_IPC is not set
@@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
CONFIG_ARCH_MXC=y
# CONFIG_ARCH_NPCM is not set
CONFIG_ARCH_NXP=y
+CONFIG_ARCH_PENSANDO=y
CONFIG_ARCH_QCOM=y
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_REALTEK is not set
@@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -375,6 +378,7 @@ CONFIG_ARM_PMU=y
# CONFIG_ARM_QCOM_CPUFREQ_HW is not set
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
# CONFIG_ARM_SCMI_POWER_CONTROL is not set
CONFIG_ARM_SCMI_POWER_DOMAIN=m
CONFIG_ARM_SCMI_PROTOCOL=y
@@ -407,6 +411,7 @@ CONFIG_ARM_TI_CPUFREQ=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -520,6 +525,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -655,7 +661,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -748,7 +753,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -828,6 +832,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -909,6 +914,7 @@ CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM_NS2=y
CONFIG_CLK_BCM_SR=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -1001,7 +1007,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1085,7 +1090,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
CONFIG_CRYPTO_AES_ARM64=y
-CONFIG_CRYPTO_AES_GCM_P10=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_ANSI_CPRNG=m
@@ -1102,7 +1106,6 @@ CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
CONFIG_CRYPTO_CHACHA20_NEON=y
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CMAC=y
# CONFIG_CRYPTO_CRC32C_VPMSUM is not set
@@ -1189,6 +1192,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1213,7 +1221,6 @@ CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
CONFIG_CRYPTO_POLY1305_NEON=y
-# CONFIG_CRYPTO_POLY1305_P10 is not set
# CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1485,6 +1492,7 @@ CONFIG_DPAA2_CONSOLE=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1557,6 +1565,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
CONFIG_DRM_IMX_DCSS=m
# CONFIG_DRM_IMX_LCDC is not set
# CONFIG_DRM_IMX_LCDIF is not set
@@ -1586,38 +1595,92 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LG_LG4573 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
# CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1637,7 +1700,8 @@ CONFIG_DRM_RADEON_USERPTR=y
# CONFIG_DRM_SIMPLE_BRIDGE is not set
CONFIG_DRM_SIMPLEDRM=y
# CONFIG_DRM_SSD130X is not set
-# CONFIG_DRM_TEGRA is not set
+# CONFIG_DRM_TEGRA_DEBUG is not set
+CONFIG_DRM_TEGRA=m
# CONFIG_DRM_THINE_THC63LVD1024 is not set
# CONFIG_DRM_TI_DLPC3433 is not set
# CONFIG_DRM_TIDSS is not set
@@ -1805,7 +1869,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1839,7 +1902,12 @@ CONFIG_ENIC=m
# CONFIG_EPIC100 is not set
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1912,7 +1980,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2029,7 +2097,9 @@ CONFIG_FSL_PQ_MDIO=m
# CONFIG_FSL_RCPM is not set
CONFIG_FSL_XGMAC_MDIO=m
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -2037,6 +2107,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2189,6 +2260,7 @@ CONFIG_GPIO_XLP=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
CONFIG_GUP_TEST=y
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2372,6 +2444,7 @@ CONFIG_HNS_ENET=m
CONFIG_HNS=m
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2535,6 +2608,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2548,6 +2622,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2617,7 +2692,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2751,6 +2825,7 @@ CONFIG_INPUT_SPARSEKMAP=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2793,6 +2868,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2820,7 +2896,8 @@ CONFIG_IOMMU_DEBUGFS=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -3122,7 +3199,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3152,6 +3229,7 @@ CONFIG_KUNIT_TEST=m
CONFIG_KVM_AMD_SEV=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3319,6 +3397,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3333,6 +3412,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3341,6 +3421,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3407,6 +3488,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3444,6 +3526,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3510,7 +3593,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3651,6 +3734,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3767,6 +3851,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3839,6 +3926,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3916,7 +4005,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
# CONFIG_MWL8K is not set
-# CONFIG_MX3_IPU is not set
# CONFIG_MXC4005 is not set
# CONFIG_MXC6255 is not set
# CONFIG_MXS_DMA is not set
@@ -3961,9 +4049,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4085,6 +4170,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4097,15 +4183,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4134,6 +4217,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4238,7 +4322,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4436,9 +4520,11 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_BCM_OCOTP is not set
# CONFIG_NVMEM_IMX_IIM is not set
@@ -4465,7 +4551,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4585,6 +4673,7 @@ CONFIG_PCC=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -4650,6 +4739,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4729,6 +4819,7 @@ CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_AMD is not set
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4817,7 +4908,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -4974,6 +5064,7 @@ CONFIG_QCOM_L3_PMU=y
# CONFIG_QCOM_PDC is not set
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
# CONFIG_QCOM_QFPROM is not set
+# CONFIG_QCOM_QSEECOM is not set
# CONFIG_QCOM_RAMP_CTRL is not set
# CONFIG_QCOM_RMTFS_MEM is not set
# CONFIG_QCOM_RPMH is not set
@@ -5002,7 +5093,7 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
+CONFIG_QORIQ_THERMAL=m
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -5110,6 +5201,7 @@ CONFIG_REGULATOR_GPIO=y
# CONFIG_REGULATOR_MAX1586 is not set
# CONFIG_REGULATOR_MAX20086 is not set
# CONFIG_REGULATOR_MAX20411 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77686=m
# CONFIG_REGULATOR_MAX77826 is not set
@@ -5214,6 +5306,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -5254,7 +5347,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5544,6 +5636,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5699,6 +5792,7 @@ CONFIG_SENSORS_LTC2945=m
# CONFIG_SENSORS_LTC2978 is not set
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
# CONFIG_SENSORS_LTC4151 is not set
@@ -5757,6 +5851,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
# CONFIG_SENSORS_PMBUS is not set
+# CONFIG_SENSORS_POWERZ is not set
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -5933,7 +6028,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -6017,6 +6111,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6154,8 +6249,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -6246,6 +6343,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6360,12 +6458,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6388,6 +6480,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6584,7 +6677,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
CONFIG_SOC_IMX8M=y
CONFIG_SOC_IMX9=m
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
@@ -6815,6 +6907,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6881,6 +6974,7 @@ CONFIG_TEST_LIST_SORT=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7112,6 +7206,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7190,6 +7285,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
CONFIG_USB_CONN_GPIO=m
@@ -7293,6 +7389,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7336,6 +7433,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7486,7 +7584,10 @@ CONFIG_VEXPRESS_CONFIG=y
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
CONFIG_VFIO_FSL_MC=m
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
# CONFIG_VFIO_MDEV is not set
@@ -7601,11 +7702,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7940,19 +8043,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-aarch64-fedora.config b/SOURCES/kernel-aarch64-fedora.config
index 2ae7f79..1bb5dec 100644
--- a/SOURCES/kernel-aarch64-fedora.config
+++ b/SOURCES/kernel-aarch64-fedora.config
@@ -254,6 +254,7 @@ CONFIG_AMD_XGBE_DCB=y
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
CONFIG_AMLOGIC_THERMAL=m
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
CONFIG_AMT=m
CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder"
@@ -329,12 +330,33 @@ CONFIG_ARCH_MXC=y
CONFIG_ARCH_NR_GPIO=2048
CONFIG_ARCH_NXP=y
# CONFIG_ARCH_OMAP1 is not set
+# CONFIG_ARCH_PENSANDO is not set
# CONFIG_ARCH_PXA is not set
CONFIG_ARCH_QCOM=y
+CONFIG_ARCH_R8A774A1=y
+# CONFIG_ARCH_R8A774B1 is not set
+# CONFIG_ARCH_R8A774C0 is not set
+# CONFIG_ARCH_R8A774E1 is not set
+# CONFIG_ARCH_R8A77951 is not set
+# CONFIG_ARCH_R8A77960 is not set
+# CONFIG_ARCH_R8A77961 is not set
+# CONFIG_ARCH_R8A77965 is not set
+# CONFIG_ARCH_R8A77970 is not set
+# CONFIG_ARCH_R8A77980 is not set
+# CONFIG_ARCH_R8A77990 is not set
+# CONFIG_ARCH_R8A77995 is not set
+# CONFIG_ARCH_R8A779A0 is not set
+# CONFIG_ARCH_R8A779F0 is not set
+# CONFIG_ARCH_R8A779G0 is not set
+CONFIG_ARCH_R9A07G043=y
+CONFIG_ARCH_R9A07G044=y
+CONFIG_ARCH_R9A07G054=y
+# CONFIG_ARCH_R9A08G045 is not set
+# CONFIG_ARCH_R9A09G011 is not set
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_RDA is not set
# CONFIG_ARCH_REALTEK is not set
-# CONFIG_ARCH_RENESAS is not set
+CONFIG_ARCH_RENESAS=y
CONFIG_ARCH_ROCKCHIP=y
# CONFIG_ARCH_S32 is not set
# CONFIG_ARCH_SA1100 is not set
@@ -396,6 +418,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -484,6 +507,7 @@ CONFIG_ARM_RASPBERRYPI_CPUFREQ=m
CONFIG_ARM_RK3399_DMC_DEVFREQ=m
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
CONFIG_ARM_SCMI_POWERCAP=m
CONFIG_ARM_SCMI_POWER_CONTROL=m
CONFIG_ARM_SCMI_POWER_DOMAIN=m
@@ -531,7 +555,7 @@ CONFIG_ATA_ACPI=y
CONFIG_ATA_BMDMA=y
CONFIG_ATA_FORCE=y
CONFIG_ATA_GENERIC=m
-# CONFIG_ATALK is not set
+CONFIG_ATALK=m
CONFIG_ATA_OVER_ETH=m
CONFIG_ATA_PIIX=y
# CONFIG_ATARI_PARTITION is not set
@@ -702,6 +726,7 @@ CONFIG_BATTERY_GAUGE_LTC2941=m
CONFIG_BATTERY_MAX17040=m
CONFIG_BATTERY_MAX17042=m
# CONFIG_BATTERY_MAX1721X is not set
+# CONFIG_BATTERY_PM8916_BMS_VM is not set
CONFIG_BATTERY_QCOM_BATTMGR=m
CONFIG_BATTERY_RT5033=m
CONFIG_BATTERY_SAMSUNG_SDI=y
@@ -715,6 +740,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM2711_THERMAL=m
CONFIG_BCM2835_MBOX=y
@@ -867,7 +901,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCMSTB_L2_IRQ=y
# CONFIG_BRCM_TRACING is not set
CONFIG_BRCMUTIL=m
@@ -980,7 +1013,6 @@ CONFIG_CADENCE_WATCHDOG=m
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -1017,6 +1049,8 @@ CONFIG_CAN_NETLINK=y
CONFIG_CAN_PEAK_PCIEFD=m
CONFIG_CAN_PEAK_USB=m
CONFIG_CAN_RAW=m
+# CONFIG_CAN_RCAR_CANFD is not set
+# CONFIG_CAN_RCAR is not set
# CONFIG_CAN_SJA1000 is not set
CONFIG_CAN_SLCAN=m
# CONFIG_CAN_SOFTING is not set
@@ -1075,6 +1109,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -1117,6 +1152,7 @@ CONFIG_CHARGER_MAX77650=m
CONFIG_CHARGER_MAX77976=m
# CONFIG_CHARGER_MAX8903 is not set
CONFIG_CHARGER_MT6370=m
+# CONFIG_CHARGER_PM8916_LBC is not set
# CONFIG_CHARGER_QCOM_SMB2 is not set
CONFIG_CHARGER_QCOM_SMBB=m
CONFIG_CHARGER_RK817=m
@@ -1168,6 +1204,7 @@ CONFIG_CIO2_BRIDGE=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM2711_DVP=m
CONFIG_CLK_BCM2835=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
CONFIG_CLK_ICST=y
@@ -1183,6 +1220,7 @@ CONFIG_CLK_LS1028A_PLLDIG=y
CONFIG_CLK_PX30=y
CONFIG_CLK_QORIQ=y
CONFIG_CLK_RASPBERRYPI=y
+# CONFIG_CLK_RCAR_USB2_CLOCK_SEL is not set
CONFIG_CLK_RK3036=y
CONFIG_CLK_RK312X=y
CONFIG_CLK_RK3188=y
@@ -1268,6 +1306,8 @@ CONFIG_COMMON_CLK_QCOM=y
CONFIG_COMMON_CLK_RK808=m
CONFIG_COMMON_CLK_ROCKCHIP=y
CONFIG_COMMON_CLK_RS9_PCIE=m
+CONFIG_COMMON_CLK_S4_PERIPHERALS=y
+CONFIG_COMMON_CLK_S4_PLL=y
CONFIG_COMMON_CLK_SCMI=y
CONFIG_COMMON_CLK_SCPI=m
# CONFIG_COMMON_CLK_SI514 is not set
@@ -1299,7 +1339,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1387,6 +1426,7 @@ CONFIG_CROS_EC_UART=m
CONFIG_CROS_EC_VBC=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1533,6 +1573,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1644,6 +1689,7 @@ CONFIG_DE2104X=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
# CONFIG_DEBUG_CREDENTIALS is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1732,7 +1778,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
CONFIG_DEFAULT_SECURITY_SELINUX=y
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
# CONFIG_DETECT_HUNG_TASK is not set
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1844,6 +1889,7 @@ CONFIG_DPOT_DAC=m
# CONFIG_DPS310 is not set
CONFIG_DRAGONRISE_FF=y
# CONFIG_DRBD_FAULT_INJECTION is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
CONFIG_DRM_ACCEL_QAIC=m
CONFIG_DRM_ACCEL=y
CONFIG_DRM_AMD_ACP=y
@@ -1927,6 +1973,7 @@ CONFIG_DRM_IMX8QXP_LDB=m
CONFIG_DRM_IMX8QXP_PIXEL_COMBINER=m
CONFIG_DRM_IMX8QXP_PIXEL_LINK=m
CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI=m
+CONFIG_DRM_IMX93_MIPI_DSI=m
CONFIG_DRM_IMX_DCSS=m
CONFIG_DRM_IMX_LCDC=m
CONFIG_DRM_IMX_LCDIF=m
@@ -1990,9 +2037,11 @@ CONFIG_DRM_PANEL_HIMAX_HX8394=m
CONFIG_DRM_PANEL_ILITEK_IL9322=m
CONFIG_DRM_PANEL_ILITEK_ILI9341=m
CONFIG_DRM_PANEL_ILITEK_ILI9881C=m
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
CONFIG_DRM_PANEL_KHADAS_TS050=m
@@ -2022,6 +2071,7 @@ CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -2066,13 +2116,16 @@ CONFIG_DRM_PL111=m
CONFIG_DRM_QXL=m
CONFIG_DRM_RADEON=m
CONFIG_DRM_RADEON_USERPTR=y
+# CONFIG_DRM_RCAR_DU is not set
# CONFIG_DRM_RCAR_DW_HDMI is not set
# CONFIG_DRM_RCAR_LVDS is not set
# CONFIG_DRM_RCAR_MIPI_DSI is not set
# CONFIG_DRM_RCAR_USE_LVDS is not set
# CONFIG_DRM_RCAR_USE_MIPI_DSI is not set
CONFIG_DRM_ROCKCHIP=m
+# CONFIG_DRM_RZG2L_MIPI_DSI is not set
CONFIG_DRM_SAMSUNG_DSIM=m
+# CONFIG_DRM_SHMOBILE is not set
# CONFIG_DRM_SII902X is not set
CONFIG_DRM_SII9234=m
# CONFIG_DRM_SIL_SII8620 is not set
@@ -2092,7 +2145,7 @@ CONFIG_DRM_TEGRA_STAGING=y
# CONFIG_DRM_THINE_THC63LVD1024 is not set
CONFIG_DRM_TI_DLPC3433=m
CONFIG_DRM_TIDSS=m
-# CONFIG_DRM_TI_SN65DSI83 is not set
+CONFIG_DRM_TI_SN65DSI83=m
CONFIG_DRM_TI_SN65DSI86=m
CONFIG_DRM_TI_TFP410=m
CONFIG_DRM_TI_TPD12S015=m
@@ -2260,6 +2313,7 @@ CONFIG_EDAC_LEGACY_SYSFS=y
CONFIG_EDAC_QCOM=m
CONFIG_EDAC_SYNOPSYS=m
CONFIG_EDAC_THUNDERX=m
+CONFIG_EDAC_VERSAL=m
CONFIG_EDAC_XGENE=m
CONFIG_EDAC=y
CONFIG_EDAC_ZYNQMP=m
@@ -2269,7 +2323,6 @@ CONFIG_EEPROM_AT24=m
CONFIG_EEPROM_AT25=m
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
CONFIG_EFI_ARMSTUB_DTB_LOADER=y
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -2392,7 +2445,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2546,6 +2599,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
# CONFIG_FTWDT010_WATCHDOG is not set
+CONFIG_FUEL_GAUGE_MM8013=m
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2676,6 +2730,7 @@ CONFIG_GPIO_PCI_IDIO_16=m
# CONFIG_GPIO_PISOSR is not set
CONFIG_GPIO_PL061=y
CONFIG_GPIO_RASPBERRYPI_EXP=m
+CONFIG_GPIO_RCAR=m
# CONFIG_GPIO_RDC321X is not set
CONFIG_GPIO_ROCKCHIP=y
# CONFIG_GPIO_SAMA5D2_PIOBU is not set
@@ -2905,6 +2960,7 @@ CONFIG_HNS_ENET=m
CONFIG_HOLTEK_FF=y
# CONFIG_HOSTAP is not set
CONFIG_HOTPLUG_CPU=y
+CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA=m
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -3058,9 +3114,13 @@ CONFIG_I2C_PXA=m
CONFIG_I2C_QCOM_CCI=m
CONFIG_I2C_QCOM_GENI=m
CONFIG_I2C_QUP=m
+# CONFIG_I2C_RCAR is not set
+# CONFIG_I2C_RIIC is not set
CONFIG_I2C_RK3X=y
# CONFIG_I2C_ROBOTFUZZ_OSIF is not set
+# CONFIG_I2C_RZV2M is not set
CONFIG_I2C_SCMI=m
+# CONFIG_I2C_SH_MOBILE is not set
CONFIG_I2C_SI470X=m
# CONFIG_I2C_SI4713 is not set
CONFIG_I2C_SIMTEC=m
@@ -3102,6 +3162,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
@@ -3194,7 +3255,6 @@ CONFIG_IMA_NG_TEMPLATE=y
CONFIG_IMA_READ_POLICY=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -3403,6 +3463,7 @@ CONFIG_INTERCONNECT_QCOM_SC8280XP=m
CONFIG_INTERCONNECT_QCOM_SDM845=m
# CONFIG_INTERCONNECT_QCOM_SDX55 is not set
# CONFIG_INTERCONNECT_QCOM_SDX65 is not set
+CONFIG_INTERCONNECT_QCOM_SDX75=m
# CONFIG_INTERCONNECT_QCOM_SM6350 is not set
CONFIG_INTERCONNECT_QCOM_SM8150=m
CONFIG_INTERCONNECT_QCOM_SM8250=m
@@ -3465,8 +3526,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
CONFIG_IPMB_DEVICE_INTERFACE=m
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3477,6 +3536,7 @@ CONFIG_IPMI_POWEROFF=m
CONFIG_IPMI_SI=m
CONFIG_IPMI_SSIF=m
CONFIG_IPMI_WATCHDOG=m
+# CONFIG_IPMMU_VMSA is not set
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MULTICAST=y
@@ -3799,7 +3859,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3835,6 +3895,7 @@ CONFIG_KUNIT=m
CONFIG_KUNIT_TEST=m
# CONFIG_KUNPENG_HCCS is not set
CONFIG_KUSER_HELPERS=y
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3892,6 +3953,7 @@ CONFIG_LEDS_GPIO=m
CONFIG_LEDS_GROUP_MULTICOLOR=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -4019,6 +4081,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -4033,6 +4096,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -4041,6 +4105,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
@@ -4112,6 +4177,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
CONFIG_MCP320X=m
CONFIG_MCP3422=m
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -4122,6 +4188,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -4143,7 +4210,7 @@ CONFIG_MDIO_I2C=m
CONFIG_MDIO_IPQ8064=m
# CONFIG_MDIO_MSCC_MIIM is not set
CONFIG_MDIO_MVUSB=m
-# CONFIG_MDIO_OCTEON is not set
+CONFIG_MDIO_OCTEON=m
# CONFIG_MDIO_SUN4I is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MDIO_XGENE=m
@@ -4157,6 +4224,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -4398,18 +4466,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -4497,7 +4569,11 @@ CONFIG_MMC_SDHCI_PLTFM=m
CONFIG_MMC_SDHCI_PXAV3=m
CONFIG_MMC_SDHCI_TEGRA=m
CONFIG_MMC_SDHCI_XENON=m
+CONFIG_MMC_SDHI_INTERNAL_DMAC=m
+CONFIG_MMC_SDHI=m
+# CONFIG_MMC_SDHI_SYS_DMAC is not set
CONFIG_MMC_SDRICOH_CS=m
+# CONFIG_MMC_SH_MMCIF is not set
CONFIG_MMC_SPI=m
# CONFIG_MMC_STM32_SDMMC is not set
CONFIG_MMC_SUNXI=m
@@ -4530,6 +4606,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -4620,6 +4699,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -4678,6 +4759,7 @@ CONFIG_MTD_NAND_NANDSIM=m
# CONFIG_MTD_NAND_PL35X is not set
# CONFIG_MTD_NAND_PLATFORM is not set
# CONFIG_MTD_NAND_QCOM is not set
+# CONFIG_MTD_NAND_RENESAS is not set
# CONFIG_MTD_NAND_RICOH is not set
# CONFIG_MTD_NAND_ROCKCHIP is not set
# CONFIG_MTD_NAND_SUNXI is not set
@@ -4745,7 +4827,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
CONFIG_MWL8K=m
-# CONFIG_MX3_IPU is not set
CONFIG_MXC4005=m
CONFIG_MXC6255=m
# CONFIG_MXS_DMA is not set
@@ -4799,9 +4880,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4874,12 +4952,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4972,6 +5050,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4984,15 +5063,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
CONFIG_NET_SB1000=y
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -5026,6 +5102,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -5163,7 +5240,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -5357,11 +5434,13 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_NVIDIA_SHIELD_FF=y
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
CONFIG_NVME_APPLE=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_APPLE_EFUSES=m
# CONFIG_NVMEM_IMX_IIM is not set
@@ -5397,7 +5476,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -5545,6 +5626,7 @@ CONFIG_PCI_AARDVARK=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -5583,10 +5665,13 @@ CONFIG_PCIE_MOBIVEIL=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCIE_PTM=y
CONFIG_PCIE_QCOM=y
+# CONFIG_PCIE_RCAR_GEN4_HOST is not set
+# CONFIG_PCIE_RCAR_HOST is not set
CONFIG_PCIE_ROCKCHIP_DW_HOST=y
CONFIG_PCIE_ROCKCHIP_HOST=y
CONFIG_PCIE_TEGRA194_HOST=y
CONFIG_PCIE_XILINX_CPM=y
+CONFIG_PCIE_XILINX_DMA_PL=y
CONFIG_PCIE_XILINX_NWL=y
CONFIG_PCIE_XILINX=y
# CONFIG_PCI_FTPCI100 is not set
@@ -5620,6 +5705,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -5669,7 +5755,7 @@ CONFIG_PHY_MESON_G12A_MIPI_DPHY_ANALOG=y
CONFIG_PHY_MESON_G12A_USB2=y
CONFIG_PHY_MESON_G12A_USB3_PCIE=m
CONFIG_PHY_MESON_GXL_USB2=m
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
+CONFIG_PHY_MIXEL_LVDS_PHY=m
CONFIG_PHY_MIXEL_MIPI_DPHY=m
CONFIG_PHY_MVEBU_A3700_COMPHY=m
CONFIG_PHY_MVEBU_A3700_UTMI=m
@@ -5703,6 +5789,11 @@ CONFIG_PHY_QCOM_USB_HSIC=m
CONFIG_PHY_QCOM_USB_HS=m
CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2=m
CONFIG_PHY_QCOM_USB_SS=m
+# CONFIG_PHY_R8A779F0_ETHERNET_SERDES is not set
+# CONFIG_PHY_RCAR_GEN2 is not set
+# CONFIG_PHY_RCAR_GEN3_PCIE is not set
+CONFIG_PHY_RCAR_GEN3_USB2=m
+# CONFIG_PHY_RCAR_GEN3_USB3 is not set
CONFIG_PHY_ROCKCHIP_DPHY_RX0=m
CONFIG_PHY_ROCKCHIP_DP=m
CONFIG_PHY_ROCKCHIP_EMMC=m
@@ -5735,6 +5826,7 @@ CONFIG_PINCONF=y
CONFIG_PINCTRL_ALDERLAKE=m
CONFIG_PINCTRL_AMD=y
CONFIG_PINCTRL_AMLOGIC_C3=y
+CONFIG_PINCTRL_AMLOGIC_T7=y
CONFIG_PINCTRL_APPLE_GPIO=m
CONFIG_PINCTRL_AS3722=y
CONFIG_PINCTRL_AXP209=m
@@ -5855,12 +5947,13 @@ CONFIG_PINCTRL_SUN50I_H6=y
# CONFIG_PINCTRL_SUN8I_A33 is not set
# CONFIG_PINCTRL_SUN8I_A83T is not set
# CONFIG_PINCTRL_SUN8I_A83T_R is not set
-# CONFIG_PINCTRL_SUN8I_H3 is not set
CONFIG_PINCTRL_SUN8I_H3_R=y
+CONFIG_PINCTRL_SUN8I_H3=y
# CONFIG_PINCTRL_SUN8I_V3S is not set
# CONFIG_PINCTRL_SUN9I_A80 is not set
# CONFIG_PINCTRL_SUN9I_A80_R is not set
# CONFIG_PINCTRL_SX150X is not set
+CONFIG_PINCTRL_TEGRA234=y
CONFIG_PINCTRL=y
CONFIG_PINCTRL_ZYNQMP=y
# CONFIG_PING is not set
@@ -5906,7 +5999,6 @@ CONFIG_POWERCAP=y
CONFIG_POWER_MLXBF=m
CONFIG_POWER_RESET_AS3722=y
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -6041,6 +6133,8 @@ CONFIG_PWM_MESON=m
CONFIG_PWM_OMAP_DMTIMER=m
CONFIG_PWM_PCA9685=m
CONFIG_PWM_RASPBERRYPI_POE=m
+# CONFIG_PWM_RCAR is not set
+# CONFIG_PWM_RENESAS_TPU is not set
CONFIG_PWM_ROCKCHIP=m
CONFIG_PWM_STMPE=y
CONFIG_PWM_SUN4I=m
@@ -6107,6 +6201,8 @@ CONFIG_QCOM_Q6V5_WCSS=m
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
CONFIG_QCOM_QFPROM=m
CONFIG_QCOM_QMI_HELPERS=m
+CONFIG_QCOM_QSEECOM_UEFISECAPP=y
+CONFIG_QCOM_QSEECOM=y
CONFIG_QCOM_RAMP_CTRL=m
CONFIG_QCOM_RMTFS_MEM=m
CONFIG_QCOM_RPMHPD=y
@@ -6213,6 +6309,10 @@ CONFIG_RASPBERRYPI_POWER=y
CONFIG_RATIONAL_KUNIT_TEST=m
# CONFIG_RAVE_SP_CORE is not set
# CONFIG_RBTREE_TEST is not set
+# CONFIG_RCAR_DMAC is not set
+# CONFIG_RCAR_GEN3_THERMAL is not set
+# CONFIG_RCAR_REMOTEPROC is not set
+# CONFIG_RCAR_THERMAL is not set
CONFIG_RC_ATI_REMOTE=m
CONFIG_RC_CORE=y
CONFIG_RC_DECODERS=y
@@ -6248,7 +6348,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=y
@@ -6292,6 +6392,7 @@ CONFIG_REGULATOR_HI655X=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77650=m
CONFIG_REGULATOR_MAX77686=m
@@ -6378,7 +6479,14 @@ CONFIG_RELOCATABLE=y
# CONFIG_REMOTEPROC_CDEV is not set
CONFIG_REMOTEPROC=y
CONFIG_REMOTE_TARGET=m
+# CONFIG_RENESAS_OSTM is not set
# CONFIG_RENESAS_PHY is not set
+# CONFIG_RENESAS_RPCIF is not set
+# CONFIG_RENESAS_RZAWDT is not set
+# CONFIG_RENESAS_RZG2LWDT is not set
+# CONFIG_RENESAS_RZN1WDT is not set
+# CONFIG_RENESAS_USB_DMAC is not set
+# CONFIG_RENESAS_WDT is not set
# CONFIG_RESET_ATTACK_MITIGATION is not set
CONFIG_RESET_CONTROLLER=y
CONFIG_RESET_HISI=y
@@ -6389,6 +6497,7 @@ CONFIG_RESET_MESON=m
CONFIG_RESET_QCOM_AOSS=y
CONFIG_RESET_QCOM_PDC=m
CONFIG_RESET_RASPBERRYPI=y
+CONFIG_RESET_RZG2L_USBPHY_CTRL=m
CONFIG_RESET_SCMI=y
CONFIG_RESET_SIMPLE=y
CONFIG_RESET_TI_SCI=m
@@ -6445,6 +6554,7 @@ CONFIG_ROCKCHIP_VOP2=y
CONFIG_ROCKCHIP_VOP=y
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -6504,7 +6614,6 @@ CONFIG_RTC_DRV_ARMADA38X=m
CONFIG_RTC_DRV_AS3722=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
CONFIG_RTC_DRV_CADENCE=m
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_CROS_EC=m
@@ -6592,6 +6701,7 @@ CONFIG_RTC_DRV_RX8581=m
CONFIG_RTC_DRV_S35390A=m
# CONFIG_RTC_DRV_SA1100 is not set
CONFIG_RTC_DRV_SD3078=m
+# CONFIG_RTC_DRV_SH is not set
CONFIG_RTC_DRV_SNVS=m
CONFIG_RTC_DRV_STK17TA8=m
CONFIG_RTC_DRV_SUN6I=y
@@ -6667,6 +6777,10 @@ CONFIG_RV_REACT_PRINTK=y
CONFIG_RV=y
CONFIG_RXKAD=y
# CONFIG_RXPERF is not set
+# CONFIG_RZ_DMAC is not set
+# CONFIG_RZG2L_ADC is not set
+# CONFIG_RZG2L_THERMAL is not set
+# CONFIG_RZ_MTU3 is not set
CONFIG_S2IO=m
# CONFIG_S390_KPROBES_SANITY_TEST is not set
# CONFIG_S390_MODULES_SANITY_TEST is not set
@@ -6687,6 +6801,7 @@ CONFIG_SATA_MV=m
CONFIG_SATA_PMP=y
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_RCAR is not set
CONFIG_SATA_SIL24=m
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIS is not set
@@ -6836,6 +6951,7 @@ CONFIG_SCSI_UFS_HISI=m
CONFIG_SCSI_UFS_HPB=y
CONFIG_SCSI_UFS_HWMON=y
CONFIG_SCSI_UFS_QCOM=m
+# CONFIG_SCSI_UFS_RENESAS is not set
CONFIG_SCSI_UFS_TI_J721E=m
CONFIG_SCSI_VIRTIO=m
CONFIG_SCSI_WD719X=m
@@ -6865,11 +6981,12 @@ CONFIG_SDM_VIDEOCC_845=m
# CONFIG_SDX_GCC_75 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -7011,6 +7128,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -7071,6 +7189,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -7144,6 +7263,7 @@ CONFIG_SERIAL_8250_CS=m
CONFIG_SERIAL_8250_DFL=m
CONFIG_SERIAL_8250_DMA=y
CONFIG_SERIAL_8250_DW=y
+# CONFIG_SERIAL_8250_EM is not set
CONFIG_SERIAL_8250_EXAR=m
CONFIG_SERIAL_8250_EXTENDED=y
# CONFIG_SERIAL_8250_FINTEK is not set
@@ -7206,6 +7326,11 @@ CONFIG_SERIAL_SC16IS7XX_I2C=y
CONFIG_SERIAL_SC16IS7XX=m
CONFIG_SERIAL_SC16IS7XX_SPI=y
# CONFIG_SERIAL_SCCNXP is not set
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_SH_SCI_DMA=y
+CONFIG_SERIAL_SH_SCI_EARLYCON=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=18
+CONFIG_SERIAL_SH_SCI=y
# CONFIG_SERIAL_SIFIVE is not set
# CONFIG_SERIAL_SPRD is not set
# CONFIG_SERIAL_ST_ASC is not set
@@ -7272,7 +7397,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -7286,12 +7410,14 @@ CONFIG_SMC91X=m
# CONFIG_SM_CAMCC_6350 is not set
# CONFIG_SM_CAMCC_8250 is not set
# CONFIG_SM_CAMCC_8450 is not set
+# CONFIG_SM_CAMCC_8550 is not set
CONFIG_SMC_DIAG=m
CONFIG_SMC=m
# CONFIG_SM_DISPCC_8250 is not set
CONFIG_SM_DISPCC_8450=m
# CONFIG_SM_DISPCC_8550 is not set
# CONFIG_SM_FTL is not set
+# CONFIG_SM_GCC_4450 is not set
# CONFIG_SM_GCC_6115 is not set
# CONFIG_SM_GCC_6125 is not set
# CONFIG_SM_GCC_6350 is not set
@@ -7326,7 +7452,7 @@ CONFIG_SMS_USB_DRV=m
# CONFIG_SM_TCSRCC_8550 is not set
# CONFIG_SM_VIDEOCC_8150 is not set
# CONFIG_SM_VIDEOCC_8250 is not set
-# CONFIG_SM_VIDEOCC_8350 is not set
+CONFIG_SM_VIDEOCC_8350=m
# CONFIG_SM_VIDEOCC_8450 is not set
# CONFIG_SM_VIDEOCC_8550 is not set
CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
@@ -7395,6 +7521,7 @@ CONFIG_SND_FM801=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -7547,8 +7674,10 @@ CONFIG_SND_SOC_APQ8016_SBC=m
CONFIG_SND_SOC_ARNDALE=m
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -7754,6 +7883,7 @@ CONFIG_SND_SOC_PCM512x_SPI=m
# CONFIG_SND_SOC_PEB2466 is not set
CONFIG_SND_SOC_QCOM=m
CONFIG_SND_SOC_QDSP6=m
+# CONFIG_SND_SOC_RCAR is not set
CONFIG_SND_SOC_RK3288_HDMI_ANALOG=m
CONFIG_SND_SOC_RK3328=m
CONFIG_SND_SOC_RK3399_GRU_SOUND=m
@@ -7788,6 +7918,8 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
+# CONFIG_SND_SOC_RZ is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811=m
@@ -7799,6 +7931,7 @@ CONFIG_SND_SOC_SC8280XP=m
CONFIG_SND_SOC_SDM845=m
# CONFIG_SND_SOC_SDW_MOCKUP is not set
CONFIG_SND_SOC_SGTL5000=m
+# CONFIG_SND_SOC_SH4_FSI is not set
CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
CONFIG_SND_SOC_SIMPLE_MUX=m
# CONFIG_SND_SOC_SM8250 is not set
@@ -8091,8 +8224,12 @@ CONFIG_SPI_QCOM_GENI=m
CONFIG_SPI_QCOM_QSPI=m
CONFIG_SPI_QUP=m
CONFIG_SPI_ROCKCHIP=m
-# CONFIG_SPI_ROCKCHIP_SFC is not set
+CONFIG_SPI_ROCKCHIP_SFC=m
+# CONFIG_SPI_RSPI is not set
+# CONFIG_SPI_RZV2M_CSI is not set
# CONFIG_SPI_SC18IS602 is not set
+# CONFIG_SPI_SH_HSPI is not set
+# CONFIG_SPI_SH_MSIOF is not set
# CONFIG_SPI_SIFIVE is not set
# CONFIG_SPI_SLAVE is not set
CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
@@ -8325,6 +8462,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -8401,6 +8539,7 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -8473,7 +8612,7 @@ CONFIG_TIFM_7XX1=m
CONFIG_TIFM_CORE=m
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
+CONFIG_TI_ICSSG_PRUETH=m
CONFIG_TI_ICSS_IEP=m
CONFIG_TI_K3_AM65_CPSW_NUSS=m
CONFIG_TI_K3_AM65_CPSW_SWITCHDEV=y
@@ -8688,6 +8827,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
CONFIG_TYPEC_QCOM_PMIC=m
# CONFIG_TYPEC_RT1711H is not set
@@ -8699,7 +8839,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m
CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TPS6598X=m
CONFIG_TYPEC_UCSI=m
-CONFIG_TYPEC_WCOVE=m
CONFIG_TYPEC_WUSB3801=m
CONFIG_TYPHOON=m
CONFIG_UACCE=m
@@ -8792,6 +8931,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
@@ -8856,6 +8996,7 @@ CONFIG_USB_EHCI_TT_NEWSCHED=y
# CONFIG_USB_EHSET_TEST_FIXTURE is not set
CONFIG_USB_EMI26=m
CONFIG_USB_EMI62=m
+# CONFIG_USB_EMXX is not set
CONFIG_USB_EPSON2888=y
# CONFIG_USB_ETH is not set
CONFIG_USB_EZUSB_FX2=m
@@ -8968,6 +9109,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_M66592 is not set
CONFIG_USB_MA901=m
@@ -9028,6 +9170,7 @@ CONFIG_USB_OTG_FSM=m
# CONFIG_USB_OTG_PRODUCTLIST is not set
CONFIG_USB_OTG=y
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -9044,6 +9187,9 @@ CONFIG_USB_QCOM_EUD=m
CONFIG_USB_RAINSHADOW_CEC=m
# CONFIG_USB_RAREMONO is not set
CONFIG_USB_RAW_GADGET=m
+# CONFIG_USB_RENESAS_USB3 is not set
+# CONFIG_USB_RENESAS_USBF is not set
+# CONFIG_USB_RENESAS_USBHS is not set
CONFIG_USB_ROLE_SWITCH=y
CONFIG_USB_RTL8150=m
CONFIG_USB_RTL8152=m
@@ -9155,6 +9301,7 @@ CONFIG_USB_XHCI_MVEBU=m
CONFIG_USB_XHCI_PCI_RENESAS=y
CONFIG_USB_XHCI_PCI=y
CONFIG_USB_XHCI_PLATFORM=m
+CONFIG_USB_XHCI_RCAR=m
CONFIG_USB_XHCI_TEGRA=m
CONFIG_USB_XUSBATM=m
CONFIG_USB=y
@@ -9325,7 +9472,7 @@ CONFIG_VIDEO_IMX8_ISI=m
CONFIG_VIDEO_IMX8_ISI_M2M=y
CONFIG_VIDEO_IMX8_JPEG=m
CONFIG_VIDEO_IMX8MQ_MIPI_CSI2=m
-CONFIG_VIDEO_IMX_MEDIA=m
+# CONFIG_VIDEO_IMX_MEDIA is not set
CONFIG_VIDEO_IMX_MIPI_CSIS=m
CONFIG_VIDEO_IMX_PXP=m
# CONFIG_VIDEO_IPU3_CIO2 is not set
@@ -9342,10 +9489,12 @@ CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
CONFIG_VIDEO_MESON_GE2D=m
CONFIG_VIDEO_MESON_VDEC=m
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -9393,12 +9542,19 @@ CONFIG_VIDEO_PVRUSB2=m
CONFIG_VIDEO_PVRUSB2_SYSFS=y
CONFIG_VIDEO_QCOM_CAMSS=m
CONFIG_VIDEO_QCOM_VENUS=m
+# CONFIG_VIDEO_RCAR_CSI2 is not set
+# CONFIG_VIDEO_RCAR_ISP is not set
+# CONFIG_VIDEO_RCAR_VIN is not set
CONFIG_VIDEO_RDACM20=m
# CONFIG_VIDEO_RDACM21 is not set
+# CONFIG_VIDEO_RENESAS_FCP is not set
+# CONFIG_VIDEO_RENESAS_JPU is not set
CONFIG_VIDEO_RJ54N1=m
CONFIG_VIDEO_ROCKCHIP_ISP1=m
CONFIG_VIDEO_ROCKCHIP_RGA=m
CONFIG_VIDEO_ROCKCHIP_VDEC=m
+# CONFIG_VIDEO_RZG2L_CRU is not set
+# CONFIG_VIDEO_RZG2L_CSI2 is not set
CONFIG_VIDEO_S5C73M3=m
CONFIG_VIDEO_S5K4ECGX=m
CONFIG_VIDEO_S5K5BAF=m
@@ -9449,6 +9605,7 @@ CONFIG_VIDEO_THS7303=m
CONFIG_VIDEO_THS8200=m
CONFIG_VIDEO_TI_CAL=m
CONFIG_VIDEO_TI_CAL_MC=y
+CONFIG_VIDEO_TI_J721E_CSI2RX=m
CONFIG_VIDEO_TLV320AIC23B=m
CONFIG_VIDEO_TM6000_ALSA=m
CONFIG_VIDEO_TM6000_DVB=m
@@ -9654,6 +9811,7 @@ CONFIG_XDP_SOCKETS=y
# CONFIG_XEN_GRANT_DMA_ALLOC is not set
# CONFIG_XEN is not set
CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_FRONTEND is not set
@@ -9772,19 +9930,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-aarch64-rhel.config b/SOURCES/kernel-aarch64-rhel.config
index 5a535cf..cfcc4f8 100644
--- a/SOURCES/kernel-aarch64-rhel.config
+++ b/SOURCES/kernel-aarch64-rhel.config
@@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m
# CONFIG_AMD_XGBE_DCB is not set
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
# CONFIG_AMT is not set
# CONFIG_ANDROID_BINDER_IPC is not set
@@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
CONFIG_ARCH_MXC=y
# CONFIG_ARCH_NPCM is not set
CONFIG_ARCH_NXP=y
+CONFIG_ARCH_PENSANDO=y
CONFIG_ARCH_QCOM=y
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_REALTEK is not set
@@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -375,6 +378,7 @@ CONFIG_ARM_PMU=y
# CONFIG_ARM_QCOM_CPUFREQ_HW is not set
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
# CONFIG_ARM_SCMI_POWER_CONTROL is not set
CONFIG_ARM_SCMI_POWER_DOMAIN=m
CONFIG_ARM_SCMI_PROTOCOL=y
@@ -407,6 +411,7 @@ CONFIG_ARM_TI_CPUFREQ=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -520,6 +525,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -655,7 +661,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -748,7 +753,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -828,6 +832,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
# CONFIG_CFG80211_DEBUGFS is not set
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -909,6 +914,7 @@ CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM_NS2=y
CONFIG_CLK_BCM_SR=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -1001,7 +1007,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1085,7 +1090,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
CONFIG_CRYPTO_AES_ARM64=y
-CONFIG_CRYPTO_AES_GCM_P10=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_ANSI_CPRNG=m
@@ -1102,7 +1106,6 @@ CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
CONFIG_CRYPTO_CHACHA20_NEON=y
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CMAC=y
# CONFIG_CRYPTO_CRC32C_VPMSUM is not set
@@ -1189,6 +1192,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1213,7 +1221,6 @@ CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
CONFIG_CRYPTO_POLY1305_NEON=y
-# CONFIG_CRYPTO_POLY1305_P10 is not set
# CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1477,6 +1484,7 @@ CONFIG_DPAA2_CONSOLE=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1549,6 +1557,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
CONFIG_DRM_IMX_DCSS=m
# CONFIG_DRM_IMX_LCDC is not set
# CONFIG_DRM_IMX_LCDIF is not set
@@ -1578,38 +1587,92 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LG_LG4573 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
# CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1629,7 +1692,8 @@ CONFIG_DRM_RADEON_USERPTR=y
# CONFIG_DRM_SIMPLE_BRIDGE is not set
CONFIG_DRM_SIMPLEDRM=y
# CONFIG_DRM_SSD130X is not set
-# CONFIG_DRM_TEGRA is not set
+# CONFIG_DRM_TEGRA_DEBUG is not set
+CONFIG_DRM_TEGRA=m
# CONFIG_DRM_THINE_THC63LVD1024 is not set
# CONFIG_DRM_TI_DLPC3433 is not set
# CONFIG_DRM_TIDSS is not set
@@ -1797,7 +1861,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1831,7 +1894,12 @@ CONFIG_ENIC=m
# CONFIG_EPIC100 is not set
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1896,7 +1964,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2013,7 +2081,9 @@ CONFIG_FSL_PQ_MDIO=m
# CONFIG_FSL_RCPM is not set
CONFIG_FSL_XGMAC_MDIO=m
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -2021,6 +2091,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2173,6 +2244,7 @@ CONFIG_GPIO_XLP=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_GUP_TEST is not set
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2356,6 +2428,7 @@ CONFIG_HNS_ENET=m
CONFIG_HNS=m
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2519,6 +2592,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2532,6 +2606,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2601,7 +2676,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2735,6 +2809,7 @@ CONFIG_INPUT_SPARSEKMAP=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2777,6 +2852,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2804,7 +2880,8 @@ CONFIG_IO_DELAY_0X80=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -3100,7 +3177,7 @@ CONFIG_KEY_NOTIFICATIONS=y
# CONFIG_KEYS_REQUEST_CACHE is not set
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3131,6 +3208,7 @@ CONFIG_KUNIT_TEST=m
CONFIG_KVM_AMD_SEV=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3298,6 +3376,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3312,6 +3391,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3320,6 +3400,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3386,6 +3467,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3423,6 +3505,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3489,7 +3572,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3630,6 +3713,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3746,6 +3830,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3818,6 +3905,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3895,7 +3984,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
# CONFIG_MWL8K is not set
-# CONFIG_MX3_IPU is not set
# CONFIG_MXC4005 is not set
# CONFIG_MXC6255 is not set
# CONFIG_MXS_DMA is not set
@@ -3940,9 +4028,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4064,6 +4149,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4076,15 +4162,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4113,6 +4196,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4217,7 +4301,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4415,9 +4499,11 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_BCM_OCOTP is not set
# CONFIG_NVMEM_IMX_IIM is not set
@@ -4444,7 +4530,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4562,6 +4650,7 @@ CONFIG_PCC=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -4627,6 +4716,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4706,6 +4796,7 @@ CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_AMD is not set
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4794,7 +4885,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -4951,6 +5041,7 @@ CONFIG_QCOM_L3_PMU=y
# CONFIG_QCOM_PDC is not set
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
# CONFIG_QCOM_QFPROM is not set
+# CONFIG_QCOM_QSEECOM is not set
# CONFIG_QCOM_RAMP_CTRL is not set
# CONFIG_QCOM_RMTFS_MEM is not set
# CONFIG_QCOM_RPMH is not set
@@ -4979,7 +5070,7 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
+CONFIG_QORIQ_THERMAL=m
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -5087,6 +5178,7 @@ CONFIG_REGULATOR_GPIO=y
# CONFIG_REGULATOR_MAX1586 is not set
# CONFIG_REGULATOR_MAX20086 is not set
# CONFIG_REGULATOR_MAX20411 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77686=m
# CONFIG_REGULATOR_MAX77826 is not set
@@ -5191,6 +5283,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -5231,7 +5324,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5521,6 +5613,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5676,6 +5769,7 @@ CONFIG_SENSORS_LTC2945=m
# CONFIG_SENSORS_LTC2978 is not set
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
# CONFIG_SENSORS_LTC4151 is not set
@@ -5734,6 +5828,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
# CONFIG_SENSORS_PMBUS is not set
+# CONFIG_SENSORS_POWERZ is not set
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -5910,7 +6005,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -5994,6 +6088,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6130,8 +6225,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -6222,6 +6319,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6336,12 +6434,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6364,6 +6456,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6559,7 +6652,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
CONFIG_SOC_IMX8M=y
CONFIG_SOC_IMX9=m
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
@@ -6790,6 +6882,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6856,6 +6949,7 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7087,6 +7181,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7165,6 +7260,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
CONFIG_USB_CONN_GPIO=m
@@ -7268,6 +7364,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7311,6 +7408,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7461,7 +7559,10 @@ CONFIG_VEXPRESS_CONFIG=y
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
CONFIG_VFIO_FSL_MC=m
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
# CONFIG_VFIO_MDEV is not set
@@ -7576,11 +7677,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7915,19 +8018,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-aarch64-rt-debug-rhel.config b/SOURCES/kernel-aarch64-rt-debug-rhel.config
index ef78813..9fbeebc 100644
--- a/SOURCES/kernel-aarch64-rt-debug-rhel.config
+++ b/SOURCES/kernel-aarch64-rt-debug-rhel.config
@@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m
# CONFIG_AMD_XGBE_DCB is not set
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
# CONFIG_AMT is not set
# CONFIG_ANDROID_BINDER_IPC is not set
@@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
CONFIG_ARCH_MXC=y
# CONFIG_ARCH_NPCM is not set
CONFIG_ARCH_NXP=y
+CONFIG_ARCH_PENSANDO=y
CONFIG_ARCH_QCOM=y
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_REALTEK is not set
@@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -375,6 +378,7 @@ CONFIG_ARM_PMU=y
# CONFIG_ARM_QCOM_CPUFREQ_HW is not set
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
# CONFIG_ARM_SCMI_POWER_CONTROL is not set
CONFIG_ARM_SCMI_POWER_DOMAIN=m
CONFIG_ARM_SCMI_PROTOCOL=y
@@ -407,6 +411,7 @@ CONFIG_ARM_TI_CPUFREQ=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -520,6 +525,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -655,7 +661,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -748,7 +753,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -828,6 +832,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -909,6 +914,7 @@ CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM_NS2=y
CONFIG_CLK_BCM_SR=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -1001,7 +1007,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1086,7 +1091,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
CONFIG_CRYPTO_AES_ARM64=y
-CONFIG_CRYPTO_AES_GCM_P10=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_ANSI_CPRNG=m
@@ -1103,7 +1107,6 @@ CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
CONFIG_CRYPTO_CHACHA20_NEON=y
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CMAC=y
# CONFIG_CRYPTO_CRC32C_VPMSUM is not set
@@ -1190,6 +1193,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1214,7 +1222,6 @@ CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
CONFIG_CRYPTO_POLY1305_NEON=y
-# CONFIG_CRYPTO_POLY1305_P10 is not set
# CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1487,6 +1494,7 @@ CONFIG_DPAA2_CONSOLE=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1559,6 +1567,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
CONFIG_DRM_IMX_DCSS=m
# CONFIG_DRM_IMX_LCDC is not set
# CONFIG_DRM_IMX_LCDIF is not set
@@ -1588,38 +1597,92 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LG_LG4573 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
# CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1639,7 +1702,8 @@ CONFIG_DRM_RADEON_USERPTR=y
# CONFIG_DRM_SIMPLE_BRIDGE is not set
CONFIG_DRM_SIMPLEDRM=y
# CONFIG_DRM_SSD130X is not set
-# CONFIG_DRM_TEGRA is not set
+# CONFIG_DRM_TEGRA_DEBUG is not set
+CONFIG_DRM_TEGRA=m
# CONFIG_DRM_THINE_THC63LVD1024 is not set
# CONFIG_DRM_TI_DLPC3433 is not set
# CONFIG_DRM_TIDSS is not set
@@ -1843,7 +1907,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1877,7 +1940,12 @@ CONFIG_ENIC=m
# CONFIG_EPIC100 is not set
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1950,7 +2018,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2067,7 +2135,9 @@ CONFIG_FSL_PQ_MDIO=m
# CONFIG_FSL_RCPM is not set
CONFIG_FSL_XGMAC_MDIO=m
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -2075,6 +2145,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2228,6 +2299,7 @@ CONFIG_GPIO_XLP=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
CONFIG_GUP_TEST=y
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2412,6 +2484,7 @@ CONFIG_HNS_ENET=m
CONFIG_HNS=m
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2575,6 +2648,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2588,6 +2662,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2657,7 +2732,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2791,6 +2865,7 @@ CONFIG_INPUT_SPARSEKMAP=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2833,6 +2908,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2860,7 +2936,8 @@ CONFIG_IOMMU_DEBUGFS=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -3162,7 +3239,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3192,6 +3269,7 @@ CONFIG_KUNIT_TEST=m
CONFIG_KVM_AMD_SEV=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3359,6 +3437,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3373,6 +3452,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3381,6 +3461,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3447,6 +3528,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3484,6 +3566,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3555,7 +3638,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3696,6 +3779,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3812,6 +3896,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3884,6 +3971,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3961,7 +4050,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
# CONFIG_MWL8K is not set
-# CONFIG_MX3_IPU is not set
# CONFIG_MXC4005 is not set
# CONFIG_MXC6255 is not set
# CONFIG_MXS_DMA is not set
@@ -4006,9 +4094,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4130,6 +4215,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4142,15 +4228,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4179,6 +4262,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4283,7 +4367,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4481,9 +4565,11 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_BCM_OCOTP is not set
# CONFIG_NVMEM_IMX_IIM is not set
@@ -4510,7 +4596,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4630,6 +4718,7 @@ CONFIG_PCC=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -4695,6 +4784,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4774,6 +4864,7 @@ CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_AMD is not set
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4862,7 +4953,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -5023,6 +5113,7 @@ CONFIG_QCOM_L3_PMU=y
# CONFIG_QCOM_PDC is not set
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
# CONFIG_QCOM_QFPROM is not set
+# CONFIG_QCOM_QSEECOM is not set
# CONFIG_QCOM_RAMP_CTRL is not set
# CONFIG_QCOM_RMTFS_MEM is not set
# CONFIG_QCOM_RPMH is not set
@@ -5051,7 +5142,7 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
+CONFIG_QORIQ_THERMAL=m
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -5162,6 +5253,7 @@ CONFIG_REGULATOR_GPIO=y
# CONFIG_REGULATOR_MAX1586 is not set
# CONFIG_REGULATOR_MAX20086 is not set
# CONFIG_REGULATOR_MAX20411 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77686=m
# CONFIG_REGULATOR_MAX77826 is not set
@@ -5266,6 +5358,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -5306,7 +5399,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5596,6 +5688,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5751,6 +5844,7 @@ CONFIG_SENSORS_LTC2945=m
# CONFIG_SENSORS_LTC2978 is not set
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
# CONFIG_SENSORS_LTC4151 is not set
@@ -5809,6 +5903,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
# CONFIG_SENSORS_PMBUS is not set
+# CONFIG_SENSORS_POWERZ is not set
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -5985,8 +6080,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
-# CONFIG_SLOB is not set
# CONFIG_SLUB_CPU_PARTIAL is not set
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -6071,6 +6164,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6208,8 +6302,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -6300,6 +6396,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6414,12 +6511,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6442,6 +6533,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6638,7 +6730,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
CONFIG_SOC_IMX8M=y
CONFIG_SOC_IMX9=m
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
@@ -6869,6 +6960,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6935,6 +7027,7 @@ CONFIG_TEST_LIST_SORT=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7166,6 +7259,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7245,6 +7339,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
CONFIG_USB_CONN_GPIO=m
@@ -7348,6 +7443,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7391,6 +7487,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7541,7 +7638,10 @@ CONFIG_VEXPRESS_CONFIG=y
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
CONFIG_VFIO_FSL_MC=m
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
# CONFIG_VFIO_MDEV is not set
@@ -7656,11 +7756,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -8001,19 +8103,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-aarch64-rt-rhel.config b/SOURCES/kernel-aarch64-rt-rhel.config
index e21fe86..77d7f10 100644
--- a/SOURCES/kernel-aarch64-rt-rhel.config
+++ b/SOURCES/kernel-aarch64-rt-rhel.config
@@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m
# CONFIG_AMD_XGBE_DCB is not set
CONFIG_AMD_XGBE=m
# CONFIG_AMIGA_PARTITION is not set
+CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y
# CONFIG_AMT is not set
# CONFIG_ANDROID_BINDER_IPC is not set
@@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
CONFIG_ARCH_MXC=y
# CONFIG_ARCH_NPCM is not set
CONFIG_ARCH_NXP=y
+CONFIG_ARCH_PENSANDO=y
CONFIG_ARCH_QCOM=y
CONFIG_ARCH_RANDOM=y
# CONFIG_ARCH_REALTEK is not set
@@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y
CONFIG_ARM64_ERRATUM_2645198=y
CONFIG_ARM64_ERRATUM_2658417=y
CONFIG_ARM64_ERRATUM_2966298=y
+CONFIG_ARM64_ERRATUM_3117295=y
CONFIG_ARM64_ERRATUM_819472=y
CONFIG_ARM64_ERRATUM_824069=y
CONFIG_ARM64_ERRATUM_826319=y
@@ -375,6 +378,7 @@ CONFIG_ARM_PMU=y
# CONFIG_ARM_QCOM_CPUFREQ_HW is not set
CONFIG_ARM_SBSA_WATCHDOG=m
CONFIG_ARM_SCMI_CPUFREQ=m
+CONFIG_ARM_SCMI_PERF_DOMAIN=y
# CONFIG_ARM_SCMI_POWER_CONTROL is not set
CONFIG_ARM_SCMI_POWER_DOMAIN=m
CONFIG_ARM_SCMI_PROTOCOL=y
@@ -407,6 +411,7 @@ CONFIG_ARM_TI_CPUFREQ=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -520,6 +525,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -655,7 +661,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -748,7 +753,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -828,6 +832,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
# CONFIG_CFG80211_DEBUGFS is not set
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -909,6 +914,7 @@ CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
CONFIG_CLK_BCM_NS2=y
CONFIG_CLK_BCM_SR=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -1001,7 +1007,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1086,7 +1091,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
CONFIG_CRYPTO_AES_ARM64=y
-CONFIG_CRYPTO_AES_GCM_P10=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_ANSI_CPRNG=m
@@ -1103,7 +1107,6 @@ CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
CONFIG_CRYPTO_CHACHA20_NEON=y
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CMAC=y
# CONFIG_CRYPTO_CRC32C_VPMSUM is not set
@@ -1190,6 +1193,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1214,7 +1222,6 @@ CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
CONFIG_CRYPTO_POLY1305_NEON=y
-# CONFIG_CRYPTO_POLY1305_P10 is not set
# CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1479,6 +1486,7 @@ CONFIG_DPAA2_CONSOLE=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1551,6 +1559,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
CONFIG_DRM_IMX_DCSS=m
# CONFIG_DRM_IMX_LCDC is not set
# CONFIG_DRM_IMX_LCDIF is not set
@@ -1580,38 +1589,92 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LG_LG4573 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
# CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1631,7 +1694,8 @@ CONFIG_DRM_RADEON_USERPTR=y
# CONFIG_DRM_SIMPLE_BRIDGE is not set
CONFIG_DRM_SIMPLEDRM=y
# CONFIG_DRM_SSD130X is not set
-# CONFIG_DRM_TEGRA is not set
+# CONFIG_DRM_TEGRA_DEBUG is not set
+CONFIG_DRM_TEGRA=m
# CONFIG_DRM_THINE_THC63LVD1024 is not set
# CONFIG_DRM_TI_DLPC3433 is not set
# CONFIG_DRM_TIDSS is not set
@@ -1835,7 +1899,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1869,7 +1932,12 @@ CONFIG_ENIC=m
# CONFIG_EPIC100 is not set
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1934,7 +2002,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2051,7 +2119,9 @@ CONFIG_FSL_PQ_MDIO=m
# CONFIG_FSL_RCPM is not set
CONFIG_FSL_XGMAC_MDIO=m
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -2059,6 +2129,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ERRATUM_010001=y
# CONFIG_FUJITSU_ES is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
@@ -2212,6 +2283,7 @@ CONFIG_GPIO_XLP=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_GUP_TEST is not set
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2396,6 +2468,7 @@ CONFIG_HNS_ENET=m
CONFIG_HNS=m
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2559,6 +2632,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2572,6 +2646,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2641,7 +2716,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2775,6 +2849,7 @@ CONFIG_INPUT_SPARSEKMAP=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2817,6 +2892,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2844,7 +2920,8 @@ CONFIG_IO_DELAY_0X80=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -3140,7 +3217,7 @@ CONFIG_KEY_NOTIFICATIONS=y
# CONFIG_KEYS_REQUEST_CACHE is not set
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3171,6 +3248,7 @@ CONFIG_KUNIT_TEST=m
CONFIG_KVM_AMD_SEV=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3338,6 +3416,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3352,6 +3431,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3360,6 +3440,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3426,6 +3507,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3463,6 +3545,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3534,7 +3617,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3675,6 +3758,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3791,6 +3875,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3863,6 +3950,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3940,7 +4029,6 @@ CONFIG_MWIFIEX_PCIE=m
CONFIG_MWIFIEX_SDIO=m
CONFIG_MWIFIEX_USB=m
# CONFIG_MWL8K is not set
-# CONFIG_MX3_IPU is not set
# CONFIG_MXC4005 is not set
# CONFIG_MXC6255 is not set
# CONFIG_MXS_DMA is not set
@@ -3985,9 +4073,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4109,6 +4194,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4121,15 +4207,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4158,6 +4241,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4262,7 +4346,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4460,9 +4544,11 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y
+CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_BCM_OCOTP is not set
# CONFIG_NVMEM_IMX_IIM is not set
@@ -4489,7 +4575,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4607,6 +4695,7 @@ CONFIG_PCC=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_AL is not set
@@ -4672,6 +4761,7 @@ CONFIG_PCI_XGENE_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4751,6 +4841,7 @@ CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_AMD is not set
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4839,7 +4930,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_HISI=y
@@ -5000,6 +5090,7 @@ CONFIG_QCOM_L3_PMU=y
# CONFIG_QCOM_PDC is not set
CONFIG_QCOM_QDF2400_ERRATUM_0065=y
# CONFIG_QCOM_QFPROM is not set
+# CONFIG_QCOM_QSEECOM is not set
# CONFIG_QCOM_RAMP_CTRL is not set
# CONFIG_QCOM_RMTFS_MEM is not set
# CONFIG_QCOM_RPMH is not set
@@ -5028,7 +5119,7 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
+CONFIG_QORIQ_THERMAL=m
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -5139,6 +5230,7 @@ CONFIG_REGULATOR_GPIO=y
# CONFIG_REGULATOR_MAX1586 is not set
# CONFIG_REGULATOR_MAX20086 is not set
# CONFIG_REGULATOR_MAX20411 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
CONFIG_REGULATOR_MAX77620=y
CONFIG_REGULATOR_MAX77686=m
# CONFIG_REGULATOR_MAX77826 is not set
@@ -5243,6 +5335,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -5283,7 +5376,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BBNSM=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5573,6 +5665,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5728,6 +5821,7 @@ CONFIG_SENSORS_LTC2945=m
# CONFIG_SENSORS_LTC2978 is not set
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
# CONFIG_SENSORS_LTC4151 is not set
@@ -5786,6 +5880,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
# CONFIG_SENSORS_PMBUS is not set
+# CONFIG_SENSORS_POWERZ is not set
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -5962,8 +6057,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
-# CONFIG_SLOB is not set
# CONFIG_SLUB_CPU_PARTIAL is not set
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -6048,6 +6141,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6184,8 +6278,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -6276,6 +6372,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6390,12 +6487,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6418,6 +6509,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6613,7 +6705,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
CONFIG_SOC_IMX8M=y
CONFIG_SOC_IMX9=m
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
@@ -6844,6 +6935,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6910,6 +7002,7 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7141,6 +7234,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7220,6 +7314,7 @@ CONFIG_USB_CHIPIDEA_HOST=y
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_TEGRA=m
CONFIG_USB_CHIPIDEA_UDC=y
CONFIG_USB_CONN_GPIO=m
@@ -7323,6 +7418,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7366,6 +7462,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7516,7 +7613,10 @@ CONFIG_VEXPRESS_CONFIG=y
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
CONFIG_VFIO_FSL_MC=m
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
# CONFIG_VFIO_MDEV is not set
@@ -7631,11 +7731,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7976,19 +8078,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-ppc64le-debug-fedora.config b/SOURCES/kernel-ppc64le-debug-fedora.config
index 01ae224..2c014c2 100644
--- a/SOURCES/kernel-ppc64le-debug-fedora.config
+++ b/SOURCES/kernel-ppc64le-debug-fedora.config
@@ -481,6 +481,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y
+CONFIG_BCACHEFS_DEBUG=y
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_LOCK_TIME_STATS=y
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM54140_PHY=m
CONFIG_BCM7XXX_PHY=m
@@ -622,7 +631,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
CONFIG_BRIDGE_CFM=y
CONFIG_BRIDGE_EBT_802_3=m
@@ -730,7 +738,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -810,6 +817,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -898,6 +906,7 @@ CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CIO2_BRIDGE=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -970,7 +979,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1036,6 +1044,7 @@ CONFIG_CROS_EC_TYPEC=m
CONFIG_CROS_EC_UART=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1127,6 +1136,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1225,6 +1239,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
CONFIG_DEBUG_CREDENTIALS=y
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1322,7 +1337,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
CONFIG_DEFAULT_SECURITY_SELINUX=y
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1413,7 +1427,7 @@ CONFIG_DNS_RESOLVER=m
CONFIG_DP83640_PHY=m
CONFIG_DP83822_PHY=m
CONFIG_DP83848_PHY=m
-# CONFIG_DP83867_PHY is not set
+CONFIG_DP83867_PHY=m
CONFIG_DP83869_PHY=m
# CONFIG_DP83TC811_PHY is not set
# CONFIG_DP83TD510_PHY is not set
@@ -1422,6 +1436,7 @@ CONFIG_DPOT_DAC=m
# CONFIG_DPS310 is not set
CONFIG_DRAGONRISE_FF=y
CONFIG_DRBD_FAULT_INJECTION=y
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL_QAIC is not set
CONFIG_DRM_ACCEL=y
CONFIG_DRM_AMD_ACP=y
@@ -1528,9 +1543,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
@@ -1560,6 +1577,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -1778,7 +1796,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
CONFIG_EFI_COCO_SECRET=y
CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y
@@ -1899,7 +1916,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2039,6 +2056,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
# CONFIG_FTR_FIXUP_SELFTEST is not set
+CONFIG_FUEL_GAUGE_MM8013=m
# CONFIG_FUNCTION_ERROR_INJECTION is not set
CONFIG_FUNCTION_GRAPH_RETVAL=y
CONFIG_FUNCTION_GRAPH_TRACER=y
@@ -2177,6 +2195,7 @@ CONFIG_GREENASIA_FF=y
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
CONFIG_GTP=m
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
CONFIG_GUP_TEST=y
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2522,6 +2541,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
@@ -2608,7 +2628,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2814,8 +2833,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
# CONFIG_IPMB_DEVICE_INTERFACE is not set
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3131,7 +3148,7 @@ CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3169,6 +3186,7 @@ CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
CONFIG_KVM_GUEST=y
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3226,6 +3244,7 @@ CONFIG_LEDS_GPIO=m
CONFIG_LEDS_GROUP_MULTICOLOR=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -3353,6 +3372,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3367,6 +3387,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3375,6 +3396,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
@@ -3442,6 +3464,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -3452,6 +3475,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -3481,6 +3505,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -3692,18 +3717,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -3804,6 +3833,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3881,6 +3913,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AR7_PARTS is not set
@@ -4028,9 +4062,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4103,12 +4134,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4201,6 +4232,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4212,15 +4244,12 @@ CONFIG_NET_NS=y
CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4254,6 +4283,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
CONFIG_NET_TULIP=y
CONFIG_NET_UDP_TUNNEL=m
CONFIG_NET_VENDOR_3COM=y
@@ -4388,7 +4418,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -4582,8 +4612,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_SHIELD_FF=y
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_LAYOUT_ONIE_TLV=m
CONFIG_NVMEM_LAYOUT_SL28_VPD=m
@@ -4603,7 +4634,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=m
# CONFIG_NVSW_SN2201 is not set
@@ -4726,6 +4759,7 @@ CONFIG_PATA_WINBOND=m
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4774,6 +4808,7 @@ CONFIG_PCI_STUB=y
CONFIG_PCI_SW_SWITCHTEC=m
CONFIG_PCI=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4798,16 +4833,12 @@ CONFIG_PHY_CADENCE_SIERRA=m
CONFIG_PHY_CADENCE_TORRENT=m
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
# CONFIG_PHY_HI3670_USB is not set
# CONFIG_PHY_LAN966X_SERDES is not set
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4901,7 +4932,6 @@ CONFIG_POWERNV_CPUIDLE=y
# CONFIG_POWERNV_OP_PANEL is not set
# CONFIG_POWERPC64_CPU is not set
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
# CONFIG_POWER_RESET_LINKSTATION is not set
@@ -5184,7 +5214,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=y
@@ -5216,6 +5246,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77650=m
# CONFIG_REGULATOR_MAX77826 is not set
CONFIG_REGULATOR_MAX77857=m
@@ -5320,6 +5351,7 @@ CONFIG_RMI4_SPI=m
CONFIG_RMNET=m
# CONFIG_ROCKCHIP_PHY is not set
CONFIG_ROCKER=m
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -5374,7 +5406,6 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_ABEOZ9=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5687,11 +5718,12 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -5832,6 +5864,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -5893,6 +5926,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -6062,7 +6096,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -6152,6 +6185,7 @@ CONFIG_SND_FM801=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6296,8 +6330,10 @@ CONFIG_SND_SOC_AK5558=m
# CONFIG_SND_SOC_ARNDALE is not set
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -6502,12 +6538,6 @@ CONFIG_SND_SOC_PCM3060_SPI=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
CONFIG_SND_SOC_RT1017_SDCA_SDW=m
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6530,6 +6560,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6969,6 +7000,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -7023,6 +7055,7 @@ CONFIG_TEST_LOCKUP=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7087,8 +7120,6 @@ CONFIG_TIFM_7XX1=m
CONFIG_TIFM_CORE=m
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
-CONFIG_TI_ICSS_IEP=m
CONFIG_TI_LMP92064=m
CONFIG_TIME_KUNIT_TEST=m
CONFIG_TIME_NS=y
@@ -7276,6 +7307,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7287,7 +7319,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m
CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TPS6598X=m
CONFIG_TYPEC_UCSI=m
-CONFIG_TYPEC_WCOVE=m
CONFIG_TYPEC_WUSB3801=m
CONFIG_TYPHOON=m
CONFIG_UACCE=m
@@ -7368,6 +7399,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
# CONFIG_USB_CHIPIDEA is not set
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CONFIGFS_F_MIDI2=y
# CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set
@@ -7498,6 +7530,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
CONFIG_USB_MA901=m
# CONFIG_USB_MASS_STORAGE is not set
@@ -7550,6 +7583,7 @@ CONFIG_USB_ONBOARD_HUB=m
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -7752,7 +7786,7 @@ CONFIG_VIDEO_BT819=m
CONFIG_VIDEO_BT848=m
CONFIG_VIDEO_BT856=m
CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_CADENCE_CSI2RX=m
+# CONFIG_VIDEO_CADENCE_CSI2RX is not set
CONFIG_VIDEO_CADENCE_CSI2TX=m
# CONFIG_VIDEO_CADENCE is not set
# CONFIG_VIDEO_CAFE_CCIC is not set
@@ -7829,10 +7863,12 @@ CONFIG_VIDEO_M52790=m
CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MAX96712 is not set
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -8121,6 +8157,7 @@ CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_XDP_SOCKETS=y
# CONFIG_XEN_GRANT_DMA_ALLOC is not set
CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_FRONTEND is not set
@@ -8229,19 +8266,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-ppc64le-debug-rhel.config b/SOURCES/kernel-ppc64le-debug-rhel.config
index 2499a4b..64f780e 100644
--- a/SOURCES/kernel-ppc64le-debug-rhel.config
+++ b/SOURCES/kernel-ppc64le-debug-rhel.config
@@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m
CONFIG_ARCH_FORCE_MAX_ORDER=8
# CONFIG_ARCH_KEEMBAY is not set
# CONFIG_ARCH_LG1K is not set
-# CONFIG_ARCH_MA35 is not set
CONFIG_ARCH_MEMORY_PROBE=y
# CONFIG_ARCH_MESON is not set
CONFIG_ARCH_MMAP_RND_BITS=14
@@ -276,6 +275,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -388,6 +388,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -517,7 +518,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -610,7 +610,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -681,6 +680,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -758,6 +758,7 @@ CONFIG_CIFS_SMB_DIRECT=y
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -829,7 +830,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -979,6 +979,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1268,6 +1273,7 @@ CONFIG_DP83TC811_PHY=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1340,6 +1346,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
# CONFIG_DRM_IMX_LCDIF is not set
# CONFIG_DRM_ITE_IT6505 is not set
# CONFIG_DRM_ITE_IT66121 is not set
@@ -1366,36 +1373,90 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1572,7 +1633,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_EE1004 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1602,7 +1662,12 @@ CONFIG_ENIC=m
CONFIG_EPIC100=m
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1676,7 +1741,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1774,7 +1839,9 @@ CONFIG_FS_DAX=y
# CONFIG_FSL_QDMA is not set
# CONFIG_FSL_RCPM is not set
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -1783,6 +1850,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
# CONFIG_FTR_FIXUP_SELFTEST is not set
+# CONFIG_FUEL_GAUGE_MM8013 is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
# CONFIG_FUNCTION_GRAPH_RETVAL is not set
CONFIG_FUNCTION_GRAPH_TRACER=y
@@ -1921,6 +1989,7 @@ CONFIG_GPIO_SIM=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
CONFIG_GUP_TEST=y
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2093,6 +2162,7 @@ CONFIG_HMM_MIRROR=y
# CONFIG_HNS3_PMU is not set
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2243,6 +2313,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2260,6 +2331,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2330,7 +2402,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2444,6 +2515,7 @@ CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2486,6 +2558,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2508,7 +2581,8 @@ CONFIG_IOMMU_DEBUGFS=y
# CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set
CONFIG_IOMMU_DEFAULT_DMA_STRICT=y
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -2806,7 +2880,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -2844,6 +2918,7 @@ CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND=y
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
CONFIG_KVM_BOOK3S_PR_POSSIBLE=y
CONFIG_KVM_GUEST=y
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3013,6 +3088,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3027,6 +3103,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3035,6 +3112,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3063,7 +3141,6 @@ CONFIG_MANTIS_CORE=m
CONFIG_MARVELL_10G_PHY=m
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
-CONFIG_MARVELL_GTI_WDT=y
CONFIG_MARVELL_PHY=m
# CONFIG_MATOM is not set
# CONFIG_MAX1027 is not set
@@ -3097,6 +3174,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3122,7 +3200,7 @@ CONFIG_MDIO_HISI_FEMAC=m
# CONFIG_MDIO_IPQ8064 is not set
CONFIG_MDIO_MSCC_MIIM=m
# CONFIG_MDIO_MVUSB is not set
-CONFIG_MDIO_OCTEON=m
+# CONFIG_MDIO_OCTEON is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MD_LINEAR=m
# CONFIG_MD_MULTIPATH is not set
@@ -3131,6 +3209,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3197,7 +3276,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3333,6 +3412,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3442,6 +3522,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3516,6 +3599,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3622,9 +3707,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3746,6 +3828,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -3758,15 +3841,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -3795,6 +3875,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -3895,7 +3976,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4089,8 +4170,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_IMX_OCOTP_ELE is not set
# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
@@ -4112,7 +4194,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=m
# CONFIG_NVSW_SN2201 is not set
@@ -4219,6 +4303,7 @@ CONFIG_PATA_PLATFORM=m
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4266,6 +4351,7 @@ CONFIG_PCI_STUB=y
# CONFIG_PCI_SW_SWITCHTEC is not set
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4290,8 +4376,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_FSL_LYNX_28G is not set
# CONFIG_PHY_HI3660_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
@@ -4301,8 +4385,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_LAN966X_SERDES is not set
CONFIG_PHYLIB=y
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4329,6 +4411,7 @@ CONFIG_PID_NS=y
CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4399,7 +4482,6 @@ CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_CPUIDLE=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_GPIO_RESTART is not set
CONFIG_POWER_RESET_GPIO=y
# CONFIG_POWER_RESET_LTC2952 is not set
@@ -4600,7 +4682,6 @@ CONFIG_QFMT_V2=y
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
# CONFIG_QRTR is not set
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -4702,6 +4783,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
@@ -4777,6 +4859,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -4818,7 +4901,6 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5102,6 +5184,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5256,6 +5339,7 @@ CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC3815 is not set
CONFIG_SENSORS_LTC4151=m
@@ -5314,6 +5398,7 @@ CONFIG_SENSORS_PCF8591=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
CONFIG_SENSORS_PMBUS=m
+# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_PXE1610 is not set
@@ -5482,7 +5567,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -5562,6 +5646,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -5699,8 +5784,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -5791,6 +5878,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -5905,12 +5993,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -5933,6 +6015,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6129,7 +6212,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
# CONFIG_SOC_TI is not set
CONFIG_SOFTLOCKUP_DETECTOR=y
@@ -6331,6 +6413,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6384,6 +6467,7 @@ CONFIG_TEST_LIVEPATCH=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6604,6 +6688,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -6680,6 +6765,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
# CONFIG_USB_CHIPIDEA is not set
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_CXACRU=m
# CONFIG_USB_CYPRESS_CY7C63 is not set
@@ -6778,6 +6864,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
# CONFIG_USB_LED_TRIG is not set
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -6823,6 +6910,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -6967,6 +7055,9 @@ CONFIG_VETH=m
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
# CONFIG_VFIO_MDEV is not set
@@ -7080,11 +7171,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7419,19 +7512,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-ppc64le-fedora.config b/SOURCES/kernel-ppc64le-fedora.config
index 544c116..df182c6 100644
--- a/SOURCES/kernel-ppc64le-fedora.config
+++ b/SOURCES/kernel-ppc64le-fedora.config
@@ -479,6 +479,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM54140_PHY=m
CONFIG_BCM7XXX_PHY=m
@@ -620,7 +629,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
CONFIG_BRIDGE_CFM=y
CONFIG_BRIDGE_EBT_802_3=m
@@ -728,7 +736,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -808,6 +815,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -896,6 +904,7 @@ CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CIO2_BRIDGE=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -968,7 +977,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1034,6 +1042,7 @@ CONFIG_CROS_EC_TYPEC=m
CONFIG_CROS_EC_UART=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1125,6 +1134,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1223,6 +1237,7 @@ CONFIG_DE2104X=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
# CONFIG_DEBUG_CREDENTIALS is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1312,7 +1327,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
CONFIG_DEFAULT_SECURITY_SELINUX=y
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
# CONFIG_DETECT_HUNG_TASK is not set
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1402,7 +1416,7 @@ CONFIG_DNS_RESOLVER=m
CONFIG_DP83640_PHY=m
CONFIG_DP83822_PHY=m
CONFIG_DP83848_PHY=m
-# CONFIG_DP83867_PHY is not set
+CONFIG_DP83867_PHY=m
CONFIG_DP83869_PHY=m
# CONFIG_DP83TC811_PHY is not set
# CONFIG_DP83TD510_PHY is not set
@@ -1411,6 +1425,7 @@ CONFIG_DPOT_DAC=m
# CONFIG_DPS310 is not set
CONFIG_DRAGONRISE_FF=y
# CONFIG_DRBD_FAULT_INJECTION is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL_QAIC is not set
CONFIG_DRM_ACCEL=y
CONFIG_DRM_AMD_ACP=y
@@ -1517,9 +1532,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
@@ -1549,6 +1566,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -1767,7 +1785,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
CONFIG_EFI_COCO_SECRET=y
CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y
@@ -1880,7 +1897,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2020,6 +2037,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
# CONFIG_FTR_FIXUP_SELFTEST is not set
+CONFIG_FUEL_GAUGE_MM8013=m
# CONFIG_FUNCTION_ERROR_INJECTION is not set
CONFIG_FUNCTION_GRAPH_RETVAL=y
CONFIG_FUNCTION_GRAPH_TRACER=y
@@ -2158,6 +2176,7 @@ CONFIG_GREENASIA_FF=y
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
CONFIG_GTP=m
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_GUP_TEST is not set
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2502,6 +2521,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
@@ -2588,7 +2608,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2794,8 +2813,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
# CONFIG_IPMB_DEVICE_INTERFACE is not set
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3104,7 +3121,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3143,6 +3160,7 @@ CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
CONFIG_KVM_GUEST=y
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -3200,6 +3218,7 @@ CONFIG_LEDS_GPIO=m
CONFIG_LEDS_GROUP_MULTICOLOR=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -3327,6 +3346,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3341,6 +3361,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3349,6 +3370,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
@@ -3415,6 +3437,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -3425,6 +3448,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -3454,6 +3478,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -3665,18 +3690,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -3776,6 +3805,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3853,6 +3885,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AR7_PARTS is not set
@@ -4000,9 +4034,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4075,12 +4106,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4173,6 +4204,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4184,15 +4216,12 @@ CONFIG_NET_NS=y
CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4226,6 +4255,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
CONFIG_NET_TULIP=y
CONFIG_NET_UDP_TUNNEL=m
CONFIG_NET_VENDOR_3COM=y
@@ -4360,7 +4390,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -4554,8 +4584,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_SHIELD_FF=y
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_LAYOUT_ONIE_TLV=m
CONFIG_NVMEM_LAYOUT_SL28_VPD=m
@@ -4575,7 +4606,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=y
# CONFIG_NVSW_SN2201 is not set
@@ -4697,6 +4730,7 @@ CONFIG_PATA_WINBOND=m
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4745,6 +4779,7 @@ CONFIG_PCI_STUB=y
CONFIG_PCI_SW_SWITCHTEC=m
CONFIG_PCI=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4769,16 +4804,12 @@ CONFIG_PHY_CADENCE_SIERRA=m
CONFIG_PHY_CADENCE_TORRENT=m
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
# CONFIG_PHY_HI3670_USB is not set
# CONFIG_PHY_LAN966X_SERDES is not set
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4872,7 +4903,6 @@ CONFIG_POWERNV_CPUIDLE=y
# CONFIG_POWERNV_OP_PANEL is not set
# CONFIG_POWERPC64_CPU is not set
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_POWER_RESET_GPIO=y
# CONFIG_POWER_RESET_LINKSTATION is not set
@@ -5155,7 +5185,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=y
@@ -5187,6 +5217,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77650=m
# CONFIG_REGULATOR_MAX77826 is not set
CONFIG_REGULATOR_MAX77857=m
@@ -5291,6 +5322,7 @@ CONFIG_RMI4_SPI=m
CONFIG_RMNET=m
# CONFIG_ROCKCHIP_PHY is not set
CONFIG_ROCKER=m
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -5345,7 +5377,6 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_ABEOZ9=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5658,11 +5689,12 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -5803,6 +5835,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -5864,6 +5897,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -6033,7 +6067,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -6123,6 +6156,7 @@ CONFIG_SND_FM801=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6266,8 +6300,10 @@ CONFIG_SND_SOC_AK5558=m
# CONFIG_SND_SOC_ARNDALE is not set
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -6472,12 +6508,6 @@ CONFIG_SND_SOC_PCM3060_SPI=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
CONFIG_SND_SOC_RT1017_SDCA_SDW=m
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6500,6 +6530,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6938,6 +6969,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6992,6 +7024,7 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7056,8 +7089,6 @@ CONFIG_TIFM_7XX1=m
CONFIG_TIFM_CORE=m
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
-CONFIG_TI_ICSS_IEP=m
CONFIG_TI_LMP92064=m
CONFIG_TIME_KUNIT_TEST=m
CONFIG_TIME_NS=y
@@ -7245,6 +7276,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7256,7 +7288,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m
CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TPS6598X=m
CONFIG_TYPEC_UCSI=m
-CONFIG_TYPEC_WCOVE=m
CONFIG_TYPEC_WUSB3801=m
CONFIG_TYPHOON=m
CONFIG_UACCE=m
@@ -7337,6 +7368,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
# CONFIG_USB_CHIPIDEA is not set
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CONFIGFS_F_MIDI2=y
# CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set
@@ -7467,6 +7499,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
CONFIG_USB_MA901=m
# CONFIG_USB_MASS_STORAGE is not set
@@ -7519,6 +7552,7 @@ CONFIG_USB_ONBOARD_HUB=m
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -7721,7 +7755,7 @@ CONFIG_VIDEO_BT819=m
CONFIG_VIDEO_BT848=m
CONFIG_VIDEO_BT856=m
CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_CADENCE_CSI2RX=m
+# CONFIG_VIDEO_CADENCE_CSI2RX is not set
CONFIG_VIDEO_CADENCE_CSI2TX=m
# CONFIG_VIDEO_CADENCE is not set
# CONFIG_VIDEO_CAFE_CCIC is not set
@@ -7798,10 +7832,12 @@ CONFIG_VIDEO_M52790=m
CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MAX96712 is not set
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -8090,6 +8126,7 @@ CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_XDP_SOCKETS=y
# CONFIG_XEN_GRANT_DMA_ALLOC is not set
CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_FRONTEND is not set
@@ -8198,19 +8235,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-ppc64le-rhel.config b/SOURCES/kernel-ppc64le-rhel.config
index 252be59..40b9392 100644
--- a/SOURCES/kernel-ppc64le-rhel.config
+++ b/SOURCES/kernel-ppc64le-rhel.config
@@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m
CONFIG_ARCH_FORCE_MAX_ORDER=8
# CONFIG_ARCH_KEEMBAY is not set
# CONFIG_ARCH_LG1K is not set
-# CONFIG_ARCH_MA35 is not set
CONFIG_ARCH_MEMORY_PROBE=y
# CONFIG_ARCH_MESON is not set
CONFIG_ARCH_MMAP_RND_BITS=14
@@ -276,6 +275,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -388,6 +388,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -517,7 +518,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -610,7 +610,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -681,6 +680,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
# CONFIG_CFG80211_DEBUGFS is not set
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -758,6 +758,7 @@ CONFIG_CIFS_SMB_DIRECT=y
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -829,7 +830,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -979,6 +979,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1260,6 +1265,7 @@ CONFIG_DP83TC811_PHY=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1332,6 +1338,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
# CONFIG_DRM_IMX_LCDIF is not set
# CONFIG_DRM_ITE_IT6505 is not set
# CONFIG_DRM_ITE_IT66121 is not set
@@ -1358,36 +1365,90 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1564,7 +1625,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_EE1004 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1594,7 +1654,12 @@ CONFIG_ENIC=m
CONFIG_EPIC100=m
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1660,7 +1725,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1758,7 +1823,9 @@ CONFIG_FS_DAX=y
# CONFIG_FSL_QDMA is not set
# CONFIG_FSL_RCPM is not set
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -1767,6 +1834,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
# CONFIG_FTR_FIXUP_SELFTEST is not set
+# CONFIG_FUEL_GAUGE_MM8013 is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
# CONFIG_FUNCTION_GRAPH_RETVAL is not set
CONFIG_FUNCTION_GRAPH_TRACER=y
@@ -1905,6 +1973,7 @@ CONFIG_GPIO_SIM=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_GUP_TEST is not set
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2077,6 +2146,7 @@ CONFIG_HMM_MIRROR=y
# CONFIG_HNS3_PMU is not set
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2227,6 +2297,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2244,6 +2315,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2314,7 +2386,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2428,6 +2499,7 @@ CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2470,6 +2542,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2492,7 +2565,8 @@ CONFIG_IO_EVENT_IRQ=y
# CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set
CONFIG_IOMMU_DEFAULT_DMA_STRICT=y
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -2785,7 +2859,7 @@ CONFIG_KEY_NOTIFICATIONS=y
# CONFIG_KEYS_REQUEST_CACHE is not set
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -2824,6 +2898,7 @@ CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND=y
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
CONFIG_KVM_BOOK3S_PR_POSSIBLE=y
CONFIG_KVM_GUEST=y
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
# CONFIG_KVM_WERROR is not set
@@ -2993,6 +3068,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3007,6 +3083,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3015,6 +3092,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3043,7 +3121,6 @@ CONFIG_MANTIS_CORE=m
CONFIG_MARVELL_10G_PHY=m
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
-CONFIG_MARVELL_GTI_WDT=y
CONFIG_MARVELL_PHY=m
# CONFIG_MATOM is not set
# CONFIG_MAX1027 is not set
@@ -3077,6 +3154,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3102,7 +3180,7 @@ CONFIG_MDIO_HISI_FEMAC=m
# CONFIG_MDIO_IPQ8064 is not set
CONFIG_MDIO_MSCC_MIIM=m
# CONFIG_MDIO_MVUSB is not set
-CONFIG_MDIO_OCTEON=m
+# CONFIG_MDIO_OCTEON is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MD_LINEAR=m
# CONFIG_MD_MULTIPATH is not set
@@ -3111,6 +3189,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3177,7 +3256,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3313,6 +3392,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3422,6 +3502,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3496,6 +3579,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3602,9 +3687,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3726,6 +3808,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -3738,15 +3821,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -3775,6 +3855,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -3875,7 +3956,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4069,8 +4150,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_IMX_OCOTP_ELE is not set
# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
@@ -4092,7 +4174,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=m
# CONFIG_NVSW_SN2201 is not set
@@ -4198,6 +4282,7 @@ CONFIG_PATA_PLATFORM=m
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4245,6 +4330,7 @@ CONFIG_PCI_STUB=y
# CONFIG_PCI_SW_SWITCHTEC is not set
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4269,8 +4355,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_FSL_LYNX_28G is not set
# CONFIG_PHY_HI3660_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
@@ -4280,8 +4364,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_LAN966X_SERDES is not set
CONFIG_PHYLIB=y
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4308,6 +4390,7 @@ CONFIG_PID_NS=y
CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4378,7 +4461,6 @@ CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_CPUIDLE=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_GPIO_RESTART is not set
CONFIG_POWER_RESET_GPIO=y
# CONFIG_POWER_RESET_LTC2952 is not set
@@ -4579,7 +4661,6 @@ CONFIG_QFMT_V2=y
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
# CONFIG_QRTR is not set
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -4681,6 +4762,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
@@ -4756,6 +4838,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -4797,7 +4880,6 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5081,6 +5163,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5235,6 +5318,7 @@ CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC3815 is not set
CONFIG_SENSORS_LTC4151=m
@@ -5293,6 +5377,7 @@ CONFIG_SENSORS_PCF8591=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
CONFIG_SENSORS_PMBUS=m
+# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_PXE1610 is not set
@@ -5461,7 +5546,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -5541,6 +5625,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -5677,8 +5762,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -5769,6 +5856,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -5883,12 +5971,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -5911,6 +5993,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6106,7 +6189,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
# CONFIG_SOC_TI is not set
CONFIG_SOFTLOCKUP_DETECTOR=y
@@ -6308,6 +6390,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6361,6 +6444,7 @@ CONFIG_TEST_LIVEPATCH=m
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6581,6 +6665,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -6657,6 +6742,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
# CONFIG_USB_CHIPIDEA is not set
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_CXACRU=m
# CONFIG_USB_CYPRESS_CY7C63 is not set
@@ -6755,6 +6841,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
# CONFIG_USB_LED_TRIG is not set
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -6800,6 +6887,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -6944,6 +7032,9 @@ CONFIG_VETH=m
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
# CONFIG_VFIO_MDEV is not set
@@ -7057,11 +7148,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7396,19 +7489,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-s390x-debug-fedora.config b/SOURCES/kernel-s390x-debug-fedora.config
index 2057166..e8b901f 100644
--- a/SOURCES/kernel-s390x-debug-fedora.config
+++ b/SOURCES/kernel-s390x-debug-fedora.config
@@ -484,6 +484,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y
+CONFIG_BCACHEFS_DEBUG=y
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_LOCK_TIME_STATS=y
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM54140_PHY=m
CONFIG_BCM7XXX_PHY=m
@@ -624,7 +633,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
CONFIG_BRIDGE_CFM=y
CONFIG_BRIDGE_EBT_802_3=m
@@ -732,7 +740,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -815,6 +822,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -905,6 +913,7 @@ CONFIG_CIFS_XATTR=y
CONFIG_CIO2_BRIDGE=y
# CONFIG_CIO_INJECT is not set
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -976,7 +985,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1042,6 +1050,7 @@ CONFIG_CROS_EC_TYPEC=m
CONFIG_CROS_EC_UART=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1128,6 +1137,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1234,6 +1248,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
CONFIG_DEBUG_CREDENTIALS=y
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1329,7 +1344,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
CONFIG_DEFAULT_SECURITY_SELINUX=y
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1418,7 +1432,7 @@ CONFIG_DNS_RESOLVER=m
# CONFIG_DP83640_PHY is not set
CONFIG_DP83822_PHY=m
CONFIG_DP83848_PHY=m
-# CONFIG_DP83867_PHY is not set
+CONFIG_DP83867_PHY=m
CONFIG_DP83869_PHY=m
# CONFIG_DP83TC811_PHY is not set
# CONFIG_DP83TD510_PHY is not set
@@ -1427,6 +1441,7 @@ CONFIG_DPOT_DAC=m
# CONFIG_DPS310 is not set
CONFIG_DRAGONRISE_FF=y
CONFIG_DRBD_FAULT_INJECTION=y
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL_QAIC is not set
CONFIG_DRM_ACCEL=y
CONFIG_DRM_AMD_ACP=y
@@ -1532,9 +1547,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
@@ -1564,6 +1581,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -1778,7 +1796,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
CONFIG_EFI_COCO_SECRET=y
CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y
@@ -1901,7 +1918,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1912,6 +1929,7 @@ CONFIG_FB_EFI=y
# CONFIG_FB_IBM_GXT4500 is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_IMX is not set
+# CONFIG_FB is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_LE80578 is not set
# CONFIG_FB_MATROX_G is not set
@@ -1952,7 +1970,6 @@ CONFIG_FB_VESA=y
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
# CONFIG_FB_XILINX is not set
-CONFIG_FB=y
# CONFIG_FCOE is not set
# CONFIG_FDDI is not set
# CONFIG_FEALNX is not set
@@ -2031,6 +2048,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+CONFIG_FUEL_GAUGE_MM8013=m
# CONFIG_FUNCTION_ERROR_INJECTION is not set
CONFIG_FUNCTION_GRAPH_RETVAL=y
CONFIG_FUNCTION_GRAPH_TRACER=y
@@ -2496,6 +2514,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
@@ -2581,7 +2600,6 @@ CONFIG_IMA_NG_TEMPLATE=y
CONFIG_IMA_READ_POLICY=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2785,8 +2803,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
# CONFIG_IPMB_DEVICE_INTERFACE is not set
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3103,7 +3119,7 @@ CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3134,6 +3150,7 @@ CONFIG_KUNIT_EXAMPLE_TEST=m
CONFIG_KUNIT=m
CONFIG_KUNIT_TEST=m
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_PROVE_MMU=y
# CONFIG_KVM_S390_UCONTROL is not set
CONFIG_KVM_SMM=y
@@ -3190,6 +3207,7 @@ CONFIG_LEDS_GPIO=m
CONFIG_LEDS_GROUP_MULTICOLOR=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -3315,6 +3333,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3329,6 +3348,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3338,6 +3358,7 @@ CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
# CONFIG_MAC80211 is not set
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211_MESH=y
CONFIG_MAC80211_MESSAGE_TRACING=y
@@ -3414,6 +3435,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -3424,6 +3446,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -3453,6 +3476,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -3664,18 +3688,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -3775,6 +3803,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3852,6 +3883,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AR7_PARTS is not set
@@ -3997,9 +4030,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4072,12 +4102,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4171,6 +4201,7 @@ CONFIG_NET_IPVTI=m
# CONFIG_NETIUCV is not set
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4182,15 +4213,12 @@ CONFIG_NET_NS=y
CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4224,6 +4252,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
CONFIG_NET_TULIP=y
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4357,7 +4386,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -4550,8 +4579,9 @@ CONFIG_NUMA=y
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_SHIELD_FF=y
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_LAYOUT_ONIE_TLV=m
CONFIG_NVMEM_LAYOUT_SL28_VPD=m
@@ -4571,7 +4601,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4688,6 +4720,7 @@ CONFIG_PATA_WINBOND=m
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4737,6 +4770,7 @@ CONFIG_PCI_STUB=y
CONFIG_PCI_SW_SWITCHTEC=m
CONFIG_PCI=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4762,16 +4796,12 @@ CONFIG_PHY_CADENCE_SIERRA=m
CONFIG_PHY_CADENCE_TORRENT=m
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
# CONFIG_PHY_HI3670_USB is not set
# CONFIG_PHY_LAN966X_SERDES is not set
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4858,7 +4888,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERCAP=y
CONFIG_POWER_MLXBF=m
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET is not set
# CONFIG_POWER_RESET_LINKSTATION is not set
# CONFIG_POWER_RESET_LTC2952 is not set
@@ -5106,7 +5135,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=m
@@ -5138,6 +5167,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77650=m
# CONFIG_REGULATOR_MAX77826 is not set
CONFIG_REGULATOR_MAX77857=m
@@ -5243,6 +5273,7 @@ CONFIG_RMI4_SPI=m
CONFIG_RMNET=m
# CONFIG_ROCKCHIP_PHY is not set
CONFIG_ROCKER=m
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -5295,7 +5326,6 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_ABEOZ9=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5619,11 +5649,12 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -5763,6 +5794,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -5823,6 +5855,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -5990,7 +6023,6 @@ CONFIG_SLIP_COMPRESSED=y
# CONFIG_SLIP is not set
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -6082,6 +6114,7 @@ CONFIG_SND_FIREWORKS=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6226,8 +6259,10 @@ CONFIG_SND_SOC_AK5558=m
# CONFIG_SND_SOC_ARNDALE is not set
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -6432,12 +6467,6 @@ CONFIG_SND_SOC_PCM3060_SPI=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
CONFIG_SND_SOC_RT1017_SDCA_SDW=m
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6460,6 +6489,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6895,6 +6925,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6949,6 +6980,7 @@ CONFIG_TEST_LOCKUP=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7012,8 +7044,6 @@ CONFIG_TIFM_7XX1=m
# CONFIG_TIFM_CORE is not set
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
-CONFIG_TI_ICSS_IEP=m
CONFIG_TI_LMP92064=m
CONFIG_TIME_KUNIT_TEST=m
CONFIG_TIME_NS=y
@@ -7216,6 +7246,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7227,7 +7258,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m
CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TPS6598X=m
CONFIG_TYPEC_UCSI=m
-CONFIG_TYPEC_WCOVE=m
CONFIG_TYPEC_WUSB3801=m
CONFIG_TYPHOON=m
CONFIG_UACCE=m
@@ -7307,6 +7337,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
# CONFIG_USB_CHIPIDEA is not set
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CONFIGFS_F_MIDI2=y
# CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set
@@ -7436,6 +7467,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
CONFIG_USB_MA901=m
# CONFIG_USB_MASS_STORAGE is not set
@@ -7485,6 +7517,7 @@ CONFIG_USB_ONBOARD_HUB=m
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -7689,7 +7722,7 @@ CONFIG_VIDEO_BT819=m
CONFIG_VIDEO_BT848=m
CONFIG_VIDEO_BT856=m
CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_CADENCE_CSI2RX=m
+# CONFIG_VIDEO_CADENCE_CSI2RX is not set
CONFIG_VIDEO_CADENCE_CSI2TX=m
# CONFIG_VIDEO_CADENCE is not set
# CONFIG_VIDEO_CAFE_CCIC is not set
@@ -7766,10 +7799,12 @@ CONFIG_VIDEO_M52790=m
CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MAX96712 is not set
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -8059,6 +8094,7 @@ CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_XDP_SOCKETS=y
# CONFIG_XEN_GRANT_DMA_ALLOC is not set
CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_FRONTEND is not set
@@ -8166,19 +8202,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-s390x-debug-rhel.config b/SOURCES/kernel-s390x-debug-rhel.config
index 03d4e91..4d051d7 100644
--- a/SOURCES/kernel-s390x-debug-rhel.config
+++ b/SOURCES/kernel-s390x-debug-rhel.config
@@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m
# CONFIG_ARCH_BITMAIN is not set
# CONFIG_ARCH_KEEMBAY is not set
# CONFIG_ARCH_LG1K is not set
-# CONFIG_ARCH_MA35 is not set
# CONFIG_ARCH_MESON is not set
CONFIG_ARCH_MMAP_RND_BITS=28
CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
@@ -275,6 +274,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -387,6 +387,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -515,7 +516,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -608,7 +608,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -682,6 +681,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -761,6 +761,7 @@ CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
# CONFIG_CIO_INJECT is not set
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -830,7 +831,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -895,7 +895,6 @@ CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
# CONFIG_CRYPTO_AEGIS128 is not set
# CONFIG_CRYPTO_AES_ARM64 is not set
-CONFIG_CRYPTO_AES_GCM_P10=y
CONFIG_CRYPTO_AES_S390=m
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
@@ -912,7 +911,6 @@ CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CHACHA_S390=y
CONFIG_CRYPTO_CMAC=y
@@ -977,6 +975,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1000,7 +1003,6 @@ CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
-# CONFIG_CRYPTO_POLY1305_P10 is not set
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RSA=y
@@ -1274,6 +1276,7 @@ CONFIG_DP83TC811_PHY=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1346,6 +1349,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
# CONFIG_DRM_IMX_LCDIF is not set
# CONFIG_DRM_ITE_IT6505 is not set
# CONFIG_DRM_ITE_IT66121 is not set
@@ -1373,36 +1377,90 @@ CONFIG_DRM_NOUVEAU_BACKLIGHT=y
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1577,7 +1635,6 @@ CONFIG_EDAC_PND2=m
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_EE1004 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
-# CONFIG_EEPROM_LEGACY is not set
# CONFIG_EEPROM_MAX6875 is not set
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1607,7 +1664,12 @@ CONFIG_ENIC=m
CONFIG_EPIC100=m
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
# CONFIG_ETHOC is not set
CONFIG_ETHTOOL_NETLINK=y
@@ -1683,7 +1745,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1694,9 +1756,9 @@ CONFIG_FB_EFI=y
# CONFIG_FB_IBM_GXT4500 is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_IMX is not set
+# CONFIG_FB is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_LE80578 is not set
-CONFIG_FB=m
# CONFIG_FB_MATROX_G is not set
# CONFIG_FB_MATROX_I2C is not set
# CONFIG_FB_MATROX is not set
@@ -1780,7 +1842,9 @@ CONFIG_FS_DAX=y
# CONFIG_FSL_QDMA is not set
# CONFIG_FSL_RCPM is not set
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -1788,6 +1852,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
# CONFIG_FUNCTION_GRAPH_RETVAL is not set
CONFIG_FUNCTION_GRAPH_TRACER=y
@@ -1924,6 +1989,7 @@ CONFIG_GPIO_SIM=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
CONFIG_GUP_TEST=y
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2095,6 +2161,7 @@ CONFIG_HMC_DRV=m
# CONFIG_HNS3_PMU is not set
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2234,6 +2301,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2248,6 +2316,7 @@ CONFIG_ICE_SWITCHDEV=y
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2317,7 +2386,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2431,6 +2499,7 @@ CONFIG_INPUT_SPARSEKMAP=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2473,6 +2542,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2494,7 +2564,8 @@ CONFIG_IOMMU_DEBUGFS=y
# CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set
CONFIG_IOMMU_DEFAULT_DMA_STRICT=y
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -2793,7 +2864,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -2824,6 +2895,7 @@ CONFIG_KVM_AMD_SEV=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_PROVE_MMU=y
# CONFIG_KVM_S390_UCONTROL is not set
CONFIG_KVM_SMM=y
@@ -2990,6 +3062,7 @@ CONFIG_LRU_GEN=y
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3004,6 +3077,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
# CONFIG_LXT_PHY is not set
@@ -3013,6 +3087,7 @@ CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
# CONFIG_MAC80211 is not set
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
# CONFIG_MAC80211_MESH is not set
CONFIG_MAC80211_MESSAGE_TRACING=y
@@ -3049,7 +3124,6 @@ CONFIG_MARCH_Z14=y
CONFIG_MARVELL_10G_PHY=m
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
-CONFIG_MARVELL_GTI_WDT=y
# CONFIG_MARVELL_PHY is not set
# CONFIG_MATOM is not set
# CONFIG_MAX1027 is not set
@@ -3084,6 +3158,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3107,7 +3182,7 @@ CONFIG_MDIO_HISI_FEMAC=m
# CONFIG_MDIO_IPQ8064 is not set
CONFIG_MDIO_MSCC_MIIM=m
# CONFIG_MDIO_MVUSB is not set
-CONFIG_MDIO_OCTEON=m
+# CONFIG_MDIO_OCTEON is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MD_LINEAR=m
# CONFIG_MD_MULTIPATH is not set
@@ -3116,6 +3191,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3182,7 +3258,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3318,6 +3394,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3426,6 +3503,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3500,6 +3580,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3605,9 +3687,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3730,6 +3809,7 @@ CONFIG_NET_IPVTI=m
# CONFIG_NETIUCV is not set
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -3742,15 +3822,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -3779,6 +3856,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -3878,7 +3956,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4072,8 +4150,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_IMX_OCOTP_ELE is not set
# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
@@ -4095,7 +4174,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4200,6 +4281,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
CONFIG_PCI_DEBUG=y
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4248,6 +4330,7 @@ CONFIG_PCI_QUIRKS=y
# CONFIG_PCI_SW_SWITCHTEC is not set
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4273,8 +4356,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_FSL_LYNX_28G is not set
# CONFIG_PHY_HI3660_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
@@ -4284,8 +4365,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_LAN966X_SERDES is not set
# CONFIG_PHYLIB is not set
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4311,6 +4390,7 @@ CONFIG_PID_NS=y
CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4378,7 +4458,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_GPIO_RESTART is not set
# CONFIG_POWER_RESET_LTC2952 is not set
# CONFIG_POWER_RESET_REGULATOR is not set
@@ -4542,7 +4621,6 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
# CONFIG_QRTR is not set
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -4644,6 +4722,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
@@ -4718,6 +4797,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -4757,7 +4837,6 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5084,6 +5163,7 @@ CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5238,6 +5318,7 @@ CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC3815 is not set
CONFIG_SENSORS_LTC4151=m
@@ -5296,6 +5377,7 @@ CONFIG_SENSORS_PCF8591=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
CONFIG_SENSORS_PMBUS=m
+# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_PXE1610 is not set
@@ -5454,7 +5536,6 @@ CONFIG_SLIP_COMPRESSED=y
# CONFIG_SLIP is not set
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -5537,6 +5618,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -5674,8 +5756,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -5766,6 +5850,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -5880,12 +5965,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -5908,6 +5987,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6104,7 +6184,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
# CONFIG_SOC_TI is not set
CONFIG_SOFTLOCKUP_DETECTOR=y
@@ -6303,6 +6382,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6356,6 +6436,7 @@ CONFIG_TEST_LIVEPATCH=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6590,6 +6671,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -6665,6 +6747,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
# CONFIG_USB_CHIPIDEA is not set
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_CXACRU=m
# CONFIG_USB_CYPRESS_CY7C63 is not set
@@ -6762,6 +6845,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
# CONFIG_USB_LED_TRIG is not set
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -6804,6 +6888,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -6950,6 +7035,9 @@ CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
CONFIG_VFIO_AP=m
CONFIG_VFIO_CCW=m
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
CONFIG_VFIO_MDEV=m
@@ -7064,11 +7152,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7405,19 +7495,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-s390x-fedora.config b/SOURCES/kernel-s390x-fedora.config
index 1c1503d..e26a697 100644
--- a/SOURCES/kernel-s390x-fedora.config
+++ b/SOURCES/kernel-s390x-fedora.config
@@ -482,6 +482,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM54140_PHY=m
CONFIG_BCM7XXX_PHY=m
@@ -622,7 +631,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
CONFIG_BRIDGE_CFM=y
CONFIG_BRIDGE_EBT_802_3=m
@@ -730,7 +738,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -813,6 +820,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -903,6 +911,7 @@ CONFIG_CIFS_XATTR=y
CONFIG_CIO2_BRIDGE=y
# CONFIG_CIO_INJECT is not set
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -974,7 +983,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1040,6 +1048,7 @@ CONFIG_CROS_EC_TYPEC=m
CONFIG_CROS_EC_UART=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1126,6 +1135,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1232,6 +1246,7 @@ CONFIG_DE2104X=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
# CONFIG_DEBUG_CREDENTIALS is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1319,7 +1334,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
CONFIG_DEFAULT_SECURITY_SELINUX=y
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
# CONFIG_DETECT_HUNG_TASK is not set
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1407,7 +1421,7 @@ CONFIG_DNS_RESOLVER=m
# CONFIG_DP83640_PHY is not set
CONFIG_DP83822_PHY=m
CONFIG_DP83848_PHY=m
-# CONFIG_DP83867_PHY is not set
+CONFIG_DP83867_PHY=m
CONFIG_DP83869_PHY=m
# CONFIG_DP83TC811_PHY is not set
# CONFIG_DP83TD510_PHY is not set
@@ -1416,6 +1430,7 @@ CONFIG_DPOT_DAC=m
# CONFIG_DPS310 is not set
CONFIG_DRAGONRISE_FF=y
# CONFIG_DRBD_FAULT_INJECTION is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL_QAIC is not set
CONFIG_DRM_ACCEL=y
CONFIG_DRM_AMD_ACP=y
@@ -1521,9 +1536,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
@@ -1553,6 +1570,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -1767,7 +1785,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
CONFIG_EFI_COCO_SECRET=y
CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y
@@ -1882,7 +1899,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1893,6 +1910,7 @@ CONFIG_FB_EFI=y
# CONFIG_FB_IBM_GXT4500 is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_IMX is not set
+# CONFIG_FB is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_LE80578 is not set
# CONFIG_FB_MATROX_G is not set
@@ -1933,7 +1951,6 @@ CONFIG_FB_VESA=y
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
# CONFIG_FB_XILINX is not set
-CONFIG_FB=y
# CONFIG_FCOE is not set
# CONFIG_FDDI is not set
# CONFIG_FEALNX is not set
@@ -2012,6 +2029,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+CONFIG_FUEL_GAUGE_MM8013=m
# CONFIG_FUNCTION_ERROR_INJECTION is not set
CONFIG_FUNCTION_GRAPH_RETVAL=y
CONFIG_FUNCTION_GRAPH_TRACER=y
@@ -2476,6 +2494,7 @@ CONFIG_ICPLUS_PHY=m
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set
@@ -2561,7 +2580,6 @@ CONFIG_IMA_NG_TEMPLATE=y
CONFIG_IMA_READ_POLICY=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2765,8 +2783,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
# CONFIG_IPMB_DEVICE_INTERFACE is not set
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3076,7 +3092,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3108,6 +3124,7 @@ CONFIG_KUNIT_EXAMPLE_TEST=m
CONFIG_KUNIT=m
CONFIG_KUNIT_TEST=m
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
# CONFIG_KVM_S390_UCONTROL is not set
CONFIG_KVM_SMM=y
@@ -3164,6 +3181,7 @@ CONFIG_LEDS_GPIO=m
CONFIG_LEDS_GROUP_MULTICOLOR=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -3289,6 +3307,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3303,6 +3322,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3312,6 +3332,7 @@ CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
# CONFIG_MAC80211 is not set
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211_MESH=y
# CONFIG_MAC80211_MESSAGE_TRACING is not set
@@ -3387,6 +3408,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -3397,6 +3419,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -3426,6 +3449,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -3637,18 +3661,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -3747,6 +3775,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3824,6 +3855,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AR7_PARTS is not set
@@ -3969,9 +4002,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4044,12 +4074,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4143,6 +4173,7 @@ CONFIG_NET_IPVTI=m
# CONFIG_NETIUCV is not set
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4154,15 +4185,12 @@ CONFIG_NET_NS=y
CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4196,6 +4224,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
CONFIG_NET_TULIP=y
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4329,7 +4358,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -4522,8 +4551,9 @@ CONFIG_NUMA=y
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_SHIELD_FF=y
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_LAYOUT_ONIE_TLV=m
CONFIG_NVMEM_LAYOUT_SL28_VPD=m
@@ -4543,7 +4573,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4659,6 +4691,7 @@ CONFIG_PATA_WINBOND=m
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4708,6 +4741,7 @@ CONFIG_PCI_STUB=y
CONFIG_PCI_SW_SWITCHTEC=m
CONFIG_PCI=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4733,16 +4767,12 @@ CONFIG_PHY_CADENCE_SIERRA=m
CONFIG_PHY_CADENCE_TORRENT=m
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
# CONFIG_PHY_HI3670_USB is not set
# CONFIG_PHY_LAN966X_SERDES is not set
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4829,7 +4859,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERCAP=y
CONFIG_POWER_MLXBF=m
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET is not set
# CONFIG_POWER_RESET_LINKSTATION is not set
# CONFIG_POWER_RESET_LTC2952 is not set
@@ -5077,7 +5106,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=m
@@ -5109,6 +5138,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77650=m
# CONFIG_REGULATOR_MAX77826 is not set
CONFIG_REGULATOR_MAX77857=m
@@ -5214,6 +5244,7 @@ CONFIG_RMI4_SPI=m
CONFIG_RMNET=m
# CONFIG_ROCKCHIP_PHY is not set
CONFIG_ROCKER=m
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -5266,7 +5297,6 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_ABEOZ9=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5590,11 +5620,12 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -5734,6 +5765,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -5794,6 +5826,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -5961,7 +5994,6 @@ CONFIG_SLIP_COMPRESSED=y
# CONFIG_SLIP is not set
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -6053,6 +6085,7 @@ CONFIG_SND_FIREWORKS=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6196,8 +6229,10 @@ CONFIG_SND_SOC_AK5558=m
# CONFIG_SND_SOC_ARNDALE is not set
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -6402,12 +6437,6 @@ CONFIG_SND_SOC_PCM3060_SPI=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
CONFIG_SND_SOC_RT1017_SDCA_SDW=m
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -6430,6 +6459,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6864,6 +6894,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6918,6 +6949,7 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6981,8 +7013,6 @@ CONFIG_TIFM_7XX1=m
# CONFIG_TIFM_CORE is not set
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
-CONFIG_TI_ICSS_IEP=m
CONFIG_TI_LMP92064=m
CONFIG_TIME_KUNIT_TEST=m
CONFIG_TIME_NS=y
@@ -7185,6 +7215,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7196,7 +7227,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m
CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TPS6598X=m
CONFIG_TYPEC_UCSI=m
-CONFIG_TYPEC_WCOVE=m
CONFIG_TYPEC_WUSB3801=m
CONFIG_TYPHOON=m
CONFIG_UACCE=m
@@ -7276,6 +7306,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
# CONFIG_USB_CHIPIDEA is not set
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CONFIGFS_F_MIDI2=y
# CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set
@@ -7405,6 +7436,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
CONFIG_USB_MA901=m
# CONFIG_USB_MASS_STORAGE is not set
@@ -7454,6 +7486,7 @@ CONFIG_USB_ONBOARD_HUB=m
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -7658,7 +7691,7 @@ CONFIG_VIDEO_BT819=m
CONFIG_VIDEO_BT848=m
CONFIG_VIDEO_BT856=m
CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_CADENCE_CSI2RX=m
+# CONFIG_VIDEO_CADENCE_CSI2RX is not set
CONFIG_VIDEO_CADENCE_CSI2TX=m
# CONFIG_VIDEO_CADENCE is not set
# CONFIG_VIDEO_CAFE_CCIC is not set
@@ -7735,10 +7768,12 @@ CONFIG_VIDEO_M52790=m
CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MAX96712 is not set
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -8028,6 +8063,7 @@ CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_XDP_SOCKETS=y
# CONFIG_XEN_GRANT_DMA_ALLOC is not set
CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_FRONTEND is not set
@@ -8135,19 +8171,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-s390x-rhel.config b/SOURCES/kernel-s390x-rhel.config
index 1ee626a..c2c1ecf 100644
--- a/SOURCES/kernel-s390x-rhel.config
+++ b/SOURCES/kernel-s390x-rhel.config
@@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m
# CONFIG_ARCH_BITMAIN is not set
# CONFIG_ARCH_KEEMBAY is not set
# CONFIG_ARCH_LG1K is not set
-# CONFIG_ARCH_MA35 is not set
# CONFIG_ARCH_MESON is not set
CONFIG_ARCH_MMAP_RND_BITS=28
CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
@@ -275,6 +274,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -387,6 +387,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -515,7 +516,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -608,7 +608,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -682,6 +681,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
# CONFIG_CFG80211_DEBUGFS is not set
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -761,6 +761,7 @@ CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
# CONFIG_CIO_INJECT is not set
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -830,7 +831,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -895,7 +895,6 @@ CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
# CONFIG_CRYPTO_AEGIS128 is not set
# CONFIG_CRYPTO_AES_ARM64 is not set
-CONFIG_CRYPTO_AES_GCM_P10=y
CONFIG_CRYPTO_AES_S390=m
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
@@ -912,7 +911,6 @@ CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CHACHA_S390=y
CONFIG_CRYPTO_CMAC=y
@@ -977,6 +975,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1000,7 +1003,6 @@ CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
-# CONFIG_CRYPTO_POLY1305_P10 is not set
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RSA=y
@@ -1266,6 +1268,7 @@ CONFIG_DP83TC811_PHY=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1338,6 +1341,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
# CONFIG_DRM_IMX_LCDIF is not set
# CONFIG_DRM_ITE_IT6505 is not set
# CONFIG_DRM_ITE_IT66121 is not set
@@ -1365,36 +1369,90 @@ CONFIG_DRM_NOUVEAU_BACKLIGHT=y
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1569,7 +1627,6 @@ CONFIG_EDAC_PND2=m
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_EE1004 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
-# CONFIG_EEPROM_LEGACY is not set
# CONFIG_EEPROM_MAX6875 is not set
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1599,7 +1656,12 @@ CONFIG_ENIC=m
CONFIG_EPIC100=m
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
# CONFIG_ETHOC is not set
CONFIG_ETHTOOL_NETLINK=y
@@ -1667,7 +1729,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1678,9 +1740,9 @@ CONFIG_FB_EFI=y
# CONFIG_FB_IBM_GXT4500 is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_IMX is not set
+# CONFIG_FB is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_LE80578 is not set
-CONFIG_FB=m
# CONFIG_FB_MATROX_G is not set
# CONFIG_FB_MATROX_I2C is not set
# CONFIG_FB_MATROX is not set
@@ -1764,7 +1826,9 @@ CONFIG_FS_DAX=y
# CONFIG_FSL_QDMA is not set
# CONFIG_FSL_RCPM is not set
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -1772,6 +1836,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
# CONFIG_FUNCTION_GRAPH_RETVAL is not set
CONFIG_FUNCTION_GRAPH_TRACER=y
@@ -1908,6 +1973,7 @@ CONFIG_GPIO_SIM=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_GUP_TEST is not set
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2079,6 +2145,7 @@ CONFIG_HMC_DRV=m
# CONFIG_HNS3_PMU is not set
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2218,6 +2285,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2232,6 +2300,7 @@ CONFIG_ICE_SWITCHDEV=y
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2301,7 +2370,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2415,6 +2483,7 @@ CONFIG_INPUT_SPARSEKMAP=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_INTEGRITY_AUDIT=y
@@ -2457,6 +2526,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2478,7 +2548,8 @@ CONFIG_IO_DELAY_0X80=y
# CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set
CONFIG_IOMMU_DEFAULT_DMA_STRICT=y
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -2772,7 +2843,7 @@ CONFIG_KEY_NOTIFICATIONS=y
# CONFIG_KEYS_REQUEST_CACHE is not set
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -2804,6 +2875,7 @@ CONFIG_KVM_AMD_SEV=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
# CONFIG_KVM_S390_UCONTROL is not set
CONFIG_KVM_SMM=y
@@ -2970,6 +3042,7 @@ CONFIG_LRU_GEN=y
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -2984,6 +3057,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
# CONFIG_LXT_PHY is not set
@@ -2993,6 +3067,7 @@ CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
# CONFIG_MAC80211 is not set
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
# CONFIG_MAC80211_MESH is not set
# CONFIG_MAC80211_MESSAGE_TRACING is not set
@@ -3029,7 +3104,6 @@ CONFIG_MARCH_Z14=y
CONFIG_MARVELL_10G_PHY=m
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
-CONFIG_MARVELL_GTI_WDT=y
# CONFIG_MARVELL_PHY is not set
# CONFIG_MATOM is not set
# CONFIG_MAX1027 is not set
@@ -3064,6 +3138,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3087,7 +3162,7 @@ CONFIG_MDIO_HISI_FEMAC=m
# CONFIG_MDIO_IPQ8064 is not set
CONFIG_MDIO_MSCC_MIIM=m
# CONFIG_MDIO_MVUSB is not set
-CONFIG_MDIO_OCTEON=m
+# CONFIG_MDIO_OCTEON is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MD_LINEAR=m
# CONFIG_MD_MULTIPATH is not set
@@ -3096,6 +3171,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3162,7 +3238,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3298,6 +3374,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3406,6 +3483,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3480,6 +3560,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3585,9 +3667,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3710,6 +3789,7 @@ CONFIG_NET_IPVTI=m
# CONFIG_NETIUCV is not set
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -3722,15 +3802,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -3759,6 +3836,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -3858,7 +3936,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4052,8 +4130,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_IMX_OCOTP_ELE is not set
# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
@@ -4075,7 +4154,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4179,6 +4260,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4227,6 +4309,7 @@ CONFIG_PCI_QUIRKS=y
# CONFIG_PCI_SW_SWITCHTEC is not set
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4252,8 +4335,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_FSL_LYNX_28G is not set
# CONFIG_PHY_HI3660_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
@@ -4263,8 +4344,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_LAN966X_SERDES is not set
# CONFIG_PHYLIB is not set
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4290,6 +4369,7 @@ CONFIG_PID_NS=y
CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4357,7 +4437,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_GPIO_RESTART is not set
# CONFIG_POWER_RESET_LTC2952 is not set
# CONFIG_POWER_RESET_REGULATOR is not set
@@ -4521,7 +4600,6 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
# CONFIG_QRTR is not set
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -4623,6 +4701,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
@@ -4697,6 +4776,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -4736,7 +4816,6 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5063,6 +5142,7 @@ CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5217,6 +5297,7 @@ CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC3815 is not set
CONFIG_SENSORS_LTC4151=m
@@ -5275,6 +5356,7 @@ CONFIG_SENSORS_PCF8591=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
CONFIG_SENSORS_PMBUS=m
+# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_PXE1610 is not set
@@ -5433,7 +5515,6 @@ CONFIG_SLIP_COMPRESSED=y
# CONFIG_SLIP is not set
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -5516,6 +5597,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -5652,8 +5734,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -5744,6 +5828,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -5858,12 +5943,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -5886,6 +5965,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6081,7 +6161,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
# CONFIG_SOC_TI is not set
CONFIG_SOFTLOCKUP_DETECTOR=y
@@ -6280,6 +6359,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6333,6 +6413,7 @@ CONFIG_TEST_LIVEPATCH=m
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6567,6 +6648,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -6642,6 +6724,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
# CONFIG_USB_CHIPIDEA is not set
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_CXACRU=m
# CONFIG_USB_CYPRESS_CY7C63 is not set
@@ -6739,6 +6822,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
# CONFIG_USB_LED_TRIG is not set
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -6781,6 +6865,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -6927,6 +7012,9 @@ CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
CONFIG_VFIO_AP=m
CONFIG_VFIO_CCW=m
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
CONFIG_VFIO_MDEV=m
@@ -7041,11 +7129,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7382,19 +7472,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-s390x-zfcpdump-rhel.config b/SOURCES/kernel-s390x-zfcpdump-rhel.config
index 667bd35..c987d1e 100644
--- a/SOURCES/kernel-s390x-zfcpdump-rhel.config
+++ b/SOURCES/kernel-s390x-zfcpdump-rhel.config
@@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m
# CONFIG_ARCH_BITMAIN is not set
# CONFIG_ARCH_KEEMBAY is not set
# CONFIG_ARCH_LG1K is not set
-# CONFIG_ARCH_MA35 is not set
# CONFIG_ARCH_MESON is not set
CONFIG_ARCH_MMAP_RND_BITS=28
CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
@@ -275,6 +274,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y
CONFIG_ASN1=y
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -389,6 +389,7 @@ CONFIG_BASE_SMALL=0
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -508,7 +509,7 @@ CONFIG_BPF_JIT=y
CONFIG_BPF_LSM=y
# CONFIG_BPF_PRELOAD is not set
CONFIG_BPF_STREAM_PARSER=y
-CONFIG_BPF_SYSCALL=y
+# CONFIG_BPF_SYSCALL is not set
CONFIG_BPF_UNPRIV_DEFAULT_OFF=y
CONFIG_BRANCH_PROFILE_NONE=y
# CONFIG_BRCMDBG is not set
@@ -517,7 +518,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -610,7 +610,6 @@ CONFIG_CACHESTAT_SYSCALL=y
# CONFIG_CAIF is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -684,6 +683,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
# CONFIG_CFG80211_DEBUGFS is not set
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -763,6 +763,7 @@ CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
# CONFIG_CIO_INJECT is not set
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -832,7 +833,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=y
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -897,7 +897,6 @@ CONFIG_CRYPTO_ADIANTUM=y
CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m
# CONFIG_CRYPTO_AEGIS128 is not set
# CONFIG_CRYPTO_AES_ARM64 is not set
-CONFIG_CRYPTO_AES_GCM_P10=y
CONFIG_CRYPTO_AES_S390=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
@@ -913,7 +912,6 @@ CONFIG_CRYPTO_CAST6=y
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_CCM is not set
CONFIG_CRYPTO_CFB=y
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=y
CONFIG_CRYPTO_CHACHA20=y
CONFIG_CRYPTO_CHACHA_S390=y
@@ -979,6 +977,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
# CONFIG_CRYPTO_LIB_BLAKE2S is not set
@@ -1001,7 +1004,6 @@ CONFIG_CRYPTO_OFB=y
CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_PCRYPT=y
-# CONFIG_CRYPTO_POLY1305_P10 is not set
CONFIG_CRYPTO_POLY1305=y
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=y
@@ -1268,6 +1270,7 @@ CONFIG_DP83TC811_PHY=m
# CONFIG_DPOT_DAC is not set
# CONFIG_DPS310 is not set
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1340,6 +1343,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
# CONFIG_DRM_IMX_LCDIF is not set
# CONFIG_DRM_ITE_IT6505 is not set
# CONFIG_DRM_ITE_IT66121 is not set
@@ -1367,36 +1371,90 @@ CONFIG_DRM_NOUVEAU_BACKLIGHT=y
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1571,7 +1629,6 @@ CONFIG_EDAC_PND2=m
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_EE1004 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
-# CONFIG_EEPROM_LEGACY is not set
# CONFIG_EEPROM_MAX6875 is not set
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1601,7 +1658,12 @@ CONFIG_ENIC=m
CONFIG_EPIC100=m
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
# CONFIG_ETHOC is not set
CONFIG_ETHTOOL_NETLINK=y
@@ -1670,7 +1732,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1681,9 +1743,9 @@ CONFIG_FB_EFI=y
# CONFIG_FB_IBM_GXT4500 is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_IMX is not set
+# CONFIG_FB is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_LE80578 is not set
-CONFIG_FB=m
# CONFIG_FB_MATROX_G is not set
# CONFIG_FB_MATROX_I2C is not set
# CONFIG_FB_MATROX is not set
@@ -1768,7 +1830,9 @@ CONFIG_FSCACHE_STATS=y
# CONFIG_FSL_RCPM is not set
CONFIG_FSNOTIFY=y
# CONFIG_FS_POSIX_ACL is not set
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
# CONFIG_FTRACE is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
@@ -1776,6 +1840,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_SORT_STARTUP_TEST is not set
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
# CONFIG_FUNCTION_ERROR_INJECTION is not set
# CONFIG_FUNCTION_GRAPH_RETVAL is not set
CONFIG_FUNCTION_GRAPH_TRACER=y
@@ -1914,6 +1979,7 @@ CONFIG_GPIO_SIM=y
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_GUP_TEST is not set
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2085,6 +2151,7 @@ CONFIG_HMC_DRV=y
# CONFIG_HNS3_PMU is not set
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2225,6 +2292,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
# CONFIG_IB700_WDT is not set
@@ -2239,6 +2307,7 @@ CONFIG_ICE_SWITCHDEV=y
# CONFIG_ICS932S401 is not set
# CONFIG_IDLE_INJECT is not set
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
# CONFIG_IEEE802154_AT86RF230 is not set
@@ -2308,7 +2377,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10
CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2423,6 +2491,7 @@ CONFIG_INPUT_POWERMATE=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
# CONFIG_INTEGRITY_ASYMMETRIC_KEYS is not set
CONFIG_INTEGRITY_AUDIT=y
@@ -2465,6 +2534,7 @@ CONFIG_INTEL_SDSI=m
# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_INTEL_TCC_COOLING is not set
+# CONFIG_INTEL_TDX_HOST is not set
# CONFIG_INTEL_TH is not set
CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
# CONFIG_INTEL_VSC is not set
@@ -2486,7 +2556,8 @@ CONFIG_IO_DELAY_0X80=y
# CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set
CONFIG_IOMMU_DEFAULT_DMA_STRICT=y
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -2781,7 +2852,7 @@ CONFIG_KEY_NOTIFICATIONS=y
# CONFIG_KEYS is not set
# CONFIG_KEYS_REQUEST_CACHE is not set
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -2813,6 +2884,7 @@ CONFIG_KVM_AMD_SEV=y
# CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set
# CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set
# CONFIG_KVM is not set
+CONFIG_KVM_MAX_NR_VCPUS=4096
# CONFIG_KVM_PROVE_MMU is not set
# CONFIG_KVM_S390_UCONTROL is not set
CONFIG_KVM_SMM=y
@@ -2980,6 +3052,7 @@ CONFIG_LRU_GEN=y
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -2994,6 +3067,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
# CONFIG_LXT_PHY is not set
@@ -3003,6 +3077,7 @@ CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
# CONFIG_MAC80211 is not set
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
# CONFIG_MAC80211_MESH is not set
# CONFIG_MAC80211_MESSAGE_TRACING is not set
@@ -3039,7 +3114,6 @@ CONFIG_MARCH_Z14=y
CONFIG_MARVELL_10G_PHY=m
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
-CONFIG_MARVELL_GTI_WDT=y
# CONFIG_MARVELL_PHY is not set
# CONFIG_MATOM is not set
# CONFIG_MAX1027 is not set
@@ -3074,6 +3148,7 @@ CONFIG_MAX_SKB_FRAGS=17
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3097,7 +3172,7 @@ CONFIG_MDIO_HISI_FEMAC=m
# CONFIG_MDIO_IPQ8064 is not set
CONFIG_MDIO_MSCC_MIIM=m
# CONFIG_MDIO_MVUSB is not set
-CONFIG_MDIO_OCTEON=m
+# CONFIG_MDIO_OCTEON is not set
CONFIG_MDIO_THUNDER=m
# CONFIG_MD is not set
CONFIG_MD_LINEAR=m
@@ -3106,6 +3181,7 @@ CONFIG_MD_RAID0=m
CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3172,7 +3248,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3308,6 +3384,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3416,6 +3493,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3490,6 +3570,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3595,9 +3677,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3720,6 +3799,7 @@ CONFIG_NET_IPVTI=m
# CONFIG_NETIUCV is not set
# CONFIG_NET_KEY is not set
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -3733,15 +3813,12 @@ CONFIG_NET_NS=y
# CONFIG_NETPOLL is not set
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
# CONFIG_NET_SCHED is not set
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -3770,6 +3847,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -3870,7 +3948,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4064,8 +4142,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
# CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
# CONFIG_NVME_FC is not set
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_IMX_OCOTP_ELE is not set
# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
@@ -4087,7 +4166,9 @@ CONFIG_NVME_TARGET_LOOP=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
# CONFIG_NVRAM is not set
# CONFIG_NVSW_SN2201 is not set
@@ -4191,6 +4272,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4239,6 +4321,7 @@ CONFIG_PCI_QUIRKS=y
# CONFIG_PCI_STUB is not set
# CONFIG_PCI_SW_SWITCHTEC is not set
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4264,8 +4347,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_FSL_LYNX_28G is not set
# CONFIG_PHY_HI3660_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
@@ -4275,8 +4356,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_LAN966X_SERDES is not set
# CONFIG_PHYLIB is not set
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4302,6 +4381,7 @@ CONFIG_PID_NS=y
CONFIG_PINCTRL_ALDERLAKE=m
# CONFIG_PINCTRL_BROXTON is not set
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_ELKHARTLAKE=m
CONFIG_PINCTRL_EMMITSBURG=m
@@ -4369,7 +4449,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_GPIO_RESTART is not set
# CONFIG_POWER_RESET_LTC2952 is not set
# CONFIG_POWER_RESET_REGULATOR is not set
@@ -4534,7 +4613,6 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
# CONFIG_QRTR is not set
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -4636,6 +4714,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
@@ -4710,6 +4789,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -4749,7 +4829,6 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5080,6 +5159,7 @@ CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5234,6 +5314,7 @@ CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC3815 is not set
CONFIG_SENSORS_LTC4151=m
@@ -5292,6 +5373,7 @@ CONFIG_SENSORS_PCF8591=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
CONFIG_SENSORS_PMBUS=m
+# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_PXE1610 is not set
@@ -5450,7 +5532,6 @@ CONFIG_SLIP_COMPRESSED=y
# CONFIG_SLIP is not set
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
CONFIG_SLUB_DEBUG=y
@@ -5533,6 +5614,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -5669,8 +5751,10 @@ CONFIG_SND_SEQ_UMP=y
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -5761,6 +5845,7 @@ CONFIG_SND_SOC_CX2072X=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -5875,12 +5960,6 @@ CONFIG_SND_SOC_MAX98927=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
# CONFIG_SND_SOC_RL6231 is not set
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
# CONFIG_SND_SOC_RT1308 is not set
# CONFIG_SND_SOC_RT1308_SDW is not set
@@ -5903,6 +5982,7 @@ CONFIG_SND_SOC_RT1318_SDW=m
# CONFIG_SND_SOC_RT715_SDW is not set
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6098,7 +6178,6 @@ CONFIG_SND_VX222=m
# CONFIG_SND_XEN_FRONTEND is not set
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
# CONFIG_SOC_TI is not set
CONFIG_SOFTLOCKUP_DETECTOR=y
@@ -6302,6 +6381,7 @@ CONFIG_TCM_IBLOCK=y
CONFIG_TCM_PSCSI=y
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6355,6 +6435,7 @@ CONFIG_TEST_LIVEPATCH=m
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6590,6 +6671,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -6665,6 +6747,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
# CONFIG_USB_CHIPIDEA is not set
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_CXACRU=m
# CONFIG_USB_CYPRESS_CY7C63 is not set
@@ -6762,6 +6845,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
# CONFIG_USB_LED_TRIG is not set
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -6804,6 +6888,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -6950,6 +7035,9 @@ CONFIG_VETH=m
# CONFIG_VFIO_AMBA is not set
CONFIG_VFIO_AP=m
CONFIG_VFIO_CCW=m
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
# CONFIG_VFIO is not set
CONFIG_VFIO_MDEV=m
@@ -7064,11 +7152,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7405,19 +7495,18 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-x86_64-debug-fedora.config b/SOURCES/kernel-x86_64-debug-fedora.config
index a3ad78d..ee8a2e4 100644
--- a/SOURCES/kernel-x86_64-debug-fedora.config
+++ b/SOURCES/kernel-x86_64-debug-fedora.config
@@ -188,16 +188,16 @@ CONFIG_ADVANTECH_EC_WDT=m
# CONFIG_ADVANTECH_WDT is not set
CONFIG_ADVISE_SYSCALLS=y
CONFIG_ADV_SWBUTTON=m
-CONFIG_ADXL313_I2C=m
-CONFIG_ADXL313_SPI=m
+# CONFIG_ADXL313_I2C is not set
+# CONFIG_ADXL313_SPI is not set
# CONFIG_ADXL345_I2C is not set
# CONFIG_ADXL345_SPI is not set
-CONFIG_ADXL355_I2C=m
-CONFIG_ADXL355_SPI=m
-CONFIG_ADXL367_I2C=m
-CONFIG_ADXL367_SPI=m
-CONFIG_ADXL372_I2C=m
-CONFIG_ADXL372_SPI=m
+# CONFIG_ADXL355_I2C is not set
+# CONFIG_ADXL355_SPI is not set
+# CONFIG_ADXL367_I2C is not set
+# CONFIG_ADXL367_SPI is not set
+# CONFIG_ADXL372_I2C is not set
+# CONFIG_ADXL372_SPI is not set
CONFIG_ADXRS290=m
# CONFIG_ADXRS450 is not set
# CONFIG_AFE4403 is not set
@@ -345,6 +345,7 @@ CONFIG_ASUS_NB_WMI=m
CONFIG_ASUS_TF103C_DOCK=m
CONFIG_ASUS_WIRELESS=m
CONFIG_ASUS_WMI=m
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -534,6 +535,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y
+CONFIG_BCACHEFS_DEBUG=y
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_LOCK_TIME_STATS=y
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM54140_PHY=m
CONFIG_BCM7XXX_PHY=m
@@ -676,7 +686,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
CONFIG_BRIDGE_CFM=y
CONFIG_BRIDGE_EBT_802_3=m
@@ -788,7 +797,6 @@ CONFIG_CALL_DEPTH_TRACKING=y
# CONFIG_CALL_THUNKS_DEBUG is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -868,6 +876,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -960,6 +969,7 @@ CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CIO2_BRIDGE=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -1039,7 +1049,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1117,6 +1126,7 @@ CONFIG_CROS_EC_TYPEC=m
CONFIG_CROS_EC_UART=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1224,6 +1234,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1315,7 +1330,7 @@ CONFIG_CXL_PMU=m
# CONFIG_CXL_REGION_INVALIDATION_TEST is not set
CONFIG_CXL_REGION=y
CONFIG_DA280=m
-CONFIG_DA311=m
+# CONFIG_DA311 is not set
CONFIG_DAMON_DBGFS=y
# CONFIG_DAMON_LRU_SORT is not set
CONFIG_DAMON_PADDR=y
@@ -1334,6 +1349,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
CONFIG_DEBUG_CREDENTIALS=y
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1446,7 +1462,6 @@ CONFIG_DELL_WMI=m
CONFIG_DELL_WMI_PRIVACY=y
CONFIG_DELL_WMI_SYSMAN=m
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1495,7 +1510,7 @@ CONFIG_DMA_NUMA_CMA=y
# CONFIG_DMAPOOL_TEST is not set
# CONFIG_DMARD06 is not set
# CONFIG_DMARD09 is not set
-CONFIG_DMARD10=m
+# CONFIG_DMARD10 is not set
# CONFIG_DMA_RESTRICTED_POOL is not set
# CONFIG_DMATEST is not set
CONFIG_DM_CACHE=m
@@ -1543,7 +1558,7 @@ CONFIG_DNS_RESOLVER=m
CONFIG_DP83640_PHY=m
CONFIG_DP83822_PHY=m
CONFIG_DP83848_PHY=m
-# CONFIG_DP83867_PHY is not set
+CONFIG_DP83867_PHY=m
CONFIG_DP83869_PHY=m
# CONFIG_DP83TC811_PHY is not set
# CONFIG_DP83TD510_PHY is not set
@@ -1554,6 +1569,7 @@ CONFIG_DPTF_PCH_FIVR=m
CONFIG_DPTF_POWER=m
CONFIG_DRAGONRISE_FF=y
CONFIG_DRBD_FAULT_INJECTION=y
+CONFIG_DRIVER_PE_KUNIT_TEST=m
CONFIG_DRM_ACCEL_HABANALABS=m
CONFIG_DRM_ACCEL_IVPU=m
CONFIG_DRM_ACCEL_QAIC=m
@@ -1672,9 +1688,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
@@ -1704,6 +1722,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -1940,7 +1959,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
# CONFIG_EFI_CAPSULE_LOADER is not set
@@ -2078,7 +2096,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2216,6 +2234,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+CONFIG_FUEL_GAUGE_MM8013=m
CONFIG_FUJITSU_ES=m
CONFIG_FUJITSU_LAPTOP=m
CONFIG_FUJITSU_TABLET=m
@@ -2324,6 +2343,7 @@ CONFIG_GPIO_IT87=m
# CONFIG_GPIO_LATCH is not set
CONFIG_GPIOLIB_FASTPATH_LIMIT=512
CONFIG_GPIOLIB=y
+CONFIG_GPIO_LJCA=m
# CONFIG_GPIO_LOGICVC is not set
# CONFIG_GPIO_MAX3191X is not set
# CONFIG_GPIO_MAX7300 is not set
@@ -2668,6 +2688,7 @@ CONFIG_I2C_HID=y
CONFIG_I2C_I801=m
CONFIG_I2C_ISCH=m
CONFIG_I2C_ISMT=m
+CONFIG_I2C_LJCA=m
CONFIG_I2C_MLXBF=m
CONFIG_I2C_MLXCPLD=m
CONFIG_I2C_MULTI_INSTANTIATE=m
@@ -2720,6 +2741,7 @@ CONFIG_I40EVF=m
CONFIG_I6300ESB_WDT=m
CONFIG_I82092=m
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
CONFIG_IA32_EMULATION=y
# CONFIG_IAQCORE is not set
CONFIG_IB700_WDT=m
@@ -2735,6 +2757,7 @@ CONFIG_ICPLUS_PHY=m
CONFIG_IDEAPAD_LAPTOP=m
CONFIG_IDLE_INJECT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IE6XX_WDT=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
@@ -2776,8 +2799,8 @@ CONFIG_IIO_FORMAT_KUNIT_TEST=m
CONFIG_IIO_HRTIMER_TRIGGER=m
CONFIG_IIO_INTERRUPT_TRIGGER=m
CONFIG_IIO_KFIFO_BUF=m
-CONFIG_IIO_KX022A_I2C=m
-CONFIG_IIO_KX022A_SPI=m
+# CONFIG_IIO_KX022A_I2C is not set
+# CONFIG_IIO_KX022A_SPI is not set
CONFIG_IIO=m
CONFIG_IIO_MUX=m
CONFIG_IIO_RESCALE_KUNIT_TEST=m
@@ -2786,7 +2809,7 @@ CONFIG_IIO_RESCALE=m
# CONFIG_IIO_SSP_SENSORHUB is not set
CONFIG_IIO_ST_ACCEL_3AXIS=m
CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
-CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
+# CONFIG_IIO_ST_ACCEL_SPI_3AXIS is not set
CONFIG_IIO_ST_GYRO_3AXIS=m
CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
@@ -2826,7 +2849,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2955,6 +2977,7 @@ CONFIG_INPUT_WISTRON_BTNS=m
CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+CONFIG_INSPUR_PLATFORM_PROFILE=m
CONFIG_INT3406_THERMAL=m
CONFIG_INT340X_THERMAL=m
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
@@ -3037,6 +3060,7 @@ CONFIG_INTEL_SOC_PMIC=y
CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
CONFIG_INTEL_TCC_COOLING=m
CONFIG_INTEL_TDX_GUEST=y
+CONFIG_INTEL_TDX_HOST=y
CONFIG_INTEL_TELEMETRY=m
CONFIG_INTEL_TH_ACPI=m
# CONFIG_INTEL_TH_DEBUG is not set
@@ -3109,8 +3133,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
# CONFIG_IPMB_DEVICE_INTERFACE is not set
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3432,7 +3454,7 @@ CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3470,6 +3492,7 @@ CONFIG_KVM_AMD_SEV=y
CONFIG_KVM_GUEST=y
CONFIG_KVM_INTEL=m
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_MMU_AUDIT=y
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
@@ -3528,6 +3551,7 @@ CONFIG_LEDS_GROUP_MULTICOLOR=m
CONFIG_LEDS_INTEL_SS4200=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -3657,6 +3681,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3671,6 +3696,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3679,6 +3705,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
@@ -3747,6 +3774,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -3757,6 +3785,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -3786,6 +3815,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -4013,18 +4043,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -4126,6 +4160,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -4181,7 +4218,7 @@ CONFIG_MQ_IOSCHED_DEADLINE=y
CONFIG_MQ_IOSCHED_KYBER=y
# CONFIG_MS5611 is not set
# CONFIG_MS5637 is not set
-CONFIG_MSA311=m
+# CONFIG_MSA311 is not set
# CONFIG_MS_BLOCK is not set
CONFIG_MSDOS_FS=m
CONFIG_MSDOS_PARTITION=y
@@ -4207,6 +4244,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AR7_PARTS is not set
@@ -4273,10 +4312,11 @@ CONFIG_MTD_RAW_NAND=m
# CONFIG_MTD_SHARPSL_PARTS is not set
# CONFIG_MTD_SLRAM is not set
# CONFIG_MTD_SPI_NAND is not set
-# CONFIG_MTD_SPI_NOR is not set
+CONFIG_MTD_SPI_NOR=m
# CONFIG_MTD_SPI_NOR_SWP_DISABLE is not set
CONFIG_MTD_SPI_NOR_SWP_DISABLE_ON_VOLATILE=y
# CONFIG_MTD_SPI_NOR_SWP_KEEP is not set
+CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
# CONFIG_MTD_SST25L is not set
# CONFIG_MTD_SWAP is not set
# CONFIG_MTD_TESTS is not set
@@ -4358,9 +4398,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4433,12 +4470,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4531,6 +4568,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4543,15 +4581,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
# CONFIG_NET_SB1000 is not set
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4585,6 +4620,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
CONFIG_NET_TULIP=y
CONFIG_NET_UDP_TUNNEL=m
CONFIG_NET_VENDOR_3COM=y
@@ -4721,7 +4757,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -4927,8 +4963,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_SHIELD_FF=y
CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_LAYOUT_ONIE_TLV=m
CONFIG_NVMEM_LAYOUT_SL28_VPD=m
@@ -4948,7 +4985,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=y
CONFIG_NVSW_SN2201=m
@@ -5085,6 +5124,7 @@ CONFIG_PCI_BIOS=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -5148,6 +5188,7 @@ CONFIG_PCMCIA_XIRC2PS=m
CONFIG_PCMCIA_XIRCOM=m
CONFIG_PCMCIA=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -5179,8 +5220,6 @@ CONFIG_PHY_CADENCE_SIERRA=m
CONFIG_PHY_CADENCE_TORRENT=m
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
# CONFIG_PHY_HI3670_USB is not set
# CONFIG_PHY_INTEL_LGM_COMBO is not set
@@ -5189,8 +5228,6 @@ CONFIG_PHY_CADENCE_TORRENT=m
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -5295,7 +5332,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERCAP=y
CONFIG_POWER_MLXBF=m
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_LINKSTATION is not set
# CONFIG_POWER_RESET_LTC2952 is not set
# CONFIG_POWER_RESET_REGULATOR is not set
@@ -5550,7 +5586,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=y
@@ -5585,6 +5621,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77650=m
# CONFIG_REGULATOR_MAX77826 is not set
CONFIG_REGULATOR_MAX77857=m
@@ -5692,6 +5729,7 @@ CONFIG_RMI4_SPI=m
CONFIG_RMNET=m
# CONFIG_ROCKCHIP_PHY is not set
CONFIG_ROCKER=m
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -5744,7 +5782,6 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_ABEOZ9=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_CROS_EC=m
@@ -5928,7 +5965,7 @@ CONFIG_SBP_TARGET=m
# CONFIG_SC1200_WDT is not set
CONFIG_SC92031=m
# CONFIG_SCA3000 is not set
-CONFIG_SCA3300=m
+# CONFIG_SCA3300 is not set
CONFIG_SCD30_CORE=m
CONFIG_SCD30_I2C=m
CONFIG_SCD30_SERIAL=m
@@ -6060,11 +6097,12 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -6219,6 +6257,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -6280,6 +6319,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -6462,7 +6502,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLS=y
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
@@ -6499,6 +6538,9 @@ CONFIG_SND_ALOOP=m
CONFIG_SND_ALS300=m
CONFIG_SND_ALS4000=m
CONFIG_SND_AMD_ACP_CONFIG=m
+# CONFIG_SND_AMD_ASOC_ACP63 is not set
+# CONFIG_SND_AMD_ASOC_REMBRANDT is not set
+# CONFIG_SND_AMD_ASOC_RENOIR is not set
CONFIG_SND_ASIHPI=m
CONFIG_SND_ATIIXP=m
CONFIG_SND_ATIIXP_MODEM=m
@@ -6558,6 +6600,7 @@ CONFIG_SND_FM801=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6689,14 +6732,18 @@ CONFIG_SND_SOC_AMD_ACP5x=m
CONFIG_SND_SOC_AMD_ACP6x=m
CONFIG_SND_SOC_AMD_ACP_COMMON=m
CONFIG_SND_SOC_AMD_ACP=m
+# CONFIG_SND_SOC_AMD_ACP_PCI is not set
CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
+# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set
+CONFIG_SND_SOC_AMD_MACH_COMMON=m
CONFIG_SND_SOC_AMD_PS=m
CONFIG_SND_SOC_AMD_PS_MACH=m
CONFIG_SND_SOC_AMD_RENOIR=m
CONFIG_SND_SOC_AMD_RENOIR_MACH=m
CONFIG_SND_SOC_AMD_RPL_ACP6x=m
CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
+CONFIG_SND_SOC_AMD_SOF_MACH=m
CONFIG_SND_SOC_AMD_ST_ES8336_MACH=m
CONFIG_SND_SOC_AMD_VANGOGH_MACH=m
CONFIG_SND_SOC_AMD_YC_MACH=m
@@ -6704,8 +6751,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m
# CONFIG_SND_SOC_ARNDALE is not set
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -6800,6 +6849,7 @@ CONFIG_SND_SOC_INTEL_AVS_MACH_PROBE=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT274=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT286=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT298=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682=m
CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567=m
@@ -6840,6 +6890,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y
CONFIG_SND_SOC_INTEL_SKYLAKE=m
CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m
CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m
CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m
@@ -6915,12 +6966,6 @@ CONFIG_SND_SOC_PCM512x=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
CONFIG_SND_SOC_RT1017_SDCA_SDW=m
CONFIG_SND_SOC_RT1308=m
CONFIG_SND_SOC_RT1308_SDW=m
@@ -6943,6 +6988,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6960,6 +7006,7 @@ CONFIG_SND_SOC_SMA1303=m
# CONFIG_SND_SOC_SNOW is not set
CONFIG_SND_SOC_SOF_ACPI=m
CONFIG_SND_SOC_SOF_ALDERLAKE=m
+CONFIG_SND_SOC_SOF_AMD_ACP63=m
CONFIG_SND_SOC_SOF_AMD_REMBRANDT=m
CONFIG_SND_SOC_SOF_AMD_RENOIR=m
CONFIG_SND_SOC_SOF_AMD_TOPLEVEL=m
@@ -7200,11 +7247,15 @@ CONFIG_SPI_FSL_LPSPI=m
# CONFIG_SPI_GPIO is not set
# CONFIG_SPI_HISI_KUNPENG is not set
# CONFIG_SPI_HISI_SFC_V3XX is not set
+CONFIG_SPI_INTEL=m
+CONFIG_SPI_INTEL_PCI=m
+# CONFIG_SPI_INTEL_PLATFORM is not set
# CONFIG_SPI_LANTIQ_SSC is not set
+CONFIG_SPI_LJCA=m
# CONFIG_SPI_LM70_LLP is not set
# CONFIG_SPI_LOOPBACK_TEST is not set
CONFIG_SPI_MASTER=y
-# CONFIG_SPI_MEM is not set
+CONFIG_SPI_MEM=y
CONFIG_SPI_MICROCHIP_CORE=m
CONFIG_SPI_MICROCHIP_CORE_QSPI=m
CONFIG_SPI_MUX=m
@@ -7402,6 +7453,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -7456,6 +7508,7 @@ CONFIG_TEST_LOCKUP=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7526,8 +7579,6 @@ CONFIG_TIFM_7XX1=m
CONFIG_TIFM_CORE=m
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
-CONFIG_TI_ICSS_IEP=m
CONFIG_TI_LMP92064=m
CONFIG_TIME_KUNIT_TEST=m
CONFIG_TIME_NS=y
@@ -7720,6 +7771,7 @@ CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_INTEL_PMC=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7813,6 +7865,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CONFIGFS_F_MIDI2=y
# CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set
@@ -7945,6 +7998,7 @@ CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LGM_PHY is not set
# CONFIG_USB_LINK_LAYER_TEST is not set
+CONFIG_USB_LJCA=m
CONFIG_USB_M5602=m
CONFIG_USB_MA901=m
# CONFIG_USB_MASS_STORAGE is not set
@@ -7994,6 +8048,7 @@ CONFIG_USB_ONBOARD_HUB=m
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -8206,7 +8261,7 @@ CONFIG_VIDEO_BT819=m
CONFIG_VIDEO_BT848=m
CONFIG_VIDEO_BT856=m
CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_CADENCE_CSI2RX=m
+# CONFIG_VIDEO_CADENCE_CSI2RX is not set
CONFIG_VIDEO_CADENCE_CSI2TX=m
# CONFIG_VIDEO_CADENCE is not set
# CONFIG_VIDEO_CAFE_CCIC is not set
@@ -8285,10 +8340,12 @@ CONFIG_VIDEO_M52790=m
CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MAX96712 is not set
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -8666,6 +8723,7 @@ CONFIG_XEN_NETDEV_BACKEND=m
CONFIG_XEN_NETDEV_FRONTEND=m
CONFIG_XEN_PCIDEV_BACKEND=m
CONFIG_XEN_PCIDEV_FRONTEND=m
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_BACKEND is not set
@@ -8792,22 +8850,13 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
CONFIG_LEGION_LAPTOP=m
CONFIG_ACPI_CALL=m
CONFIG_MFD_STEAMDECK=m
@@ -8843,16 +8892,17 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
# CONFIG_USB_DUMMY_HCD is not set
# CONFIG_USB_CONFIGFS is not set
# CONFIG_PHY_SAMSUNG_USB2 is not set
-CONFIG_SND_SOC_AMD_SOF_MACH=m
-CONFIG_SND_SOC_AMD_MACH_COMMON=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
CONFIG_SND_SOC_SOF=m
CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y
CONFIG_SND_SOC_SOF_IPC3=y
CONFIG_SND_SOC_SOF_INTEL_IPC4=y
CONFIG_SND_SOC_SOF_AMD_COMMON=m
-CONFIG_SND_SOC_SOF_AMD_ACP63=m
-# CONFIG_SND_SOC_AMD_ACP_PCI is not set
-# CONFIG_SND_AMD_ASOC_RENOIR is not set
-# CONFIG_SND_AMD_ASOC_REMBRANDT is not set
-# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set
CONFIG_SND_SOC_TOPOLOGY=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-x86_64-debug-rhel.config b/SOURCES/kernel-x86_64-debug-rhel.config
index 9ad5728..46a8f09 100644
--- a/SOURCES/kernel-x86_64-debug-rhel.config
+++ b/SOURCES/kernel-x86_64-debug-rhel.config
@@ -258,7 +258,6 @@ CONFIG_AQUANTIA_PHY=m
# CONFIG_ARCH_BITMAIN is not set
# CONFIG_ARCH_KEEMBAY is not set
# CONFIG_ARCH_LG1K is not set
-# CONFIG_ARCH_MA35 is not set
# CONFIG_ARCH_MEMORY_PROBE is not set
# CONFIG_ARCH_MESON is not set
CONFIG_ARCH_MMAP_RND_BITS=28
@@ -304,6 +303,7 @@ CONFIG_ASUS_NB_WMI=m
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
CONFIG_ASUS_WMI=m
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -417,6 +417,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -545,7 +546,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -640,7 +640,6 @@ CONFIG_CALL_DEPTH_TRACKING=y
# CONFIG_CALL_THUNKS_DEBUG is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -710,6 +709,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -787,6 +787,7 @@ CONFIG_CIFS_SMB_DIRECT=y
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -862,7 +863,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -936,7 +936,6 @@ CONFIG_CRYPTO_ADIANTUM=m
# CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set
# CONFIG_CRYPTO_AEGIS128 is not set
# CONFIG_CRYPTO_AES_ARM64 is not set
-CONFIG_CRYPTO_AES_GCM_P10=y
CONFIG_CRYPTO_AES_NI_INTEL=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
@@ -963,7 +962,6 @@ CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CHACHA20_X86_64=y
CONFIG_CRYPTO_CMAC=y
@@ -1036,6 +1034,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1060,7 +1063,6 @@ CONFIG_CRYPTO_OFB=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
-# CONFIG_CRYPTO_POLY1305_P10 is not set
CONFIG_CRYPTO_POLY1305_X86_64=y
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1356,6 +1358,7 @@ CONFIG_DP83TC811_PHY=m
CONFIG_DPTF_PCH_FIVR=m
CONFIG_DPTF_POWER=m
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1431,6 +1434,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
# CONFIG_DRM_IMX_LCDIF is not set
# CONFIG_DRM_ITE_IT6505 is not set
# CONFIG_DRM_ITE_IT66121 is not set
@@ -1457,36 +1461,90 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1681,7 +1739,6 @@ CONFIG_EEPROM_93CX6=m
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_EE1004 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1721,7 +1778,12 @@ CONFIG_ENIC=m
CONFIG_EPIC100=m
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1792,7 +1854,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1891,7 +1953,9 @@ CONFIG_FS_DAX=y
# CONFIG_FSL_QDMA is not set
# CONFIG_FSL_RCPM is not set
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -1899,6 +1963,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ES=m
CONFIG_FUJITSU_LAPTOP=m
CONFIG_FUJITSU_TABLET=m
@@ -2041,6 +2106,7 @@ CONFIG_GPIO_SIM=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
CONFIG_GUP_TEST=y
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2226,6 +2292,7 @@ CONFIG_HMM_MIRROR=y
# CONFIG_HNS3_PMU is not set
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2381,6 +2448,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
CONFIG_IA32_EMULATION=y
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
@@ -2397,6 +2465,7 @@ CONFIG_ICPLUS_PHY=m
CONFIG_IDEAPAD_LAPTOP=m
CONFIG_IDLE_INJECT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
# CONFIG_IE6XX_WDT is not set
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
@@ -2468,7 +2537,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2586,6 +2654,7 @@ CONFIG_INPUT_UINPUT=m
CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INT340X_THERMAL=m
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
@@ -2654,6 +2723,7 @@ CONFIG_INTEL_SDSI=m
CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
CONFIG_INTEL_TCC_COOLING=m
CONFIG_INTEL_TDX_GUEST=y
+# CONFIG_INTEL_TDX_HOST is not set
CONFIG_INTEL_TH_ACPI=m
# CONFIG_INTEL_TH_DEBUG is not set
CONFIG_INTEL_TH_GTH=m
@@ -2688,7 +2758,8 @@ CONFIG_IOMMU_DEBUGFS=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -2989,7 +3060,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3023,6 +3094,7 @@ CONFIG_KVM_AMD_SEV=y
CONFIG_KVM_GUEST=y
CONFIG_KVM_INTEL=m
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_MMU_AUDIT=y
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
@@ -3190,6 +3262,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3204,6 +3277,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3212,6 +3286,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3241,7 +3316,6 @@ CONFIG_MANTIS_CORE=m
CONFIG_MARVELL_10G_PHY=m
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
-CONFIG_MARVELL_GTI_WDT=y
CONFIG_MARVELL_PHY=m
# CONFIG_MATOM is not set
# CONFIG_MAX1027 is not set
@@ -3277,6 +3351,7 @@ CONFIG_MAXSMP=y
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3300,7 +3375,7 @@ CONFIG_MDIO_HISI_FEMAC=m
# CONFIG_MDIO_IPQ8064 is not set
CONFIG_MDIO_MSCC_MIIM=m
# CONFIG_MDIO_MVUSB is not set
-CONFIG_MDIO_OCTEON=m
+# CONFIG_MDIO_OCTEON is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MD_LINEAR=m
# CONFIG_MD_MULTIPATH is not set
@@ -3309,6 +3384,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3375,7 +3451,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3520,6 +3596,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3629,6 +3706,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3706,6 +3786,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3816,9 +3898,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3940,6 +4019,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -3952,15 +4032,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -3989,6 +4066,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4088,7 +4166,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4285,8 +4363,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_IMX_OCOTP_ELE is not set
# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
@@ -4308,7 +4387,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=y
CONFIG_NVSW_SN2201=m
@@ -4424,6 +4505,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4472,6 +4554,7 @@ CONFIG_PCI_STUB=y
# CONFIG_PCI_SW_SWITCHTEC is not set
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4501,8 +4584,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_FSL_LYNX_28G is not set
# CONFIG_PHY_HI3660_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
@@ -4514,8 +4595,6 @@ CONFIG_PHY_BCM_SR_USB=m
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4547,6 +4626,7 @@ CONFIG_PINCTRL_BROXTON=m
CONFIG_PINCTRL_CANNONLAKE=m
CONFIG_PINCTRL_CEDARFORK=m
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_DENVERTON=m
CONFIG_PINCTRL_ELKHARTLAKE=m
@@ -4624,7 +4704,6 @@ CONFIG_POWERCAP=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_GPIO_RESTART is not set
# CONFIG_POWER_RESET_LTC2952 is not set
# CONFIG_POWER_RESET_REGULATOR is not set
@@ -4788,7 +4867,6 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -4890,6 +4968,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
@@ -4965,6 +5044,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -5004,7 +5084,6 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5316,6 +5395,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5472,6 +5552,7 @@ CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC3815 is not set
CONFIG_SENSORS_LTC4151=m
@@ -5530,6 +5611,7 @@ CONFIG_SENSORS_PCF8591=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
CONFIG_SENSORS_PMBUS=m
+# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_PXE1610 is not set
@@ -5699,7 +5781,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLS=y
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
@@ -5781,6 +5862,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -5923,8 +6005,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -6016,6 +6100,7 @@ CONFIG_SND_SOC_INTEL_AVS=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6056,6 +6141,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y
CONFIG_SND_SOC_INTEL_SKYLAKE=m
CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m
CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m
CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m
@@ -6131,12 +6217,6 @@ CONFIG_SND_SOC_PCM512x=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
CONFIG_SND_SOC_RT1308=m
CONFIG_SND_SOC_RT1308_SDW=m
@@ -6159,6 +6239,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6356,7 +6437,6 @@ CONFIG_SND_X86=y
CONFIG_SND_XEN_FRONTEND=m
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
# CONFIG_SOC_TI is not set
CONFIG_SOFTLOCKUP_DETECTOR=y
@@ -6574,6 +6654,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6627,6 +6708,7 @@ CONFIG_TEST_LIVEPATCH=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6856,6 +6938,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -6931,6 +7014,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_CXACRU=m
# CONFIG_USB_CYPRESS_CY7C63 is not set
@@ -7029,6 +7113,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LGM_PHY is not set
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7071,6 +7156,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7220,6 +7306,9 @@ CONFIG_VETH=m
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
CONFIG_VFIO_MDEV=m
@@ -7335,11 +7424,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7754,22 +7845,13 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
CONFIG_LEGION_LAPTOP=m
CONFIG_ACPI_CALL=m
CONFIG_IIO_HRTIMER_TRIGGER=m
@@ -7811,6 +7893,7 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
# CONFIG_USB_DUMMY_HCD is not set
# CONFIG_USB_CONFIGFS is not set
# CONFIG_PHY_SAMSUNG_USB2 is not set
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
CONFIG_SND_SOC_AMD_SOF_MACH=m
CONFIG_SND_SOC_AMD_MACH_COMMON=m
CONFIG_SND_SOC_SOF=m
@@ -7824,3 +7907,10 @@ CONFIG_SND_SOC_SOF_AMD_ACP63=m
# CONFIG_SND_AMD_ASOC_REMBRANDT is not set
# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set
CONFIG_SND_SOC_TOPOLOGY=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-x86_64-fedora.config b/SOURCES/kernel-x86_64-fedora.config
index 212554b..2b338b9 100644
--- a/SOURCES/kernel-x86_64-fedora.config
+++ b/SOURCES/kernel-x86_64-fedora.config
@@ -188,16 +188,16 @@ CONFIG_ADVANTECH_EC_WDT=m
# CONFIG_ADVANTECH_WDT is not set
CONFIG_ADVISE_SYSCALLS=y
CONFIG_ADV_SWBUTTON=m
-CONFIG_ADXL313_I2C=m
-CONFIG_ADXL313_SPI=m
+# CONFIG_ADXL313_I2C is not set
+# CONFIG_ADXL313_SPI is not set
# CONFIG_ADXL345_I2C is not set
# CONFIG_ADXL345_SPI is not set
-CONFIG_ADXL355_I2C=m
-CONFIG_ADXL355_SPI=m
-CONFIG_ADXL367_I2C=m
-CONFIG_ADXL367_SPI=m
-CONFIG_ADXL372_I2C=m
-CONFIG_ADXL372_SPI=m
+# CONFIG_ADXL355_I2C is not set
+# CONFIG_ADXL355_SPI is not set
+# CONFIG_ADXL367_I2C is not set
+# CONFIG_ADXL367_SPI is not set
+# CONFIG_ADXL372_I2C is not set
+# CONFIG_ADXL372_SPI is not set
CONFIG_ADXRS290=m
# CONFIG_ADXRS450 is not set
# CONFIG_AFE4403 is not set
@@ -343,6 +343,7 @@ CONFIG_ASUS_NB_WMI=m
CONFIG_ASUS_TF103C_DOCK=m
CONFIG_ASUS_WIRELESS=m
CONFIG_ASUS_WMI=m
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -532,6 +533,15 @@ CONFIG_BAYCOM_SER_HDX=m
# CONFIG_BCACHE_ASYNC_REGISTRATION is not set
# CONFIG_BCACHE_CLOSURES_DEBUG is not set
# CONFIG_BCACHE_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG is not set
+# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
+# CONFIG_BCACHEFS_ERASURE_CODING is not set
+CONFIG_BCACHEFS_FS=m
+# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
+# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
+CONFIG_BCACHEFS_POSIX_ACL=y
+CONFIG_BCACHEFS_QUOTA=y
+# CONFIG_BCACHEFS_TESTS is not set
CONFIG_BCACHE=m
CONFIG_BCM54140_PHY=m
CONFIG_BCM7XXX_PHY=m
@@ -674,7 +684,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
CONFIG_BRIDGE_CFM=y
CONFIG_BRIDGE_EBT_802_3=m
@@ -786,7 +795,6 @@ CONFIG_CALL_DEPTH_TRACKING=y
# CONFIG_CALL_THUNKS_DEBUG is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-CONFIG_CAN_BXCAN=m
CONFIG_CAN_CALC_BITTIMING=y
CONFIG_CAN_CAN327=m
# CONFIG_CAN_CC770 is not set
@@ -866,6 +874,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFI_CLANG is not set
CONFIG_CFS_BANDWIDTH=y
@@ -958,6 +967,7 @@ CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CIO2_BRIDGE=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -1037,7 +1047,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -1115,6 +1124,7 @@ CONFIG_CROS_EC_TYPEC=m
CONFIG_CROS_EC_UART=m
CONFIG_CROS_HPS_I2C=m
CONFIG_CROS_KBD_LED_BACKLIGHT=m
+CONFIG_CROS_KUNIT_EC_PROTO_TEST=m
CONFIG_CROS_KUNIT=m
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_CROS_TYPEC_SWITCH=m
@@ -1222,6 +1232,11 @@ CONFIG_CRYPTO_GHASH=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1313,7 +1328,7 @@ CONFIG_CXL_PMU=m
# CONFIG_CXL_REGION_INVALIDATION_TEST is not set
CONFIG_CXL_REGION=y
CONFIG_DA280=m
-CONFIG_DA311=m
+# CONFIG_DA311 is not set
CONFIG_DAMON_DBGFS=y
# CONFIG_DAMON_LRU_SORT is not set
CONFIG_DAMON_PADDR=y
@@ -1332,6 +1347,7 @@ CONFIG_DE2104X=m
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_CGROUP_REF is not set
+# CONFIG_DEBUG_CLOSURES is not set
# CONFIG_DEBUG_CREDENTIALS is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_DEBUG_DRIVER is not set
@@ -1436,7 +1452,6 @@ CONFIG_DELL_WMI=m
CONFIG_DELL_WMI_PRIVACY=y
CONFIG_DELL_WMI_SYSMAN=m
# CONFIG_DETECT_HUNG_TASK is not set
-# CONFIG_DEV_APPLETALK is not set
CONFIG_DEV_DAX_CXL=m
CONFIG_DEV_DAX_HMEM=m
CONFIG_DEV_DAX_KMEM=m
@@ -1484,7 +1499,7 @@ CONFIG_DMA_NUMA_CMA=y
# CONFIG_DMAPOOL_TEST is not set
# CONFIG_DMARD06 is not set
# CONFIG_DMARD09 is not set
-CONFIG_DMARD10=m
+# CONFIG_DMARD10 is not set
# CONFIG_DMA_RESTRICTED_POOL is not set
# CONFIG_DMATEST is not set
CONFIG_DM_CACHE=m
@@ -1532,7 +1547,7 @@ CONFIG_DNS_RESOLVER=m
CONFIG_DP83640_PHY=m
CONFIG_DP83822_PHY=m
CONFIG_DP83848_PHY=m
-# CONFIG_DP83867_PHY is not set
+CONFIG_DP83867_PHY=m
CONFIG_DP83869_PHY=m
# CONFIG_DP83TC811_PHY is not set
# CONFIG_DP83TD510_PHY is not set
@@ -1543,6 +1558,7 @@ CONFIG_DPTF_PCH_FIVR=m
CONFIG_DPTF_POWER=m
CONFIG_DRAGONRISE_FF=y
# CONFIG_DRBD_FAULT_INJECTION is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
CONFIG_DRM_ACCEL_HABANALABS=m
CONFIG_DRM_ACCEL_IVPU=m
CONFIG_DRM_ACCEL_QAIC=m
@@ -1661,9 +1677,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m
# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m
+CONFIG_DRM_PANEL_JDI_LPM102A188A=m
# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
CONFIG_DRM_PANEL_JDI_R63452=m
# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
@@ -1693,6 +1711,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RONBO_RB070D30=m
CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SAMSUNG_DB7430=m
@@ -1929,7 +1948,6 @@ CONFIG_EEPROM_AT24=m
# CONFIG_EEPROM_AT25 is not set
CONFIG_EEPROM_EE1004=m
CONFIG_EEPROM_IDT_89HPESX=m
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
# CONFIG_EFI_CAPSULE_LOADER is not set
@@ -2059,7 +2077,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -2197,6 +2215,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+CONFIG_FUEL_GAUGE_MM8013=m
CONFIG_FUJITSU_ES=m
CONFIG_FUJITSU_LAPTOP=m
CONFIG_FUJITSU_TABLET=m
@@ -2305,6 +2324,7 @@ CONFIG_GPIO_IT87=m
# CONFIG_GPIO_LATCH is not set
CONFIG_GPIOLIB_FASTPATH_LIMIT=512
CONFIG_GPIOLIB=y
+CONFIG_GPIO_LJCA=m
# CONFIG_GPIO_LOGICVC is not set
# CONFIG_GPIO_MAX3191X is not set
# CONFIG_GPIO_MAX7300 is not set
@@ -2648,6 +2668,7 @@ CONFIG_I2C_HID=y
CONFIG_I2C_I801=m
CONFIG_I2C_ISCH=m
CONFIG_I2C_ISMT=m
+CONFIG_I2C_LJCA=m
CONFIG_I2C_MLXBF=m
CONFIG_I2C_MLXCPLD=m
CONFIG_I2C_MULTI_INSTANTIATE=m
@@ -2700,6 +2721,7 @@ CONFIG_I40EVF=m
CONFIG_I6300ESB_WDT=m
CONFIG_I82092=m
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
CONFIG_IA32_EMULATION=y
# CONFIG_IAQCORE is not set
CONFIG_IB700_WDT=m
@@ -2715,6 +2737,7 @@ CONFIG_ICPLUS_PHY=m
CONFIG_IDEAPAD_LAPTOP=m
CONFIG_IDLE_INJECT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
CONFIG_IE6XX_WDT=m
CONFIG_IEEE802154_6LOWPAN=m
CONFIG_IEEE802154_ADF7242=m
@@ -2756,8 +2779,8 @@ CONFIG_IIO_FORMAT_KUNIT_TEST=m
CONFIG_IIO_HRTIMER_TRIGGER=m
CONFIG_IIO_INTERRUPT_TRIGGER=m
CONFIG_IIO_KFIFO_BUF=m
-CONFIG_IIO_KX022A_I2C=m
-CONFIG_IIO_KX022A_SPI=m
+# CONFIG_IIO_KX022A_I2C is not set
+# CONFIG_IIO_KX022A_SPI is not set
CONFIG_IIO=m
CONFIG_IIO_MUX=m
CONFIG_IIO_RESCALE_KUNIT_TEST=m
@@ -2766,7 +2789,7 @@ CONFIG_IIO_RESCALE=m
# CONFIG_IIO_SSP_SENSORHUB is not set
CONFIG_IIO_ST_ACCEL_3AXIS=m
CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m
-CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m
+# CONFIG_IIO_ST_ACCEL_SPI_3AXIS is not set
CONFIG_IIO_ST_GYRO_3AXIS=m
CONFIG_IIO_ST_GYRO_I2C_3AXIS=m
CONFIG_IIO_ST_GYRO_SPI_3AXIS=m
@@ -2806,7 +2829,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
# CONFIG_IMA_SIG_TEMPLATE is not set
# CONFIG_IMA_TEMPLATE is not set
-# CONFIG_IMA_TRUSTED_KEYRING is not set
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2935,6 +2957,7 @@ CONFIG_INPUT_WISTRON_BTNS=m
CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+CONFIG_INSPUR_PLATFORM_PROFILE=m
CONFIG_INT3406_THERMAL=m
CONFIG_INT340X_THERMAL=m
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
@@ -3017,6 +3040,7 @@ CONFIG_INTEL_SOC_PMIC=y
CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
CONFIG_INTEL_TCC_COOLING=m
CONFIG_INTEL_TDX_GUEST=y
+CONFIG_INTEL_TDX_HOST=y
CONFIG_INTEL_TELEMETRY=m
CONFIG_INTEL_TH_ACPI=m
# CONFIG_INTEL_TH_DEBUG is not set
@@ -3089,8 +3113,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IPC_NS=y
# CONFIG_IP_DCCP is not set
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP=m
CONFIG_IP_FIB_TRIE_STATS=y
# CONFIG_IPMB_DEVICE_INTERFACE is not set
CONFIG_IPMI_DEVICE_INTERFACE=m
@@ -3405,7 +3427,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3444,6 +3466,7 @@ CONFIG_KVM_AMD_SEV=y
CONFIG_KVM_GUEST=y
CONFIG_KVM_INTEL=m
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_MMU_AUDIT=y
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
@@ -3502,6 +3525,7 @@ CONFIG_LEDS_GROUP_MULTICOLOR=m
CONFIG_LEDS_INTEL_SS4200=m
# CONFIG_LEDS_IS31FL319X is not set
CONFIG_LEDS_IS31FL32XX=m
+CONFIG_LEDS_KTD202X=m
# CONFIG_LEDS_KTD2692 is not set
# CONFIG_LEDS_LGM is not set
CONFIG_LEDS_LM3530=m
@@ -3631,6 +3655,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock"
CONFIG_LSM_MMAP_MIN_ADDR=65535
CONFIG_LTC1660=m
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3645,6 +3670,7 @@ CONFIG_LTO_NONE=y
CONFIG_LTR501=m
CONFIG_LTRF216A=m
CONFIG_LV0104CS=m
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3653,6 +3679,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
CONFIG_MAC80211_MESH=y
@@ -3721,6 +3748,7 @@ CONFIG_MB1232=m
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
CONFIG_MCP3911=m
CONFIG_MCP4018=m
CONFIG_MCP41010=m
@@ -3731,6 +3759,7 @@ CONFIG_MCP4728=m
# CONFIG_MCP4922 is not set
CONFIG_MCTP_SERIAL=m
# CONFIG_MCTP_TRANSPORT_I2C is not set
+# CONFIG_MCTP_TRANSPORT_I3C is not set
CONFIG_MCTP=y
CONFIG_MD_AUTODETECT=y
CONFIG_MD_BITMAP_FILE=y
@@ -3760,6 +3789,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
CONFIG_MEDIA_ATTACH=y
@@ -3987,18 +4017,22 @@ CONFIG_MLX4_DEBUG=y
CONFIG_MLX4_EN_DCB=y
CONFIG_MLX4_EN=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_ACCEL=y
CONFIG_MLX5_CLS_ACT=y
CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
CONFIG_MLX5_EN_RXNFC=y
CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_ESWITCH=y
-# CONFIG_MLX5_FPGA is not set
+# CONFIG_MLX5_FPGA_IPSEC is not set
+# CONFIG_MLX5_FPGA_TLS is not set
+CONFIG_MLX5_FPGA=y
CONFIG_MLX5_INFINIBAND=m
CONFIG_MLX5_IPSEC=y
CONFIG_MLX5_MACSEC=y
@@ -4100,6 +4134,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -4155,7 +4192,7 @@ CONFIG_MQ_IOSCHED_DEADLINE=y
CONFIG_MQ_IOSCHED_KYBER=y
# CONFIG_MS5611 is not set
# CONFIG_MS5637 is not set
-CONFIG_MSA311=m
+# CONFIG_MSA311 is not set
# CONFIG_MS_BLOCK is not set
CONFIG_MSDOS_FS=m
CONFIG_MSDOS_PARTITION=y
@@ -4181,6 +4218,8 @@ CONFIG_MT7915E=m
CONFIG_MT7921E=m
CONFIG_MT7921S=m
CONFIG_MT7921U=m
+CONFIG_MT7925E=m
+CONFIG_MT7925U=m
CONFIG_MT7996E=m
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AR7_PARTS is not set
@@ -4247,10 +4286,11 @@ CONFIG_MTD_RAW_NAND=m
# CONFIG_MTD_SHARPSL_PARTS is not set
# CONFIG_MTD_SLRAM is not set
# CONFIG_MTD_SPI_NAND is not set
-# CONFIG_MTD_SPI_NOR is not set
+CONFIG_MTD_SPI_NOR=m
# CONFIG_MTD_SPI_NOR_SWP_DISABLE is not set
CONFIG_MTD_SPI_NOR_SWP_DISABLE_ON_VOLATILE=y
# CONFIG_MTD_SPI_NOR_SWP_KEEP is not set
+CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y
# CONFIG_MTD_SST25L is not set
# CONFIG_MTD_SWAP is not set
# CONFIG_MTD_TESTS is not set
@@ -4332,9 +4372,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -4407,12 +4444,12 @@ CONFIG_NETFILTER_EGRESS=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_NETLINK_ACCT=m
# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
-# CONFIG_NETFILTER_NETLINK_HOOK is not set
+CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_OSF=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+# CONFIG_NETFILTER_XTABLES_COMPAT is not set
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_CONNMARK=m
CONFIG_NETFILTER_XT_MARK=m
@@ -4505,6 +4542,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NETKIT=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -4517,15 +4555,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NETROM=m
# CONFIG_NET_SB1000 is not set
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
# CONFIG_NET_SCH_DEFAULT is not set
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4559,6 +4594,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m
CONFIG_NET_TEAM_MODE_LOADBALANCE=m
CONFIG_NET_TEAM_MODE_RANDOM=m
CONFIG_NET_TEAM_MODE_ROUNDROBIN=m
+CONFIG_NET_TEST=m
CONFIG_NET_TULIP=y
CONFIG_NET_UDP_TUNNEL=m
CONFIG_NET_VENDOR_3COM=y
@@ -4695,7 +4731,7 @@ CONFIG_NFC_ST21NFCA=m
# CONFIG_NFC_ST_NCI_I2C is not set
# CONFIG_NFC_ST_NCI_SPI is not set
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NFC_TRF7970A=m
@@ -4901,8 +4937,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_SHIELD_FF=y
CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
CONFIG_NVME_HWMON=y
CONFIG_NVMEM_LAYOUT_ONIE_TLV=m
CONFIG_NVMEM_LAYOUT_SL28_VPD=m
@@ -4922,7 +4959,9 @@ CONFIG_NVME_TARGET=m
CONFIG_NVME_TARGET_PASSTHRU=y
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=y
CONFIG_NVSW_SN2201=m
@@ -5058,6 +5097,7 @@ CONFIG_PCI_BIOS=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -5121,6 +5161,7 @@ CONFIG_PCMCIA_XIRC2PS=m
CONFIG_PCMCIA_XIRCOM=m
CONFIG_PCMCIA=y
CONFIG_PCNET32=m
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -5152,8 +5193,6 @@ CONFIG_PHY_CADENCE_SIERRA=m
CONFIG_PHY_CADENCE_TORRENT=m
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
# CONFIG_PHY_HI3670_USB is not set
# CONFIG_PHY_INTEL_LGM_COMBO is not set
@@ -5162,8 +5201,6 @@ CONFIG_PHY_CADENCE_TORRENT=m
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -5267,7 +5304,6 @@ CONFIG_POSIX_TIMERS=y
CONFIG_POWERCAP=y
CONFIG_POWER_MLXBF=m
# CONFIG_POWER_RESET_BRCMKONA is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_LINKSTATION is not set
# CONFIG_POWER_RESET_LTC2952 is not set
# CONFIG_POWER_RESET_REGULATOR is not set
@@ -5522,7 +5558,7 @@ CONFIG_RD_ZSTD=y
# CONFIG_READABLE_ASM is not set
# CONFIG_READ_ONLY_THP_FOR_FS is not set
CONFIG_REALTEK_AUTOPM=y
-CONFIG_REALTEK_PHY=y
+CONFIG_REALTEK_PHY=m
# CONFIG_REED_SOLOMON_TEST is not set
# CONFIG_REGMAP_BUILD is not set
CONFIG_REGMAP_I2C=y
@@ -5557,6 +5593,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
CONFIG_REGULATOR_MAX20411=m
CONFIG_REGULATOR_MAX5970=m
CONFIG_REGULATOR_MAX597X=m
+CONFIG_REGULATOR_MAX77503=m
CONFIG_REGULATOR_MAX77650=m
# CONFIG_REGULATOR_MAX77826 is not set
CONFIG_REGULATOR_MAX77857=m
@@ -5664,6 +5701,7 @@ CONFIG_RMI4_SPI=m
CONFIG_RMNET=m
# CONFIG_ROCKCHIP_PHY is not set
CONFIG_ROCKER=m
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
CONFIG_ROHM_BU27034=m
CONFIG_ROMFS_BACKED_BY_BLOCK=y
@@ -5716,7 +5754,6 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_ABEOZ9=m
CONFIG_RTC_DRV_ABX80X=m
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_CROS_EC=m
@@ -5900,7 +5937,7 @@ CONFIG_SBP_TARGET=m
# CONFIG_SC1200_WDT is not set
CONFIG_SC92031=m
# CONFIG_SCA3000 is not set
-CONFIG_SCA3300=m
+# CONFIG_SCA3300 is not set
CONFIG_SCD30_CORE=m
CONFIG_SCD30_I2C=m
CONFIG_SCD30_SERIAL=m
@@ -6032,11 +6069,12 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_SECURITY_APPARMOR is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY_DMESG_RESTRICT=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_INFINIBAND=y
CONFIG_SECURITY_LANDLOCK=y
@@ -6191,6 +6229,7 @@ CONFIG_SENSORS_LTC2947_SPI=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
CONFIG_SENSORS_LTC2990=m
+CONFIG_SENSORS_LTC2991=m
# CONFIG_SENSORS_LTC2992 is not set
CONFIG_SENSORS_LTC3815=m
CONFIG_SENSORS_LTC4151=m
@@ -6252,6 +6291,7 @@ CONFIG_SENSORS_PLI1209BC=m
CONFIG_SENSORS_PLI1209BC_REGULATOR=y
CONFIG_SENSORS_PM6764TR=m
CONFIG_SENSORS_PMBUS=m
+CONFIG_SENSORS_POWERZ=m
CONFIG_SENSORS_POWR1220=m
CONFIG_SENSORS_PWM_FAN=m
# CONFIG_SENSORS_PXE1610 is not set
@@ -6434,7 +6474,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLS=y
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
@@ -6471,6 +6510,9 @@ CONFIG_SND_ALOOP=m
CONFIG_SND_ALS300=m
CONFIG_SND_ALS4000=m
CONFIG_SND_AMD_ACP_CONFIG=m
+# CONFIG_SND_AMD_ASOC_ACP63 is not set
+# CONFIG_SND_AMD_ASOC_REMBRANDT is not set
+# CONFIG_SND_AMD_ASOC_RENOIR is not set
CONFIG_SND_ASIHPI=m
CONFIG_SND_ATIIXP=m
CONFIG_SND_ATIIXP_MODEM=m
@@ -6530,6 +6572,7 @@ CONFIG_SND_FM801=m
CONFIG_SND_FM801_TEA575X_BOOL=y
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -6660,14 +6703,18 @@ CONFIG_SND_SOC_AMD_ACP5x=m
CONFIG_SND_SOC_AMD_ACP6x=m
CONFIG_SND_SOC_AMD_ACP_COMMON=m
CONFIG_SND_SOC_AMD_ACP=m
+# CONFIG_SND_SOC_AMD_ACP_PCI is not set
CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
+# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set
+CONFIG_SND_SOC_AMD_MACH_COMMON=m
CONFIG_SND_SOC_AMD_PS=m
CONFIG_SND_SOC_AMD_PS_MACH=m
CONFIG_SND_SOC_AMD_RENOIR=m
CONFIG_SND_SOC_AMD_RENOIR_MACH=m
CONFIG_SND_SOC_AMD_RPL_ACP6x=m
CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m
+CONFIG_SND_SOC_AMD_SOF_MACH=m
CONFIG_SND_SOC_AMD_ST_ES8336_MACH=m
CONFIG_SND_SOC_AMD_VANGOGH_MACH=m
CONFIG_SND_SOC_AMD_YC_MACH=m
@@ -6675,8 +6722,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m
# CONFIG_SND_SOC_ARNDALE is not set
CONFIG_SND_SOC_AUDIO_IIO_AUX=m
CONFIG_SND_SOC_AW8738=m
+CONFIG_SND_SOC_AW87390=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88395=m
+CONFIG_SND_SOC_AW88399=m
CONFIG_SND_SOC_BD28623=m
CONFIG_SND_SOC_BT_SCO=m
CONFIG_SND_SOC_CHV3_CODEC=m
@@ -6771,6 +6820,7 @@ CONFIG_SND_SOC_INTEL_AVS_MACH_PROBE=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT274=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT286=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT298=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682=m
CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567=m
@@ -6811,6 +6861,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y
CONFIG_SND_SOC_INTEL_SKYLAKE=m
CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m
CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m
CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m
@@ -6886,12 +6937,6 @@ CONFIG_SND_SOC_PCM512x=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
CONFIG_SND_SOC_RT1017_SDCA_SDW=m
CONFIG_SND_SOC_RT1308=m
CONFIG_SND_SOC_RT1308_SDW=m
@@ -6914,6 +6959,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+CONFIG_SND_SOC_RTQ9128=m
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6931,6 +6977,7 @@ CONFIG_SND_SOC_SMA1303=m
# CONFIG_SND_SOC_SNOW is not set
CONFIG_SND_SOC_SOF_ACPI=m
CONFIG_SND_SOC_SOF_ALDERLAKE=m
+CONFIG_SND_SOC_SOF_AMD_ACP63=m
CONFIG_SND_SOC_SOF_AMD_REMBRANDT=m
CONFIG_SND_SOC_SOF_AMD_RENOIR=m
CONFIG_SND_SOC_SOF_AMD_TOPLEVEL=m
@@ -7170,11 +7217,15 @@ CONFIG_SPI_FSL_LPSPI=m
# CONFIG_SPI_GPIO is not set
# CONFIG_SPI_HISI_KUNPENG is not set
# CONFIG_SPI_HISI_SFC_V3XX is not set
+CONFIG_SPI_INTEL=m
+CONFIG_SPI_INTEL_PCI=m
+# CONFIG_SPI_INTEL_PLATFORM is not set
# CONFIG_SPI_LANTIQ_SSC is not set
+CONFIG_SPI_LJCA=m
# CONFIG_SPI_LM70_LLP is not set
# CONFIG_SPI_LOOPBACK_TEST is not set
CONFIG_SPI_MASTER=y
-# CONFIG_SPI_MEM is not set
+CONFIG_SPI_MEM=y
CONFIG_SPI_MICROCHIP_CORE=m
CONFIG_SPI_MICROCHIP_CORE_QSPI=m
CONFIG_SPI_MUX=m
@@ -7372,6 +7423,7 @@ CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX_DEBUG is not set
CONFIG_TCM_QLA2XXX=m
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -7426,6 +7478,7 @@ CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -7496,8 +7549,6 @@ CONFIG_TIFM_7XX1=m
CONFIG_TIFM_CORE=m
CONFIG_TIGON3_HWMON=y
CONFIG_TIGON3=m
-# CONFIG_TI_ICSSG_PRUETH is not set
-CONFIG_TI_ICSS_IEP=m
CONFIG_TI_LMP92064=m
CONFIG_TIME_KUNIT_TEST=m
CONFIG_TIME_NS=y
@@ -7690,6 +7741,7 @@ CONFIG_TYPEC_MUX_GPIO_SBU=m
CONFIG_TYPEC_MUX_INTEL_PMC=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PI3USB30532=m
+CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_NVIDIA_ALTMODE=m
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -7783,6 +7835,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
CONFIG_USB_CHIPIDEA_PCI=m
CONFIG_USB_CONFIGFS_F_MIDI2=y
# CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set
@@ -7915,6 +7968,7 @@ CONFIG_USB_LED_TRIG=y
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LGM_PHY is not set
# CONFIG_USB_LINK_LAYER_TEST is not set
+CONFIG_USB_LJCA=m
CONFIG_USB_M5602=m
CONFIG_USB_MA901=m
# CONFIG_USB_MASS_STORAGE is not set
@@ -7964,6 +8018,7 @@ CONFIG_USB_ONBOARD_HUB=m
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
CONFIG_USBPCWATCHDOG=m
CONFIG_USB_PEGASUS=m
@@ -8176,7 +8231,7 @@ CONFIG_VIDEO_BT819=m
CONFIG_VIDEO_BT848=m
CONFIG_VIDEO_BT856=m
CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_CADENCE_CSI2RX=m
+# CONFIG_VIDEO_CADENCE_CSI2RX is not set
CONFIG_VIDEO_CADENCE_CSI2TX=m
# CONFIG_VIDEO_CADENCE is not set
# CONFIG_VIDEO_CAFE_CCIC is not set
@@ -8255,10 +8310,12 @@ CONFIG_VIDEO_M52790=m
CONFIG_VIDEO_MAX9286=m
# CONFIG_VIDEO_MAX96712 is not set
# CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set
+# CONFIG_VIDEO_MGB4 is not set
CONFIG_VIDEO_ML86V7667=m
CONFIG_VIDEO_MSP3400=m
CONFIG_VIDEO_MT9M001=m
# CONFIG_VIDEO_MT9M111 is not set
+CONFIG_VIDEO_MT9M114=m
CONFIG_VIDEO_MT9P031=m
CONFIG_VIDEO_MT9T112=m
CONFIG_VIDEO_MT9V011=m
@@ -8636,6 +8693,7 @@ CONFIG_XEN_NETDEV_BACKEND=m
CONFIG_XEN_NETDEV_FRONTEND=m
CONFIG_XEN_PCIDEV_BACKEND=m
CONFIG_XEN_PCIDEV_FRONTEND=m
+CONFIG_XEN_PRIVCMD_EVENTFD=y
CONFIG_XEN_PRIVCMD_IRQFD=y
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_PVCALLS_BACKEND is not set
@@ -8762,22 +8820,13 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
CONFIG_LEGION_LAPTOP=m
CONFIG_ACPI_CALL=m
CONFIG_MFD_STEAMDECK=m
@@ -8813,16 +8862,17 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
# CONFIG_USB_DUMMY_HCD is not set
# CONFIG_USB_CONFIGFS is not set
# CONFIG_PHY_SAMSUNG_USB2 is not set
-CONFIG_SND_SOC_AMD_SOF_MACH=m
-CONFIG_SND_SOC_AMD_MACH_COMMON=m
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
CONFIG_SND_SOC_SOF=m
CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y
CONFIG_SND_SOC_SOF_IPC3=y
CONFIG_SND_SOC_SOF_INTEL_IPC4=y
CONFIG_SND_SOC_SOF_AMD_COMMON=m
-CONFIG_SND_SOC_SOF_AMD_ACP63=m
-# CONFIG_SND_SOC_AMD_ACP_PCI is not set
-# CONFIG_SND_AMD_ASOC_RENOIR is not set
-# CONFIG_SND_AMD_ASOC_REMBRANDT is not set
-# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set
CONFIG_SND_SOC_TOPOLOGY=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-x86_64-rhel.config b/SOURCES/kernel-x86_64-rhel.config
index 981f729..304cfeb 100644
--- a/SOURCES/kernel-x86_64-rhel.config
+++ b/SOURCES/kernel-x86_64-rhel.config
@@ -258,7 +258,6 @@ CONFIG_AQUANTIA_PHY=m
# CONFIG_ARCH_BITMAIN is not set
# CONFIG_ARCH_KEEMBAY is not set
# CONFIG_ARCH_LG1K is not set
-# CONFIG_ARCH_MA35 is not set
# CONFIG_ARCH_MEMORY_PROBE is not set
# CONFIG_ARCH_MESON is not set
CONFIG_ARCH_MMAP_RND_BITS=28
@@ -304,6 +303,7 @@ CONFIG_ASUS_NB_WMI=m
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
CONFIG_ASUS_WMI=m
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -417,6 +417,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -545,7 +546,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -640,7 +640,6 @@ CONFIG_CALL_DEPTH_TRACKING=y
# CONFIG_CALL_THUNKS_DEBUG is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -710,6 +709,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
# CONFIG_CFG80211_DEBUGFS is not set
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -787,6 +787,7 @@ CONFIG_CIFS_SMB_DIRECT=y
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -862,7 +863,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -936,7 +936,6 @@ CONFIG_CRYPTO_ADIANTUM=m
# CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set
# CONFIG_CRYPTO_AEGIS128 is not set
# CONFIG_CRYPTO_AES_ARM64 is not set
-CONFIG_CRYPTO_AES_GCM_P10=y
CONFIG_CRYPTO_AES_NI_INTEL=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
@@ -963,7 +962,6 @@ CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CHACHA20_X86_64=y
CONFIG_CRYPTO_CMAC=y
@@ -1036,6 +1034,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1060,7 +1063,6 @@ CONFIG_CRYPTO_OFB=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
-# CONFIG_CRYPTO_POLY1305_P10 is not set
CONFIG_CRYPTO_POLY1305_X86_64=y
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1348,6 +1350,7 @@ CONFIG_DP83TC811_PHY=m
CONFIG_DPTF_PCH_FIVR=m
CONFIG_DPTF_POWER=m
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1423,6 +1426,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
# CONFIG_DRM_IMX_LCDIF is not set
# CONFIG_DRM_ITE_IT6505 is not set
# CONFIG_DRM_ITE_IT66121 is not set
@@ -1449,36 +1453,90 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1673,7 +1731,6 @@ CONFIG_EEPROM_93CX6=m
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_EE1004 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1713,7 +1770,12 @@ CONFIG_ENIC=m
CONFIG_EPIC100=m
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1776,7 +1838,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1875,7 +1937,9 @@ CONFIG_FS_DAX=y
# CONFIG_FSL_QDMA is not set
# CONFIG_FSL_RCPM is not set
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -1883,6 +1947,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ES=m
CONFIG_FUJITSU_LAPTOP=m
CONFIG_FUJITSU_TABLET=m
@@ -2025,6 +2090,7 @@ CONFIG_GPIO_SIM=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_GUP_TEST is not set
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2210,6 +2276,7 @@ CONFIG_HMM_MIRROR=y
# CONFIG_HNS3_PMU is not set
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2365,6 +2432,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
CONFIG_IA32_EMULATION=y
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
@@ -2381,6 +2449,7 @@ CONFIG_ICPLUS_PHY=m
CONFIG_IDEAPAD_LAPTOP=m
CONFIG_IDLE_INJECT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
# CONFIG_IE6XX_WDT is not set
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
@@ -2452,7 +2521,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2570,6 +2638,7 @@ CONFIG_INPUT_UINPUT=m
CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INT340X_THERMAL=m
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
@@ -2638,6 +2707,7 @@ CONFIG_INTEL_SDSI=m
CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
CONFIG_INTEL_TCC_COOLING=m
CONFIG_INTEL_TDX_GUEST=y
+# CONFIG_INTEL_TDX_HOST is not set
CONFIG_INTEL_TH_ACPI=m
# CONFIG_INTEL_TH_DEBUG is not set
CONFIG_INTEL_TH_GTH=m
@@ -2672,7 +2742,8 @@ CONFIG_IO_DELAY_0X80=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -2968,7 +3039,7 @@ CONFIG_KEY_NOTIFICATIONS=y
# CONFIG_KEYS_REQUEST_CACHE is not set
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3003,6 +3074,7 @@ CONFIG_KVM_AMD_SEV=y
CONFIG_KVM_GUEST=y
CONFIG_KVM_INTEL=m
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_MMU_AUDIT=y
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
@@ -3170,6 +3242,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3184,6 +3257,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3192,6 +3266,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3221,7 +3296,6 @@ CONFIG_MANTIS_CORE=m
CONFIG_MARVELL_10G_PHY=m
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
-CONFIG_MARVELL_GTI_WDT=y
CONFIG_MARVELL_PHY=m
# CONFIG_MATOM is not set
# CONFIG_MAX1027 is not set
@@ -3257,6 +3331,7 @@ CONFIG_MAXSMP=y
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3280,7 +3355,7 @@ CONFIG_MDIO_HISI_FEMAC=m
# CONFIG_MDIO_IPQ8064 is not set
CONFIG_MDIO_MSCC_MIIM=m
# CONFIG_MDIO_MVUSB is not set
-CONFIG_MDIO_OCTEON=m
+# CONFIG_MDIO_OCTEON is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MD_LINEAR=m
# CONFIG_MD_MULTIPATH is not set
@@ -3289,6 +3364,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3355,7 +3431,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3500,6 +3576,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3609,6 +3686,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3686,6 +3766,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3796,9 +3878,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3920,6 +3999,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -3932,15 +4012,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -3969,6 +4046,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4068,7 +4146,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4265,8 +4343,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_IMX_OCOTP_ELE is not set
# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
@@ -4288,7 +4367,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=y
CONFIG_NVSW_SN2201=m
@@ -4403,6 +4484,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4451,6 +4533,7 @@ CONFIG_PCI_STUB=y
# CONFIG_PCI_SW_SWITCHTEC is not set
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4480,8 +4563,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_FSL_LYNX_28G is not set
# CONFIG_PHY_HI3660_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
@@ -4493,8 +4574,6 @@ CONFIG_PHY_BCM_SR_USB=m
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4526,6 +4605,7 @@ CONFIG_PINCTRL_BROXTON=m
CONFIG_PINCTRL_CANNONLAKE=m
CONFIG_PINCTRL_CEDARFORK=m
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_DENVERTON=m
CONFIG_PINCTRL_ELKHARTLAKE=m
@@ -4602,7 +4682,6 @@ CONFIG_POWERCAP=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_GPIO_RESTART is not set
# CONFIG_POWER_RESET_LTC2952 is not set
# CONFIG_POWER_RESET_REGULATOR is not set
@@ -4766,7 +4845,6 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -4868,6 +4946,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
@@ -4943,6 +5022,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -4982,7 +5062,6 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5294,6 +5373,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5450,6 +5530,7 @@ CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC3815 is not set
CONFIG_SENSORS_LTC4151=m
@@ -5508,6 +5589,7 @@ CONFIG_SENSORS_PCF8591=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
CONFIG_SENSORS_PMBUS=m
+# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_PXE1610 is not set
@@ -5677,7 +5759,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLS=y
CONFIG_SLUB_CPU_PARTIAL=y
# CONFIG_SLUB_DEBUG_ON is not set
@@ -5759,6 +5840,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -5900,8 +5982,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -5993,6 +6077,7 @@ CONFIG_SND_SOC_INTEL_AVS=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6033,6 +6118,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y
CONFIG_SND_SOC_INTEL_SKYLAKE=m
CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m
CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m
CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m
@@ -6108,12 +6194,6 @@ CONFIG_SND_SOC_PCM512x=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
CONFIG_SND_SOC_RT1308=m
CONFIG_SND_SOC_RT1308_SDW=m
@@ -6136,6 +6216,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6332,7 +6413,6 @@ CONFIG_SND_X86=y
CONFIG_SND_XEN_FRONTEND=m
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
# CONFIG_SOC_TI is not set
CONFIG_SOFTLOCKUP_DETECTOR=y
@@ -6550,6 +6630,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6603,6 +6684,7 @@ CONFIG_TEST_LIVEPATCH=m
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6832,6 +6914,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -6907,6 +6990,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_CXACRU=m
# CONFIG_USB_CYPRESS_CY7C63 is not set
@@ -7005,6 +7089,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LGM_PHY is not set
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7047,6 +7132,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7196,6 +7282,9 @@ CONFIG_VETH=m
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
CONFIG_VFIO_MDEV=m
@@ -7311,11 +7400,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7730,22 +7821,13 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
CONFIG_LEGION_LAPTOP=m
CONFIG_ACPI_CALL=m
CONFIG_IIO_HRTIMER_TRIGGER=m
@@ -7787,6 +7869,7 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
# CONFIG_USB_DUMMY_HCD is not set
# CONFIG_USB_CONFIGFS is not set
# CONFIG_PHY_SAMSUNG_USB2 is not set
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
CONFIG_SND_SOC_AMD_SOF_MACH=m
CONFIG_SND_SOC_AMD_MACH_COMMON=m
CONFIG_SND_SOC_SOF=m
@@ -7800,3 +7883,10 @@ CONFIG_SND_SOC_SOF_AMD_ACP63=m
# CONFIG_SND_AMD_ASOC_REMBRANDT is not set
# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set
CONFIG_SND_SOC_TOPOLOGY=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-x86_64-rt-debug-rhel.config b/SOURCES/kernel-x86_64-rt-debug-rhel.config
index 3c168ab..985beda 100644
--- a/SOURCES/kernel-x86_64-rt-debug-rhel.config
+++ b/SOURCES/kernel-x86_64-rt-debug-rhel.config
@@ -258,7 +258,6 @@ CONFIG_AQUANTIA_PHY=m
# CONFIG_ARCH_BITMAIN is not set
# CONFIG_ARCH_KEEMBAY is not set
# CONFIG_ARCH_LG1K is not set
-# CONFIG_ARCH_MA35 is not set
# CONFIG_ARCH_MEMORY_PROBE is not set
# CONFIG_ARCH_MESON is not set
CONFIG_ARCH_MMAP_RND_BITS=28
@@ -304,6 +303,7 @@ CONFIG_ASUS_NB_WMI=m
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
CONFIG_ASUS_WMI=m
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -417,6 +417,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -545,7 +546,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
CONFIG_BRCM_TRACING=y
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -640,7 +640,6 @@ CONFIG_CALL_DEPTH_TRACKING=y
# CONFIG_CALL_THUNKS_DEBUG is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -710,6 +709,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
CONFIG_CFG80211_DEBUGFS=y
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -787,6 +787,7 @@ CONFIG_CIFS_SMB_DIRECT=y
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -862,7 +863,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -937,7 +937,6 @@ CONFIG_CRYPTO_ADIANTUM=m
# CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set
# CONFIG_CRYPTO_AEGIS128 is not set
# CONFIG_CRYPTO_AES_ARM64 is not set
-CONFIG_CRYPTO_AES_GCM_P10=y
CONFIG_CRYPTO_AES_NI_INTEL=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
@@ -964,7 +963,6 @@ CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CHACHA20_X86_64=y
CONFIG_CRYPTO_CMAC=y
@@ -1037,6 +1035,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1061,7 +1064,6 @@ CONFIG_CRYPTO_OFB=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
-# CONFIG_CRYPTO_POLY1305_P10 is not set
CONFIG_CRYPTO_POLY1305_X86_64=y
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1358,6 +1360,7 @@ CONFIG_DP83TC811_PHY=m
CONFIG_DPTF_PCH_FIVR=m
CONFIG_DPTF_POWER=m
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1433,6 +1436,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
# CONFIG_DRM_IMX_LCDIF is not set
# CONFIG_DRM_ITE_IT6505 is not set
# CONFIG_DRM_ITE_IT66121 is not set
@@ -1459,36 +1463,90 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1719,7 +1777,6 @@ CONFIG_EEPROM_93CX6=m
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_EE1004 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1759,7 +1816,12 @@ CONFIG_ENIC=m
CONFIG_EPIC100=m
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1830,7 +1892,7 @@ CONFIG_FAULT_INJECTION=y
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1929,7 +1991,9 @@ CONFIG_FS_DAX=y
# CONFIG_FSL_QDMA is not set
# CONFIG_FSL_RCPM is not set
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -1937,6 +2001,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ES=m
CONFIG_FUJITSU_LAPTOP=m
CONFIG_FUJITSU_TABLET=m
@@ -2080,6 +2145,7 @@ CONFIG_GPIO_SIM=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
CONFIG_GUP_TEST=y
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2266,6 +2332,7 @@ CONFIG_HMM_MIRROR=y
# CONFIG_HNS3_PMU is not set
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2421,6 +2488,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
CONFIG_IA32_EMULATION=y
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
@@ -2437,6 +2505,7 @@ CONFIG_ICPLUS_PHY=m
CONFIG_IDEAPAD_LAPTOP=m
CONFIG_IDLE_INJECT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
# CONFIG_IE6XX_WDT is not set
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
@@ -2508,7 +2577,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2626,6 +2694,7 @@ CONFIG_INPUT_UINPUT=m
CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INT340X_THERMAL=m
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
@@ -2694,6 +2763,7 @@ CONFIG_INTEL_SDSI=m
CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
CONFIG_INTEL_TCC_COOLING=m
CONFIG_INTEL_TDX_GUEST=y
+# CONFIG_INTEL_TDX_HOST is not set
CONFIG_INTEL_TH_ACPI=m
# CONFIG_INTEL_TH_DEBUG is not set
CONFIG_INTEL_TH_GTH=m
@@ -2728,7 +2798,8 @@ CONFIG_IOMMU_DEBUGFS=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -3029,7 +3100,7 @@ CONFIG_KEY_NOTIFICATIONS=y
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
# CONFIG_KFENCE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3063,6 +3134,7 @@ CONFIG_KVM_AMD_SEV=y
CONFIG_KVM_GUEST=y
CONFIG_KVM_INTEL=m
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_MMU_AUDIT=y
CONFIG_KVM_PROVE_MMU=y
CONFIG_KVM_SMM=y
@@ -3230,6 +3302,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3244,6 +3317,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3252,6 +3326,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3281,7 +3356,6 @@ CONFIG_MANTIS_CORE=m
CONFIG_MARVELL_10G_PHY=m
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
-CONFIG_MARVELL_GTI_WDT=y
CONFIG_MARVELL_PHY=m
# CONFIG_MATOM is not set
# CONFIG_MAX1027 is not set
@@ -3317,6 +3391,7 @@ CONFIG_MAXSMP=y
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3340,7 +3415,7 @@ CONFIG_MDIO_HISI_FEMAC=m
# CONFIG_MDIO_IPQ8064 is not set
CONFIG_MDIO_MSCC_MIIM=m
# CONFIG_MDIO_MVUSB is not set
-CONFIG_MDIO_OCTEON=m
+# CONFIG_MDIO_OCTEON is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MD_LINEAR=m
# CONFIG_MD_MULTIPATH is not set
@@ -3349,6 +3424,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3420,7 +3496,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3565,6 +3641,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3674,6 +3751,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3751,6 +3831,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3861,9 +3943,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3985,6 +4064,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -3997,15 +4077,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4034,6 +4111,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4133,7 +4211,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4330,8 +4408,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_IMX_OCOTP_ELE is not set
# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
@@ -4353,7 +4432,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=y
CONFIG_NVSW_SN2201=m
@@ -4469,6 +4550,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4517,6 +4599,7 @@ CONFIG_PCI_STUB=y
# CONFIG_PCI_SW_SWITCHTEC is not set
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4546,8 +4629,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_FSL_LYNX_28G is not set
# CONFIG_PHY_HI3660_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
@@ -4559,8 +4640,6 @@ CONFIG_PHY_BCM_SR_USB=m
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4592,6 +4671,7 @@ CONFIG_PINCTRL_BROXTON=m
CONFIG_PINCTRL_CANNONLAKE=m
CONFIG_PINCTRL_CEDARFORK=m
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_DENVERTON=m
CONFIG_PINCTRL_ELKHARTLAKE=m
@@ -4669,7 +4749,6 @@ CONFIG_POWERCAP=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_GPIO_RESTART is not set
# CONFIG_POWER_RESET_LTC2952 is not set
# CONFIG_POWER_RESET_REGULATOR is not set
@@ -4837,7 +4916,6 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -4942,6 +5020,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
@@ -5017,6 +5096,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -5056,7 +5136,6 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5368,6 +5447,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5524,6 +5604,7 @@ CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC3815 is not set
CONFIG_SENSORS_LTC4151=m
@@ -5582,6 +5663,7 @@ CONFIG_SENSORS_PCF8591=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
CONFIG_SENSORS_PMBUS=m
+# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_PXE1610 is not set
@@ -5751,8 +5833,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
-# CONFIG_SLOB is not set
CONFIG_SLS=y
# CONFIG_SLUB_CPU_PARTIAL is not set
# CONFIG_SLUB_DEBUG_ON is not set
@@ -5835,6 +5915,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -5977,8 +6058,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -6070,6 +6153,7 @@ CONFIG_SND_SOC_INTEL_AVS=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6110,6 +6194,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y
CONFIG_SND_SOC_INTEL_SKYLAKE=m
CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m
CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m
CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m
@@ -6185,12 +6270,6 @@ CONFIG_SND_SOC_PCM512x=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
CONFIG_SND_SOC_RT1308=m
CONFIG_SND_SOC_RT1308_SDW=m
@@ -6213,6 +6292,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6410,7 +6490,6 @@ CONFIG_SND_X86=y
CONFIG_SND_XEN_FRONTEND=m
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
# CONFIG_SOC_TI is not set
CONFIG_SOFTLOCKUP_DETECTOR=y
@@ -6628,6 +6707,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6681,6 +6761,7 @@ CONFIG_TEST_LIVEPATCH=m
# CONFIG_TEST_MEMINIT is not set
CONFIG_TEST_MIN_HEAP=m
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6910,6 +6991,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -6986,6 +7068,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_CXACRU=m
# CONFIG_USB_CYPRESS_CY7C63 is not set
@@ -7084,6 +7167,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LGM_PHY is not set
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7126,6 +7210,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7275,6 +7360,9 @@ CONFIG_VETH=m
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
CONFIG_VFIO_MDEV=m
@@ -7390,11 +7478,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7816,22 +7906,13 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
CONFIG_LEGION_LAPTOP=m
CONFIG_ACPI_CALL=m
CONFIG_IIO_HRTIMER_TRIGGER=m
@@ -7873,6 +7954,7 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
# CONFIG_USB_DUMMY_HCD is not set
# CONFIG_USB_CONFIGFS is not set
# CONFIG_PHY_SAMSUNG_USB2 is not set
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
CONFIG_SND_SOC_AMD_SOF_MACH=m
CONFIG_SND_SOC_AMD_MACH_COMMON=m
CONFIG_SND_SOC_SOF=m
@@ -7886,3 +7968,10 @@ CONFIG_SND_SOC_SOF_AMD_ACP63=m
# CONFIG_SND_AMD_ASOC_REMBRANDT is not set
# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set
CONFIG_SND_SOC_TOPOLOGY=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel-x86_64-rt-rhel.config b/SOURCES/kernel-x86_64-rt-rhel.config
index 998a273..bce617a 100644
--- a/SOURCES/kernel-x86_64-rt-rhel.config
+++ b/SOURCES/kernel-x86_64-rt-rhel.config
@@ -258,7 +258,6 @@ CONFIG_AQUANTIA_PHY=m
# CONFIG_ARCH_BITMAIN is not set
# CONFIG_ARCH_KEEMBAY is not set
# CONFIG_ARCH_LG1K is not set
-# CONFIG_ARCH_MA35 is not set
# CONFIG_ARCH_MEMORY_PROBE is not set
# CONFIG_ARCH_MESON is not set
CONFIG_ARCH_MMAP_RND_BITS=28
@@ -304,6 +303,7 @@ CONFIG_ASUS_NB_WMI=m
# CONFIG_ASUS_TF103C_DOCK is not set
# CONFIG_ASUS_WIRELESS is not set
CONFIG_ASUS_WMI=m
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_ASYNC_TX_DMA=y
@@ -417,6 +417,7 @@ CONFIG_BASE_FULL=y
# CONFIG_BATTERY_SAMSUNG_SDI is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_UG3105 is not set
+# CONFIG_BCACHEFS_FS is not set
# CONFIG_BCACHE is not set
# CONFIG_BCM54140_PHY is not set
CONFIG_BCM7XXX_PHY=m
@@ -545,7 +546,6 @@ CONFIG_BRCMFMAC_PCIE=y
CONFIG_BRCMFMAC_SDIO=y
CONFIG_BRCMFMAC_USB=y
CONFIG_BRCMSMAC=m
-# CONFIG_BRCMSTB_GISB_ARB is not set
# CONFIG_BRCM_TRACING is not set
# CONFIG_BRIDGE_CFM is not set
CONFIG_BRIDGE_EBT_802_3=m
@@ -640,7 +640,6 @@ CONFIG_CALL_DEPTH_TRACKING=y
# CONFIG_CALL_THUNKS_DEBUG is not set
CONFIG_CAN_8DEV_USB=m
CONFIG_CAN_BCM=m
-# CONFIG_CAN_BXCAN is not set
CONFIG_CAN_CALC_BITTIMING=y
# CONFIG_CAN_CAN327 is not set
# CONFIG_CAN_CC770 is not set
@@ -710,6 +709,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y
# CONFIG_CFG80211_DEBUGFS is not set
CONFIG_CFG80211_DEFAULT_PS=y
# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+CONFIG_CFG80211_KUNIT_TEST=m
CONFIG_CFG80211=m
# CONFIG_CFG80211_WEXT is not set
# CONFIG_CFI_CLANG is not set
@@ -787,6 +787,7 @@ CONFIG_CIFS_SMB_DIRECT=y
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CLEANCACHE=y
+CONFIG_CLK_FD_KUNIT_TEST=m
CONFIG_CLK_GATE_KUNIT_TEST=m
# CONFIG_CLK_GFM_LPASS_SM8250 is not set
# CONFIG_CLK_ICST is not set
@@ -862,7 +863,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_CONTEXT_SWITCH_TRACER=y
# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
-# CONFIG_COPS is not set
CONFIG_CORDIC=m
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_COREDUMP=y
@@ -937,7 +937,6 @@ CONFIG_CRYPTO_ADIANTUM=m
# CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set
# CONFIG_CRYPTO_AEGIS128 is not set
# CONFIG_CRYPTO_AES_ARM64 is not set
-CONFIG_CRYPTO_AES_GCM_P10=y
CONFIG_CRYPTO_AES_NI_INTEL=y
# CONFIG_CRYPTO_AES_TI is not set
CONFIG_CRYPTO_AES=y
@@ -964,7 +963,6 @@ CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_CFB=y
CONFIG_CRYPTO_CHACHA20=m
-# CONFIG_CRYPTO_CHACHA20_P10 is not set
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CHACHA20_X86_64=y
CONFIG_CRYPTO_CMAC=y
@@ -1037,6 +1035,11 @@ CONFIG_CRYPTO_GHASH=y
# CONFIG_CRYPTO_HCTR2 is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set
+CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y
+# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
# CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set
# CONFIG_CRYPTO_KEYWRAP is not set
CONFIG_CRYPTO_LIB_BLAKE2S=m
@@ -1061,7 +1064,6 @@ CONFIG_CRYPTO_OFB=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_POLY1305=m
-# CONFIG_CRYPTO_POLY1305_P10 is not set
CONFIG_CRYPTO_POLY1305_X86_64=y
# CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set
CONFIG_CRYPTO_RMD160=m
@@ -1350,6 +1352,7 @@ CONFIG_DP83TC811_PHY=m
CONFIG_DPTF_PCH_FIVR=m
CONFIG_DPTF_POWER=m
# CONFIG_DRAGONRISE_FF is not set
+CONFIG_DRIVER_PE_KUNIT_TEST=m
# CONFIG_DRM_ACCEL is not set
CONFIG_DRM_AMD_ACP=y
# CONFIG_DRM_AMD_DC_HDCP is not set
@@ -1425,6 +1428,7 @@ CONFIG_DRM_I915_USERPTR=y
# CONFIG_DRM_IMX8QXP_LDB is not set
# CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set
# CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set
+# CONFIG_DRM_IMX93_MIPI_DSI is not set
# CONFIG_DRM_IMX_LCDIF is not set
# CONFIG_DRM_ITE_IT6505 is not set
# CONFIG_DRM_ITE_IT66121 is not set
@@ -1451,36 +1455,90 @@ CONFIG_DRM_NOUVEAU=m
# CONFIG_DRM_OFDRM is not set
# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set
# CONFIG_DRM_PANEL_ARM_VERSATILE is not set
+# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set
# CONFIG_DRM_PANEL_AUO_A030JTN01 is not set
+# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set
+# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set
+# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set
+# CONFIG_DRM_PANEL_DSI_CM is not set
+# CONFIG_DRM_PANEL_EBBG_FT8719 is not set
# CONFIG_DRM_PANEL_EDP is not set
+# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set
+# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set
+# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set
+# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set
# CONFIG_DRM_PANEL_ILITEK_IL9322 is not set
# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set
+# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set
# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set
+# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set
+# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set
+# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set
+# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set
+# CONFIG_DRM_PANEL_JDI_R63452 is not set
+# CONFIG_DRM_PANEL_KHADAS_TS050 is not set
+# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set
+# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set
# CONFIG_DRM_PANEL_LG_LB035Q02 is not set
# CONFIG_DRM_PANEL_LVDS is not set
+# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set
+# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set
# CONFIG_DRM_PANEL_MIPI_DBI is not set
# CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set
+# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set
# CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set
+# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set
# CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set
# CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set
# CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set
+# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set
+# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set
+# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set
+# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set
+# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set
# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set
# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set
# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set
+# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set
# CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set
+# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set
# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set
+# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set
# CONFIG_DRM_PANEL_SIMPLE is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set
+# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set
# CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set
# CONFIG_DRM_PANEL_SONY_ACX565AKM is not set
+# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set
+# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set
+# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set
+# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set
# CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set
# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
# CONFIG_DRM_PANEL_TPO_TPG110 is not set
+# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
+# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set
+# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
+# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set
# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set
+# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set
# CONFIG_DRM_PANFROST is not set
# CONFIG_DRM_PARADE_PS8622 is not set
# CONFIG_DRM_PARADE_PS8640 is not set
@@ -1711,7 +1769,6 @@ CONFIG_EEPROM_93CX6=m
# CONFIG_EEPROM_AT25 is not set
# CONFIG_EEPROM_EE1004 is not set
# CONFIG_EEPROM_IDT_89HPESX is not set
-CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
# CONFIG_EFI_BOOTLOADER_CONTROL is not set
@@ -1751,7 +1808,12 @@ CONFIG_ENIC=m
CONFIG_EPIC100=m
CONFIG_EPOLL=y
# CONFIG_EQUALIZER is not set
-# CONFIG_EROFS_FS is not set
+# CONFIG_EROFS_FS_DEBUG is not set
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_POSIX_ACL=y
+CONFIG_EROFS_FS_SECURITY=y
+CONFIG_EROFS_FS_XATTR=y
+# CONFIG_EROFS_FS_ZIP is not set
CONFIG_ETHERNET=y
CONFIG_ETHOC=m
CONFIG_ETHTOOL_NETLINK=y
@@ -1814,7 +1876,7 @@ CONFIG_FAT_KUNIT_TEST=m
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_DA8XX is not set
-CONFIG_FB_DEVICE=y
+# CONFIG_FB_DEVICE is not set
CONFIG_FB_EFI=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_GEODE is not set
@@ -1913,7 +1975,9 @@ CONFIG_FS_DAX=y
# CONFIG_FSL_QDMA is not set
# CONFIG_FSL_RCPM is not set
CONFIG_FSNOTIFY=y
-# CONFIG_FS_VERITY is not set
+# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set
+# CONFIG_FS_VERITY_DEBUG is not set
+CONFIG_FS_VERITY=y
# CONFIG_FTL is not set
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
@@ -1921,6 +1985,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE=y
+# CONFIG_FUEL_GAUGE_MM8013 is not set
CONFIG_FUJITSU_ES=m
CONFIG_FUJITSU_LAPTOP=m
CONFIG_FUJITSU_TABLET=m
@@ -2064,6 +2129,7 @@ CONFIG_GPIO_SIM=m
# CONFIG_GREYBUS is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_GTP is not set
+# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_GUP_TEST is not set
CONFIG_GVE=m
# CONFIG_HABANA_AI is not set
@@ -2250,6 +2316,7 @@ CONFIG_HMM_MIRROR=y
# CONFIG_HNS3_PMU is not set
# CONFIG_HOLTEK_FF is not set
CONFIG_HOTPLUG_CPU=y
+# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set
CONFIG_HOTPLUG_PCI_ACPI_IBM=m
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -2405,6 +2472,7 @@ CONFIG_I40E=m
CONFIG_I40EVF=m
# CONFIG_I6300ESB_WDT is not set
# CONFIG_I8K is not set
+# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set
CONFIG_IA32_EMULATION=y
# CONFIG_IAQCORE is not set
CONFIG_IAVF=m
@@ -2421,6 +2489,7 @@ CONFIG_ICPLUS_PHY=m
CONFIG_IDEAPAD_LAPTOP=m
CONFIG_IDLE_INJECT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IDPF=m
# CONFIG_IE6XX_WDT is not set
CONFIG_IEEE802154_6LOWPAN=m
# CONFIG_IEEE802154_ADF7242 is not set
@@ -2492,7 +2561,6 @@ CONFIG_IMA_READ_POLICY=y
CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y
CONFIG_IMA_SIG_TEMPLATE=y
# CONFIG_IMA_TEMPLATE is not set
-CONFIG_IMA_TRUSTED_KEYRING=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA=y
# CONFIG_IMG_ASCII_LCD is not set
@@ -2610,6 +2678,7 @@ CONFIG_INPUT_UINPUT=m
CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
CONFIG_INPUT=y
CONFIG_INPUT_YEALINK=m
+# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_INT3406_THERMAL is not set
CONFIG_INT340X_THERMAL=m
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
@@ -2678,6 +2747,7 @@ CONFIG_INTEL_SDSI=m
CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
CONFIG_INTEL_TCC_COOLING=m
CONFIG_INTEL_TDX_GUEST=y
+# CONFIG_INTEL_TDX_HOST is not set
CONFIG_INTEL_TH_ACPI=m
# CONFIG_INTEL_TH_DEBUG is not set
CONFIG_INTEL_TH_GTH=m
@@ -2712,7 +2782,8 @@ CONFIG_IO_DELAY_0X80=y
CONFIG_IOMMU_DEFAULT_DMA_LAZY=y
# CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set
# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set
-# CONFIG_IOMMUFD is not set
+CONFIG_IOMMUFD=m
+# CONFIG_IOMMUFD_TEST is not set
# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set
# CONFIG_IOMMU_IO_PGTABLE_DART is not set
# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set
@@ -3008,7 +3079,7 @@ CONFIG_KEY_NOTIFICATIONS=y
# CONFIG_KEYS_REQUEST_CACHE is not set
CONFIG_KEYS=y
# CONFIG_KFENCE_DEFERRABLE is not set
-# CONFIG_KFENCE_KUNIT_TEST is not set
+CONFIG_KFENCE_KUNIT_TEST=m
CONFIG_KFENCE_NUM_OBJECTS=255
CONFIG_KFENCE_SAMPLE_INTERVAL=100
# CONFIG_KFENCE_STATIC_KEYS is not set
@@ -3043,6 +3114,7 @@ CONFIG_KVM_AMD_SEV=y
CONFIG_KVM_GUEST=y
CONFIG_KVM_INTEL=m
CONFIG_KVM=m
+CONFIG_KVM_MAX_NR_VCPUS=4096
CONFIG_KVM_MMU_AUDIT=y
# CONFIG_KVM_PROVE_MMU is not set
CONFIG_KVM_SMM=y
@@ -3210,6 +3282,7 @@ CONFIG_LSI_ET1011C_PHY=m
CONFIG_LSM="lockdown,yama,integrity,selinux,bpf"
CONFIG_LSM_MMAP_MIN_ADDR=65535
# CONFIG_LTC1660 is not set
+# CONFIG_LTC2309 is not set
# CONFIG_LTC2471 is not set
# CONFIG_LTC2485 is not set
# CONFIG_LTC2496 is not set
@@ -3224,6 +3297,7 @@ CONFIG_LTO_NONE=y
# CONFIG_LTR501 is not set
# CONFIG_LTRF216A is not set
# CONFIG_LV0104CS is not set
+# CONFIG_LWQ_TEST is not set
CONFIG_LWTUNNEL_BPF=y
CONFIG_LWTUNNEL=y
CONFIG_LXT_PHY=m
@@ -3232,6 +3306,7 @@ CONFIG_LZ4_COMPRESS=m
CONFIG_MAC80211_DEBUGFS=y
# CONFIG_MAC80211_DEBUG_MENU is not set
CONFIG_MAC80211_HWSIM=m
+CONFIG_MAC80211_KUNIT_TEST=m
CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_MESH is not set
@@ -3261,7 +3336,6 @@ CONFIG_MANTIS_CORE=m
CONFIG_MARVELL_10G_PHY=m
# CONFIG_MARVELL_88Q2XXX_PHY is not set
# CONFIG_MARVELL_88X2222_PHY is not set
-CONFIG_MARVELL_GTI_WDT=y
CONFIG_MARVELL_PHY=m
# CONFIG_MATOM is not set
# CONFIG_MAX1027 is not set
@@ -3297,6 +3371,7 @@ CONFIG_MAXSMP=y
# CONFIG_MCORE2 is not set
# CONFIG_MCP320X is not set
# CONFIG_MCP3422 is not set
+# CONFIG_MCP3564 is not set
# CONFIG_MCP3911 is not set
# CONFIG_MCP4018 is not set
# CONFIG_MCP41010 is not set
@@ -3320,7 +3395,7 @@ CONFIG_MDIO_HISI_FEMAC=m
# CONFIG_MDIO_IPQ8064 is not set
CONFIG_MDIO_MSCC_MIIM=m
# CONFIG_MDIO_MVUSB is not set
-CONFIG_MDIO_OCTEON=m
+# CONFIG_MDIO_OCTEON is not set
CONFIG_MDIO_THUNDER=m
CONFIG_MD_LINEAR=m
# CONFIG_MD_MULTIPATH is not set
@@ -3329,6 +3404,7 @@ CONFIG_MD_RAID10=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID456=m
CONFIG_MD=y
+CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m
CONFIG_MEDIA_ALTERA_CI=m
# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
CONFIG_MEDIA_ATTACH=y
@@ -3400,7 +3476,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_MFD_BD9571MWV is not set
# CONFIG_MFD_CPCAP is not set
# CONFIG_MFD_CS42L43_I2C is not set
-# CONFIG_MFD_CS42L43_SDW is not set
+CONFIG_MFD_CS42L43_SDW=m
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9052_SPI is not set
# CONFIG_MFD_DA9055 is not set
@@ -3545,6 +3621,7 @@ CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_DPLL=m
CONFIG_MLX5_EN_ARFS=y
CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_MACSEC=y
@@ -3654,6 +3731,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
# CONFIG_MODULE_SIG_SHA256 is not set
+# CONFIG_MODULE_SIG_SHA3_256 is not set
+# CONFIG_MODULE_SIG_SHA3_384 is not set
+# CONFIG_MODULE_SIG_SHA3_512 is not set
# CONFIG_MODULE_SIG_SHA384 is not set
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG=y
@@ -3731,6 +3811,8 @@ CONFIG_MT76x2U=m
CONFIG_MT7921E=m
# CONFIG_MT7921S is not set
# CONFIG_MT7921U is not set
+# CONFIG_MT7925E is not set
+# CONFIG_MT7925U is not set
# CONFIG_MT7996E is not set
# CONFIG_MTD_ABSENT is not set
# CONFIG_MTD_AFS_PARTS is not set
@@ -3841,9 +3923,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_MATCHALL=m
# CONFIG_NET_CLS_ROUTE4 is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_TCINDEX is not set
CONFIG_NET_CLS_U32=m
CONFIG_NET_CLS=y
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -3965,6 +4044,7 @@ CONFIG_NET_IPIP=m
CONFIG_NET_IPVTI=m
CONFIG_NET_KEY=m
CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NETKIT is not set
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NETLABEL=y
CONFIG_NETLINK_DIAG=y
@@ -3977,15 +4057,12 @@ CONFIG_NET_PKTGEN=m
CONFIG_NET_POLL_CONTROLLER=y
CONFIG_NET_RX_BUSY_POLL=y
# CONFIG_NET_SB1000 is not set
-# CONFIG_NET_SCH_ATM is not set
CONFIG_NET_SCH_CAKE=m
-# CONFIG_NET_SCH_CBQ is not set
CONFIG_NET_SCH_CBS=m
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_CODEL is not set
CONFIG_NET_SCH_DEFAULT=y
# CONFIG_NET_SCH_DRR is not set
-# CONFIG_NET_SCH_DSMARK is not set
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_ETS=m
@@ -4014,6 +4091,7 @@ CONFIG_NET_SCH_TBF=m
CONFIG_NET_SWITCHDEV=y
CONFIG_NET_TC_SKB_EXT=y
# CONFIG_NET_TEAM is not set
+CONFIG_NET_TEST=m
# CONFIG_NET_TULIP is not set
CONFIG_NET_UDP_TUNNEL=m
# CONFIG_NET_VENDOR_3COM is not set
@@ -4113,7 +4191,7 @@ CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK_HELPER=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_CT_PROTO_DCCP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_CT_PROTO_UDPLITE=y
CONFIG_NF_DUP_NETDEV=m
@@ -4310,8 +4388,9 @@ CONFIG_NVDIMM_PFN=y
# CONFIG_NVDIMM_SECURITY_TEST is not set
# CONFIG_NVHE_EL2_DEBUG is not set
CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m
-CONFIG_NVME_AUTH=y
+CONFIG_NVME_AUTH=m
CONFIG_NVME_FC=m
+CONFIG_NVME_HOST_AUTH=y
# CONFIG_NVME_HWMON is not set
# CONFIG_NVMEM_IMX_OCOTP_ELE is not set
# CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set
@@ -4333,7 +4412,9 @@ CONFIG_NVME_TARGET=m
# CONFIG_NVME_TARGET_PASSTHRU is not set
CONFIG_NVME_TARGET_RDMA=m
CONFIG_NVME_TARGET_TCP=m
+CONFIG_NVME_TARGET_TCP_TLS=y
CONFIG_NVME_TCP=m
+CONFIG_NVME_TCP_TLS=y
# CONFIG_NVME_VERBOSE_ERRORS is not set
CONFIG_NVRAM=y
CONFIG_NVSW_SN2201=m
@@ -4448,6 +4529,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_DYNAMIC_OF_NODES is not set
+CONFIG_PCIEAER_CXL=y
CONFIG_PCIEAER_INJECT=m
CONFIG_PCIEAER=y
# CONFIG_PCIE_ALTERA is not set
@@ -4496,6 +4578,7 @@ CONFIG_PCI_STUB=y
# CONFIG_PCI_SW_SWITCHTEC is not set
CONFIG_PCI=y
# CONFIG_PCNET32 is not set
+CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PCPU_DEV_REFCNT=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_PCS_XPCS=m
@@ -4525,8 +4608,6 @@ CONFIG_PHY_BCM_SR_USB=m
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
# CONFIG_PHY_CPCAP_USB is not set
-# CONFIG_PHY_FSL_IMX8M_PCIE is not set
-# CONFIG_PHY_FSL_IMX8MQ_USB is not set
# CONFIG_PHY_FSL_LYNX_28G is not set
# CONFIG_PHY_HI3660_USB is not set
# CONFIG_PHY_HI3670_PCIE is not set
@@ -4538,8 +4619,6 @@ CONFIG_PHY_BCM_SR_USB=m
CONFIG_PHYLIB=y
CONFIG_PHYLINK=m
# CONFIG_PHY_MAPPHONE_MDM6600 is not set
-# CONFIG_PHY_MIXEL_LVDS_PHY is not set
-# CONFIG_PHY_MIXEL_MIPI_DPHY is not set
# CONFIG_PHY_OCELOT_SERDES is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
@@ -4571,6 +4650,7 @@ CONFIG_PINCTRL_BROXTON=m
CONFIG_PINCTRL_CANNONLAKE=m
CONFIG_PINCTRL_CEDARFORK=m
# CONFIG_PINCTRL_CHERRYVIEW is not set
+# CONFIG_PINCTRL_CS42L43 is not set
# CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_DENVERTON=m
CONFIG_PINCTRL_ELKHARTLAKE=m
@@ -4647,7 +4727,6 @@ CONFIG_POWERCAP=y
CONFIG_POWERNV_CPUFREQ=y
CONFIG_POWERNV_OP_PANEL=m
# CONFIG_POWERPC64_CPU is not set
-# CONFIG_POWER_RESET_BRCMSTB is not set
# CONFIG_POWER_RESET_GPIO_RESTART is not set
# CONFIG_POWER_RESET_LTC2952 is not set
# CONFIG_POWER_RESET_REGULATOR is not set
@@ -4815,7 +4894,6 @@ CONFIG_QLA3XXX=m
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_QORIQ_CPUFREQ is not set
-# CONFIG_QORIQ_THERMAL is not set
CONFIG_QRTR=m
CONFIG_QRTR_MHI=m
# CONFIG_QRTR_SMD is not set
@@ -4920,6 +4998,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_LTC3676 is not set
# CONFIG_REGULATOR_MAX1586 is not set
+# CONFIG_REGULATOR_MAX77503 is not set
# CONFIG_REGULATOR_MAX77857 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
@@ -4995,6 +5074,7 @@ CONFIG_RMI4_SPI=m
CONFIG_ROCKCHIP_PHY=m
CONFIG_ROCKER=m
CONFIG_RODATA_FULL_DEFAULT_ENABLED=y
+# CONFIG_ROHM_BM1390 is not set
# CONFIG_ROHM_BU27008 is not set
# CONFIG_ROHM_BU27034 is not set
# CONFIG_ROMFS_FS is not set
@@ -5034,7 +5114,6 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_DRV_ABEOZ9 is not set
# CONFIG_RTC_DRV_ABX80X is not set
CONFIG_RTC_DRV_BQ32K=m
-CONFIG_RTC_DRV_BQ4802=m
# CONFIG_RTC_DRV_CADENCE is not set
CONFIG_RTC_DRV_CMOS=y
CONFIG_RTC_DRV_DS1286=m
@@ -5346,6 +5425,7 @@ CONFIG_SDIO_UART=m
# CONFIG_SDX_GCC_55 is not set
# CONFIG_SECCOMP_CACHE_DEBUG is not set
CONFIG_SECCOMP=y
+# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set
CONFIG_SECONDARY_TRUSTED_KEYRING=y
CONFIG_SECRETMEM=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
@@ -5502,6 +5582,7 @@ CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_LTC2978=m
# CONFIG_SENSORS_LTC2978_REGULATOR is not set
# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC2991 is not set
# CONFIG_SENSORS_LTC2992 is not set
# CONFIG_SENSORS_LTC3815 is not set
CONFIG_SENSORS_LTC4151=m
@@ -5560,6 +5641,7 @@ CONFIG_SENSORS_PCF8591=m
# CONFIG_SENSORS_PLI1209BC is not set
# CONFIG_SENSORS_PM6764TR is not set
CONFIG_SENSORS_PMBUS=m
+# CONFIG_SENSORS_POWERZ is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_PWM_FAN is not set
# CONFIG_SENSORS_PXE1610 is not set
@@ -5729,8 +5811,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP=m
# CONFIG_SLIP_MODE_SLIP6 is not set
CONFIG_SLIP_SMART=y
-# CONFIG_SLOB_DEPRECATED is not set
-# CONFIG_SLOB is not set
CONFIG_SLS=y
# CONFIG_SLUB_CPU_PARTIAL is not set
# CONFIG_SLUB_DEBUG_ON is not set
@@ -5813,6 +5893,7 @@ CONFIG_SND_FIREWORKS=m
# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_GINA20=m
CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m
CONFIG_SND_HDA_CODEC_ANALOG=m
CONFIG_SND_HDA_CODEC_CA0110=m
CONFIG_SND_HDA_CODEC_CA0132_DSP=y
@@ -5954,8 +6035,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m
# CONFIG_SND_SOC_ARNDALE is not set
# CONFIG_SND_SOC_AUDIO_IIO_AUX is not set
# CONFIG_SND_SOC_AW8738 is not set
+# CONFIG_SND_SOC_AW87390 is not set
# CONFIG_SND_SOC_AW88261 is not set
# CONFIG_SND_SOC_AW88395 is not set
+# CONFIG_SND_SOC_AW88399 is not set
# CONFIG_SND_SOC_BD28623 is not set
# CONFIG_SND_SOC_BT_SCO is not set
# CONFIG_SND_SOC_CHV3_CODEC is not set
@@ -6047,6 +6130,7 @@ CONFIG_SND_SOC_INTEL_AVS=m
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
+# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
@@ -6087,6 +6171,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y
CONFIG_SND_SOC_INTEL_SKYLAKE=m
CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m
CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m
+CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m
CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m
CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m
CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m
@@ -6162,12 +6247,6 @@ CONFIG_SND_SOC_PCM512x=m
# CONFIG_SND_SOC_RK3399_GRU_SOUND is not set
# CONFIG_SND_SOC_RK817 is not set
CONFIG_SND_SOC_RL6231=m
-# CONFIG_SND_SOC_ROCKCHIP_I2S is not set
-# CONFIG_SND_SOC_ROCKCHIP is not set
-# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set
-# CONFIG_SND_SOC_ROCKCHIP_PDM is not set
-# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set
-# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set
# CONFIG_SND_SOC_RT1017_SDCA_SDW is not set
CONFIG_SND_SOC_RT1308=m
CONFIG_SND_SOC_RT1308_SDW=m
@@ -6190,6 +6269,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m
CONFIG_SND_SOC_RT715_SDW=m
CONFIG_SND_SOC_RT722_SDCA_SDW=m
# CONFIG_SND_SOC_RT9120 is not set
+# CONFIG_SND_SOC_RTQ9128 is not set
# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set
# CONFIG_SND_SOC_SAMSUNG is not set
# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set
@@ -6386,7 +6466,6 @@ CONFIG_SND_X86=y
CONFIG_SND_XEN_FRONTEND=m
# CONFIG_SND_YMFPCI is not set
# CONFIG_SNET_VDPA is not set
-# CONFIG_SOC_BRCMSTB is not set
# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
# CONFIG_SOC_TI is not set
CONFIG_SOFTLOCKUP_DETECTOR=y
@@ -6604,6 +6683,7 @@ CONFIG_TCM_IBLOCK=m
CONFIG_TCM_PSCSI=m
# CONFIG_TCM_QLA2XXX is not set
CONFIG_TCM_USER2=m
+CONFIG_TCP_AO=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_BBR=m
CONFIG_TCP_CONG_BIC=m
@@ -6657,6 +6737,7 @@ CONFIG_TEST_LIVEPATCH=m
# CONFIG_TEST_MEMINIT is not set
# CONFIG_TEST_MIN_HEAP is not set
# CONFIG_TEST_OBJAGG is not set
+# CONFIG_TEST_OBJPOOL is not set
# CONFIG_TEST_OVERFLOW is not set
# CONFIG_TEST_PARMAN is not set
# CONFIG_TEST_POWER is not set
@@ -6886,6 +6967,7 @@ CONFIG_TYPEC_MUX_FSA4480=m
# CONFIG_TYPEC_MUX_GPIO_SBU is not set
# CONFIG_TYPEC_MUX_NB7VPQ904M is not set
CONFIG_TYPEC_MUX_PI3USB30532=m
+# CONFIG_TYPEC_MUX_PTN36502 is not set
# CONFIG_TYPEC_NVIDIA_ALTMODE is not set
# CONFIG_TYPEC_QCOM_PMIC is not set
# CONFIG_TYPEC_RT1711H is not set
@@ -6962,6 +7044,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m
CONFIG_USB_CHIPIDEA_IMX=m
CONFIG_USB_CHIPIDEA=m
CONFIG_USB_CHIPIDEA_MSM=m
+CONFIG_USB_CHIPIDEA_NPCM=m
# CONFIG_USB_CONN_GPIO is not set
CONFIG_USB_CXACRU=m
# CONFIG_USB_CYPRESS_CY7C63 is not set
@@ -7060,6 +7143,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
CONFIG_USB_LEGOTOWER=m
# CONFIG_USB_LGM_PHY is not set
# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_USB_LJCA is not set
CONFIG_USB_M5602=m
# CONFIG_USB_MA901 is not set
# CONFIG_USB_MAX3421_HCD is not set
@@ -7102,6 +7186,7 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_PRODUCTLIST is not set
# CONFIG_USB_OXU210HP_HCD is not set
+CONFIG_USB_PCI_AMD=y
CONFIG_USB_PCI=y
# CONFIG_USBPCWATCHDOG is not set
CONFIG_USB_PEGASUS=m
@@ -7251,6 +7336,9 @@ CONFIG_VETH=m
# CONFIG_VF610_DAC is not set
CONFIG_VFAT_FS=m
# CONFIG_VFIO_AMBA is not set
+CONFIG_VFIO_CONTAINER=y
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_VFIO_GROUP=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
CONFIG_VFIO_MDEV=m
@@ -7366,11 +7454,13 @@ CONFIG_VIDEO_IVTV=m
# CONFIG_VIDEO_M5MOLS is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_MEYE is not set
+# CONFIG_VIDEO_MGB4 is not set
# CONFIG_VIDEO_ML86V7667 is not set
# CONFIG_VIDEO_MSP3400 is not set
# CONFIG_VIDEO_MT9M001 is not set
# CONFIG_VIDEO_MT9M032 is not set
# CONFIG_VIDEO_MT9M111 is not set
+# CONFIG_VIDEO_MT9M114 is not set
# CONFIG_VIDEO_MT9P031 is not set
# CONFIG_VIDEO_MT9T001 is not set
# CONFIG_VIDEO_MT9T112 is not set
@@ -7792,22 +7882,13 @@ CONFIG_ZENIFY=y
CONFIG_WINESYNC=y
CONFIG_USER_NS_UNPRIVILEGED=y
CONFIG_TCP_CONG_BBR2=m
-CONFIG_BCACHEFS_FS=m
-CONFIG_BCACHEFS_QUOTA=y
-CONFIG_BCACHEFS_POSIX_ACL=y
-# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set
-# CONFIG_BCACHEFS_DEBUG is not set
-# CONFIG_BCACHEFS_TESTS is not set
-# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set
-# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set
-# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set
-# CONFIG_DEBUG_CLOSURES is not set
CONFIG_HID_IPTS=m
CONFIG_HID_ITHC=m
CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m
CONFIG_IPC_CLASSES=y
CONFIG_LEDS_TPS68470=m
-CONFIG_DRM_AMD_COLOR_STEAMDECK=y
+CONFIG_SENSORS_SURFACE_FAN=m
+CONFIG_SENSORS_SURFACE_TEMP=m
CONFIG_LEGION_LAPTOP=m
CONFIG_ACPI_CALL=m
CONFIG_IIO_HRTIMER_TRIGGER=m
@@ -7849,6 +7930,7 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
# CONFIG_USB_DUMMY_HCD is not set
# CONFIG_USB_CONFIGFS is not set
# CONFIG_PHY_SAMSUNG_USB2 is not set
+CONFIG_DRM_AMD_COLOR_STEAMDECK=y
CONFIG_SND_SOC_AMD_SOF_MACH=m
CONFIG_SND_SOC_AMD_MACH_COMMON=m
CONFIG_SND_SOC_SOF=m
@@ -7862,3 +7944,10 @@ CONFIG_SND_SOC_SOF_AMD_ACP63=m
# CONFIG_SND_AMD_ASOC_REMBRANDT is not set
# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set
CONFIG_SND_SOC_TOPOLOGY=y
+CONFIG_BMI323_I2C=m
+CONFIG_DRM_APPLETBDRM=m
+CONFIG_HID_APPLETB_BL=m
+CONFIG_HID_APPLETB_KBD=m
+CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m
+CONFIG_APPLE_BCE=m
+CONFIG_BMI323_SPI=m
diff --git a/SOURCES/kernel.changelog b/SOURCES/kernel.changelog
new file mode 100644
index 0000000..f4767f4
--- /dev/null
+++ b/SOURCES/kernel.changelog
@@ -0,0 +1,2477 @@
+* Wed Jan 31 2024 Justin M. Forbes <jforbes@fedoraproject.org> [6.7.3-0]
+- Config update for stable backport (Justin M. Forbes)
+- Add some more bugs to BugsFixed (Justin M. Forbes)
+- Linux v6.7.3
+Resolves:
+
+* Fri Jan 26 2024 Justin M. Forbes <jforbes@fedoraproject.org> [6.7.2-0]
+- redhat: spec: Fix update_scripts run for CentOS builds (Neal Gompa)
+- BPF Tool versioning seems incompatible with stable Fedroa (Justin M. Forbes)
+- Linux v6.7.2
+Resolves:
+
+* Sat Jan 20 2024 Justin M. Forbes <jforbes@fedoraproject.org> [6.7.1-0]
+- Fix up requires for UKI (Justin M. Forbes)
+- Fix up libperf install (Justin M. Forbes)
+- Drop soname for libcpupower.so since we reverted the bump (Justin M. Forbes)
+- Turn on CONFIG_TCP_AO for Fedora (Justin M. Forbes)
+- temporarily remove LIBBPF_DYNAMIC=1 from perf build (Thorsten Leemhuis)
+- add libperf packages and enable perf, libperf, tools and bpftool packages (Thorsten Leemhuis)
+- Revert "cpupower: Bump soname version" (Justin M. Forbes)
+- Turn on Renesas RZ for Fedora IOT rhbz2257913 (Justin M. Forbes)
+- Add bugs to BugsFixed (Justin M. Forbes)
+- wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() (Xingyuan Mo)
+- drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set (Javier Martinez Canillas)
+- Basic scaffolding to create a kernel-headers package (Justin M. Forbes)
+- Initial config for fedora-6.7 branch (Justin M. Forbes)
+- Reset RHEL_RELEASE for 6.8 series (Justin M. Forbes)
+- common: cleanup MX3_IPU (Peter Robinson)
+- all: The Octeon MDIO driver is aarch64/mips (Peter Robinson)
+- common: rtc: remove bq4802 config (Peter Robinson)
+- common: de-dupe MARVELL_GTI_WDT (Peter Robinson)
+- all: Remove CAN_BXCAN (Peter Robinson)
+- common: cleanup SND_SOC_ROCKCHIP (Peter Robinson)
+- common: move RHEL DP83867_PHY to common (Peter Robinson)
+- common: Make ASYMMETRIC_KEY_TYPE enable explicit (Peter Robinson)
+- common: Disable aarch64 ARCH_MA35 universally (Peter Robinson)
+- common: arm64: enable Tegra234 pinctrl driver (Peter Robinson)
+- rhel: arm64: Enable qoriq thermal driver (Peter Robinson)
+- common: aarch64: Cleanup some i.MX8 config options (Peter Robinson)
+- all: EEPROM_LEGACY has been removed (Peter Robinson)
+- all: rmeove AppleTalk hardware configs (Peter Robinson)
+- all: cleanup: remove references to SLOB (Peter Robinson)
+- all: cleanup: Drop unnessary BRCMSTB configs (Peter Robinson)
+- all: net: remove retired network schedulers (Peter Robinson)
+- all: cleanup removed CONFIG_IMA_TRUSTED_KEYRING (Peter Robinson)
+- BuildRequires: lld for build with selftests for x86 (Jan Stancek)
+- spec: add keyutils to selftest-internal subpackage requirements (Artem Savkov) [2166911]
+- redhat/spec: exclude liburandom_read.so from requires (Artem Savkov) [2120968]
+- rtla: sync summary text with upstream and update Requires (Jan Stancek)
+- uki-virt: add systemd-sysext dracut module (Gerd Hoffmann)
+- uki-virt: add virtiofs dracut module (Gerd Hoffmann)
+- common: disable the FB device creation (Peter Robinson)
+- s390x: There's no FB on Z-series (Peter Robinson)
+- Linux v6.7.1
+Resolves: rhbz#2120968, rhbz#2166911
+
+* Mon Jan 08 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-68]
+- fedora: aarch64: enable SM_VIDEOCC_8350 (Peter Robinson)
+- Linux v6.7.0
+Resolves:
+
+* Sun Jan 07 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.52b1853b080a.67]
+- Linux v6.7.0-0.rc8.52b1853b080a
+Resolves:
+
+* Sat Jan 06 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.95c8a35f1c01.66]
+- fedora: arm64: enable ethernet on newer TI industrial (Peter Robinson)
+- fedora: arm64: Disable VIDEO_IMX_MEDIA (Peter Robinson)
+- fedora: use common config for Siemens Simatic IPC (Peter Robinson)
+- fedora: arm: enable Rockchip SPI flash (Peter Robinson)
+- fedora: arm64: enable DRM_TI_SN65DSI83 (Peter Robinson)
+- Linux v6.7.0-0.rc8.95c8a35f1c01
+Resolves:
+
+* Fri Jan 05 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.1f874787ed9a.65]
+- Linux v6.7.0-0.rc8.1f874787ed9a
+Resolves:
+
+* Thu Jan 04 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.ac865f00af29.64]
+- Linux v6.7.0-0.rc8.ac865f00af29
+Resolves:
+
+* Wed Jan 03 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.63]
+- kernel.spec: remove kernel-smp reference from scripts (Jan Stancek)
+Resolves:
+
+* Tue Jan 02 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.62]
+- redhat: do not compress the full kernel changelog in the src.rpm (Herton R. Krzesinski)
+Resolves:
+
+* Mon Jan 01 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.61]
+- Linux v6.7.0-0.rc8
+Resolves:
+
+* Sun Dec 31 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.453f5db0619e.60]
+- Linux v6.7.0-0.rc7.453f5db0619e
+Resolves:
+
+* Sat Dec 30 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.f016f7547aee.59]
+- Auto consolidate configs for the 6.7 cycle (Justin M. Forbes)
+- Linux v6.7.0-0.rc7.f016f7547aee
+Resolves:
+
+* Fri Dec 29 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.8735c7c84d1b.58]
+- Linux v6.7.0-0.rc7.8735c7c84d1b
+Resolves:
+
+* Thu Dec 28 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.f5837722ffec.57]
+- Linux v6.7.0-0.rc7.f5837722ffec
+Resolves:
+
+* Tue Dec 26 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.fbafc3e621c3.56]
+- Linux v6.7.0-0.rc7.fbafc3e621c3
+Resolves:
+
+* Mon Dec 25 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.55]
+- Enable sound for a line of Huawei laptops (TomZanna)
+Resolves:
+
+* Sun Dec 24 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.54]
+- Linux v6.7.0-0.rc7
+Resolves:
+
+* Sat Dec 23 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.5254c0cbc92d.53]
+- Linux v6.7.0-0.rc6.5254c0cbc92d
+Resolves:
+
+* Fri Dec 22 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.24e0d2e527a3.52]
+- fedora: a few cleanups and driver enablements (Peter Robinson)
+- fedora: arm64: cleanup Allwinner Pinctrl drivers (Peter Robinson)
+- fedora: aarch64: Enable some DW drivers (Peter Robinson)
+- Linux v6.7.0-0.rc6.24e0d2e527a3
+Resolves:
+
+* Thu Dec 21 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.a4aebe936554.51]
+- redhat: ship all the changelog from source git into kernel-doc (Herton R. Krzesinski)
+- redhat: create an empty changelog file when changing its name (Herton R. Krzesinski)
+- Linux v6.7.0-0.rc6.a4aebe936554
+Resolves:
+
+* Wed Dec 20 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.55cb5f43689d.50]
+- redhat/self-test: Remove --all from git query (Prarit Bhargava)
+- Linux v6.7.0-0.rc6.55cb5f43689d
+Resolves:
+
+* Tue Dec 19 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.2cf4f94d8e86.49]
+- Linux v6.7.0-0.rc6.2cf4f94d8e86
+Resolves:
+
+* Mon Dec 18 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.48]
+- Disable accel drivers for Fedora x86 (Kate Hsuan)
+- redhat: scripts: An automation script for disabling unused driver for x86 (Kate Hsuan)
+- Fix up Fedora LJCA configs and filters (Justin M. Forbes)
+- Linux v6.7.0-0.rc6
+Resolves:
+
+* Sun Dec 17 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.3b8a9b2e6809.47]
+- Linux v6.7.0-0.rc5.3b8a9b2e6809
+Resolves:
+
+* Sat Dec 16 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.c8e97fc6b4c0.46]
+- Fedora configs for 6.7 (Justin M. Forbes)
+- Linux v6.7.0-0.rc5.c8e97fc6b4c0
+Resolves:
+
+* Fri Dec 15 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.3f7168591ebf.45]
+- Linux v6.7.0-0.rc5.3f7168591ebf
+Resolves:
+
+* Thu Dec 14 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.5bd7ef53ffe5.44]
+- Linux v6.7.0-0.rc5.5bd7ef53ffe5
+Resolves:
+
+* Wed Dec 13 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.88035e5694a8.43]
+- Some Fedora config updates for MLX5 (Justin M. Forbes)
+- Turn on DRM_ACCEL drivers for Fedora (Justin M. Forbes)
+- Linux v6.7.0-0.rc5.88035e5694a8
+Resolves:
+
+* Tue Dec 12 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.26aff849438c.42]
+- redhat: enable the kfence test (Nico Pache)
+- Linux v6.7.0-0.rc5.26aff849438c
+Resolves:
+
+* Mon Dec 11 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.41]
+- redhat/configs: Enable UCLAMP_TASK for PipeWire and WirePlumber (Neal Gompa)
+- Linux v6.7.0-0.rc5
+Resolves:
+
+* Sun Dec 10 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.c527f5606aa5.40]
+- Linux v6.7.0-0.rc4.c527f5606aa5
+Resolves:
+
+* Sat Dec 09 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.f2e8a57ee903.39]
+- Linux v6.7.0-0.rc4.f2e8a57ee903
+Resolves:
+
+* Fri Dec 08 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.5e3f5b81de80.38]
+- Turn on CONFIG_SECURITY_DMESG_RESTRICT for Fedora (Justin M. Forbes)
+- Linux v6.7.0-0.rc4.5e3f5b81de80
+Resolves:
+
+* Wed Dec 06 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.bee0e7762ad2.37]
+- Turn off shellcheck for the fedora-stable-release script (Justin M. Forbes)
+Resolves:
+
+* Tue Dec 05 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.bee0e7762ad2.36]
+- Add some initial Fedora stable branch script to redhat/scripts/fedora/ (Justin M. Forbes)
+- Linux v6.7.0-0.rc4.bee0e7762ad2
+Resolves:
+
+* Mon Dec 04 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.35]
+- Linux v6.7.0-0.rc4
+Resolves:
+
+* Sun Dec 03 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.968f35f4ab1c.34]
+- Linux v6.7.0-0.rc3.968f35f4ab1c
+Resolves:
+
+* Sat Dec 02 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.815fb87b7530.33]
+- redhat: disable iptables-legacy compatibility layer (Florian Westphal)
+- redhat: disable dccp conntrack support (Florian Westphal)
+- configs: enable netfilter_netlink_hook in fedora too (Florian Westphal)
+- Linux v6.7.0-0.rc3.815fb87b7530
+Resolves:
+
+* Fri Dec 01 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.994d5c58e50e.32]
+- ext4: Mark mounting fs-verity filesystems as tech-preview (Alexander Larsson)
+- erofs: Add tech preview markers at mount (Alexander Larsson)
+- Enable fs-verity (Alexander Larsson)
+- Enable erofs (Alexander Larsson)
+- aarch64: enable uki (Gerd Hoffmann)
+- redhat: enable CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH as a module for x86 (Patrick Talbert)
+- Turn CONFIG_MFD_CS42L43_SDW on for RHEL (Justin M. Forbes)
+- Linux v6.7.0-0.rc3.994d5c58e50e
+Resolves:
+
+* Thu Nov 30 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.3b47bc037bd4.31]
+- Linux v6.7.0-0.rc3.3b47bc037bd4
+Resolves:
+
+* Wed Nov 29 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.18d46e76d7c2.30]
+- Enable cryptographic acceleration config flags for PowerPC (Mamatha Inamdar)
+- Also make vmlinuz-virt.efi world readable (Zbigniew Jędrzejewski-Szmek)
+- Drop custom mode for System.map file (Zbigniew Jędrzejewski-Szmek)
+- Linux v6.7.0-0.rc3.18d46e76d7c2
+Resolves:
+
+* Tue Nov 28 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.df60cee26a2e.29]
+- Add drm_exec_test to mod-internal.list for depmod to succeed (Mika Penttilä)
+- RHEL 9.4 DRM backport (upto v6.6 kernel), sync Kconfigs (Mika Penttilä)
+- Linux v6.7.0-0.rc3.df60cee26a2e
+Resolves:
+
+* Mon Nov 27 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.28]
+- Linux v6.7.0-0.rc3
+Resolves:
+
+* Sun Nov 26 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.090472ed9c92.27]
+- Linux v6.7.0-0.rc2.090472ed9c92
+Resolves:
+
+* Sat Nov 25 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.0f5cc96c367f.26]
+- Linux v6.7.0-0.rc2.0f5cc96c367f
+Resolves:
+
+* Fri Nov 24 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.f1a09972a45a.25]
+- Linux v6.7.0-0.rc2.f1a09972a45a
+Resolves:
+
+* Thu Nov 23 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.9b6de136b5f0.24]
+- Turn on USB_DWC3 for Fedora (rhbz 2250955) (Justin M. Forbes)
+- Linux v6.7.0-0.rc2.9b6de136b5f0
+Resolves:
+
+* Wed Nov 22 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.c2d5304e6c64.23]
+- redhat/configs: Move IOMMUFD to common (Alex Williamson)
+- redhat: Really remove cpupower files (Prarit Bhargava)
+- redhat: remove update_scripts.sh (Prarit Bhargava)
+- Linux v6.7.0-0.rc2.c2d5304e6c64
+Resolves:
+
+* Mon Nov 20 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.22]
+- Fix s390 zfcpfdump bpf build failures for cgroups (Don Zickus)
+- Linux v6.7.0-0.rc2
+Resolves:
+
+* Sun Nov 19 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.037266a5f723.21]
+- Linux v6.7.0-0.rc1.037266a5f723
+Resolves:
+
+* Sat Nov 18 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.791c8ab095f7.20]
+- Linux v6.7.0-0.rc1.791c8ab095f7
+Resolves:
+
+* Fri Nov 17 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.7475e51b8796.19]
+- Linux v6.7.0-0.rc1.7475e51b8796
+Resolves:
+
+* Wed Nov 15 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.c42d9eeef8e5.18]
+- Linux v6.7.0-0.rc1.c42d9eeef8e5
+Resolves:
+
+* Tue Nov 14 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.9bacdd8996c7.17]
+- Linux v6.7.0-0.rc1.9bacdd8996c7
+Resolves:
+
+* Mon Nov 13 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.16]
+- Linux v6.7.0-0.rc1
+Resolves:
+
+* Sun Nov 12 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.1b907d050735.15]
+- Linux v6.7.0-0.rc0.1b907d050735
+Resolves:
+
+* Sat Nov 11 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.3ca112b71f35.14]
+- Flip CONFIG_NVME_AUTH to m in pending (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.3ca112b71f35
+Resolves:
+
+* Fri Nov 10 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.89cdf9d55601.13]
+- Linux v6.7.0-0.rc0.89cdf9d55601
+Resolves:
+
+* Thu Nov 09 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.6bc986ab839c.12]
+- Linux v6.7.0-0.rc0.6bc986ab839c
+Resolves:
+
+* Wed Nov 08 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.305230142ae0.11]
+- Turn CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 on for Fedora x86 (Jason Montleon)
+- kernel/rh_messages.c: Mark functions as possibly unused (Prarit Bhargava)
+- Add snd-hda-cirrus-scodec-test to mod-internal.list (Scott Weaver)
+- Linux v6.7.0-0.rc0.305230142ae0
+Resolves:
+
+* Tue Nov 07 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.be3ca57cfb77.10]
+- Turn off BPF_SYSCALL in pending for zfcpdump (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.be3ca57cfb77
+Resolves:
+
+* Mon Nov 06 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.d2f51b3516da.9]
+- Linux v6.7.0-0.rc0.d2f51b3516da
+Resolves:
+
+* Sun Nov 05 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.1c41041124bd.8]
+- Linux v6.7.0-0.rc0.1c41041124bd
+Resolves:
+
+* Sat Nov 04 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.90b0c2b2edd1.7]
+- Add mean_and_variance_test to mod-internal.list (Justin M. Forbes)
+- Add cfg80211-tests and mac80211-tests to mod-internal.list (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.90b0c2b2edd1
+Resolves:
+
+* Fri Nov 03 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.8f6f76a6a29f.6]
+- Turn on CONFIG_MFD_CS42L43_SDW for RHEL in pending (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.8f6f76a6a29f
+Resolves:
+
+* Fri Nov 03 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.21e80f3841c0.5]
+- Turn on bcachefs for Fedora (Justin M. Forbes)
+- redhat: configs: fedora: Enable QSEECOM and friends (Andrew Halaney)
+Resolves:
+
+* Thu Nov 02 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.21e80f3841c0.4]
+- Add clk-fractional-divider_test to mod-internal.list (Thorsten Leemhuis)
+- Add gso_test to mod-internal.list (Thorsten Leemhuis)
+- Add property-entry-test to mod-internal.list (Thorsten Leemhuis)
+- Linux v6.7.0-0.rc0.21e80f3841c0
+Resolves:
+
+* Wed Nov 01 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.8bc9e6515183.3]
+- Fedora 6.7 configs part 1 (Justin M. Forbes)
+- Trim changelog after version bump (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.8bc9e6515183
+Resolves:
+
+* Tue Oct 31 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.5a6a09e97199.2]
+- Reset RHEL_RELEASE for rebase (Justin M. Forbes)
+- [Scheduled job] Catch config mismatches early during upstream merge (Don Zickus)
+- redhat/self-test: Update data for KABI xz change (Prarit Bhargava)
+- redhat/scripts: Switch KABI tarballs to xz (Prarit Bhargava)
+- redhat/kernel.spec.template: Switch KABI compression to xz (Prarit Bhargava)
+- redhat: self-test: Use a more complete SRPM file suffix (Andrew Halaney)
+- redhat: makefile: remove stray rpmbuild --without (Eric Chanudet)
+- Consolidate configs into common for 6.6 (Justin M. Forbes)
+- Updated Fedora configs (Justin M. Forbes)
+- Turn on UFSHCD for Fedora x86 (Justin M. Forbes)
+- redhat: configs: generic: x86: Disable CONFIG_VIDEO_OV01A10 for x86 platform (Hans de Goede)
+- redhat: remove pending-rhel CONFIG_XFS_ASSERT_FATAL file (Patrick Talbert)
+- New configs in fs/xfs (Fedora Kernel Team)
+- crypto: rng - Override drivers/char/random in FIPS mode (Herbert Xu)
+- random: Add hook to override device reads and getrandom(2) (Herbert Xu)
+- redhat/configs: share CONFIG_ARM64_ERRATUM_2966298 between rhel and fedora (Mark Salter)
+- configs: Remove S390 IOMMU config options that no longer exist (Jerry Snitselaar)
+- redhat: docs: clarify where bugs and issues are created (Scott Weaver)
+- redhat/scripts/rh-dist-git.sh does not take any arguments: fix error message (Denys Vlasenko)
+- Add target_branch for gen_config_patches.sh (Don Zickus)
+- redhat: disable kunit by default (Nico Pache)
+- redhat/configs: enable the AMD_PMF driver for RHEL (David Arcari)
+- Make CONFIG_ADDRESS_MASKING consistent between fedora and rhel (Chris von Recklinghausen)
+- CI: add ark-latest baseline job to tag cki-gating for successful pipelines (Michael Hofmann)
+- CI: provide child pipelines for CKI container image gating (Michael Hofmann)
+- CI: allow to run as child pipeline (Michael Hofmann)
+- CI: provide descriptive pipeline name for scheduled pipelines (Michael Hofmann)
+- CI: use job templates for variant variables (Michael Hofmann)
+- redhat/kernel.spec.template: simplify __modsign_install_post (Jan Stancek)
+- Fedora filter updates after configs (Justin M. Forbes)
+- Fedora configs for 6.6 (Justin M. Forbes)
+- redhat/configs: Freescale Layerscape SoC family (Steve Best)
+- Add clang MR/baseline pipelines (Michael Hofmann)
+- CI: Remove unused kpet_tree_family (Nikolai Kondrashov)
+- Add clang config framework (Don Zickus)
+- Apply partial snippet configs to all configs (Don Zickus)
+- Remove unpackaged kgcov config files (Don Zickus)
+- redhat/configs: enable missing Kconfig options for Qualcomm RideSX4 (Brian Masney)
+- enable CONFIG_ADDRESS_MASKING for x86_64 (Chris von Recklinghausen)
+- common: aarch64: enable NXP Flex SPI (Peter Robinson)
+- fedora: Switch TI_SCI_CLK and TI_SCI_PM_DOMAINS symbols to built-in (Javier Martinez Canillas)
+- kernel.spec: adjust build option comment (Michael Hofmann)
+- kernel.spec: allow to enable arm64_16k variant (Michael Hofmann)
+- gitlab-ci: enable build-only pipelines for Rawhide/16k/aarch64 (Michael Hofmann)
+- kernel.spec.template: Fix --without bpftool (Prarit Bhargava)
+- redhat/configs: NXP BBNSM Power Key Driver (Steve Best)
+- redhat/self-test: Update data for cross compile fields (Prarit Bhargava)
+- redhat/Makefile.cross: Add message for disabled subpackages (Prarit Bhargava)
+- redhat/Makefile.cross: Update cross targets with disabled subpackages (Prarit Bhargava)
+- Remove XFS_ASSERT_FATAL from pending-fedora (Justin M. Forbes)
+- Change default pending for XFS_ONLINE_SCRUB_STATSas it now selects XFS_DEBUG (Justin M. Forbes)
+- gitlab-ci: use --with debug/base to select kernel variants (Michael Hofmann)
+- kernel.spec: add rpmbuild --without base option (Michael Hofmann)
+- redhat: spec: Fix typo for kernel_variant_preun for 16k-debug flavor (Neal Gompa)
+- Turn off appletalk for fedora (Justin M. Forbes)
+- New configs in drivers/media (Fedora Kernel Team)
+- redhat/docs: Add a mention of bugzilla for bugs (Prarit Bhargava)
+- Fix the fixup of Fedora release (Don Zickus)
+- Fix Fedora release scheduled job (Don Zickus)
+- Move squashfs to kernel-modules-core (Justin M. Forbes)
+- redhat: Explicitly disable CONFIG_COPS (Vitaly Kuznetsov)
+- redhat: Add dist-check-licenses target (Vitaly Kuznetsov)
+- redhat: Introduce "Verify SPDX-License-Identifier tags" selftest (Vitaly Kuznetsov)
+- redhat: Use kspdx-tool output for the License: field (Vitaly Kuznetsov)
+- Rename pipeline repo branch and DW tree names (Michael Hofmann)
+- Adjust comments that refer to ARK in a Rawhide context (Michael Hofmann)
+- Rename variable names starting with ark- to rawhide- (Michael Hofmann)
+- Rename trigger-ark to trigger-rawhide (Michael Hofmann)
+- Fix up config mismatches for Fedora (Justin M. Forbes)
+- redhat/configs: Texas Instruments Inc. K3 multicore SoC architecture (Steve Best)
+- Flip CONFIG_VIDEO_V4L2_SUBDEV_API in pending RHEL due to mismatch (Justin M. Forbes)
+- CONFIG_HW_RANDOM_HISI: move to common and set to m (Scott Weaver)
+- Turn off CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE for Fedora s390x (Justin M. Forbes)
+- Disable tests for ELN realtime pipelines (Michael Hofmann)
+- New configs in mm/Kconfig (Fedora Kernel Team)
+- Flip CONFIG_SND_SOC_CS35L56_SDW to m and clean up (Justin M. Forbes)
+- Add drm_exec_test to mod-internal.list (Thorsten Leemhuis)
+- Add new pending entry for CONFIG_SND_SOC_CS35L56_SDW to fix mismatch (Justin M. Forbes)
+- Fix tarball creation logic (Don Zickus)
+- redhat: bump libcpupower soname to match upstream (Patrick Talbert)
+- Turn on MEMFD_CREATE in pending as it is selected by CONFIG_TMPFS (Justin M. Forbes)
+- redhat: drop unneeded build-time dependency gcc-plugin-devel (Coiby Xu)
+- Reset RHEL release and trim changelog after rebase (Justin M. Forbes)
+- all: x86: move wayward x86 specific config home (Peter Robinson)
+- all: de-dupe non standard config options (Peter Robinson)
+- all: x86: clean up microcode loading options (Peter Robinson)
+- common: remove unnessary CONFIG_SND_MESON_AXG* (Peter Robinson)
+- redhat: Fix UKI install with systemd >= 254 (Vitaly Kuznetsov)
+- redhat: Use named parameters for kernel_variant_posttrans()/kernel_variant_preun() (Vitaly Kuznetsov)
+- redhat/kernel.spec.template: update compression variables to support zstd (Brian Masney)
+- Consolidate configs to common for 6.5 (Justin M. Forbes)
+- Remove unused config entry for Fedora (Justin M. Forbes)
+- redhat/self-test: Remove rpmlint test (Prarit Bhargava)
+- Remove the armv7 config directory from Fedora again (Justin M. Forbes)
+- Enable CONFIG_EXPERT for both RHEL and Fedora (Justin M. Forbes)
+- redhat/configs: Enable CONFIG_DEVICE_PRIVATE on aarch64 (David Hildenbrand) [2231407]
+- redhat/configs: disable CONFIG_ROCKCHIP_ERRATUM_3588001 for RHEL (Mark Salter)
+- redhat: shellcheck fixes (Prarit Bhargava)
+- redhat/configs: enable tegra114 SPI (Mark Salter)
+- all: properly cleanup firewire once and for all (Peter Robinson)
+- Fix up filters for Fedora (Justin M. Forbes)
+- New configs in arch/x86 (Fedora Kernel Team)
+- Add an armv7 directory back for the Fedora configs (Justin M. Forbes)
+- Fedora 6.5 config updates (Justin M. Forbes)
+- Turn off DMABUF_SYSFS_STATS (Justin M. Forbes)
+- CI: rawhide_release: switch to using script to push (Don Zickus)
+- redhat/self-test: Update self-test data (Prarit Bhargava)
+- redhat/scripts/cross-compile: Update download_cross.sh (Prarit Bhargava)
+- redhat/Makefile.cross: Remove ARCH selection code (Prarit Bhargava)
+- redhat/Makefile.cross: Update script (Prarit Bhargava)
+- Fix interruptible non MR jobs (Michael Hofmann)
+- all: run evaluate_configs to de-dupe merged aarch64 (Peter Robinson)
+- all: arm: merge the arm and arm/aarch64 (Peter Robinson)
+- fedora: remove ARMv7 AKA armhfp configurations (Peter Robinson)
+- fedora: remove ARMv7 AKA armhfp support (Peter Robinson)
+- redhat/configs: enable CONFIG_VIRTIO_MEM on aarch64 (David Hildenbrand) [2044155]
+- redhat/configs: enable CONFIG_MEMORY_HOTREMOVE aarch64 (David Hildenbrand) [2062054]
+- redhat: Add arm64-16k kernel flavor scaffold for 16K page-size'd AArch64 (Neal Gompa)
+- fedora: enable i3c on aarch64 (Peter Robinson)
+- redhat/configs: Remove `CONFIG_HZ_1000 is not set` for aarch64 (Enric Balletbo i Serra)
+- redhat/configs: turn on the framework for SPI NOR for ARM (Steve Best)
+- configs: add new ChromeOS UART driver (Mark Langsdorf)
+- configs: add new ChromeOS Human Presence Sensor (Mark Langsdorf)
+- redhat/configs: Enable CONFIG_NVIDIA_WMI_EC_BACKLIGHT for both Fedora and RHEL (Kate Hsuan)
+- redhat/configs: Texas Instruments INA3221 driver (Steve Best)
+- arm: i.MX: Some minor NXP i.MX cleanups (Peter Robinson)
+- Description: Set config for Tegra234 pinctrl driver (Joel Slebodnick)
+- Update RPM Scriptlet for kernel-install Changes (Jonathan Steffan)
+- [CI] add exit 0 to the end of CI scripts (Don Zickus)
+- redhat: configs: Disable CONFIG_CRYPTO_STATS since performance issue for storage (Kate Hsuan) [2227793]
+- Remove obsolete variable from gitlab-ci.yml (Ondrej Kinst)
+- redhat/configs: Move GVT-g to Fedora only (Alex Williamson)
+- [CI] Make sure we are on correct branch before running script (Don Zickus)
+- CI: ark-update-configs: sync push command and output (Don Zickus)
+- CI: ark-update-configs: misc changes (Don Zickus)
+- CI: sync ark-create-release push commands with output (Don Zickus)
+- CI: ark-create-release: Add a robust check if nothing changed (Don Zickus)
+- CI: Remove legacy tag check cruft (Don Zickus)
+- CI: Introduce simple environment script (Don Zickus)
+- redhat/configs: Disable FIREWIRE for RHEL (Prarit Bhargava)
+- redhat/scripts/rh-dist-git.sh: print list of uploaded files (Denys Vlasenko)
+- redhat/scripts/expand_srpm.sh: add missing function, robustify (Denys Vlasenko)
+- redhat: Enable HSR and PRP (Felix Maurer)
+- redhat/scripts/rh-dist-git.sh: fix outdated message and comment (Denys Vlasenko)
+- redhat/configs: Disable CONFIG_I8K (Prarit Bhargava)
+- Make sure posttrans script doesn't fail if restorecon is not installed (Daan De Meyer)
+- Update filters for new config items (Justin M. Forbes)
+- More Fedora 6.5 configs (Justin M. Forbes)
+- redhat/configs: disable pre-UVC cameras for RHEL on aarch64 (Dean Nelson)
+- redhat/configs: enable CONFIG_MEDIA_SUPPORT for RHEL on aarch64 (Dean Nelson)
+- move ownership of /lib/modules/<ver>/ to kernel-core (Thorsten Leemhuis)
+- Let kernel-modules-core own the files depmod generates. (Thorsten Leemhuis)
+- redhat: configs: Enable CONFIG_TYPEC_STUSB160X for rhel on aarch64 (Desnes Nunes)
+- Add filters for ptp_dfl_tod on Fedora (Justin M. Forbes)
+- Fedora 6.5 configs part 1 (Justin M. Forbes)
+- fedora: enable CONFIG_ZYNQMP_IPI_MBOX as a builtin in pending-fedora (Patrick Talbert)
+- fedora: arm: some minor updates (Peter Robinson)
+- fedora: bluetooth: enable AOSP extensions (Peter Robinson)
+- fedora: wifi: tweak ZYDAS WiFI config options (Peter Robinson)
+- scsi: sd: Add "probe_type" module parameter to allow synchronous probing (Ewan D. Milne) [2140017]
+- redhat/configs: allow IMA to use MOK keys (Coiby Xu)
+- Simplify documentation jobs (Michael Hofmann)
+- Auto-cancel pipelines only on MRs (Michael Hofmann)
+- CI: Call script directly (Don Zickus)
+- CI: Remove stale TAG and Makefile cruft (Don Zickus)
+- CI: Move os-build tracking to common area (Don Zickus)
+- redhat: use the eln builder for daily jobs (Patrick Talbert)
+- redhat: set CONFIG_XILINX_WINDOW_WATCHDOG as disabled in pending (Patrick Talbert)
+- Add baseline ARK/ELN pipelines (Michael Hofmann)
+- Simplify job rules (Michael Hofmann)
+- Build ELN srpm for bot changes (Michael Hofmann)
+- Run RH selftests for ELN (Michael Hofmann)
+- Simplify job templates (Michael Hofmann)
+- Extract rules to allow orthogonal configuration (Michael Hofmann)
+- Require ELN pipelines if started automatically (Michael Hofmann)
+- Add ARK debug pipeline (Michael Hofmann)
+- Extract common parts of child pipeline job (Michael Hofmann)
+- Move ARK pipeline variables into job template (Michael Hofmann)
+- Simplify ARK pipeline rules (Michael Hofmann)
+- Change pathfix.py to %%py3_shebang_fix (Justin M. Forbes)
+- Turn on NET_VENDOR_QUALCOMM for Fedora to enable rmnet (Justin M. Forbes)
+- redhat: add intel-m10-bmc-hwmon to filter-modules singlemods list (Patrick Talbert)
+- fedira: enable pending-fedora CONFIG_CPUFREQ_DT_PLATDEV as a module (Patrick Talbert)
+- redhat: fix the 'eln BUILD_TARGET' self-test (Patrick Talbert)
+- redhat: update the self-test-data (Patrick Talbert)
+- redhat: remove trailing space in dist-dump-variables output (Patrick Talbert)
+- Allow ELN pipelines failures (Michael Hofmann)
+- Enable cs-like CI (Michael Hofmann)
+- Allow to auto-cancel redundant pipelines (Michael Hofmann)
+- Remove obsolete unused trigger variable (Michael Hofmann)
+- Fix linter warnings in .gitlab-ci.yml (Michael Hofmann)
+- config: wifi: debug options for ath11k, brcm80211 and iwlwifi (Íñigo Huguet)
+- redhat: allow dbgonly cross builds (Jan Stancek)
+- redhat/configs: Clean up x86-64 call depth tracking configs (Waiman Long)
+- redhat: move SND configs from pending-rhel to rhel (Patrick Talbert)
+- Fix up armv7 configs for Fedora (Justin M. Forbes)
+- redhat: Set pending-rhel x86 values for various SND configs (Patrick Talbert)
+- redhat: update self-test data (Patrick Talbert)
+- redhat: ignore SPECBPFTOOLVERSION/bpftoolversion in self-test create-data.sh (Patrick Talbert)
+- fedora/rhel: Move I2C_DESIGNWARE_PLATFORM, I2C_SLAVE, & GPIOLIB from pending (Patrick Talbert)
+- redhat/filter-modules.sh.rhel: add needed deps for intel_rapl_tpmi (Jan Stancek)
+- fedora: Enable CONFIG_SPI_SLAVE (Patrick Talbert)
+- fedora/rhel: enable I2C_DESIGNWARE_PLATFORM, I2C_SLAVE, and GPIOLIB (Patrick Talbert)
+- fedora: Enable CONFIG_SPI_SLAVE in fedora-pending (Patrick Talbert)
+- redhat: remove extra + (plus) from meta package Requires definitions (Patrick Talbert)
+- Add intel-m10-bmc-hwmon to singlemods (Thorsten Leemhuis)
+- Add hid-uclogic-test to mod-internal.list (Thorsten Leemhuis)
+- Add checksum_kunit.ko to mod-internal.list (Thorsten Leemhuis)
+- Add strcat_kunit to mod-internal.list (Thorsten Leemhuis)
+- Add input_test to mod-intenal.list (Thorsten Leemhuis)
+- Revert "Remove EXPERT from ARCH_FORCE_MAX_ORDER for aarch64" (Justin M. Forbes)
+- Reset the release number and dedup the changelog after rebase (Justin M. Forbes)
+- Fix up rebase issue with CONFIG_ARCH_FORCE_MAX_ORDER (Justin M. Forbes)
+- redhat/kernel.spec.template: Disable 'extracting debug info' messages (Prarit Bhargava)
+- kernel/rh_messages.c: Another gcc12 warning on redundant NULL test (Florian Weimer) [2216678]
+- redhat: fix signing for realtime and arm64_64k non-debug variants (Jan Stancek)
+- redhat: treat with_up consistently (Jan Stancek)
+- redhat: make with_realtime opt-in (Jan Stancek)
+- redhat/configs: Disable qcom armv7 drippings in the aarch64 tree (Jeremy Linton)
+- kernel.spec: drop obsolete ldconfig (Jan Stancek)
+- Consolidate config items to common for 6.4 cycle (Justin M. Forbes)
+- Turn on CO?NFIg_RMNET for Fedora (Justin M. Forbes)
+- redhat/configs: enable CONFIG_MANA_INFINIBAND=m for ARK (Vitaly Kuznetsov)
+- redhat/config: common: Enable CONFIG_GPIO_SIM for software development (Kate Hsuan)
+- redhat: fix problem with RT kvm modules listed twice in rpm generation (Clark Williams)
+- redhat: turn off 64k kernel builds with rtonly (Clark Williams)
+- redhat: turn off zfcpdump for rtonly (Clark Williams)
+- redhat: don't allow with_rtonly to turn on unsupported arches (Clark Williams)
+- redhat: update self-test data for addition of RT and 64k-page variants (Clark Williams)
+- redhat: fix realtime and efiuki build conflict (Jan Stancek)
+- arm64-64k: Add new kernel variant to RHEL9/CS9 for 64K page-size'd ARM64 (Donald Dutile) [2153073]
+- redhat: TEMPORARY set configs to deal with PREEMPT_RT not available (Clark Williams)
+- redhat: TEMPORARY default realtime to off (Clark Williams)
+- redhat: moved ARM errata configs to arm dir (Clark Williams)
+- redhat: RT packaging changes (Clark Williams)
+- redhat: miscellaneous commits needed due to CONFIG_EXPERT (Clark Williams)
+- redhat: realtime config entries (Clark Williams)
+- common: remove deleted USB PCCARD drivers (Peter Robinson)
+- fedora: further cleanup of pccard/cardbus subsystem (Peter Robinson)
+- common: properly disable PCCARD subsystem (Peter Robinson)
+- redhat/configs: arm: enable SERIAL_TEGRA UART for RHEL (Mark Salter)
+- redhat/configs: enable CONFIG_X86_AMD_PSTATE_UT (David Arcari)
+- redhat/configs: Enable CONFIG_TCG_VTPM_PROXY for RHEL (Štěpán Horáček)
+- redhat: do not package *.mod.c generated files (Denys Vlasenko)
+- ALSA configuration changes for ARK/RHEL 9.3 (Jaroslav Kysela)
+- spec: remove resolve_btfids from kernel-devel (Viktor Malik)
+- Fix typo in filter-modules (Justin M. Forbes)
+- redhat/configs: Enable CONFIG_INIT_STACK_ALL_ZERO for RHEL (Josh Poimboeuf)
+- Remove CONFIG_ARCH_FORCE_MAX_ORDER for aarch64 (Justin M. Forbes)
+- Fix up config and filter for PTP_DFL_TOD (Justin M. Forbes)
+- redhat/configs: IMX8ULP pinctrl driver (Steve Best)
+- redhat/configs: increase CONFIG_FRAME_WARN for Fedora on aarch64 (Brian Masney)
+- redhat/configs: add two missing Kconfig options for the Thinkpad x13s (Brian Masney)
+- Fedora configs for 6.4 (Justin M. Forbes)
+- Change aarch64 CONFIG_ARCH_FORCE_MAX_ORDER to 10 for 4K pages (Justin M. Forbes)
+- kernel.spec: remove "RPM_VMLINUX_H=$DevelDir/vmlinux.h" code chunk in %%install (Denys Vlasenko)
+- redhat/configs: aarch64: Turn on Display for OnePlus 6 (Eric Curtin)
+- redhat/configs: NXP i.MX93 pinctrl, clk, analog to digital converters (Steve Best)
+- redhat/configs: Enable CONFIG_SC_GPUCC_8280XP for fedora (Andrew Halaney)
+- redhat/configs: Enable CONFIG_QCOM_IPCC for fedora (Andrew Halaney)
+- Add rv subpackage for kernel-tools (John Kacur) [2188441]
+- redhat/configs: NXP i.MX9 family (Steve Best)
+- redhat/genlog.py: add support to list/process zstream Jira tickets (Herton R. Krzesinski)
+- redhat: fix duplicate jira issues in the resolves line (Herton R. Krzesinski)
+- redhat: add support for Jira issues in changelog (Herton R. Krzesinski)
+- redhat/configs: turn on IMX8ULP CCM Clock Driver (Steve Best)
+- redhat: update filter-modules fsdrvs list to reference smb instead of cifs (Patrick Talbert)
+- Turn off some debug options found to impact performance (Justin M. Forbes)
+- wifi: rtw89: enable RTL8852BE card in RHEL (Íñigo Huguet)
+- redhat/configs: enable TEGRA186_GPC_DMA for RHEL (Mark Salter)
+- Move imx8m configs from fedora to common (Mark Salter)
+- redhat/configs: turn on lpuart serial port support Driver (Steve Best) [2208834]
+- Turn off DEBUG_VM for non debug Fedora kernels (Justin M. Forbes)
+- Enable CONFIG_BT on aarch64 (Charles Mirabile)
+- redhat/configs: turn on CONFIG_MARVELL_CN10K_TAD_PMU (Michal Schmidt) [2042240]
+- redhat/configs: Fix enabling MANA Infiniband (Kamal Heib)
+- Fix file listing for symvers in uki (Justin M. Forbes)
+- Fix up some Fedora config items (Justin M. Forbes)
+- enable efifb for Nvidia (Justin M. Forbes)
+- kernel.spec: package unstripped test_progs-no_alu32 (Felix Maurer)
+- Turn on NFT_CONNLIMIT for Fedora (Justin M. Forbes)
+- Include the information about builtin symbols into kernel-uki-virt package too (Vitaly Kuznetsov)
+- redhat/configs: Fix incorrect configs location and content (Vladis Dronov)
+- redhat/configs: turn on CONFIG_MARVELL_CN10K_DDR_PMU (Michal Schmidt) [2042241]
+- redhat: configs: generic: x86: Disable CONFIG_VIDEO_OV2740 for x86 platform (Kate Hsuan)
+- Enable IO_URING for RHEL (Justin M. Forbes)
+- Turn on IO_URING for RHEL in pending (Justin M. Forbes)
+- redhat: Remove editconfig (Prarit Bhargava)
+- redhat: configs: fix CONFIG_WERROR replace in build_configs (Jan Stancek)
+- redhat/configs: enable Maxim MAX77620 PMIC for RHEL (Mark Salter)
+- kernel.spec: skip kernel meta package when building without up (Jan Stancek)
+- redhat/configs: enable RDMA_RXE for RHEL (Kamal Heib) [2022578]
+- redhat/configs: update RPCSEC_GSS_KRB5 configs (Scott Mayhew)
+- redhat/Makefile: Support building linux-next (Thorsten Leemhuis)
+- redhat/Makefile: support building stable-rc versions (Thorsten Leemhuis)
+- redhat/Makefile: Add target to print DISTRELEASETAG (Thorsten Leemhuis)
+- Remove EXPERT from ARCH_FORCE_MAX_ORDER for aarch64 (Justin M. Forbes)
+- Revert "Merge branch 'unstripped-no_alu32' into 'os-build'" (Patrick Talbert)
+- configs: Enable CONFIG_PAGE_POOL_STATS for common/generic (Patrick Talbert)
+- redhat/configs: enable CONFIG_DELL_WMI_PRIVACY for both RHEL and Fedora (David Arcari)
+- kernel.spec: package unstripped test_progs-no_alu32 (Felix Maurer)
+- bpf/selftests: fix bpf selftests install (Jerome Marchand)
+- kernel.spec: add bonding selftest (Hangbin Liu)
+- Change FORCE_MAX_ORDER for ppc64 to be 8 (Justin M. Forbes)
+- kernel.spec.template: Add global compression variables (Prarit Bhargava)
+- kernel.spec.template: Use xz for KABI (Prarit Bhargava)
+- kernel.spec.template: Remove gzip related aarch64 code (Prarit Bhargava)
+- Add apple_bl to filter-modules (Justin M. Forbes)
+- Add handshake-test to mod-intenal.list (Justin M. Forbes)
+- Add regmap-kunit to mod-internal.list (Justin M. Forbes)
+- configs: set CONFIG_PAGE_POOL_STATS (Patrick Talbert)
+- Add apple_bl to fedora module_filter (Justin M. Forbes)
+- Fix up some config mismatches in new Fedora config items (Justin M. Forbes)
+- redhat/configs: disable CONFIG_USB_NET_SR9700 for aarch64 (Jose Ignacio Tornos Martinez)
+- Reset changelog for 6.4 series (Justin M. Forbes)
+- Reset RHEL_RELEASE for the 6.4 cycle (Justin M. Forbes)
+- Fix up the RHEL configs for xtables and ipset (Justin M. Forbes)
+- ark: enable wifi on aarch64 (Íñigo Huguet)
+- fedora: wifi: hermes: disable 802.11b driver (Peter Robinson)
+- fedora: wifi: libertas: use the LIBERTAS_THINFIRM driver (Peter Robinson)
+- fedora: wifi: disable Zydas vendor (Peter Robinson)
+- redhat: fix python ValueError in error path of merge.py (Clark Williams)
+- fedora: arm: minor updates (Peter Robinson)
+- kernel.spec: Fix UKI naming to comply with BLS (Philipp Rudo)
+- redhat/kernel.spec.template: Suppress 'extracting debug info' noise in build log (Prarit Bhargava)
+- Fedora 6.3 configs part 2 (Justin M. Forbes)
+- redhat/configs: Enable CONFIG_X86_KERNEL_IBT for Fedora and ARK (Josh Poimboeuf)
+- kernel.spec: gcov: make gcov subpackages per variant (Jan Stancek)
+- kernel.spec: Gemini: add Epoch to perf and rtla subpackages (Jan Stancek)
+- kernel.spec: Gemini: fix header provides for upgrade path (Jan Stancek)
+- redhat: introduce Gemini versioning (Jan Stancek)
+- redhat: separate RPM version from uname version (Jan Stancek)
+- redhat: introduce GEMINI and RHEL_REBASE_NUM variable (Jan Stancek)
+- ipmi: ssif_bmc: Add SSIF BMC driver (Tony Camuso)
+- common: minor de-dupe of parallel port configs (Peter Robinson)
+- Fedora 6.3 configs part 1 (Justin M. Forbes)
+- redhat: configs: Enable CONFIG_MEMTEST to enable memory test (Kate Hsuan)
+- Update Fedora arm filters after config updates (Nicolas Chauvet)
+- redhat/kernel.spec.template: Fix kernel-tools-libs-devel dependency (Prarit Bhargava)
+- redhat: fix the check for the n option (Patrick Talbert)
+- common: de-dupe some options that are the same (Peter Robinson)
+- generic: remove deleted options (Peter Robinson)
+- redhat/configs: enable CONFIG_INTEL_TCC_COOLING for RHEL (David Arcari)
+- Update Fedora ppc filters after config updates (Justin M. Forbes)
+- Update Fedora aarch64 filters after config updates (Justin M. Forbes)
+- fedora: arm: Updates for 6.3 (Peter Robinson)
+- redhat: kunit: cleanup NITRO config and enable rescale test (Nico Pache)
+- kernel.spec: use %%{package_name} to fix kernel-devel-matched Requires (Jan Stancek)
+- kernel.spec: use %%{package_name} also for abi-stablelist subpackages (Jan Stancek)
+- kernel.spec: use %%{package_name} also for tools subpackages (Jan Stancek)
+- generic: common: Parport and paride/ata cleanups (Peter Robinson)
+- CONFIG_SND_SOC_CS42L83 is no longer common (Justin M. Forbes)
+- configs: arm: bring some configs in line with rhel configs in c9s (Mark Salter)
+- arm64/configs: Put some arm64 configs in the right place (Mark Salter)
+- cleanup removed R8188EU config (Peter Robinson)
+- Make RHJOBS container friendly (Don Zickus)
+- Remove scmversion from kernel.spec.template (Don Zickus)
+- redhat/configs: Enable CONFIG_SND_SOC_CS42L83 (Neal Gompa)
+- Use RHJOBS for create-tarball (Don Zickus)
+- Enable CONFIG_NET_SCH_FQ_PIE for Fedora (Justin M. Forbes)
+- Make Fedora debug configs more useful for debug (Justin M. Forbes)
+- redhat/configs: enable Octeon TX2 network drivers for RHEL (Michal Schmidt) [2040643]
+- redhat/kernel.spec.template: fix installonlypkg for meta package (Jan Stancek)
+- redhat: version two of Makefile.rhelver tweaks (Clark Williams)
+- redhat/configs: Disable CONFIG_GCC_PLUGINS (Prarit Bhargava)
+- redhat/kernel.spec.template: Fix typo for process_configs.sh call (Neal Gompa)
+- redhat/configs: CONFIG_CRYPTO_SM3_AVX_X86_64 is x86 only (Vladis Dronov)
+- redhat/configs: Enable CONFIG_PINCTRL_METEORLAKE in RHEL (Prarit Bhargava)
+- fedora: enable new image sensors (Peter Robinson)
+- redhat/self-test: Update self-test data (Prarit Bhargava)
+- redhat/kernel.spec.template: Fix hardcoded "kernel" (Prarit Bhargava)
+- redhat/configs/generate_all_configs.sh: Fix config naming (Prarit Bhargava)
+- redhat/kernel.spec.template: Pass SPECPACKAGE_NAME to generate_all_configs.sh (Prarit Bhargava)
+- kernel.spec.template: Use SPECPACKAGE_NAME (Prarit Bhargava)
+- redhat/Makefile: Copy spec file (Prarit Bhargava)
+- redhat: Change PACKAGE_NAME to SPECPACKAGE_NAME (Prarit Bhargava)
+- redhat/configs: Support the virtio_mmio.device parameter in Fedora (David Michael)
+- Revert "Merge branch 'systemd-boot-unsigned' into 'os-build'" (Patrick Talbert)
+- redhat/Makefile: fix default values for dist-brew's DISTRO and DIST (Íñigo Huguet)
+- Remove cc lines from automatic configs (Don Zickus)
+- Add rtla-hwnoise files (Justin M. Forbes)
+- redhat/kernel.spec.template: Mark it as a non-executable file (Neal Gompa)
+- fedora: arm: Enable DRM_PANEL_HIMAX_HX8394 (Javier Martinez Canillas)
+- redhat/configs: CONFIG_HP_ILO location fix (Vladis Dronov)
+- redhat: Fix build for kselftests mm (Nico Pache)
+- fix tools build after vm to mm rename (Justin M. Forbes)
+- redhat/spec: Update bpftool versioning scheme (Viktor Malik)
+- redhat/configs: CONFIG_CRYPTO_SM4_AESNI_AVX*_X86_64 is x86 only (Prarit Bhargava)
+- redhat: adapt to upstream Makefile change (Clark Williams)
+- redhat: modify efiuki specfile changes to use variants convention (Clark Williams)
+- Turn off DEBUG_INFO_COMPRESSED_ZLIB for Fedora (Justin M. Forbes)
+- redhat/kernel.spec.template: Fix RHEL systemd-boot-unsigned dependency (Prarit Bhargava)
+- Add hashtable_test to mod-internal.list (Justin M. Forbes)
+- Add more kunit tests to mod-internal.list for 6.3 (Justin M. Forbes)
+- Flip CONFIG_I2C_ALGOBIT to m (Justin M. Forbes)
+- Flip I2C_ALGOBIT to m to avoid mismatch (Justin M. Forbes)
+- kernel.spec: move modules.builtin to kernel-core (Jan Stancek)
+- Turn on IDLE_INJECT for x86 (Justin M. Forbes)
+- Flip CONFIG_IDLE_INJECT in pending (Justin M. Forbes)
+- Trim Changelog for 6.3 series (Justin M. Forbes)
+- Reset RHEL_RELEASE to 0 for the 6.3 cycle (Justin M. Forbes)
+- redhat/configs: Enable CONFIG_V4L_TEST_DRIVERS related drivers (Enric Balletbo i Serra)
+- redhat/configs: Enable UCSI_CCG support (David Marlin)
+- Fix underline mark-up after text change (Justin M. Forbes)
+- Turn on CONFIG_XFS_RT for Fedora (Justin M. Forbes)
+- Consolidate common configs for 6.2 (Justin M. Forbes)
+- aarch64: enable zboot (Gerd Hoffmann)
+- redhat: remove duplicate pending-rhel config items (Patrick Talbert)
+- Disable frame pointers (Justin M. Forbes)
+- redhat/configs: update scripts and docs for ark -> rhel rename (Clark Williams)
+- redhat/configs: rename ark configs dir to rhel (Clark Williams)
+- Turn off CONFIG_DEBUG_INFO_COMPRESSED_ZLIB for ppc64le (Justin M. Forbes)
+- kernel.spec: package unstripped kselftests/bpf/test_progs (Jan Stancek)
+- kernel.spec: allow to package some binaries as unstripped (Jan Stancek)
+- redhat/configs: Make merge.py portable for older python (Desnes Nunes)
+- Fedora configs for 6.2 (Justin M. Forbes)
+- redhat: Repair ELN build broken by the recent UKI changes (Vitaly Kuznetsov)
+- redhat/configs: enable CONFIG_INET_DIAG_DESTROY (Andrea Claudi)
+- Enable TDX Guest driver (Vitaly Kuznetsov)
+- redhat/configs: Enable CONFIG_PCIE_PTM generically (Corinna Vinschen)
+- redhat: Add sub-RPM with a EFI unified kernel image for virtual machines (Vitaly Kuznetsov)
+- redhat/Makefile: Remove GIT deprecated message (Prarit Bhargava)
+- Revert "redhat: configs: Disable xtables and ipset" (Phil Sutter)
+- redhat/configs: Enable CONFIG_SENSORS_LM90 for RHEL (Mark Salter)
+- Fix up SQUASHFS decompression configs (Justin M. Forbes)
+- redhat/configs: enable CONFIG_OCTEON_EP as a module in ARK (Michal Schmidt) [2041990]
+- redhat: ignore rpminspect runpath report on urandom_read selftest binaries (Herton R. Krzesinski)
+- kernel.spec: add llvm-devel build requirement (Scott Weaver)
+- Update self-test data to not expect debugbuildsenabled 0 (Justin M. Forbes)
+- Turn off forced debug builds (Justin M. Forbes)
+- Turn on debug builds for aarch64 Fedora (Justin M. Forbes)
+- redhat/configs: modify merge.py to match old overrides input (Clark Williams)
+- redhat: fixup pylint complaints (Clark Williams)
+- redhat: remove merge.pl and references to it (Clark Williams)
+- redhat: update merge.py to handle merge.pl corner cases (Clark Williams)
+- Revert "redhat: fix elf got hardening for vm tools" (Don Zickus)
+- Update rebase notes for Fedora (Justin M. Forbes)
+- Update CONFIG_LOCKDEP_CHAINS_BITS to 19 (cmurf)
+- redhat/configs: Turn on CONFIG_SPI_TEGRA210_QUAD for RHEL (Mark Salter)
+- ark: aarch64: drop CONFIG_SMC911X (Peter Robinson)
+- all: cleanup and de-dupe CDROM_PKTCDVD options. (Peter Robinson)
+- all: remove CRYPTO_GF128MUL (Peter Robinson)
+- all: cleanup UEFI options (Peter Robinson)
+- common: arm64: Enable Ampere Altra SMpro Hardware Monitoring (Peter Robinson)
+- fedora: enable STACKPROTECTOR_STRONG (Peter Robinson)
+- fedora: enable STACKPROTECTOR on arm platforms (Peter Robinson)
+- redhat/self-test: Update data with ENABLE_WERROR (Prarit Bhargava)
+- redhat/Makefile.variables: Add ENABLE_WERROR (Prarit Bhargava)
+- makefile: Add -Werror support for RHEL (Prarit Bhargava)
+- redhat/Makefile.variables: Remove mention of Makefile.rhpkg (Prarit Bhargava)
+- redhat/Makefile.variables: Alphabetize variables (Prarit Bhargava)
+- gitlab-ci: use CI templates from production branch (Michael Hofmann)
+- redhat/kernel.spec.template: Fix internal "File listed twice" errors (Prarit Bhargava)
+- redhat: Remove stale .tmp_versions code and comments (Prarit Bhargava)
+- redhat/kernel.spec.template: Fix vmlinux_decompressor on !s390x (Prarit Bhargava)
+- redhat/kernel.spec.template: Remove unnecessary output from pathfix.py (Prarit Bhargava)
+- Modularize CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU (Mark Salter)
+- redhat/kernel.spec.template: Parallelize compression (Prarit Bhargava)
+- config: Enable Security Path (Ricardo Robaina)
+- redhat/self-test/data: Regenerate self-test data for make change (Prarit Bhargava)
+- Update module filters for nvmem_u-boot-env (Justin M. Forbes)
+- fedora: Updates for 6.2 merge (Peter Robinson)
+- fedora: Updates for 6.1 merge (Peter Robinson)
+- modules-core: use %%posttrans (Gerd Hoffmann)
+- split sub-rpm kernel-modules-core from kernel-core (Gerd Hoffmann)
+- Turn off CONFIG_MTK_T7XX for S390x (Justin M. Forbes)
+- CI: add variable for variant handling (Veronika Kabatova)
+- Fix up configs with SND_SOC_NAU8315 mismatch (Justin M. Forbes)
+- CI: Do a full build for non-bot runs (Veronika Kabatova)
+- Fix up configs with SND_SOC_NAU8315 mismatch (Justin M. Forbes)
+- kernel/rh_messages.c: gcc12 warning on redundant NULL test (Eric Chanudet) [2142658]
+- redhat/configs: Enable CRYPTO_CURVE25519 in ark (Prarit Bhargava)
+- general: arm: cleanup ASPEED options (Peter Robinson)
+- redhat/configs: ALSA - cleanups for the AMD Pink Sardine DMIC driver (Jaroslav Kysela)
+- redhat/docs: Add FAQ entry for booting between Fedora & ELN/RHEL kernels (Prarit Bhargava)
+- spec: add missing BuildRequires: python3-docutils for tools (Ondrej Mosnacek)
+- config: enable RCU_TRACE for debug kernels (Wander Lairson Costa)
+- Add siphash_kunit and strscpy_kunit to mod-internal.list (Justin M. Forbes)
+- Add drm_kunit_helpers to mod-internal.list (Justin M. Forbes)
+- Fix up configs for Fedora so we don't have a mismatch (Justin M. Forbes)
+- Turn on CONFIG_SQUASHFS_DECOMP_SINGLE in pending (Justin M. Forbes)
+- Trim changelog for 6.2 cycle (Justin M. Forbes)
+- Reset RHEL_RELEASE for the 6.2 window. (Justin M. Forbes)
+- redhat/kernel.spec.template: Fix cpupower file error (Prarit Bhargava)
+- redhat/configs: aarhc64: clean up some erratum configs (Mark Salter)
+- More Fedora configs for 6.1 as deps were switched on (Justin M. Forbes)
+- redhat/configs: make SOC_TEGRA_CBB a module (Mark Salter)
+- redhat/configs: aarch64: reorganize tegra configs to common dir (Mark Salter)
+- Enforces buildroot if cross_arm (Nicolas Chauvet)
+- Handle automated case when config generation works correctly (Don Zickus)
+- Turn off CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64 (Justin M. Forbes)
+- Turn off CONFIG_EFI_ZBOOT as it makes CKI choke (Justin M. Forbes)
+- Fedora config updates for 6.1 (Justin M. Forbes)
+- redhat: Remove cpupower files (Prarit Bhargava)
+- redhat/configs: update CXL-related options to match what RHEL will use (John W. Linville)
+- Clean up the config for the Tegra186 timer (Al Stone)
+- redhat/configs: move CONFIG_TEGRA186_GPC_DMA config (Mark Salter)
+- Check for kernel config git-push failures (Don Zickus)
+- redhat: genlog.sh failures should interrupt the recipe (Patrick Talbert)
+- Turn CONFIG_GNSS back on for Fedora (Justin M. Forbes)
+- redhat/configs: enable CONFIG_GNSS for RHEL (Michal Schmidt)
+- Turn off NVMEM_U_BOOT_ENV for fedora (Justin M. Forbes)
+- Consolidate matching fedora and ark entries to common (Justin M. Forbes)
+- Empty out redhat/configs/common (Justin M. Forbes)
+- Adjust path to compressed vmlinux kernel image for s390x (Justin M. Forbes) [2149273]
+- Fedora config updates for 6.1 (Justin M. Forbes)
+- redhat: genlog.sh should expect genlog.py in the current directory (Patrick Talbert)
+- redhat/configs: consolidate CONFIG_TEST_LIVEPATCH=m (Joe Lawrence)
+- redhat/configs: enable CONFIG_TEST_LIVEPATCH=m for s390x (Julia Denham)
+- Revert "Merge branch 'ark-make-help' into 'os-build'" (Scott Weaver)
+- Remove recommendation to use 'common' for config changes. (Don Zickus)
+- Update config to add i3c support for AArch64 (Mark Charlebois)
+- redhat: Move cross-compile scripts into their own directory (Prarit Bhargava)
+- redhat: Move yaml files into their own directory (Prarit Bhargava)
+- redhat: Move update_scripts.sh into redhat/scripts (Prarit Bhargava)
+- redhat: Move kernel-tools scripts into their own directory (Prarit Bhargava)
+- redhat: Move gen-* scripts into their own directory (Prarit Bhargava)
+- redhat: Move mod-* scripts into their own directory (Prarit Bhargava)
+- redhat/Makefile: Fix RHJOBS grep warning (Prarit Bhargava)
+- redhat: Force remove tmp file (Prarit Bhargava)
+- redhat/configs: ALSA - cleanups for the CentOS 9.2 update (Jaroslav Kysela)
+- CI: Use CKI container images from quay.io (Veronika Kabatova)
+- redhat: clean up the partial-kgcov-snip.config file (Patrick Talbert)
+- redhat: avoid picking up stray editor backups when processing configs (Clark Williams)
+- CI: Remove old configs (Veronika Kabatova)
+- redhat: override `make help` to include dist-help (Jonathan Toppins)
+- redhat: make RHTEST stricter (Jonathan Toppins)
+- redhat: Enable support for SN2201 system (Ivan Vecera)
+- redhat/docs/index.rst: Add FLAVOR information to generate configs for local builds (Enric Balletbo i Serra)
+- redhat: fix selftest git command so it picks the right commit (Patrick Talbert)
+- redhat/configs: enable HP_WATCHDOG for aarch64 (Mark Salter)
+- redhat: disable Kfence Kunit Test (Nico Pache)
+- configs: enable CONFIG_LRU_GEN_ENABLED everywhere (Patrick Talbert)
+- redhat: Enable WWAN feature and support for Intel, Qualcomm and Mediatek devices (Jose Ignacio Tornos Martinez)
+- Turn on dln2 support (RHBZ 2110372) (Justin M. Forbes)
+- Enable configs for imx8m PHYs (Al Stone)
+- configs/fedora: Build some SC7180 clock controllers as modules (Javier Martinez Canillas)
+- redhat/configs: Disable fbdev drivers and use simpledrm everywhere (Javier Martinez Canillas) [1986223]
+- redhat: fix the branch we pull from the documentation tree (Herton R. Krzesinski)
+- redhat/configs: change so watchdog is module versus builtin (Steve Best)
+- redhat/configs: move CONFIG_ACPI_VIDEO to common/generic (Mark Langsdorf)
+- enable imx8xm I2C configs properly (Al Stone)
+- configs/fedora: Enable a few more drivers needed by the HP X2 Chromebook (Javier Martinez Canillas)
+- enable the rtc-rv8803 driver on RHEL and Fedora (David Arcari)
+- redhat/Makefile: Remove BUILD_SCRATCH_TARGET (Prarit Bhargava)
+- configs: move CONFIG_INTEL_TDX_GUEST to common directory (Wander Lairson Costa)
+- redhat/Makefile: Use new BUILD_TARGET for RHEL dist[g]-brew target (Prarit Bhargava)
+- redhat: method.py: change the output loop to use 'values' method (Patrick Talbert)
+- redhat: use 'update' method in merge.py (Patrick Talbert)
+- redhat: Use a context manager in merge.py for opening the config file for reading (Patrick Talbert)
+- redhat: automatically strip newlines in merge.py (Clark Williams)
+- redhat: python replacement for merge.pl (Clark Williams)
+- redhat/docs: Update with DISTLOCALVERSION (Prarit Bhargava)
+- redhat/Makefile: Rename LOCALVERSION to DISTLOCALVERSION (Akihiko Odaki)
+- Adjust FIPS module name in RHEL (Vladis Dronov)
+- spec: prevent git apply from searching for the .git directory (Ondrej Mosnacek)
+- redhat: Remove parallel_xz.sh (Prarit Bhargava)
+- Turn on Multi-Gen LRU for Fedora (Justin M. Forbes)
+- Add kasan_test to mod-internal.list (Justin M. Forbes)
+- redhat/Makefile.variables: Fix typo with RHDISTGIT_TMP (Prarit Bhargava)
+- spec: fix path to `installing_core` stamp file for subpackages (Jonathan Lebon)
+- Remove unused ci scripts (Don Zickus)
+- Rename rename FORCE_MAX_ZONEORDER to ARCH_FORCE_MAX_ORDER in configs (Justin M. Forbes)
+- redhat: Add new fortify_kunit & is_signed_type_kunit to mod-internal.list (Patrick Talbert)
+- Rename rename FORCE_MAX_ZONEORDER to ARCH_FORCE_MAX_ORDER in pending (Justin M. Forbes)
+- Add acpi video to the filter_modules.sh for rhel (Justin M. Forbes)
+- Change acpi_bus_get_acpi_device to acpi_get_acpi_dev (Justin M. Forbes)
+- Turn on ACPI_VIDEO for arm (Justin M. Forbes)
+- Turn on CONFIG_PRIME_NUMBERS as a module (Justin M. Forbes)
+- Add new drm kunit tests to mod-internal.list (Justin M. Forbes)
+- redhat: fix elf got hardening for vm tools (Frantisek Hrbata)
+- kernel.spec.template: remove some temporary files early (Ondrej Mosnacek)
+- kernel.spec.template: avoid keeping two copies of vmlinux (Ondrej Mosnacek)
+- Add fortify_kunit to mod-internal.list (Justin M. Forbes)
+- Add module filters for Fedora as acpi video has new deps (Justin M. Forbes)
+- One more mismatch (Justin M. Forbes)
+- Fix up pending for mismatches (Justin M. Forbes)
+- Trim changelog with the reset (Justin M. Forbes)
+- Reset the RHEL_RELEASE in Makefile.rhelver (Justin M. Forbes)
+- Forgot too remove this from pending, it is set properly in ark (Justin M. Forbes)
+- redhat/Makefile: Add DIST to git tags for RHEL (Prarit Bhargava)
+- redhat/configs: Move CONFIG_ARM_SMMU_QCOM_DEBUG to common (Jerry Snitselaar)
+- Common config cleanup for 6.0 (Justin M. Forbes)
+- Allow selftests to fail without killing the build (Justin M. Forbes)
+- redhat: Remove redhat/Makefile.rhpkg (Prarit Bhargava)
+- redhat/Makefile: Move RHDISTGIT_CACHE and RHDISTGIT_TMP (Prarit Bhargava)
+- redhat/Makefile.rhpkg: Remove RHDISTGIT_USER (Prarit Bhargava)
+- redhat/Makefile: Move RHPKG_BIN to redhat/Makefile (Prarit Bhargava)
+- common: clean up Android option with removal of CONFIG_ANDROID (Peter Robinson)
+- redhat/configs: Remove x86_64 from priority files (Prarit Bhargava)
+- redhat/configs/pending-ark: Remove x86_64 directory (Prarit Bhargava)
+- redhat/configs/pending-fedora: Remove x86_64 directory (Prarit Bhargava)
+- redhat/configs/fedora: Remove x86_64 directory (Prarit Bhargava)
+- redhat/configs/common: Remove x86_64 directory (Prarit Bhargava)
+- redhat/configs/ark: Remove x86_64 directory (Prarit Bhargava)
+- redhat/configs/custom-overrides: Remove x86_64 directory (Prarit Bhargava)
+- configs: use common CONFIG_ARM64_SME for ark and fedora (Mark Salter)
+- redhat/configs: Add a warning message to priority.common (Prarit Bhargava)
+- redhat/configs: Enable INIT_STACK_ALL_ZERO for Fedora (Miko Larsson)
+- redhat: Set CONFIG_MAXLINEAR_GPHY to =m (Petr Oros)
+- redhat/configs enable CONFIG_INTEL_IFS (David Arcari)
+- redhat: Remove filter-i686.sh.rhel (Prarit Bhargava)
+- redhat/Makefile: Set PATCHLIST_URL to none for RHEL/cs9 (Prarit Bhargava)
+- redhat: remove GL_DISTGIT_USER, RHDISTGIT and unify dist-git cloning (Prarit Bhargava)
+- redhat/Makefile.variables: Add ADD_COMMITID_TO_VERSION (Prarit Bhargava)
+- kernel.spec: disable vmlinux.h generation for s390 zfcpdump config (Prarit Bhargava)
+- perf: Require libbpf 0.6.0 or newer (Prarit Bhargava)
+- kabi: add stablelist helpers (Prarit Bhargava)
+- Makefile: add kabi targets (Prarit Bhargava)
+- kabi: add support for symbol namespaces into check-kabi (Prarit Bhargava)
+- kabi: ignore new stablelist metadata in show-kabi (Prarit Bhargava)
+- redhat/Makefile: add dist-assert-tree-clean target (Prarit Bhargava)
+- redhat/kernel.spec.template: Specify vmlinux.h path when building samples/bpf (Prarit Bhargava) [2041365]
+- spec: Fix separate tools build (Prarit Bhargava) [2054579]
+- redhat/scripts: Update merge-subtrees.sh with new subtree location (Prarit Bhargava)
+- redhat/kernel.spec.template: enable dependencies generation (Prarit Bhargava)
+- redhat: build and include memfd to kernel-selftests-internal (Prarit Bhargava) [2027506]
+- redhat/kernel.spec.template: Link perf with --export-dynamic (Prarit Bhargava)
+- redhat: kernel.spec: selftests: abort on build failure (Prarit Bhargava)
+- redhat: configs: move CONFIG_SERIAL_MULTI_INSTANTIATE=m settings to common/x86 (Jaroslav Kysela)
+- configs: enable CONFIG_HP_ILO for aarch64 (Mark Salter)
+- all: cleanup dell config options (Peter Robinson)
+- redhat: Include more kunit tests (Nico Pache)
+- common: some minor cleanups/de-dupe (Peter Robinson)
+- common: enable INTEGRITY_MACHINE_KEYRING on all configuraitons (Peter Robinson)
+- Fedora 6.0 configs update (Justin M. Forbes)
+- redhat/self-test: Ignore .rhpkg.mk files (Prarit Bhargava)
+- redhat/configs: Enable CONFIG_PRINTK_INDEX on Fedora (Prarit Bhargava)
+- redhat/configs: Cleanup CONFIG_X86_KERNEL_IBT (Prarit Bhargava)
+- Fix up SND_CTL debug options (Justin M. Forbes)
+- redhat: create /boot symvers link if it doesn't exist (Jan Stancek)
+- redhat: remove duplicate kunit tests in mod-internal.list (Nico Pache)
+- configs/fedora: Make Fedora work with HNS3 network adapter (Zamir SUN)
+- redhat/configs/fedora/generic: Enable CONFIG_BLK_DEV_UBLK on Fedora (Richard W.M. Jones) [2122595]
+- fedora: disable IWLMEI (Peter Robinson)
+- redhat/configs: enable UINPUT on aarch64 (Benjamin Tissoires)
+- Fedora 6.0 configs part 1 (Justin M. Forbes)
+- redhat/Makefile: Always set UPSTREAM (Prarit Bhargava)
+- redhat/configs: aarch64: Turn on Apple Silicon configs for Fedora (Eric Curtin)
+- Add cpumask_kunit to mod-internal.list (Justin M. Forbes)
+- config - consolidate disabled MARCH options on s390x (Dan Horák)
+- move the baseline arch to z13 for s390x in F-37+ (Dan Horák)
+- redhat/scripts/rh-dist-git.sh: Fix outdated cvs reference (Prarit Bhargava)
+- redhat/scripts/expand_srpm.sh: Use Makefile variables (Prarit Bhargava)
+- redhat/scripts/clone_tree.sh: Use Makefile variables (Prarit Bhargava)
+- Fedora: arm changes for 6.0, part 1, with some ACPI (Peter Robinson)
+- redhat/self-test: Fix shellcheck errors (Prarit Bhargava)
+- redhat/docs: Add dist-brew BUILD_FLAGS information (Prarit Bhargava)
+- redhat: change the changelog item for upstream merges (Herton R. Krzesinski)
+- redhat: fix dist-release build number test (Herton R. Krzesinski)
+- redhat: fix release number bump when dist-release-changed runs (Herton R. Krzesinski)
+- redhat: use new genlog.sh script to detect changes for dist-release (Herton R. Krzesinski)
+- redhat: move changelog addition to the spec file back into genspec.sh (Herton R. Krzesinski)
+- redhat: always add a rebase entry when ark merges from upstream (Herton R. Krzesinski)
+- redhat: drop merge ark patches hack (Herton R. Krzesinski)
+- redhat: don't hardcode temporary changelog file (Herton R. Krzesinski)
+- redhat: split changelog generation from genspec.sh (Herton R. Krzesinski)
+- redhat: configs: Disable FIE on arm (Jeremy Linton) [2012226]
+- redhat/Makefile: Clean linux tarballs (Prarit Bhargava)
+- redhat/configs: Cleanup CONFIG_ACPI_AGDI (Prarit Bhargava)
+- spec: add cpupower daemon reload on install/upgrade (Jarod Wilson)
+- redhat: properly handle binary files in patches (Ondrej Mosnacek)
+- Add python3-setuptools buildreq for perf (Justin M. Forbes)
+- Add cros_kunit to mod-internal.list (Justin M. Forbes)
+- Add new tests to mod-internal.list (Justin M. Forbes)
+- Turn off some Kunit tests in pending (Justin M. Forbes)
+- Clean up a mismatch in Fedora configs (Justin M. Forbes)
+- redhat/configs: Sync up Retbleed configs with centos-stream (Waiman Long)
+- Change CRYPTO_BLAKE2S_X86 from m to y (Justin M. Forbes)
+- Leave CONFIG_ACPI_VIDEO on for x86 only (Justin M. Forbes)
+- Fix BLAKE2S_ARM and BLAKE2S_X86 configs in pending (Justin M. Forbes)
+- Fix pending for ACPI_VIDEO (Justin M. Forbes)
+- Reset release (Justin M. Forbes)
+- redhat/configs: Fix rm warning on config warnings (Eric Chanudet)
+- redhat/Makefile: Deprecate PREBUILD_GIT_ONLY variable (Prarit Bhargava)
+- redhat/Makefile: Deprecate SINGLE_TARBALL variable (Prarit Bhargava)
+- redhat/Makefile: Deprecate GIT variable (Prarit Bhargava)
+- Update CONFIG_LOCKDEP_CHAINS_BITS to 18 (cmurf)
+- Add new FIPS module name and version configs (Vladis Dronov)
+- redhat/configs/fedora: Make PowerPC's nx-gzip buildin (Jakub Čajka)
+- omit unused Provides (Dan Horák)
+- self-test: Add test for DIST=".eln" (Prarit Bhargava)
+- redhat: Enable CONFIG_LZ4_COMPRESS on Fedora (Prarit Bhargava)
+- fedora: armv7: enable MMC_STM32_SDMMC (Peter Robinson)
+- .gitlab-ci.yaml: Add test for dist-get-buildreqs target (Prarit Bhargava)
+- redhat/docs: Add information on build dependencies (Prarit Bhargava)
+- redhat/Makefile: Add better pass message for dist-get-buildreqs (Prarit Bhargava)
+- redhat/Makefile: Provide a better message for system-sb-certs (Prarit Bhargava)
+- redhat/Makefile: Change dist-buildreq-check to a non-blocking target (Prarit Bhargava)
+- create-data: Parallelize spec file data (Prarit Bhargava)
+- create-data.sh: Store SOURCES Makefile variable (Prarit Bhargava)
+- redhat/Makefile: Split up setup-source target (Prarit Bhargava)
+- create-data.sh: Redefine varfilename (Prarit Bhargava)
+- create-data.sh: Parallelize variable file creation (Prarit Bhargava)
+- redhat/configs: Enable CONFIG_LZ4_COMPRESS (Prarit Bhargava)
+- redhat/docs: Update brew information (Prarit Bhargava)
+- redhat/Makefile: Fix eln BUILD_TARGET (Prarit Bhargava)
+- redhat/Makefile: Set BUILD_TARGET for dist-brew (Prarit Bhargava)
+- kernel.spec.template: update (s390x) expoline.o path (Joe Lawrence)
+- fedora: enable BCM_NET_PHYPTP (Peter Robinson)
+- Fedora 5.19 configs update part 2 (Justin M. Forbes)
+- redhat/Makefile: Change fedora BUILD_TARGET (Prarit Bhargava)
+- New configs in security/keys (Fedora Kernel Team)
+- Fedora: arm: enable a pair of drivers (Peter Robinson)
+- redhat: make kernel-zfcpdump-core to not provide kernel-core/kernel (Herton R. Krzesinski)
+- redhat/configs: Enable QAT devices for arches other than x86 (Vladis Dronov)
+- Fedora 5.19 configs pt 1 (Justin M. Forbes)
+- redhat: Exclude cpufreq.h from kernel-headers (Patrick Talbert)
+- Add rtla subpackage for kernel-tools (Justin M. Forbes)
+- fedora: arm: enable a couple of QCom drivers (Peter Robinson)
+- redhat/Makefile: Deprecate BUILD_SCRATCH_TARGET (Prarit Bhargava)
+- redhat: enable CONFIG_DEVTMPFS_SAFE (Mark Langsdorf)
+- redhat/Makefile: Remove deprecated variables and targets (Prarit Bhargava)
+- Split partner modules into a sub-package (Alice Mitchell)
+- Enable kAFS and it's dependancies in RHEL (Alice Mitchell)
+- Enable Marvell OcteonTX2 crypto device in ARK (Vladis Dronov)
+- redhat/Makefile: Remove --scratch from BUILD_TARGET (Prarit Bhargava)
+- redhat/Makefile: Fix dist-brew and distg-brew targets (Prarit Bhargava)
+- fedora: arm64: Initial support for TI Keystone 3 (ARCH_K3) (Peter Robinson)
+- fedora: arm: enable Hardware Timestamping Engine support (Peter Robinson)
+- fedora: wireless: disable SiLabs and PureLiFi (Peter Robinson)
+- fedora: updates for 5.19 (Peter Robinson)
+- fedora: minor updates for Fedora configs (Peter Robinson)
+- configs/fedora: Enable the pinctrl SC7180 driver built-in (Enric Balletbo i Serra)
+- redhat/configs: enable CONFIG_DEBUG_NET for debug kernel (Hangbin Liu)
+- redhat/Makefile: Add SPECKABIVERSION variable (Prarit Bhargava)
+- redhat/self-test: Provide better failure output (Prarit Bhargava)
+- redhat/self-test: Reformat tests to kernel standard (Prarit Bhargava)
+- redhat/self-test: Add purpose and header to each test (Prarit Bhargava)
+- Drop outdated CRYPTO_ECDH configs (Vladis Dronov)
+- Brush up crypto SHA512 and USER configs (Vladis Dronov)
+- Brush up crypto ECDH and ECDSA configs (Vladis Dronov)
+- redhat/self-test: Update data set (Prarit Bhargava)
+- create-data.sh: Reduce specfile data output (Prarit Bhargava)
+- redhat/configs: restore/fix core INTEL_LPSS configs to be builtin again (Hans de Goede)
+- Enable CKI on os-build MRs only (Don Zickus)
+- self-test: Fixup Makefile contents test (Prarit Bhargava)
+- redhat/self-test: self-test data update (Prarit Bhargava)
+- redhat/self-test: Fix up create-data.sh to not report local variables (Prarit Bhargava)
+- redhat/configs/fedora: Enable a set of modules used on some x86 tablets (Hans de Goede)
+- redhat/configs: Make INTEL_SOC_PMIC_CHTDC_TI builtin (Hans de Goede)
+- redhat/configs/fedora: enable missing modules modules for Intel IPU3 camera support (Hans de Goede)
+- Common: minor cleanups (Peter Robinson)
+- fedora: some minor Fedora cleanups (Peter Robinson)
+- fedora: drop X86_PLATFORM_DRIVERS_DELL dupe (Peter Robinson)
+- redhat: change tools_make macro to avoid full override of variables in Makefile (Herton R. Krzesinski)
+- Fix typo in Makefile for Fedora Stable Versioning (Justin M. Forbes)
+- Remove duplicates from ark/generic/s390x/zfcpdump/ (Vladis Dronov)
+- Move common/debug/s390x/zfcpdump/ configs to ark/debug/s390x/zfcpdump/ (Vladis Dronov)
+- Move common/generic/s390x/zfcpdump/ configs to ark/generic/s390x/zfcpdump/ (Vladis Dronov)
+- Drop RCU_EXP_CPU_STALL_TIMEOUT to 0, we are not really android (Justin M. Forbes)
+- redhat/configs/README: Update the README (Prarit Bhargava)
+- redhat/docs: fix hyperlink typo (Patrick Talbert)
+- all: net: remove old NIC/ATM drivers that use virt_to_bus() (Peter Robinson)
+- Explicitly turn off CONFIG_KASAN_INLINE for ppc (Justin M. Forbes)
+- redhat/docs: Add a description of kernel naming (Prarit Bhargava)
+- Change CRYPTO_CHACHA_S390 from m to y (Justin M. Forbes)
+- enable CONFIG_NET_ACT_CTINFO in ark (Davide Caratti)
+- redhat/configs: enable CONFIG_SP5100_TCO (David Arcari)
+- redhat/configs: Set CONFIG_VIRTIO_IOMMU on x86_64 (Eric Auger) [2089765]
+- Turn off KASAN_INLINE for RHEL ppc in pending (Justin M. Forbes)
+- redhat/kernel.spec.template: update selftest data via "make dist-self-test-data" (Denys Vlasenko)
+- redhat/kernel.spec.template: remove stray *.hardlink-temporary files, if any (Denys Vlasenko)
+- Fix up ZSMALLOC config for s390 (Justin M. Forbes)
+- Turn on KASAN_OUTLINE for ppc debug (Justin M. Forbes)
+- Turn on KASAN_OUTLINE for PPC debug to avoid mismatch (Justin M. Forbes)
+- Fix up crypto config mistmatches (Justin M. Forbes)
+- Fix up config mismatches (Justin M. Forbes)
+- generic/fedora: cleanup and disable Lightning Moutain SoC (Peter Robinson)
+- redhat: Set SND_SOC_SOF_HDA_PROBES to =m (Patrick Talbert)
+- Fix versioning on stable Fedora (Justin M. Forbes)
+- Enable PAGE_POOL_STATS for arm only (Justin M. Forbes)
+- Revert "Merge branch 'fix-ci-20220523' into 'os-build'" (Patrick Talbert)
+- Fix changelog one more time post rebase (Justin M. Forbes)
+- Flip CONFIG_RADIO_ADAPTERS to module for Fedora (Justin M. Forbes)
+- Reset Release for 5.19 (Justin M. Forbes)
+- redhat/Makefile: Drop quotation marks around string definitions (Prarit Bhargava)
+- Fedora: arm: Updates for QCom devices (Peter Robinson)
+- Fedora arm and generic updates for 5.17 (Peter Robinson)
+- enable COMMON_CLK_SI5341 for Xilinx ZYNQ-MP (Peter Robinson)
+- Turn on CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING for Fedora (Justin M. Forbes)
+- redhat/self-test/data: Update data set (Prarit Bhargava)
+- Revert variable switch for lasttag (Justin M. Forbes)
+- redhat: Add self-tests to .gitlab-ci.yml (Prarit Bhargava)
+- redhat/self-test: Update data (Prarit Bhargava)
+- redhat/self-test: Unset Makefile variables (Prarit Bhargava)
+- redhat/self-test: Omit SHELL variable from test data (Prarit Bhargava)
+- Add CONFIG_EFI_DXE_MEM_ATTRIBUTES (Justin M. Forbes)
+- Update filter-modules for mlx5-vfio-pci (Justin M. Forbes)
+- Fedora configs for 5.18 (Justin M. Forbes)
+- self-test/data/create-data.sh: Avoid SINGLE_TARBALL warning (Prarit Bhargava)
+- redhat/Makefile: Rename PREBUILD to UPSTREAMBUILD (Prarit Bhargava)
+- redhat/Makefile: Rename BUILDID to LOCALVERSION (Prarit Bhargava)
+- redhat/Makefile: Fix dist-brew & distg-brew targets (Prarit Bhargava)
+- redhat/Makefile: Reorganize MARKER code (Prarit Bhargava)
+- redhat/scripts/new_release.sh: Use Makefile variables (Prarit Bhargava)
+- redhat/Makefile: Rename __YSTREAM and __ZSTREAM (Prarit Bhargava)
+- redhat/genspec.sh: Add comment about SPECBUILDID variable (Prarit Bhargava)
+- redhat/kernel.spec.template: Move genspec variables into one section (Prarit Bhargava)
+- redhat/kernel.spec.template: Remove kversion (Prarit Bhargava)
+- redhat/Makefile: Add SPECTARFILE_RELEASE comment (Prarit Bhargava)
+- redhat/Makefile: Rename RPMVERSION to BASEVERSION (Prarit Bhargava)
+- redhat/Makefile: Target whitespace cleanup (Prarit Bhargava)
+- redhat/Makefile: Move SPECRELEASE to genspec.sh (Prarit Bhargava)
+- redhat/Makefile: Add kernel-NVR comment (Prarit Bhargava)
+- redhat/Makefile: Use SPECFILE variable (Prarit Bhargava)
+- redhat/Makefile: Remove KEXTRAVERSION (Prarit Bhargava)
+- redhat: Enable VM kselftests (Nico Pache) [1978539]
+- redhat: enable CONFIG_TEST_VMALLOC for vm selftests (Nico Pache)
+- redhat: Enable HMM test to be used by the kselftest test suite (Nico Pache)
+- redhat/Makefile.variables: Change git hash length to default (Prarit Bhargava)
+- redhat/Makefile: Drop quotation marks around string definitions (Prarit Bhargava)
+- Turn on INTEGRITY_MACHINE_KEYRING for Fedora (Justin M. Forbes)
+- redhat/configs: fix CONFIG_INTEL_ISHTP_ECLITE (David Arcari)
+- redhat/configs: Fix rm warning on error (Prarit Bhargava)
+- Fix nightly merge CI (Don Zickus)
+- redhat/kernel.spec.template: fix standalone tools build (Jan Stancek)
+- Add system-sb-certs for RHEL-9 (Don Zickus)
+- Fix dist-buildcheck-reqs (Don Zickus)
+- move DAMON configs to correct directory (Chris von Recklinghausen)
+- redhat: indicate HEAD state in tarball/rpm name (Jarod Wilson)
+- Fedora 5.18 config set part 1 (Justin M. Forbes)
+- fedora: arm: Enable new Rockchip 356x series drivers (Peter Robinson)
+- fedora: arm: enable DRM_I2C_NXP_TDA998X on aarch64 (Peter Robinson)
+- redhat/self-test: Add test to verify Makefile declarations. (Prarit Bhargava)
+- redhat/Makefile: Add RHTEST (Prarit Bhargava)
+- redhat: shellcheck cleanup (Prarit Bhargava)
+- redhat/self-test/data: Cleanup data (Prarit Bhargava)
+- redhat/self-test: Add test to verify SPEC variables (Prarit Bhargava)
+- redhat/Makefile: Add 'duplicate' SPEC entries for user set variables (Prarit Bhargava)
+- redhat/Makefile: Rename TARFILE_RELEASE to SPECTARFILE_RELEASE (Prarit Bhargava)
+- redhat/genspec: Rename PATCHLIST_CHANGELOG to SPECPATCHLIST_CHANGELOG (Prarit Bhargava)
+- redhat/genspec: Rename DEBUG_BUILDS_ENABLED to SPECDEBUG_BUILDS_ENABLED (Prarit Bhargava)
+- redhat/Makefile: Rename PKGRELEASE to SPECBUILD (Prarit Bhargava)
+- redhat/genspec: Rename BUILDID_DEFINE to SPECBUILDID (Prarit Bhargava)
+- redhat/Makefile: Rename CHANGELOG to SPECCHANGELOG (Prarit Bhargava)
+- redhat/Makefile: Rename RPMKEXTRAVERSION to SPECKEXTRAVERSION (Prarit Bhargava)
+- redhat/Makefile: Rename RPMKSUBLEVEL to SPECKSUBLEVEL (Prarit Bhargava)
+- redhat/Makefile: Rename RPMKPATCHLEVEL to SPECKPATCHLEVEL (Prarit Bhargava)
+- redhat/Makefile: Rename RPMKVERSION to SPECKVERSION (Prarit Bhargava)
+- redhat/Makefile: Rename KVERSION to SPECVERSION (Prarit Bhargava)
+- redhat/Makefile: Deprecate some simple targets (Prarit Bhargava)
+- redhat/Makefile: Use KVERSION (Prarit Bhargava)
+- redhat/configs: Set GUP_TEST in debug kernel (Joel Savitz)
+- enable DAMON configs (Chris von Recklinghausen) [2004233]
+- redhat: add zstream switch for zstream release numbering (Herton R. Krzesinski)
+- redhat: change kabi tarballs to use the package release (Herton R. Krzesinski)
+- redhat: generate distgit changelog in genspec.sh as well (Herton R. Krzesinski)
+- redhat: make genspec prefer metadata from git notes (Herton R. Krzesinski)
+- redhat: use tags from git notes for zstream to generate changelog (Herton R. Krzesinski)
+- ARK: Remove code marking devices unmaintained (Peter Georg)
+- rh_message: Fix function name (Peter Georg) [2019377]
+- Turn on CONFIG_RANDOM_TRUST_BOOTLOADER (Justin M. Forbes)
+- redhat/configs: aarch64: enable CPU_FREQ_GOV_SCHEDUTIL (Mark Salter)
+- Move CONFIG_HW_RANDOM_CN10K to a proper place (Vladis Dronov)
+- redhat/self-test: Clean up data set (Prarit Bhargava)
+- redhat/Makefile.rhpkg: Remove quotes for RHDISTGIT (Prarit Bhargava)
+- redhat/scripts/create-tarball.sh: Use Makefile variables (Prarit Bhargava)
+- redhat/Makefile: Deprecate SINGLE_TARBALL (Prarit Bhargava)
+- redhat/Makefile: Move SINGLE_TARBALL to Makefile.variables (Prarit Bhargava)
+- redhat/Makefile: Use RPMVERSION (Prarit Bhargava)
+- redhat/scripts/rh-dist-git.sh: Use Makefile variables (Prarit Bhargava)
+- redhat/configs/build_configs.sh: Use Makefile variables (Prarit Bhargava)
+- redhat/configs/process_configs.sh: Use Makefile variables (Prarit Bhargava)
+- redhat/kernel.spec.template: Use RPM_BUILD_NCPUS (Prarit Bhargava)
+- redhat/configs/generate_all_configs.sh: Use Makefile variables (Prarit Bhargava)
+- redhat/configs: enable nf_tables SYNPROXY extension on ark (Davide Caratti)
+- fedora: Disable fbdev drivers missed before (Javier Martinez Canillas)
+- Redhat: enable Kfence on production servers (Nico Pache)
+- redhat: ignore known empty patches on the patches rpminspect test (Herton R. Krzesinski)
+- kernel-ark: arch_hw Update CONFIG_MOUSE_VSXXXAA=m (Tony Camuso) [2062909]
+- spec: keep .BTF section in modules for s390 (Yauheni Kaliuta) [2071969]
+- kernel.spec.template: Ship arch/s390/lib/expoline.o in kernel-devel (Ondrej Mosnacek)
+- redhat: disable tv/radio media device infrastructure (Jarod Wilson)
+- redhat/configs: clean up INTEL_LPSS configuration (David Arcari)
+- Have to rename the actual contents too (Justin M. Forbes)
+- The CONFIG_SATA_MOBILE_LPM_POLICY rebane was reverted (Justin M. Forbes)
+- redhat: Enable KASAN on all ELN debug kernels (Nico Pache)
+- redhat: configs: Enable INTEL_IOMMU_DEBUGFS for debug builds (Jerry Snitselaar)
+- generic: can: disable CAN_SOFTING everywhere (Peter Robinson)
+- redhat/configs: Enable CONFIG_DM_ERA=m for all (Yanko Kaneti)
+- redhat/configs: enable CONFIG_SAMPLE_VFIO_MDEV_MTTY (Patrick Talbert)
+- Build intel_sdsi with %%{tools_make} (Justin M. Forbes)
+- configs: remove redundant Fedora config for INTEL_IDXD_COMPAT (Jerry Snitselaar)
+- redhat/configs: enable CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT (Joel Savitz) [2026319]
+- configs: enable CONFIG_RMI4_F3A (Benjamin Tissoires)
+- redhat: configs: Disable TPM 1.2 specific drivers (Jerry Snitselaar)
+- redhat/configs: Enable cr50 I2C TPM interface (Akihiko Odaki)
+- spec: make HMAC file encode relative path (Jonathan Lebon)
+- redhat/kernel.spec.template: Add intel_sdsi utility (Prarit Bhargava)
+- Spec fixes for intel-speed-select (Justin M. Forbes)
+- Add Partner Supported taint flag to kAFS (Alice Mitchell) [2038999]
+- Add Partner Supported taint flag (Alice Mitchell) [2038999]
+- Enabled INTEGRITY_MACHINE_KEYRING for all configs. (Peter Robinson)
+- redhat/configs: Enable CONFIG_RCU_SCALE_TEST & CONFIG_RCU_REF_SCALE_TEST (Waiman Long)
+- Add clk_test and clk-gate_test to mod-internal.list (Justin M. Forbes)
+- redhat/self-tests: Ignore UPSTREAM (Prarit Bhargava)
+- redhat/self-tests: Ignore RHGITURL (Prarit Bhargava)
+- redhat/Makefile.variables: Extend git hash length to 15 (Prarit Bhargava)
+- redhat/self-test: Remove changelog from spec files (Prarit Bhargava)
+- redhat/genspec.sh: Rearrange genspec.sh (Prarit Bhargava)
+- redhat/self-test: Add spec file data (Prarit Bhargava)
+- redhat/self-test: Add better dist-dump-variables test (Prarit Bhargava)
+- redhat/self-test: Add variable test data (Prarit Bhargava)
+- redhat/config: Remove obsolete CONFIG_MFD_INTEL_PMT (David Arcari)
+- redhat/configs: enable CONFIG_INTEL_ISHTP_ECLITE (David Arcari)
+- Avoid creating files in $RPM_SOURCE_DIR (Nicolas Chauvet)
+- Flip CRC64 from off to y (Justin M. Forbes)
+- New configs in lib/Kconfig (Fedora Kernel Team)
+- disable redundant assignment of CONFIG_BQL on ARK (Davide Caratti)
+- redhat/configs: remove unnecessary GPIO options for aarch64 (Brian Masney)
+- redhat/configs: remove viperboard related Kconfig options (Brian Masney)
+- redhat/configs/process_configs.sh: Avoid race with find (Prarit Bhargava)
+- redhat/configs/process_configs.sh: Remove CONTINUEONERROR (Prarit Bhargava)
+- Remove i686 configs and filters (Justin M. Forbes)
+- redhat/configs: Set CONFIG_X86_AMD_PSTATE built-in on Fedora (Prarit Bhargava)
+- Fix up mismatch with CRC64 (Justin M. Forbes)
+- Fedora config updates to fix process_configs (Justin M. Forbes)
+- redhat: Fix release tagging (Prarit Bhargava)
+- redhat/self-test: Fix version tag test (Prarit Bhargava)
+- redhat/self-test: Fix BUILD verification test (Prarit Bhargava)
+- redhat/self-test: Cleanup SRPM related self-tests (Prarit Bhargava)
+- redhat/self-test: Fix shellcheck test (Prarit Bhargava)
+- redhat/configs: Disable watchdog components (Prarit Bhargava)
+- redhat/README.Makefile: Add a Makefile README file (Prarit Bhargava)
+- redhat/Makefile: Remove duplicated code (Prarit Bhargava)
+- Add BuildRequires libnl3-devel for intel-speed-select (Justin M. Forbes)
+- Add new kunit tests for 5.18 to mod-internal.list (Justin M. Forbes)
+- Fix RHDISTGIT for Fedora (Justin M. Forbes)
+- redhat/configs/process_configs.sh: Fix race with tools generation (Prarit Bhargava)
+- New configs in drivers/dax (Fedora Kernel Team)
+- Fix up CONFIG_SND_AMD_ACP_CONFIG files (Patrick Talbert)
+- Remove CONFIG_SND_SOC_SOF_DEBUG_PROBES files (Patrick Talbert)
+- SATA_MOBILE_LPM_POLICY is now SATA_LPM_POLICY (Justin M. Forbes)
+- Define SNAPSHOT correctly when VERSION_ON_UPSTREAM is 0 (Justin M. Forbes)
+- redhat/Makefile: Fix dist-git (Prarit Bhargava)
+- Clean up the changelog (Justin M. Forbes)
+- Change the pending-ark CONFIG_DAX to y due to mismatch (Justin M. Forbes)
+- Reset Makefile.rhelver for the 5.18 cycle (Justin M. Forbes)
+- Enable net reference count trackers in all debug kernels (Jiri Benc)
+- redhat/Makefile: Reorganize variables (Prarit Bhargava)
+- redhat/Makefile: Add some descriptions (Prarit Bhargava)
+- redhat/Makefile: Move SNAPSHOT check (Prarit Bhargava)
+- redhat/Makefile: Deprecate BREW_FLAGS, KOJI_FLAGS, and TEST_FLAGS (Prarit Bhargava)
+- redhat/genspec.sh: Rework RPMVERSION variable (Prarit Bhargava)
+- redhat/Makefile: Remove dead comment (Prarit Bhargava)
+- redhat/Makefile: Cleanup KABI* variables. (Prarit Bhargava)
+- redhat/Makefile.variables: Default RHGITCOMMIT to HEAD (Prarit Bhargava)
+- redhat/scripts/create-tarball.sh: Use Makefile TARBALL variable (Prarit Bhargava)
+- redhat/Makefile: Remove extra DIST_BRANCH (Prarit Bhargava)
+- redhat/Makefile: Remove STAMP_VERSION (Prarit Bhargava)
+- redhat/Makefile: Move NO_CONFIGCHECKS to Makefile.variables (Prarit Bhargava)
+- redhat/Makefile: Move RHJOBS to Makefile.variables (Prarit Bhargava)
+- redhat/Makefile: Move RHGIT* variables to Makefile.variables (Prarit Bhargava)
+- redhat/Makefile: Move PREBUILD_GIT_ONLY to Makefile.variables (Prarit Bhargava)
+- redhat/Makefile: Move BUILD to Makefile.variables (Prarit Bhargava)
+- redhat/Makefile: Move BUILD_FLAGS to Makefile.variables. (Prarit Bhargava)
+- redhat/Makefile: Move BUILD_PROFILE to Makefile.variables (Prarit Bhargava)
+- redhat/Makefile: Move BUILD_TARGET and BUILD_SCRATCH_TARGET to Makefile.variables (Prarit Bhargava)
+- redhat/Makefile: Remove RHPRODUCT variable (Prarit Bhargava)
+- redhat/Makefile: Cleanup DISTRO variable (Prarit Bhargava)
+- redhat/Makefile: Move HEAD to Makefile.variables. (Prarit Bhargava)
+- redhat: Combine Makefile and Makefile.common (Prarit Bhargava)
+- redhat/koji/Makefile: Decouple koji Makefile from Makefile.common (Prarit Bhargava)
+- Set CONFIG_SND_SOC_SOF_MT8195 for Fedora and turn on VDPA_SIM_BLOCK (Justin M. Forbes)
+- Add asus_wmi_sensors modules to filters for Fedora (Justin M. Forbes)
+- redhat: spec: trigger dracut when modules are installed separately (Jan Stancek)
+- Last of the Fedora 5.17 configs initial pass (Justin M. Forbes)
+- redhat/Makefile: Silence dist-clean-configs output (Prarit Bhargava)
+- Fedora 5.17 config updates (Justin M. Forbes)
+- Setting CONFIG_I2C_SMBUS to "m" for ark (Gopal Tiwari)
+- Print arch with process_configs errors (Justin M. Forbes)
+- Pass RHJOBS to process_configs for dist-configs-check as well (Justin M. Forbes)
+- redhat/configs/process_configs.sh: Fix issue with old error files (Prarit Bhargava)
+- redhat/configs/build_configs.sh: Parallelize execution (Prarit Bhargava)
+- redhat/configs/build_configs.sh: Provide better messages (Prarit Bhargava)
+- redhat/configs/build_configs.sh: Create unique output files (Prarit Bhargava)
+- redhat/configs/build_configs.sh: Add local variables (Prarit Bhargava)
+- redhat/configs/process_configs.sh: Parallelize execution (Prarit Bhargava)
+- redhat/configs/process_configs.sh: Provide better messages (Prarit Bhargava)
+- redhat/configs/process_configs.sh: Create unique output files (Prarit Bhargava)
+- redhat/configs/process_configs.sh: Add processing config function (Prarit Bhargava)
+- redhat: Unify genspec.sh and kernel.spec variable names (Prarit Bhargava)
+- redhat/genspec.sh: Remove options and use Makefile variables (Prarit Bhargava)
+- Add rebase note for 5.17 on Fedora stable (Justin M. Forbes)
+- More Fedora config updates for 5.17 (Justin M. Forbes)
+- redhat/configs: Disable CONFIG_MACINTOSH_DRIVERS in RHEL. (Prarit Bhargava)
+- redhat: Fix "make dist-release-finish" to use the correct NVR variables (Neal Gompa) [2053836]
+- Build CROS_EC Modules (Jason Montleon)
+- redhat: configs: change aarch64 default dma domain to lazy (Jerry Snitselaar)
+- redhat: configs: disable ATM protocols (Davide Caratti)
+- configs/fedora: Enable the interconnect SC7180 driver built-in (Enric Balletbo i Serra)
+- configs: clean up CONFIG_PAGE_TABLE_ISOLATION files (Ondrej Mosnacek)
+- redhat: configs: enable CONFIG_INTEL_PCH_THERMAL for RHEL x86 (David Arcari)
+- redhat/Makefile: Fix dist-dump-variables target (Prarit Bhargava)
+- redhat/configs: Enable DEV_DAX and DEV_DAX_PMEM modules on aarch64 for fedora (D Scott Phillips)
+- redhat/configs: Enable CONFIG_TRANSPARENT_HUGEPAGE on aarch64 for fedora (D Scott Phillips)
+- configs/process_configs.sh: Remove orig files (Prarit Bhargava)
+- redhat: configs: Disable CONFIG_MPLS for s390x/zfcpdump (Guillaume Nault)
+- Fedora 5.17 configs round 1 (Justin M. Forbes)
+- redhat: configs: disable the surface platform (David Arcari)
+- redhat: configs: Disable team driver (Hangbin Liu) [1945477]
+- configs: enable LOGITECH_FF for RHEL/CentOS too (Benjamin Tissoires)
+- redhat/configs: Disable CONFIG_SENSORS_NCT6683 in RHEL for arm/aarch64 (Dean Nelson) [2041186]
+- redhat: fix make {distg-brew,distg-koji} (Andrea Claudi)
+- [fedora] Turn on CONFIG_VIDEO_OV5693 for sensor support (Dave Olsthoorn)
+- Cleanup 'disabled' config options for RHEL (Prarit Bhargava)
+- redhat: move CONFIG_ARM64_MTE to aarch64 config directory (Herton R. Krzesinski)
+- Change CONFIG_TEST_BPF to a module (Justin M. Forbes)
+- Change CONFIG_TEST_BPF to module in pending MR coming for proper review (Justin M. Forbes)
+- redhat/configs: Enable CONFIG_TEST_BPF (Viktor Malik)
+- Enable KUNIT tests for testing (Nico Pache)
+- Makefile: Check PKGRELEASE size on dist-brew targets (Prarit Bhargava)
+- kernel.spec: Add glibc-static build requirement (Prarit Bhargava)
+- Enable iSER on s390x (Stefan Schulze Frielinghaus)
+- redhat/configs: Enable CONFIG_ACER_WIRELESS (Peter Georg) [2025985]
+- kabi: Add kABI macros for enum type (Čestmír Kalina) [2024595]
+- kabi: expand and clarify documentation of aux structs (Čestmír Kalina) [2024595]
+- kabi: introduce RH_KABI_USE_AUX_PTR (Čestmír Kalina) [2024595]
+- kabi: rename RH_KABI_SIZE_AND_EXTEND to AUX (Čestmír Kalina) [2024595]
+- kabi: more consistent _RH_KABI_SIZE_AND_EXTEND (Čestmír Kalina) [2024595]
+- kabi: use fixed field name for extended part (Čestmír Kalina) [2024595]
+- kabi: fix dereference in RH_KABI_CHECK_EXT (Čestmír Kalina) [2024595]
+- kabi: fix RH_KABI_SET_SIZE macro (Čestmír Kalina) [2024595]
+- kabi: expand and clarify documentation (Čestmír Kalina) [2024595]
+- kabi: make RH_KABI_USE replace any number of reserved fields (Čestmír Kalina) [2024595]
+- kabi: rename RH_KABI_USE2 to RH_KABI_USE_SPLIT (Čestmír Kalina) [2024595]
+- kabi: change RH_KABI_REPLACE2 to RH_KABI_REPLACE_SPLIT (Čestmír Kalina) [2024595]
+- kabi: change RH_KABI_REPLACE_UNSAFE to RH_KABI_BROKEN_REPLACE (Čestmír Kalina) [2024595]
+- kabi: introduce RH_KABI_ADD_MODIFIER (Čestmír Kalina) [2024595]
+- kabi: Include kconfig.h (Čestmír Kalina) [2024595]
+- kabi: macros for intentional kABI breakage (Čestmír Kalina) [2024595]
+- kabi: fix the note about terminating semicolon (Čestmír Kalina) [2024595]
+- kabi: introduce RH_KABI_HIDE_INCLUDE and RH_KABI_FAKE_INCLUDE (Čestmír Kalina) [2024595]
+- spec: don't overwrite auto.conf with .config (Ondrej Mosnacek)
+- New configs in drivers/crypto (Fedora Kernel Team)
+- Add test_hash to the mod-internal.list (Justin M. Forbes)
+- configs: disable CONFIG_CRAMFS (Abhi Das) [2041184]
+- spec: speed up "cp -r" when it overwrites existing files. (Denys Vlasenko)
+- redhat: use centos x509.genkey file if building under centos (Herton R. Krzesinski)
+- Revert "[redhat] Generate a crashkernel.default for each kernel build" (Coiby Xu)
+- spec: make linux-firmware weak(er) dependency (Jan Stancek)
+- rtw89: enable new driver rtw89 and device RTK8852AE (Íñigo Huguet)
+- Config consolidation into common (Justin M. Forbes)
+- Add packaged but empty /lib/modules/<kver>/systemtap/ (Justin M. Forbes)
+- filter-modules.sh.rhel: Add ntc_thermistor to singlemods (Prarit Bhargava)
+- Move CONFIG_SND_SOC_TLV320AIC31XX as it is now selected by CONFIG_SND_SOC_FSL_ASOC_CARD (Justin M. Forbes)
+- Add dev_addr_lists_test to mod-internal.list (Justin M. Forbes)
+- configs/fedora: Enable CONFIG_NFC_PN532_UART for use PN532 NFC module (Ziqian SUN (Zamir))
+- redhat: ignore ksamples and kselftests on the badfuncs rpminspect test (Herton R. Krzesinski)
+- redhat: disable upstream check for rpminspect (Herton R. Krzesinski)
+- redhat: switch the vsyscall config to CONFIG_LEGACY_VSYSCALL_XONLY=y (Herton R. Krzesinski) [1876977]
+- redhat: configs: increase CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE (Rafael Aquini)
+- move CONFIG_STRICT_SIGALTSTACK_SIZE to the appropriate directory (David Arcari)
+- redhat/configs: Enable CONFIG_DM_MULTIPATH_IOA for fedora (Benjamin Marzinski)
+- redhat/configs: Enable CONFIG_DM_MULTIPATH_HST (Benjamin Marzinski) [2000835]
+- redhat: Pull in openssl-devel as a build dependency correctly (Neal Gompa) [2034670]
+- redhat/configs: Migrate ZRAM_DEF_* configs to common/ (Neal Gompa)
+- redhat/configs: Enable CONFIG_CRYPTO_ZSTD (Neal Gompa) [2032758]
+- Turn CONFIG_DEVMEM back off for aarch64 (Justin M. Forbes)
+- Clean up excess text in Fedora config files (Justin M. Forbes)
+- Fedora config updates for 5.16 (Justin M. Forbes)
+- redhat/configs: enable CONFIG_INPUT_KEYBOARD for AARCH64 (Vitaly Kuznetsov)
+- Fedora configs for 5.16 pt 1 (Justin M. Forbes)
+- redhat/configs: NFS: disable UDP, insecure enctypes (Benjamin Coddington) [1952863]
+- Update rebase-notes with dracut 5.17 information (Justin M. Forbes)
+- redhat/configs: Enable CONFIG_CRYPTO_BLAKE2B (Neal Gompa) [2031547]
+- Enable CONFIG_BPF_SYSCALL for zfcpdump (Jiri Olsa)
+- Enable CONFIG_CIFS_SMB_DIRECT for ARK (Ronnie Sahlberg)
+- mt76: enable new device MT7921E in CentOs/RHEL (Íñigo Huguet) [2004821]
+- Disable CONFIG_DEBUG_PREEMPT on normal builds (Phil Auld)
+- redhat/configs: Enable CONFIG_PCI_P2PDMA for ark (Myron Stowe)
+- pci.h: Fix static include (Prarit Bhargava)
+- Enable CONFIG_VFIO_NOIOMMU for Fedora (Justin M. Forbes)
+- redhat/configs: enable CONFIG_NTB_NETDEV for ark (John W. Linville)
+- drivers/pci/pci-driver.c: Fix if/ifdef typo (Prarit Bhargava)
+- common: arm64: ensure all the required arm64 errata are enabled (Peter Robinson)
+- kernel/rh_taint.c: Update to new messaging (Prarit Bhargava) [2019377]
+- redhat/configs: enable CONFIG_AMD_PTDMA for ark (John W. Linville)
+- redhat/configs: enable CONFIG_RD_ZSTD for rhel (Tao Liu) [2020132]
+- fedora: build TEE as a module for all arches (Peter Robinson)
+- common: build TRUSTED_KEYS in everywhere (Peter Robinson)
+- redhat: make Patchlist.changelog generation conditional (Herton R. Krzesinski)
+- redhat/configs: Add two new CONFIGs (Prarit Bhargava)
+- redhat/configs: Remove dead CONFIG files (Prarit Bhargava)
+- redhat/configs/evaluate_configs: Add find dead configs option (Prarit Bhargava)
+- Add more rebase notes for Fedora 5.16 (Justin M. Forbes)
+- Fedora: Feature: Retire wireless Extensions (Peter Robinson)
+- fedora: arm: some SoC enablement pieces (Peter Robinson)
+- fedora: arm: enable PCIE_ROCKCHIP_DW for rk35xx series (Peter Robinson)
+- fedora: enable RTW89 802.11 WiFi driver (Peter Robinson)
+- fedora: arm: Enable DRM_PANEL_EDP (Peter Robinson)
+- fedora: sound: enable new sound drivers (Peter Robinson)
+- redhat/configs: unset KEXEC_SIG for s390x zfcpdump (Coiby Xu)
+- spec: Keep .BTF section in modules (Jiri Olsa)
+- Fix up PREEMPT configs (Justin M. Forbes)
+- New configs in drivers/media (Fedora Kernel Team)
+- New configs in drivers/net/ethernet/litex (Fedora Kernel Team)
+- spec: add bpf_testmod.ko to kselftests/bpf (Viktor Malik)
+- New configs in drivers/net/wwan (Fedora Kernel Team)
+- New configs in drivers/i2c (Fedora Kernel Team)
+- redhat/docs/index.rst: Add local build information. (Prarit Bhargava)
+- Fix up preempt configs (Justin M. Forbes)
+- Turn on CONFIG_HID_NINTENDO for controller support (Dave Olsthoorn)
+- Fedora: Enable MediaTek bluetooth pieces (Peter Robinson)
+- Add rebase notes to check for PCI patches (Justin M. Forbes)
+- redhat: configs: move CONFIG_ACCESSIBILITY from fedora to common (John W. Linville)
+- Filter updates for hid-playstation on Fedora (Justin M. Forbes)
+- Enable CONFIG_VIRT_DRIVERS for ARK (Vitaly Kuznetsov)
+- redhat/configs: Enable Nitro Enclaves on aarch64 (Vitaly Kuznetsov)
+- Enable e1000 in rhel9 as unsupported (Ken Cox) [2002344]
+- Turn on COMMON_CLK_AXG_AUDIO for Fedora rhbz 2020481 (Justin M. Forbes)
+- Fix up fedora config options from mismatch (Justin M. Forbes)
+- Add nct6775 to filter-modules.sh.rhel (Justin M. Forbes)
+- Enable PREEMPT_DYNAMIC for all but s390x (Justin M. Forbes)
+- Add memcpy_kunit to mod-internal.list (Justin M. Forbes)
+- New configs in fs/ksmbd (Fedora Kernel Team)
+- Add nct6775 to Fedora filter-modules.sh (Justin M. Forbes)
+- New configs in fs/ntfs3 (Fedora Kernel Team)
+- Make CONFIG_IOMMU_DEFAULT_DMA_STRICT default for all but x86 (Justin M. Forbes)
+- redhat/configs: enable KEXEC_IMAGE_VERIFY_SIG for RHEL (Coiby Xu)
+- redhat/configs: enable KEXEC_SIG for aarch64 RHEL (Coiby Xu) [1994858]
+- Fix up fedora and pending configs for PREEMPT to end mismatch (Justin M. Forbes)
+- Enable binder for fedora (Justin M. Forbes)
+- Reset RHEL_RELEASE for 5.16 (Justin M. Forbes)
+- redhat: configs: Update configs for vmware (Kamal Heib)
+- Fedora configs for 5.15 (Justin M. Forbes)
+- redhat/kernel.spec.template: don't hardcode gcov arches (Jan Stancek)
+- redhat/configs: create a separate config for gcov options (Jan Stancek)
+- Update documentation with FAQ and update frequency (Don Zickus)
+- Document force pull option for mirroring (Don Zickus)
+- Ignore the rhel9 kabi files (Don Zickus)
+- Remove legacy elrdy cruft (Don Zickus)
+- redhat/configs/evaluate_configs: walk cfgvariants line by line (Jan Stancek)
+- redhat/configs/evaluate_configs: insert EMPTY tags at correct place (Jan Stancek)
+- redhat: make dist-srpm-gcov add to BUILDOPTS (Jan Stancek)
+- Build CONFIG_SPI_PXA2XX as a module on x86 (Justin M. Forbes)
+- redhat/configs: enable CONFIG_BCMGENET as module (Joel Savitz)
+- Fedora config updates (Justin M. Forbes)
+- Enable CONFIG_FAIL_SUNRPC for debug builds (Justin M. Forbes)
+- fedora: Disable fbdev drivers and use simpledrm instead (Javier Martinez Canillas)
+- spec: Don't fail spec build if ksamples fails (Jiri Olsa)
+- Enable CONFIG_QCOM_SCM for arm (Justin M. Forbes)
+- redhat: Disable clang's integrated assembler on ppc64le and s390x (Tom Stellard)
+- redhat/configs: enable CONFIG_IMA_WRITE_POLICY (Bruno Meneguele)
+- Fix dist-srpm-gcov (Don Zickus)
+- redhat: configs: add CONFIG_NTB and related items (John W. Linville)
+- Add kfence_test to mod-internal.list (Justin M. Forbes)
+- Enable KUNIT tests for redhat kernel-modules-internal (Nico Pache)
+- redhat: add *-matched meta packages to rpminspect emptyrpm config (Herton R. Krzesinski)
+- Use common config for NODES_SHIFT (Mark Salter)
+- redhat: fix typo and make the output more silent for dist-git sync (Herton R. Krzesinski)
+- Fedora NTFS config updates (Justin M. Forbes)
+- Fedora 5.15 configs part 1 (Justin M. Forbes)
+- Fix ordering in genspec args (Justin M. Forbes)
+- redhat/configs: Enable Hyper-V guests on ARM64 (Vitaly Kuznetsov) [2007430]
+- redhat: configs: Enable CONFIG_THINKPAD_LMI (Hans de Goede)
+- redhat/docs: update Koji link to avoid redirect (Joel Savitz)
+- redhat: add support for different profiles with dist*-brew (Herton R. Krzesinski)
+- redhat: configs: Disable xtables and ipset (Phil Sutter) [1945179]
+- redhat: Add mark_driver_deprecated() (Phil Sutter) [1945179]
+- Change s390x CONFIG_NODES_SHIFT from 4 to 1 (Justin M. Forbes)
+- Build CRYPTO_SHA3_*_S390 inline for s390 zfcpdump (Justin M. Forbes)
+- redhat: move the DIST variable setting to Makefile.variables (Herton R. Krzesinski)
+- redhat/kernel.spec.template: Cleanup source numbering (Prarit Bhargava)
+- redhat/kernel.spec.template: Reorganize RHEL and Fedora specific files (Prarit Bhargava)
+- redhat/kernel.spec.template: Add include_fedora and include_rhel variables (Prarit Bhargava)
+- redhat/Makefile: Make kernel-local global (Prarit Bhargava)
+- redhat/Makefile: Use flavors file (Prarit Bhargava)
+- Turn on CONFIG_CPU_FREQ_GOV_SCHEDUTIL for x86 (Justin M. Forbes)
+- redhat/configs: Remove CONFIG_INFINIBAND_I40IW (Kamal Heib)
+- cleanup CONFIG_X86_PLATFORM_DRIVERS_INTEL (David Arcari)
+- redhat: rename usage of .rhel8git.mk to .rhpkg.mk (Herton R. Krzesinski)
+- Manually add pending items that need to be set due to mismatch (Justin M. Forbes)
+- Clean up pending common (Justin M. Forbes)
+- redhat/configs: Enable CONFIG_BLK_CGROUP_IOLATENCY & CONFIG_BLK_CGROUP_FC_APPID (Waiman Long) [2006813]
+- redhat: remove kernel.changelog-8.99 file (Herton R. Krzesinski)
+- redhat/configs: enable CONFIG_SQUASHFS_ZSTD which is already enabled in Fedora 34 (Tao Liu) [1998953]
+- redhat: bump RHEL_MAJOR and add the changelog file for it (Herton R. Krzesinski)
+- redhat: add documentation about the os-build rebase process (Herton R. Krzesinski)
+- redhat/configs: enable SYSTEM_BLACKLIST_KEYRING which is already enabled in rhel8 and Fedora 34 (Coiby Xu)
+- Build kernel-doc for Fedora (Justin M. Forbes)
+- x86_64: Enable Elkhart Lake Quadrature Encoder Peripheral support (Prarit Bhargava)
+- Update CONFIG_WERROR to disabled as it can cause issue with out of tree modules. (Justin M. Forbes)
+- Fixup IOMMU configs in pending so that configs are sane again (Justin M. Forbes)
+- Some initial Fedora config items for 5.15 (Justin M. Forbes)
+- arm64: use common CONFIG_MAX_ZONEORDER for arm kernel (Mark Salter)
+- Create Makefile.variables for a single point of configuration change (Justin M. Forbes)
+- rpmspec: drop traceevent files instead of just excluding them from files list (Herton R. Krzesinski) [1967640]
+- redhat/config: Enablement of CONFIG_PAPR_SCM for PowerPC (Gustavo Walbon) [1962936]
+- Attempt to fix Intel PMT code (David Arcari)
+- CI: Enable realtime branch testing (Veronika Kabatova)
+- CI: Enable realtime checks for c9s and RHEL9 (Veronika Kabatova)
+- ark: wireless: enable all rtw88 pcie wirless variants (Peter Robinson)
+- wireless: rtw88: move debug options to common/debug (Peter Robinson)
+- fedora: minor PTP clock driver cleanups (Peter Robinson)
+- common: x86: enable VMware PTP support on ark (Peter Robinson)
+- [scsi] megaraid_sas: re-add certain pci-ids (Tomas Henzl)
+- Disable liquidio driver on ark/rhel (Herton R. Krzesinski) [1993393]
+- More Fedora config updates (Justin M. Forbes)
+- Fedora config updates for 5.14 (Justin M. Forbes)
+- CI: Rename ARK CI pipeline type (Veronika Kabatova)
+- CI: Finish up c9s config (Veronika Kabatova)
+- CI: Update ppc64le config (Veronika Kabatova)
+- CI: use more templates (Veronika Kabatova)
+- Filter updates for aarch64 (Justin M. Forbes)
+- increase CONFIG_NODES_SHIFT for aarch64 (Chris von Recklinghausen) [1890304]
+- redhat: configs: Enable CONFIG_WIRELESS_HOTKEY (Hans de Goede)
+- redhat/configs: Update CONFIG_NVRAM (Desnes A. Nunes do Rosario) [1988254]
+- common: serial: build in SERIAL_8250_LPSS for x86 (Peter Robinson)
+- powerpc: enable CONFIG_FUNCTION_PROFILER (Diego Domingos) [1831065]
+- redhat/configs: Disable Soft-RoCE driver (Kamal Heib)
+- redhat/configs/evaluate_configs: Update help output (Prarit Bhargava)
+- redhat/configs: Double MAX_LOCKDEP_CHAINS (Justin M. Forbes)
+- fedora: configs: Fix WM5102 Kconfig (Hans de Goede)
+- powerpc: enable CONFIG_POWER9_CPU (Diego Domingos) [1876436]
+- redhat/configs: Fix CONFIG_VIRTIO_IOMMU to 'y' on aarch64 (Eric Auger) [1972795]
+- filter-modules.sh: add more sound modules to filter (Jaroslav Kysela)
+- redhat/configs: sound configuration cleanups and updates (Jaroslav Kysela)
+- common: Update for CXL (Compute Express Link) configs (Peter Robinson)
+- redhat: configs: disable CRYPTO_SM modules (Herton R. Krzesinski) [1990040]
+- Remove fedora version of the LOCKDEP_BITS, we should use common (Justin M. Forbes)
+- Re-enable sermouse for x86 (rhbz 1974002) (Justin M. Forbes)
+- Fedora 5.14 configs round 1 (Justin M. Forbes)
+- redhat: add gating configuration for centos stream/rhel9 (Herton R. Krzesinski)
+- x86: configs: Enable CONFIG_TEST_FPU for debug kernels (Vitaly Kuznetsov) [1988384]
+- redhat/configs: Move CHACHA and POLY1305 to core kernel to allow BIG_KEYS=y (root) [1983298]
+- kernel.spec: fix build of samples/bpf (Jiri Benc)
+- Enable OSNOISE_TRACER and TIMERLAT_TRACER (Jerome Marchand) [1979379]
+- rpmspec: switch iio and gpio tools to use tools_make (Herton R. Krzesinski) [1956988]
+- configs/process_configs.sh: Handle config items with no help text (Patrick Talbert)
+- fedora: sound config updates for 5.14 (Peter Robinson)
+- fedora: Only enable FSI drivers on POWER platform (Peter Robinson)
+- The CONFIG_RAW_DRIVER has been removed from upstream (Peter Robinson)
+- fedora: updates for 5.14 with a few disables for common from pending (Peter Robinson)
+- fedora: migrate from MFD_TPS68470 -> INTEL_SKL_INT3472 (Peter Robinson)
+- fedora: Remove STAGING_GASKET_FRAMEWORK (Peter Robinson)
+- Fedora: move DRM_VMWGFX configs from ark -> common (Peter Robinson)
+- fedora: arm: disabled unused FB drivers (Peter Robinson)
+- fedora: don't enable FB_VIRTUAL (Peter Robinson)
+- redhat/configs: Double MAX_LOCKDEP_ENTRIES (Waiman Long) [1940075]
+- rpmspec: fix verbose output on kernel-devel installation (Herton R. Krzesinski) [1981406]
+- Build Fedora x86s kernels with bytcr-wm5102 (Marius Hoch)
+- Deleted redhat/configs/fedora/generic/x86/CONFIG_FB_HYPERV (Patrick Lang)
+- rpmspec: correct the ghost initramfs attributes (Herton R. Krzesinski) [1977056]
+- rpmspec: amend removal of depmod created files to include modules.builtin.alias.bin (Herton R. Krzesinski) [1977056]
+- configs: remove duplicate CONFIG_DRM_HYPERV file (Patrick Talbert)
+- CI: use common code for merge and release (Don Zickus)
+- rpmspec: add release string to kernel doc directory name (Jan Stancek)
+- redhat/configs: Add CONFIG_INTEL_PMT_CRASHLOG (Michael Petlan) [1880486]
+- redhat/configs: Add CONFIG_INTEL_PMT_TELEMETRY (Michael Petlan) [1880486]
+- redhat/configs: Add CONFIG_MFD_INTEL_PMT (Michael Petlan) [1880486]
+- redhat/configs: enable CONFIG_BLK_DEV_ZONED (Ming Lei) [1638087]
+- Add --with clang_lto option to build the kernel with Link Time Optimizations (Tom Stellard)
+- common: disable DVB_AV7110 and associated pieces (Peter Robinson)
+- Fix fedora-only config updates (Don Zickus)
+- Fedor config update for new option (Justin M. Forbes)
+- redhat/configs: Enable stmmac NIC for x86_64 (Mark Salter)
+- all: hyperv: use the DRM driver rather than FB (Peter Robinson)
+- all: hyperv: unify the Microsoft HyperV configs (Peter Robinson)
+- all: VMWare: clean up VMWare configs (Peter Robinson)
+- Update CONFIG_ARM_FFA_TRANSPORT (Patrick Talbert)
+- CI: Handle all mirrors (Veronika Kabatova)
+- Turn on CONFIG_STACKTRACE for s390x zfpcdump kernels (Justin M. Forbes)
+- arm64: switch ark kernel to 4K pagesize (Mark Salter)
+- Disable AMIGA_PARTITION and KARMA_PARTITION (Prarit Bhargava) [1802694]
+- all: unify and cleanup i2c TPM2 modules (Peter Robinson)
+- redhat/configs: Set CONFIG_VIRTIO_IOMMU on aarch64 (Eric Auger) [1972795]
+- redhat/configs: Disable CONFIG_RT_GROUP_SCHED in rhel config (Phil Auld)
+- redhat/configs: enable KEXEC_SIG which is already enabled in RHEL8 for s390x and x86_64 (Coiby Xu) [1976835]
+- rpmspec: do not BuildRequires bpftool on noarch (Herton R. Krzesinski)
+- redhat/configs: disable {IMA,EVM}_LOAD_X509 (Bruno Meneguele) [1977529]
+- redhat: add secureboot CA certificate to trusted kernel keyring (Bruno Meneguele)
+- redhat/configs: enable IMA_ARCH_POLICY for aarch64 and s390x (Bruno Meneguele)
+- redhat/configs: Enable CONFIG_MLXBF_GIGE on aarch64 (Alaa Hleihel) [1858599]
+- common: enable STRICT_MODULE_RWX everywhere (Peter Robinson)
+- COMMON_CLK_STM32MP157_SCMI is bool and selects COMMON_CLK_SCMI (Justin M. Forbes)
+- kernel.spec: Add kernel{,-debug}-devel-matched meta packages (Timothée Ravier)
+- Turn off with_selftests for Fedora (Justin M. Forbes)
+- Don't build bpftool on Fedora (Justin M. Forbes)
+- Fix location of syscall scripts for kernel-devel (Justin M. Forbes)
+- fedora: arm: Enable some i.MX8 options (Peter Robinson)
+- Enable Landlock for Fedora (Justin M. Forbes)
+- Filter update for Fedora aarch64 (Justin M. Forbes)
+- rpmspec: only build debug meta packages where we build debug ones (Herton R. Krzesinski)
+- rpmspec: do not BuildRequires bpftool on nobuildarches (Herton R. Krzesinski)
+- redhat/configs: Consolidate CONFIG_HMC_DRV in the common s390x folder (Thomas Huth) [1976270]
+- redhat/configs: Consolidate CONFIG_EXPOLINE_OFF in the common folder (Thomas Huth) [1976270]
+- redhat/configs: Move CONFIG_HW_RANDOM_S390 into the s390x/ subfolder (Thomas Huth) [1976270]
+- redhat/configs: Disable CONFIG_HOTPLUG_PCI_SHPC in the Fedora settings (Thomas Huth) [1976270]
+- redhat/configs: Remove the non-existent CONFIG_NO_BOOTMEM switch (Thomas Huth) [1976270]
+- redhat/configs: Compile the virtio-console as a module on s390x (Thomas Huth) [1976270]
+- redhat/configs: Enable CONFIG_S390_CCW_IOMMU and CONFIG_VFIO_CCW for ARK, too (Thomas Huth) [1976270]
+- Revert "Merge branch 'ec_fips' into 'os-build'" (Vladis Dronov) [1947240]
+- Fix typos in fedora filters (Justin M. Forbes)
+- More filtering for Fedora (Justin M. Forbes)
+- Fix Fedora module filtering for spi-altera-dfl (Justin M. Forbes)
+- Fedora 5.13 config updates (Justin M. Forbes)
+- fedora: cleanup TCG_TIS_I2C_CR50 (Peter Robinson)
+- fedora: drop duplicate configs (Peter Robinson)
+- More Fedora config updates for 5.13 (Justin M. Forbes)
+- redhat/configs: Enable needed drivers for BlueField SoC on aarch64 (Alaa Hleihel) [1858592 1858594 1858596]
+- redhat: Rename mod-blacklist.sh to mod-denylist.sh (Prarit Bhargava)
+- redhat/configs: enable CONFIG_NET_ACT_MPLS (Marcelo Ricardo Leitner)
+- configs: Enable CONFIG_DEBUG_KERNEL for zfcpdump (Jiri Olsa)
+- kernel.spec: Add support to use vmlinux.h (Don Zickus)
+- spec: Add vmlinux.h to kernel-devel package (Jiri Olsa)
+- Turn off DRM_XEN_FRONTEND for Fedora as we had DRM_XEN off already (Justin M. Forbes)
+- Fedora 5.13 config updates pt 3 (Justin M. Forbes)
+- all: enable ath11k wireless modules (Peter Robinson)
+- all: Enable WWAN and associated MHI bus pieces (Peter Robinson)
+- spec: Enable sefltests rpm build (Jiri Olsa)
+- spec: Allow bpf selftest/samples to fail (Jiri Olsa)
+- kvm: Add kvm_stat.service file and kvm_stat logrotate config to the tools (Jiri Benc)
+- kernel.spec: Add missing source files to kernel-selftests-internal (Jiri Benc)
+- kernel.spec: selftests: add net/forwarding to TARGETS list (Jiri Benc)
+- kernel.spec: selftests: add build requirement on libmnl-devel (Jiri Benc)
+- kernel.spec: add action.o to kernel-selftests-internal (Jiri Benc)
+- kernel.spec: avoid building bpftool repeatedly (Jiri Benc)
+- kernel.spec: selftests require python3 (Jiri Benc)
+- kernel.spec: skip selftests that failed to build (Jiri Benc)
+- kernel.spec: fix installation of bpf selftests (Jiri Benc)
+- redhat: fix samples and selftests make options (Jiri Benc)
+- kernel.spec: enable mptcp selftests for kernel-selftests-internal (Jiri Benc)
+- kernel.spec: Do not export shared objects from libexecdir to RPM Provides (Jiri Benc)
+- kernel.spec: add missing dependency for the which package (Jiri Benc)
+- kernel.spec: add netfilter selftests to kernel-selftests-internal (Jiri Benc)
+- kernel.spec: move slabinfo and page_owner_sort debuginfo to tools-debuginfo (Jiri Benc)
+- kernel.spec: package and ship VM tools (Jiri Benc)
+- configs: enable CONFIG_PAGE_OWNER (Jiri Benc)
+- kernel.spec: add coreutils (Jiri Benc)
+- kernel.spec: add netdevsim driver selftests to kernel-selftests-internal (Jiri Benc)
+- redhat/Makefile: Clean out the --without flags from the baseonly rule (Jiri Benc)
+- kernel.spec: Stop building unnecessary rpms for baseonly builds (Jiri Benc)
+- kernel.spec: disable more kabi switches for gcov build (Jiri Benc)
+- kernel.spec: Rename kabi-dw base (Jiri Benc)
+- kernel.spec: Fix error messages during build of zfcpdump kernel (Jiri Benc)
+- kernel.spec: perf: remove bpf examples (Jiri Benc)
+- kernel.spec: selftests should not depend on modules-internal (Jiri Benc)
+- kernel.spec: build samples (Jiri Benc)
+- kernel.spec: tools: sync missing options with RHEL 8 (Jiri Benc)
+- redhat/configs: nftables: Enable extra flowtable symbols (Phil Sutter)
+- redhat/configs: Sync netfilter options with RHEL8 (Phil Sutter)
+- Fedora 5.13 config updates pt 2 (Justin M. Forbes)
+- Move CONFIG_ARCH_INTEL_SOCFPGA up a level for Fedora (Justin M. Forbes)
+- fedora: enable the Rockchip rk3399 pcie drivers (Peter Robinson)
+- Fedora 5.13 config updates pt 1 (Justin M. Forbes)
+- Fix version requirement from opencsd-devel buildreq (Justin M. Forbes)
+- configs/ark/s390: set CONFIG_MARCH_Z14 and CONFIG_TUNE_Z15 (Philipp Rudo) [1876435]
+- configs/common/s390: Clean up CONFIG_{MARCH,TUNE}_Z* (Philipp Rudo)
+- configs/process_configs.sh: make use of dummy-tools (Philipp Rudo)
+- configs/common: disable CONFIG_INIT_STACK_ALL_{PATTERN,ZERO} (Philipp Rudo)
+- configs/common/aarch64: disable CONFIG_RELR (Philipp Rudo)
+- redhat/config: enable STMICRO nic for RHEL (Mark Salter)
+- redhat/configs: Enable ARCH_TEGRA on RHEL (Mark Salter)
+- redhat/configs: enable IMA_KEXEC for supported arches (Bruno Meneguele)
+- redhat/configs: enable INTEGRITY_SIGNATURE to all arches (Bruno Meneguele)
+- configs: enable CONFIG_LEDS_BRIGHTNESS_HW_CHANGED (Benjamin Tissoires)
+- RHEL: disable io_uring support (Jeff Moyer) [1964537]
+- all: Changing CONFIG_UV_SYSFS to build uv_sysfs.ko as a loadable module. (Frank Ramsay)
+- Enable NITRO_ENCLAVES on RHEL (Vitaly Kuznetsov)
+- Update the Quick Start documentation (David Ward)
+- redhat/configs: Set PVPANIC_MMIO for x86 and PVPANIC_PCI for aarch64 (Eric Auger) [1961178]
+- bpf: Fix unprivileged_bpf_disabled setup (Jiri Olsa)
+- Enable CONFIG_BPF_UNPRIV_DEFAULT_OFF (Jiri Olsa)
+- configs/common/s390: disable CONFIG_QETH_{OSN,OSX} (Philipp Rudo) [1903201]
+- nvme: nvme_mpath_init remove multipath check (Mike Snitzer)
+- Make CRYPTO_EC also builtin (Simo Sorce) [1947240]
+- Do not hard-code a default value for DIST (David Ward)
+- Override %%{debugbuildsenabled} if the --with-release option is used (David Ward)
+- Improve comments in SPEC file, and move some option tests and macros (David Ward)
+- configs: enable CONFIG_EXFAT_FS (Pavel Reichl) [1943423]
+- Revert s390x/zfcpdump part of a9d179c40281 and ecbfddd98621 (Vladis Dronov)
+- Embed crypto algos, modes and templates needed in the FIPS mode (Vladis Dronov) [1947240]
+- configs: Add and enable CONFIG_HYPERV_TESTING for debug kernels (Mohammed Gamal)
+- configs: enable CONFIG_CMA on x86_64 in ARK (David Hildenbrand) [1945002]
+- rpmspec: build debug-* meta-packages if debug builds are disabled (Herton R. Krzesinski)
+- UIO: disable unused config options (Aristeu Rozanski) [1957819]
+- ARK-config: Make amd_pinctrl module builtin (Hans de Goede)
+- rpmspec: revert/drop content hash for kernel-headers (Herton R. Krzesinski)
+- rpmspec: fix check that calls InitBuildVars (Herton R. Krzesinski)
+- fedora: enable zonefs (Damien Le Moal)
+- redhat: load specific ARCH keys to INTEGRITY_PLATFORM_KEYRING (Bruno Meneguele)
+- redhat: enable INTEGRITY_TRUSTED_KEYRING across all variants (Bruno Meneguele)
+- redhat: enable SYSTEM_BLACKLIST_KEYRING across all variants (Bruno Meneguele)
+- redhat: enable INTEGRITY_ASYMMETRIC_KEYS across all variants (Bruno Meneguele)
+- Remove unused boot loader specification files (David Ward)
+- redhat/configs: Enable mlx5 IPsec and TLS offloads (Alaa Hleihel) [1869674 1957636]
+- common: disable Apple Silicon generally (Peter Robinson)
+- cleanup Intel's FPGA configs (Peter Robinson)
+- common: move PTP KVM support from ark to common (Peter Robinson)
+- Enable CONFIG_DRM_AMDGPU_USERPTR for everyone (Justin M. Forbes)
+- redhat: add initial rpminspect configuration (Herton R. Krzesinski)
+- fedora: arm updates for 5.13 (Peter Robinson)
+- fedora: Enable WWAN and associated MHI bits (Peter Robinson)
+- Update CONFIG_MODPROBE_PATH to /usr/sbin (Justin Forbes)
+- Fedora set modprobe path (Justin M. Forbes)
+- Keep sctp and l2tp modules in modules-extra (Don Zickus)
+- Fix ppc64le cross build packaging (Don Zickus)
+- Fedora: Make amd_pinctrl module builtin (Hans de Goede)
+- Keep CONFIG_KASAN_HW_TAGS off for aarch64 debug configs (Justin M. Forbes)
+- New configs in drivers/bus (Fedora Kernel Team)
+- RHEL: Don't build KVM PR module on ppc64 (David Gibson) [1930649]
+- Flip CONFIG_USB_ROLE_SWITCH from m to y (Justin M. Forbes)
+- Set valid options for CONFIG_FW_LOADER_USER_HELPER (Justin M. Forbes)
+- Clean up CONFIG_FB_MODE_HELPERS (Justin M. Forbes)
+- Turn off CONFIG_VFIO for the s390x zfcpdump kernel (Justin M. Forbes)
+- Delete unused CONFIG_SND_SOC_MAX98390 pending-common (Justin M. Forbes)
+- Update pending-common configs, preparing to set correctly (Justin M. Forbes)
+- Update fedora filters for surface (Justin M. Forbes)
+- Build CONFIG_CRYPTO_ECDSA inline for s390x zfcpdump (Justin M. Forbes)
+- Replace "flavour" where "variant" is meant instead (David Ward)
+- Drop the %%{variant} macro and fix --with-vanilla (David Ward)
+- Fix syntax of %%kernel_variant_files (David Ward)
+- Change description of --without-vdso-install to fix typo (David Ward)
+- Config updates to work around mismatches (Justin M. Forbes)
+- CONFIG_SND_SOC_FSL_ASOC_CARD selects CONFIG_MFD_WM8994 now (Justin M. Forbes)
+- wireguard: disable in FIPS mode (Hangbin Liu) [1940794]
+- Enable mtdram for fedora (rhbz 1955916) (Justin M. Forbes)
+- Remove reference to bpf-helpers man page (Justin M. Forbes)
+- Fedora: enable more modules for surface devices (Dave Olsthoorn)
+- Fix Fedora config mismatch for CONFIG_FSL_ENETC_IERB (Justin M. Forbes)
+- hardlink is in /usr/bin/ now (Justin M. Forbes)
+- Ensure CONFIG_KVM_BOOK3S_64_PR stays on in Fedora, even if it is turned off in RHEL (Justin M. Forbes)
+- Set date in package release from repository commit, not system clock (David Ward)
+- Use a better upstream tarball filename for snapshots (David Ward)
+- Don't create empty pending-common files on pending-fedora commits (Don Zickus)
+- nvme: decouple basic ANA log page re-read support from native multipathing (Mike Snitzer)
+- nvme: allow local retry and proper failover for REQ_FAILFAST_TRANSPORT (Mike Snitzer)
+- nvme: Return BLK_STS_TARGET if the DNR bit is set (Mike Snitzer)
+- Add redhat/configs/pending-common/generic/s390x/zfcpdump/CONFIG_NETFS_SUPPORT (Justin M. Forbes)
+- Create ark-latest branch last for CI scripts (Don Zickus)
+- Replace /usr/libexec/platform-python with /usr/bin/python3 (David Ward)
+- Turn off ADI_AXI_ADC and AD9467 which now require CONFIG_OF (Justin M. Forbes)
+- Export ark infrastructure files (Don Zickus)
+- docs: Update docs to reflect newer workflow. (Don Zickus)
+- Use upstream/master for merge-base with fallback to master (Don Zickus)
+- Fedora: Turn off the SND_INTEL_BYT_PREFER_SOF option (Hans de Goede)
+- filter-modules.sh.fedora: clean up "netprots" (Paul Bolle)
+- filter-modules.sh.fedora: clean up "scsidrvs" (Paul Bolle)
+- filter-*.sh.fedora: clean up "ethdrvs" (Paul Bolle)
+- filter-*.sh.fedora: clean up "driverdirs" (Paul Bolle)
+- filter-*.sh.fedora: remove incorrect entries (Paul Bolle)
+- filter-*.sh.fedora: clean up "singlemods" (Paul Bolle)
+- filter-modules.sh.fedora: drop unused list "iiodrvs" (Paul Bolle)
+- Update mod-internal to fix depmod issue (Nico Pache)
+- Turn on CONFIG_VDPA_SIM_NET (rhbz 1942343) (Justin M. Forbes)
+- New configs in drivers/power (Fedora Kernel Team)
+- Turn on CONFIG_NOUVEAU_DEBUG_PUSH for debug configs (Justin M. Forbes)
+- Turn off KFENCE sampling by default for Fedora (Justin M. Forbes)
+- Fedora config updates round 2 (Justin M. Forbes)
+- New configs in drivers/soc (Jeremy Cline)
+- filter-modules.sh: Fix copy/paste error 'input' (Paul Bolle)
+- Update module filtering for 5.12 kernels (Justin M. Forbes)
+- Fix genlog.py to ensure that comments retain "%%" characters. (Mark Mielke)
+- New configs in drivers/leds (Fedora Kernel Team)
+- Limit CONFIG_USB_CDNS_SUPPORT to x86_64 and arm in Fedora (David Ward)
+- Fedora: Enable CHARGER_GPIO on aarch64 too (Peter Robinson)
+- Fedora config updates (Justin M. Forbes)
+- configs: enable CONFIG_WIREGUARD in ARK (Hangbin Liu) [1613522]
+- Remove duplicate configs acroos fedora, ark and common (Don Zickus)
+- Combine duplicate configs across ark and fedora into common (Don Zickus)
+- common/ark: cleanup and unify the parport configs (Peter Robinson)
+- iommu/vt-d: enable INTEL_IDXD_SVM for both fedora and rhel (Jerry Snitselaar)
+- REDHAT: coresight: etm4x: Disable coresight on HPE Apollo 70 (Jeremy Linton)
+- configs/common/generic: disable CONFIG_SLAB_MERGE_DEFAULT (Rafael Aquini)
+- Remove _legacy_common_support (Justin M. Forbes)
+- redhat/mod-blacklist.sh: Fix floppy blacklisting (Hans de Goede)
+- New configs in fs/pstore (CKI@GitLab)
+- New configs in arch/powerpc (Fedora Kernel Team)
+- configs: enable BPF LSM on Fedora and ARK (Ondrej Mosnacek)
+- configs: clean up LSM configs (Ondrej Mosnacek)
+- New configs in drivers/platform (CKI@GitLab)
+- New configs in drivers/firmware (CKI@GitLab)
+- New configs in drivers/mailbox (Fedora Kernel Team)
+- New configs in drivers/net/phy (Justin M. Forbes)
+- Update CONFIG_DM_MULTIPATH_IOA (Augusto Caringi)
+- New configs in mm/Kconfig (CKI@GitLab)
+- New configs in arch/powerpc (Jeremy Cline)
+- New configs in arch/powerpc (Jeremy Cline)
+- New configs in drivers/input (Fedora Kernel Team)
+- New configs in net/bluetooth (Justin M. Forbes)
+- New configs in drivers/clk (Fedora Kernel Team)
+- New configs in init/Kconfig (Jeremy Cline)
+- redhat: allow running fedora-configs and rh-configs targets outside of redhat/ (Herton R. Krzesinski)
+- all: unify the disable of goldfish (android emulation platform) (Peter Robinson)
+- common: minor cleanup/de-dupe of dma/dmabuf debug configs (Peter Robinson)
+- common/ark: these drivers/arches were removed in 5.12 (Peter Robinson)
+- Correct kernel-devel make prepare build for 5.12. (Paulo E. Castro)
+- redhat: add initial support for centos stream dist-git sync on Makefiles (Herton R. Krzesinski)
+- redhat/configs: Enable CONFIG_SCHED_STACK_END_CHECK for Fedora and ARK (Josh Poimboeuf) [1856174]
+- CONFIG_VFIO now selects IOMMU_API instead of depending on it, causing several config mismatches for the zfcpdump kernel (Justin M. Forbes)
+- Turn off weak-modules for Fedora (Justin M. Forbes)
+- redhat: enable CONFIG_FW_LOADER_COMPRESS for ARK (Herton R. Krzesinski) [1939095]
+- Fedora: filters: update to move dfl-emif to modules (Peter Robinson)
+- drop duplicate DEVFREQ_GOV_SIMPLE_ONDEMAND config (Peter Robinson)
+- efi: The EFI_VARS is legacy and now x86 only (Peter Robinson)
+- common: enable RTC_SYSTOHC to supplement update_persistent_clock64 (Peter Robinson)
+- generic: arm: enable SCMI for all options (Peter Robinson)
+- fedora: the PCH_CAN driver is x86-32 only (Peter Robinson)
+- common: disable legacy CAN device support (Peter Robinson)
+- common: Enable Microchip MCP251x/MCP251xFD CAN controllers (Peter Robinson)
+- common: Bosch MCAN support for Intel Elkhart Lake (Peter Robinson)
+- common: enable CAN_PEAK_PCIEFD PCI-E driver (Peter Robinson)
+- common: disable CAN_PEAK_PCIEC PCAN-ExpressCard (Peter Robinson)
+- common: enable common CAN layer 2 protocols (Peter Robinson)
+- ark: disable CAN_LEDS option (Peter Robinson)
+- Fedora: Turn on SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC option (Hans de Goede)
+- Fedora: enable modules for surface devices (Dave Olsthoorn)
+- Turn on SND_SOC_INTEL_SOUNDWIRE_SOF_MACH for Fedora again (Justin M. Forbes)
+- common: fix WM8804 codec dependencies (Peter Robinson)
+- Build SERIO_SERPORT as a module (Peter Robinson)
+- input: touchscreen: move ELO and Wacom serial touchscreens to x86 (Peter Robinson)
+- Sync serio touchscreens for non x86 architectures to the same as ARK (Peter Robinson)
+- Only enable SERIO_LIBPS2 on x86 (Peter Robinson)
+- Only enable PC keyboard controller and associated keyboard on x86 (Peter Robinson)
+- Generic: Mouse: Tweak generic serial mouse options (Peter Robinson)
+- Only enable PS2 Mouse options on x86 (Peter Robinson)
+- Disable bluetooth highspeed by default (Peter Robinson)
+- Fedora: A few more general updates for 5.12 window (Peter Robinson)
+- Fedora: Updates for 5.12 merge window (Peter Robinson)
+- Fedora: remove dead options that were removed upstream (Peter Robinson)
+- redhat: remove CONFIG_DRM_PANEL_XINGBANGDA_XBD599 (Herton R. Krzesinski)
+- New configs in arch/powerpc (Fedora Kernel Team)
+- Turn on CONFIG_PPC_QUEUED_SPINLOCKS as it is default upstream now (Justin M. Forbes)
+- Update pending-common configs to address new upstream config deps (Justin M. Forbes)
+- rpmspec: ship gpio-watch.debug in the proper debuginfo package (Herton R. Krzesinski)
+- Removed description text as a comment confuses the config generation (Justin M. Forbes)
+- New configs in drivers/dma-buf (Jeremy Cline)
+- Fedora: ARMv7: build for 16 CPUs. (Peter Robinson)
+- Fedora: only enable DEBUG_HIGHMEM on debug kernels (Peter Robinson)
+- process_configs.sh: fix find/xargs data flow (Ondrej Mosnacek)
+- Fedora config update (Justin M. Forbes)
+- fedora: minor arm sound config updates (Peter Robinson)
+- Fix trailing white space in redhat/configs/fedora/generic/CONFIG_SND_INTEL_BYT_PREFER_SOF (Justin M. Forbes)
+- Add a redhat/rebase-notes.txt file (Hans de Goede)
+- Turn on SND_INTEL_BYT_PREFER_SOF for Fedora (Hans de Goede)
+- CI: Drop MR ID from the name variable (Veronika Kabatova)
+- redhat: add DUP and kpatch certificates to system trusted keys for RHEL build (Herton R. Krzesinski)
+- The comments in CONFIG_USB_RTL8153_ECM actually turn off CONFIG_USB_RTL8152 (Justin M. Forbes)
+- Update CKI pipeline project (Veronika Kabatova)
+- Turn off additional KASAN options for Fedora (Justin M. Forbes)
+- Rename the master branch to rawhide for Fedora (Justin M. Forbes)
+- Makefile targets for packit integration (Ben Crocker)
+- Turn off KASAN for rawhide debug builds (Justin M. Forbes)
+- New configs in arch/arm64 (Justin Forbes)
+- Remove deprecated Intel MIC config options (Peter Robinson)
+- redhat: replace inline awk script with genlog.py call (Herton R. Krzesinski)
+- redhat: add genlog.py script (Herton R. Krzesinski)
+- kernel.spec.template - fix use_vdso usage (Ben Crocker)
+- redhat: remove remaining references of CONFIG_RH_DISABLE_DEPRECATED (Herton R. Krzesinski)
+- Turn off vdso_install for ppc (Justin M. Forbes)
+- Remove bpf-helpers.7 from bpftool package (Jiri Olsa)
+- New configs in lib/Kconfig.debug (Fedora Kernel Team)
+- Turn off CONFIG_VIRTIO_CONSOLE for s390x zfcpdump (Justin M. Forbes)
+- New configs in drivers/clk (Justin M. Forbes)
+- Keep VIRTIO_CONSOLE on s390x available. (Jakub Čajka)
+- New configs in lib/Kconfig.debug (Jeremy Cline)
+- Fedora 5.11 config updates part 4 (Justin M. Forbes)
+- Fedora 5.11 config updates part 3 (Justin M. Forbes)
+- Fedora 5.11 config updates part 2 (Justin M. Forbes)
+- Update internal (test) module list from RHEL-8 (Joe Lawrence) [1915073]
+- Fix USB_XHCI_PCI regression (Justin M. Forbes)
+- fedora: fixes for ARMv7 build issue by disabling HIGHPTE (Peter Robinson)
+- all: s390x: Increase CONFIG_PCI_NR_FUNCTIONS to 512 (#1888735) (Dan Horák)
+- Fedora 5.11 configs pt 1 (Justin M. Forbes)
+- redhat: avoid conflict with mod-blacklist.sh and released_kernel defined (Herton R. Krzesinski)
+- redhat: handle certificate files conditionally as done for src.rpm (Herton R. Krzesinski)
+- specfile: add %%{?_smp_mflags} to "make headers_install" in tools/testing/selftests (Denys Vlasenko)
+- specfile: add %%{?_smp_mflags} to "make samples/bpf/" (Denys Vlasenko)
+- Run MR testing in CKI pipeline (Veronika Kabatova)
+- Reword comment (Nicolas Chauvet)
+- Add with_cross_arm conditional (Nicolas Chauvet)
+- Redefines __strip if with_cross (Nicolas Chauvet)
+- fedora: only enable ACPI_CONFIGFS, ACPI_CUSTOM_METHOD in debug kernels (Peter Robinson)
+- fedora: User the same EFI_CUSTOM_SSDT_OVERLAYS as ARK (Peter Robinson)
+- all: all arches/kernels enable the same DMI options (Peter Robinson)
+- all: move SENSORS_ACPI_POWER to common/generic (Peter Robinson)
+- fedora: PCIE_HISI_ERR is already in common (Peter Robinson)
+- all: all ACPI platforms enable ATA_ACPI so move it to common (Peter Robinson)
+- all: x86: move shared x86 acpi config options to generic (Peter Robinson)
+- All: x86: Move ACPI_VIDEO to common/x86 (Peter Robinson)
+- All: x86: Enable ACPI_DPTF (Intel DPTF) (Peter Robinson)
+- All: enable ACPI_BGRT for all ACPI platforms. (Peter Robinson)
+- All: Only build ACPI_EC_DEBUGFS for debug kernels (Peter Robinson)
+- All: Disable Intel Classmate PC ACPI_CMPC option (Peter Robinson)
+- cleanup: ACPI_PROCFS_POWER was removed upstream (Peter Robinson)
+- All: ACPI: De-dupe the ACPI options that are the same across ark/fedora on x86/arm (Peter Robinson)
+- Enable the vkms module in Fedora (Jeremy Cline)
+- Fedora: arm updates for 5.11 and general cross Fedora cleanups (Peter Robinson)
+- Add gcc-c++ to BuildRequires (Justin M. Forbes)
+- Update CONFIG_KASAN_HW_TAGS (Justin M. Forbes)
+- fedora: arm: move generic power off/reset to all arm (Peter Robinson)
+- fedora: ARMv7: build in DEVFREQ_GOV_SIMPLE_ONDEMAND until I work out why it's changed (Peter Robinson)
+- fedora: cleanup joystick_adc (Peter Robinson)
+- fedora: update some display options (Peter Robinson)
+- fedora: arm: enable TI PRU options (Peter Robinson)
+- fedora: arm: minor exynos plaform updates (Peter Robinson)
+- arm: SoC: disable Toshiba Visconti SoC (Peter Robinson)
+- common: disable ARCH_BCM4908 (NFC) (Peter Robinson)
+- fedora: minor arm config updates (Peter Robinson)
+- fedora: enable Tegra 234 SoC (Peter Robinson)
+- fedora: arm: enable new Hikey 3xx options (Peter Robinson)
+- Fedora: USB updates (Peter Robinson)
+- fedora: enable the GNSS receiver subsystem (Peter Robinson)
+- Remove POWER_AVS as no longer upstream (Peter Robinson)
+- Cleanup RESET_RASPBERRYPI (Peter Robinson)
+- Cleanup GPIO_CDEV_V1 options. (Peter Robinson)
+- fedora: arm crypto updates (Peter Robinson)
+- CONFIG_KASAN_HW_TAGS for aarch64 (Justin M. Forbes)
+- Fedora: cleanup PCMCIA configs, move to x86 (Peter Robinson)
+- New configs in drivers/rtc (Fedora Kernel Team)
+- redhat/configs: Enable CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL (Josh Poimboeuf) [1856176]
+- redhat/configs: Enable CONFIG_GCC_PLUGIN_STRUCTLEAK (Josh Poimboeuf) [1856176]
+- redhat/configs: Enable CONFIG_GCC_PLUGINS on ARK (Josh Poimboeuf) [1856176]
+- redhat/configs: Enable CONFIG_KASAN on Fedora (Josh Poimboeuf) [1856176]
+- New configs in init/Kconfig (Fedora Kernel Team)
+- build_configs.sh: Fix syntax flagged by shellcheck (Ben Crocker)
+- genspec.sh: Fix syntax flagged by shellcheck (Ben Crocker)
+- mod-blacklist.sh: Fix syntax flagged by shellcheck (Ben Crocker)
+- Enable Speakup accessibility driver (Justin M. Forbes)
+- New configs in init/Kconfig (Fedora Kernel Team)
+- Fix fedora config mismatch due to dep changes (Justin M. Forbes)
+- New configs in drivers/crypto (Jeremy Cline)
+- Remove duplicate ENERGY_MODEL configs (Peter Robinson)
+- This is selected by PCIE_QCOM so must match (Justin M. Forbes)
+- drop unused BACKLIGHT_GENERIC (Peter Robinson)
+- Remove cp instruction already handled in instruction below. (Paulo E. Castro)
+- Add all the dependencies gleaned from running `make prepare` on a bloated devel kernel. (Paulo E. Castro)
+- Add tools to path mangling script. (Paulo E. Castro)
+- Remove duplicate cp statement which is also not specific to x86. (Paulo E. Castro)
+- Correct orc_types failure whilst running `make prepare` https://bugzilla.redhat.com/show_bug.cgi?id=1882854 (Paulo E. Castro)
+- redhat: ark: enable CONFIG_IKHEADERS (Jiri Olsa)
+- Add missing '$' sign to (GIT) in redhat/Makefile (Augusto Caringi)
+- Remove filterdiff and use native git instead (Don Zickus)
+- New configs in net/sched (Justin M. Forbes)
+- New configs in drivers/mfd (CKI@GitLab)
+- New configs in drivers/mfd (Fedora Kernel Team)
+- New configs in drivers/firmware (Fedora Kernel Team)
+- Temporarily backout parallel xz script (Justin M. Forbes)
+- redhat: explicitly disable CONFIG_IMA_APPRAISE_SIGNED_INIT (Bruno Meneguele)
+- redhat: enable CONFIG_EVM_LOAD_X509 on ARK (Bruno Meneguele)
+- redhat: enable CONFIG_EVM_ATTR_FSUUID on ARK (Bruno Meneguele)
+- redhat: enable CONFIG_EVM in all arches and flavors (Bruno Meneguele)
+- redhat: enable CONFIG_IMA_LOAD_X509 on ARK (Bruno Meneguele)
+- redhat: set CONFIG_IMA_DEFAULT_HASH to SHA256 (Bruno Meneguele)
+- redhat: enable CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT (Bruno Meneguele)
+- redhat: enable CONFIG_IMA_READ_POLICY on ARK (Bruno Meneguele)
+- redhat: set default IMA template for all ARK arches (Bruno Meneguele)
+- redhat: enable CONFIG_IMA_DEFAULT_HASH_SHA256 for all flavors (Bruno Meneguele)
+- redhat: disable CONFIG_IMA_DEFAULT_HASH_SHA1 (Bruno Meneguele)
+- redhat: enable CONFIG_IMA_ARCH_POLICY for ppc and x86 (Bruno Meneguele)
+- redhat: enable CONFIG_IMA_APPRAISE_MODSIG (Bruno Meneguele)
+- redhat: enable CONFIG_IMA_APPRAISE_BOOTPARAM (Bruno Meneguele)
+- redhat: enable CONFIG_IMA_APPRAISE (Bruno Meneguele)
+- redhat: enable CONFIG_INTEGRITY for aarch64 (Bruno Meneguele)
+- kernel: Update some missing KASAN/KCSAN options (Jeremy Linton)
+- kernel: Enable coresight on aarch64 (Jeremy Linton)
+- Update CONFIG_INET6_ESPINTCP (Justin Forbes)
+- New configs in net/ipv6 (Justin M. Forbes)
+- fedora: move CONFIG_RTC_NVMEM options from ark to common (Peter Robinson)
+- configs: Enable CONFIG_DEBUG_INFO_BTF (Don Zickus)
+- fedora: some minor arm audio config tweaks (Peter Robinson)
+- Ship xpad with default modules on Fedora and RHEL (Bastien Nocera)
+- Fedora: Only enable legacy serial/game port joysticks on x86 (Peter Robinson)
+- Fedora: Enable the options required for the Librem 5 Phone (Peter Robinson)
+- Fedora config update (Justin M. Forbes)
+- Fedora config change because CONFIG_FSL_DPAA2_ETH now selects CONFIG_FSL_XGMAC_MDIO (Justin M. Forbes)
+- redhat: generic enable CONFIG_INET_MPTCP_DIAG (Davide Caratti)
+- Fedora config update (Justin M. Forbes)
+- Enable NANDSIM for Fedora (Justin M. Forbes)
+- Re-enable CONFIG_ACPI_TABLE_UPGRADE for Fedora since upstream disables this if secureboot is active (Justin M. Forbes)
+- Ath11k related config updates (Justin M. Forbes)
+- Fedora config updates for ath11k (Justin M. Forbes)
+- Turn on ATH11K for Fedora (Justin M. Forbes)
+- redhat: enable CONFIG_INTEL_IOMMU_SVM (Jerry Snitselaar)
+- More Fedora config fixes (Justin M. Forbes)
+- Fedora 5.10 config updates (Justin M. Forbes)
+- Fedora 5.10 configs round 1 (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- Allow kernel-tools to build without selftests (Don Zickus)
+- Allow building of kernel-tools standalone (Don Zickus)
+- redhat: ark: disable CONFIG_NET_ACT_CTINFO (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_TEQL (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_SFB (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_QFQ (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_PLUG (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_PIE (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_HHF (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_DSMARK (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_DRR (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_CODEL (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_CHOKE (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_CBQ (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_SCH_ATM (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_EMATCH and sub-targets (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_CLS_TCINDEX (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_CLS_RSVP6 (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_CLS_RSVP (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_CLS_ROUTE4 (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_CLS_BASIC (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_ACT_SKBMOD (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_ACT_SIMP (Davide Caratti)
+- redhat: ark: disable CONFIG_NET_ACT_NAT (Davide Caratti)
+- arm64/defconfig: Enable CONFIG_KEXEC_FILE (Bhupesh Sharma) [1821565]
+- redhat/configs: Cleanup CONFIG_CRYPTO_SHA512 (Prarit Bhargava)
+- New configs in drivers/mfd (Fedora Kernel Team)
+- Fix LTO issues with kernel-tools (Don Zickus)
+- Point pathfix to the new location for gen_compile_commands.py (Justin M. Forbes)
+- configs: Disable CONFIG_SECURITY_SELINUX_DISABLE (Ondrej Mosnacek)
+- [Automatic] Handle config dependency changes (Don Zickus)
+- configs/iommu: Add config comment to empty CONFIG_SUN50I_IOMMU file (Jerry Snitselaar)
+- New configs in kernel/trace (Fedora Kernel Team)
+- Fix Fedora config locations (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- configs: enable CONFIG_CRYPTO_CTS=y so cts(cbc(aes)) is available in FIPS mode (Vladis Dronov) [1855161]
+- Partial revert: Add master merge check (Don Zickus)
+- Update Maintainers doc to reflect workflow changes (Don Zickus)
+- WIP: redhat/docs: Update documentation for single branch workflow (Prarit Bhargava)
+- Add CONFIG_ARM64_MTE which is not picked up by the config scripts for some reason (Justin M. Forbes)
+- Disable Speakup synth DECEXT (Justin M. Forbes)
+- Enable Speakup for Fedora since it is out of staging (Justin M. Forbes)
+- Modify patchlist changelog output (Don Zickus)
+- process_configs.sh: Fix syntax flagged by shellcheck (Ben Crocker)
+- generate_all_configs.sh: Fix syntax flagged by shellcheck (Ben Crocker)
+- redhat/self-test: Initial commit (Ben Crocker)
+- arch/x86: Remove vendor specific CPU ID checks (Prarit Bhargava)
+- redhat: Replace hardware.redhat.com link in Unsupported message (Prarit Bhargava) [1810301]
+- x86: Fix compile issues with rh_check_supported() (Don Zickus)
+- KEYS: Make use of platform keyring for module signature verify (Robert Holmes)
+- Input: rmi4 - remove the need for artificial IRQ in case of HID (Benjamin Tissoires)
+- ARM: tegra: usb no reset (Peter Robinson)
+- arm: make CONFIG_HIGHPTE optional without CONFIG_EXPERT (Jon Masters)
+- redhat: rh_kabi: deduplication friendly structs (Jiri Benc)
+- redhat: rh_kabi add a comment with warning about RH_KABI_EXCLUDE usage (Jiri Benc)
+- redhat: rh_kabi: introduce RH_KABI_EXTEND_WITH_SIZE (Jiri Benc)
+- redhat: rh_kabi: Indirect EXTEND macros so nesting of other macros will resolve. (Don Dutile)
+- redhat: rh_kabi: Fix RH_KABI_SET_SIZE to use dereference operator (Tony Camuso)
+- redhat: rh_kabi: Add macros to size and extend structs (Prarit Bhargava)
+- Removing Obsolete hba pci-ids from rhel8 (Dick Kennedy) [1572321]
+- mptsas: pci-id table changes (Laura Abbott)
+- mptsas: Taint kernel if mptsas is loaded (Laura Abbott)
+- mptspi: pci-id table changes (Laura Abbott)
+- qla2xxx: Remove PCI IDs of deprecated adapter (Jeremy Cline)
+- be2iscsi: remove unsupported device IDs (Chris Leech) [1574502 1598366]
+- mptspi: Taint kernel if mptspi is loaded (Laura Abbott)
+- hpsa: remove old cciss-based smartarray pci ids (Joseph Szczypek) [1471185]
+- qla4xxx: Remove deprecated PCI IDs from RHEL 8 (Chad Dupuis) [1518874]
+- aacraid: Remove depreciated device and vendor PCI id's (Raghava Aditya Renukunta) [1495307]
+- megaraid_sas: remove deprecated pci-ids (Tomas Henzl) [1509329]
+- mpt*: remove certain deprecated pci-ids (Jeremy Cline)
+- kernel: add SUPPORT_REMOVED kernel taint (Tomas Henzl) [1602033]
+- Rename RH_DISABLE_DEPRECATED to RHEL_DIFFERENCES (Don Zickus)
+- s390: Lock down the kernel when the IPL secure flag is set (Jeremy Cline)
+- efi: Lock down the kernel if booted in secure boot mode (David Howells)
+- efi: Add an EFI_SECURE_BOOT flag to indicate secure boot mode (David Howells)
+- security: lockdown: expose a hook to lock the kernel down (Jeremy Cline)
+- Make get_cert_list() use efi_status_to_str() to print error messages. (Peter Jones)
+- Add efi_status_to_str() and rework efi_status_to_err(). (Peter Jones)
+- Add support for deprecating processors (Laura Abbott) [1565717 1595918 1609604 1610493]
+- arm: aarch64: Drop the EXPERT setting from ARM64_FORCE_52BIT (Jeremy Cline)
+- iommu/arm-smmu: workaround DMA mode issues (Laura Abbott)
+- rh_kabi: introduce RH_KABI_EXCLUDE (Jakub Racek) [1652256]
+- ipmi: do not configure ipmi for HPE m400 (Laura Abbott) [1670017]
+- kABI: Add generic kABI macros to use for kABI workarounds (Myron Stowe) [1546831]
+- add pci_hw_vendor_status() (Maurizio Lombardi) [1590829]
+- ahci: thunderx2: Fix for errata that affects stop engine (Robert Richter) [1563590]
+- Vulcan: AHCI PCI bar fix for Broadcom Vulcan early silicon (Robert Richter) [1563590]
+- bpf: set unprivileged_bpf_disabled to 1 by default, add a boot parameter (Eugene Syromiatnikov) [1561171]
+- add Red Hat-specific taint flags (Eugene Syromiatnikov) [1559877]
+- tags.sh: Ignore redhat/rpm (Jeremy Cline)
+- put RHEL info into generated headers (Laura Abbott) [1663728]
+- aarch64: acpi scan: Fix regression related to X-Gene UARTs (Mark Salter) [1519554]
+- ACPI / irq: Workaround firmware issue on X-Gene based m400 (Mark Salter) [1519554]
+- modules: add rhelversion MODULE_INFO tag (Laura Abbott)
+- ACPI: APEI: arm64: Ignore broken HPE moonshot APEI support (Al Stone) [1518076]
+- Add Red Hat tainting (Laura Abbott) [1565704 1652266]
+- Introduce CONFIG_RH_DISABLE_DEPRECATED (Laura Abbott)
+- Stop merging ark-patches for release (Don Zickus)
+- Fix path location for ark-update-configs.sh (Don Zickus)
+- Combine Red Hat patches into single patch (Don Zickus)
+- New configs in drivers/misc (Jeremy Cline)
+- New configs in drivers/net/wireless (Justin M. Forbes)
+- New configs in drivers/phy (Fedora Kernel Team)
+- New configs in drivers/tty (Fedora Kernel Team)
+- Set SquashFS decompression options for all flavors to match RHEL (Bohdan Khomutskyi)
+- configs: Enable CONFIG_ENERGY_MODEL (Phil Auld)
+- New configs in drivers/pinctrl (Fedora Kernel Team)
+- Update CONFIG_THERMAL_NETLINK (Justin Forbes)
+- Separate merge-upstream and release stages (Don Zickus)
+- Re-enable CONFIG_IR_SERIAL on Fedora (Prarit Bhargava)
+- Create Patchlist.changelog file (Don Zickus)
+- Filter out upstream commits from changelog (Don Zickus)
+- Merge Upstream script fixes (Don Zickus)
+- kernel.spec: Remove kernel-keys directory on rpm erase (Prarit Bhargava)
+- Add mlx5_vdpa to module filter for Fedora (Justin M. Forbes)
+- Add python3-sphinx_rtd_theme buildreq for docs (Justin M. Forbes)
+- redhat/configs/process_configs.sh: Remove *.config.orig files (Prarit Bhargava)
+- redhat/configs/process_configs.sh: Add process_configs_known_broken flag (Prarit Bhargava)
+- redhat/Makefile: Fix '*-configs' targets (Prarit Bhargava)
+- dist-merge-upstream: Checkout known branch for ci scripts (Don Zickus)
+- kernel.spec: don't override upstream compiler flags for ppc64le (Dan Horák)
+- Fedora config updates (Justin M. Forbes)
+- Fedora confi gupdate (Justin M. Forbes)
+- mod-sign.sh: Fix syntax flagged by shellcheck (Ben Crocker)
+- Swap how ark-latest is built (Don Zickus)
+- Add extra version bump to os-build branch (Don Zickus)
+- dist-release: Avoid needless version bump. (Don Zickus)
+- Add dist-fedora-release target (Don Zickus)
+- Remove redundant code in dist-release (Don Zickus)
+- Makefile.common rename TAG to _TAG (Don Zickus)
+- Fedora config change (Justin M. Forbes)
+- Fedora filter update (Justin M. Forbes)
+- Config update for Fedora (Justin M. Forbes)
+- enable PROTECTED_VIRTUALIZATION_GUEST for all s390x kernels (Dan Horák)
+- redhat: ark: enable CONFIG_NET_SCH_TAPRIO (Davide Caratti)
+- redhat: ark: enable CONFIG_NET_SCH_ETF (Davide Caratti)
+- More Fedora config updates (Justin M. Forbes)
+- New config deps (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- First half of config updates for Fedora (Justin M. Forbes)
+- Updates for Fedora arm architectures for the 5.9 window (Peter Robinson)
+- Merge 5.9 config changes from Peter Robinson (Justin M. Forbes)
+- Add config options that only show up when we prep on arm (Justin M. Forbes)
+- Config updates for Fedora (Justin M. Forbes)
+- fedora: enable enery model (Peter Robinson)
+- Use the configs/generic config for SND_HDA_INTEL everywhere (Peter Robinson)
+- Enable ZSTD compression algorithm on all kernels (Peter Robinson)
+- Enable ARM_SMCCC_SOC_ID on all aarch64 kernels (Peter Robinson)
+- iio: enable LTR-559 light and proximity sensor (Peter Robinson)
+- iio: chemical: enable some popular chemical and partical sensors (Peter Robinson)
+- More mismatches (Justin M. Forbes)
+- Fedora config change due to deps (Justin M. Forbes)
+- CONFIG_SND_SOC_MAX98390 is now selected by SND_SOC_INTEL_DA7219_MAX98357A_GENERIC (Justin M. Forbes)
+- Config change required for build part 2 (Justin M. Forbes)
+- Config change required for build (Justin M. Forbes)
+- Fedora config update (Justin M. Forbes)
+- Add ability to sync upstream through Makefile (Don Zickus)
+- Add master merge check (Don Zickus)
+- Replace hardcoded values 'os-build' and project id with variables (Don Zickus)
+- redhat/Makefile.common: Fix MARKER (Prarit Bhargava)
+- gitattributes: Remove unnecesary export restrictions (Prarit Bhargava)
+- Add new certs for dual signing with boothole (Justin M. Forbes)
+- Update secureboot signing for dual keys (Justin M. Forbes)
+- fedora: enable LEDS_SGM3140 for arm configs (Peter Robinson)
+- Enable CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG (Justin M. Forbes)
+- redhat/configs: Fix common CONFIGs (Prarit Bhargava)
+- redhat/configs: General CONFIG cleanups (Prarit Bhargava)
+- redhat/configs: Update & generalize evaluate_configs (Prarit Bhargava)
+- fedora: arm: Update some meson config options (Peter Robinson)
+- redhat/docs: Add Fedora RPM tagging date (Prarit Bhargava)
+- Update config for renamed panel driver. (Peter Robinson)
+- Enable SERIAL_SC16IS7XX for SPI interfaces (Peter Robinson)
+- s390x-zfcpdump: Handle missing Module.symvers file (Don Zickus)
+- Fedora config updates (Justin M. Forbes)
+- redhat/configs: Add .tmp files to .gitignore (Prarit Bhargava)
+- disable uncommon TCP congestion control algorithms (Davide Caratti)
+- Add new bpf man pages (Justin M. Forbes)
+- Add default option for CONFIG_ARM64_BTI_KERNEL to pending-common so that eln kernels build (Justin M. Forbes)
+- redhat/Makefile: Add fedora-configs and rh-configs make targets (Prarit Bhargava)
+- redhat/configs: Use SHA512 for module signing (Prarit Bhargava)
+- genspec.sh: 'touch' empty Patchlist file for single tarball (Don Zickus)
+- Fedora config update for rc1 (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- redhat/Makefile.common: fix RPMKSUBLEVEL condition (Ondrej Mosnacek)
+- redhat/Makefile: silence KABI tar output (Ondrej Mosnacek)
+- One more Fedora config update (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- Fix PATCHLEVEL for merge window (Justin M. Forbes)
+- Change ark CONFIG_COMMON_CLK to yes, it is selected already by other options (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- More module filtering for Fedora (Justin M. Forbes)
+- Update filters for rnbd in Fedora (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- Fix up module filtering for 5.8 (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- More Fedora config work (Justin M. Forbes)
+- RTW88BE and CE have been extracted to their own modules (Justin M. Forbes)
+- Set CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK for Fedora (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- Arm64 Use Branch Target Identification for kernel (Justin M. Forbes)
+- Change value of CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE (Justin M. Forbes)
+- Fedora config updates (Justin M. Forbes)
+- Fix configs for Fedora (Justin M. Forbes)
+- Add zero-commit to format-patch options (Justin M. Forbes)
+- Copy Makefile.rhelver as a source file rather than a patch (Jeremy Cline)
+- Move the sed to clear the patch templating outside of conditionals (Justin M. Forbes)
+- Match template format in kernel.spec.template (Justin M. Forbes)
+- Break out the Patches into individual files for dist-git (Justin M. Forbes)
+- Break the Red Hat patch into individual commits (Jeremy Cline)
+- Fix update_scripts.sh unselective pattern sub (David Howells)
+- Add cec to the filter overrides (Justin M. Forbes)
+- Add overrides to filter-modules.sh (Justin M. Forbes)
+- redhat/configs: Enable CONFIG_SMC91X and disable CONFIG_SMC911X (Prarit Bhargava) [1722136]
+- Include bpftool-struct_ops man page in the bpftool package (Jeremy Cline)
+- Add sharedbuffer_configuration.py to the pathfix.py script (Jeremy Cline)
+- Use __make macro instead of make (Tom Stellard)
+- Sign off generated configuration patches (Jeremy Cline)
+- Drop the static path configuration for the Sphinx docs (Jeremy Cline)
+- redhat: Add dummy-module kernel module (Prarit Bhargava)
+- redhat: enable CONFIG_LWTUNNEL_BPF (Jiri Benc)
+- Remove typoed config file aarch64CONFIG_SM_GCC_8150 (Justin M. Forbes)
+- Add Documentation back to kernel-devel as it has Kconfig now (Justin M. Forbes)
+- Copy distro files rather than moving them (Jeremy Cline)
+- kernel.spec: fix 'make scripts' for kernel-devel package (Brian Masney)
+- Makefile: correct help text for dist-cross-<arch>-rpms (Brian Masney)
+- redhat/Makefile: Fix RHEL8 python warning (Prarit Bhargava)
+- redhat: Change Makefile target names to dist- (Prarit Bhargava)
+- configs: Disable Serial IR driver (Prarit Bhargava)
+- Fix "multiple %%files for package kernel-tools" (Pablo Greco)
+- Introduce a Sphinx documentation project (Jeremy Cline)
+- Build ARK against ELN (Don Zickus)
+- Drop the requirement to have a remote called linus (Jeremy Cline)
+- Rename 'internal' branch to 'os-build' (Don Zickus)
+- Only include open merge requests with "Include in Releases" label (Jeremy Cline)
+- Package gpio-watch in kernel-tools (Jeremy Cline)
+- Exit non-zero if the tag already exists for a release (Jeremy Cline)
+- Adjust the changelog update script to not push anything (Jeremy Cline)
+- Drop --target noarch from the rh-rpms make target (Jeremy Cline)
+- Add a script to generate release tags and branches (Jeremy Cline)
+- Set CONFIG_VDPA for fedora (Justin M. Forbes)
+- Add a README to the dist-git repository (Jeremy Cline)
+- Provide defaults in ark-rebase-patches.sh (Jeremy Cline)
+- Default ark-rebase-patches.sh to not report issues (Jeremy Cline)
+- Drop DIST from release commits and tags (Jeremy Cline)
+- Place the buildid before the dist in the release (Jeremy Cline)
+- Sync up with Fedora arm configuration prior to merging (Jeremy Cline)
+- Disable CONFIG_PROTECTED_VIRTUALIZATION_GUEST for zfcpdump (Jeremy Cline)
+- Add RHMAINTAINERS file and supporting conf (Don Zickus)
+- Add a script to test if all commits are signed off (Jeremy Cline)
+- Fix make rh-configs-arch (Don Zickus)
+- Drop RH_FEDORA in favor of the now-merged RHEL_DIFFERENCES (Jeremy Cline)
+- Sync up Fedora configs from the first week of the merge window (Jeremy Cline)
+- Migrate blacklisting floppy.ko to mod-blacklist.sh (Don Zickus)
+- kernel packaging: Combine mod-blacklist.sh and mod-extra-blacklist.sh (Don Zickus)
+- kernel packaging: Fix extra namespace collision (Don Zickus)
+- mod-extra.sh: Rename to mod-blacklist.sh (Don Zickus)
+- mod-extra.sh: Make file generic (Don Zickus)
+- Fix a painfully obvious YAML syntax error in .gitlab-ci.yml (Jeremy Cline)
+- Add in armv7hl kernel header support (Don Zickus)
+- Disable all BuildKernel commands when only building headers (Don Zickus)
+- Drop any gitlab-ci patches from ark-patches (Jeremy Cline)
+- Build the srpm for internal branch CI using the vanilla tree (Jeremy Cline)
+- Pull in the latest ARM configurations for Fedora (Jeremy Cline)
+- Fix xz memory usage issue (Neil Horman)
+- Use ark-latest instead of master for update script (Jeremy Cline)
+- Move the CI jobs back into the ARK repository (Jeremy Cline)
+- Sync up ARK's Fedora config with the dist-git repository (Jeremy Cline)
+- Pull in the latest configuration changes from Fedora (Jeremy Cline)
+- configs: enable CONFIG_NET_SCH_CBS (Marcelo Ricardo Leitner)
+- Drop configuration options in fedora/ that no longer exist (Jeremy Cline)
+- Set RH_FEDORA for ARK and Fedora (Jeremy Cline)
+- redhat/kernel.spec: Include the release in the kernel COPYING file (Jeremy Cline)
+- redhat/kernel.spec: add scripts/jobserver-exec to py3_shbang_opts list (Jeremy Cline)
+- redhat/kernel.spec: package bpftool-gen man page (Jeremy Cline)
+- distgit-changelog: handle multiple y-stream BZ numbers (Bruno Meneguele)
+- redhat/kernel.spec: remove all inline comments (Bruno Meneguele)
+- redhat/genspec: awk unknown whitespace regex pattern (Bruno Meneguele)
+- Improve the readability of gen_config_patches.sh (Jeremy Cline)
+- Fix some awkward edge cases in gen_config_patches.sh (Jeremy Cline)
+- Update the CI environment to use Fedora 31 (Jeremy Cline)
+- redhat: drop whitespace from with_gcov macro (Jan Stancek)
+- configs: Enable CONFIG_KEY_DH_OPERATIONS on ARK (Ondrej Mosnacek)
+- configs: Adjust CONFIG_MPLS_ROUTING and CONFIG_MPLS_IPTUNNEL (Laura Abbott)
+- New configs in lib/crypto (Jeremy Cline)
+- New configs in drivers/char (Jeremy Cline)
+- Turn on BLAKE2B for Fedora (Jeremy Cline)
+- kernel.spec.template: Clean up stray *.h.s files (Laura Abbott)
+- Build the SRPM in the CI job (Jeremy Cline)
+- New configs in net/tls (Jeremy Cline)
+- New configs in net/tipc (Jeremy Cline)
+- New configs in lib/kunit (Jeremy Cline)
+- Fix up released_kernel case (Laura Abbott)
+- New configs in lib/Kconfig.debug (Jeremy Cline)
+- New configs in drivers/ptp (Jeremy Cline)
+- New configs in drivers/nvme (Jeremy Cline)
+- New configs in drivers/net/phy (Jeremy Cline)
+- New configs in arch/arm64 (Jeremy Cline)
+- New configs in drivers/crypto (Jeremy Cline)
+- New configs in crypto/Kconfig (Jeremy Cline)
+- Add label so the Gitlab to email bridge ignores the changelog (Jeremy Cline)
+- Temporarily switch TUNE_DEFAULT to y (Jeremy Cline)
+- Run config test for merge requests and internal (Jeremy Cline)
+- Add missing licensedir line (Laura Abbott)
+- redhat/scripts: Remove redhat/scripts/rh_get_maintainer.pl (Prarit Bhargava)
+- configs: Take CONFIG_DEFAULT_MMAP_MIN_ADDR from Fedra (Laura Abbott)
+- configs: Turn off ISDN (Laura Abbott)
+- Add a script to generate configuration patches (Laura Abbott)
+- Introduce rh-configs-commit (Laura Abbott)
+- kernel-packaging: Remove kernel files from kernel-modules-extra package (Prarit Bhargava)
+- configs: Enable CONFIG_DEBUG_WX (Laura Abbott)
+- configs: Disable wireless USB (Laura Abbott)
+- Clean up some temporary config files (Laura Abbott)
+- configs: New config in drivers/gpu for v5.4-rc1 (Jeremy Cline)
+- configs: New config in arch/powerpc for v5.4-rc1 (Jeremy Cline)
+- configs: New config in crypto for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/usb for v5.4-rc1 (Jeremy Cline)
+- AUTOMATIC: New configs (Jeremy Cline)
+- Skip ksamples for bpf, they are broken (Jeremy Cline)
+- configs: New config in fs/erofs for v5.4-rc1 (Jeremy Cline)
+- configs: New config in mm for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/md for v5.4-rc1 (Jeremy Cline)
+- configs: New config in init for v5.4-rc1 (Jeremy Cline)
+- configs: New config in fs/fuse for v5.4-rc1 (Jeremy Cline)
+- merge.pl: Avoid comments but do not skip them (Don Zickus)
+- configs: New config in drivers/net/ethernet/pensando for v5.4-rc1 (Jeremy Cline)
+- Update a comment about what released kernel means (Laura Abbott)
+- Provide both Fedora and RHEL files in the SRPM (Laura Abbott)
+- kernel.spec.template: Trim EXTRAVERSION in the Makefile (Laura Abbott)
+- kernel.spec.template: Add macros for building with nopatches (Laura Abbott)
+- kernel.spec.template: Add some macros for Fedora differences (Laura Abbott)
+- kernel.spec.template: Consolodate the options (Laura Abbott)
+- configs: Add pending direcory to Fedora (Laura Abbott)
+- kernel.spec.template: Don't run hardlink if rpm-ostree is in use (Laura Abbott)
+- configs: New config in net/can for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/net/phy for v5.4-rc1 (Jeremy Cline)
+- configs: Increase x86_64 NR_UARTS to 64 (Prarit Bhargava) [1730649]
+- configs: turn on ARM64_FORCE_52BIT for debug builds (Jeremy Cline)
+- kernel.spec.template: Tweak the python3 mangling (Laura Abbott)
+- kernel.spec.template: Add --with verbose option (Laura Abbott)
+- kernel.spec.template: Switch to using %%install instead of %%__install (Laura Abbott)
+- kernel.spec.template: Make the kernel.org URL https (Laura Abbott)
+- kernel.spec.template: Update message about secure boot signing (Laura Abbott)
+- kernel.spec.template: Move some with flags definitions up (Laura Abbott)
+- kernel.spec.template: Update some BuildRequires (Laura Abbott)
+- kernel.spec.template: Get rid of %%clean (Laura Abbott)
+- configs: New config in drivers/char for v5.4-rc1 (Jeremy Cline)
+- configs: New config in net/sched for v5.4-rc1 (Jeremy Cline)
+- configs: New config in lib for v5.4-rc1 (Jeremy Cline)
+- configs: New config in fs/verity for v5.4-rc1 (Jeremy Cline)
+- configs: New config in arch/aarch64 for v5.4-rc4 (Jeremy Cline)
+- configs: New config in arch/arm64 for v5.4-rc1 (Jeremy Cline)
+- Flip off CONFIG_ARM64_VA_BITS_52 so the bundle that turns it on applies (Jeremy Cline)
+- New configuration options for v5.4-rc4 (Jeremy Cline)
+- Correctly name tarball for single tarball builds (Laura Abbott)
+- configs: New config in drivers/pci for v5.4-rc1 (Jeremy Cline)
+- Allow overriding the dist tag on the command line (Laura Abbott)
+- Allow scratch branch target to be overridden (Laura Abbott)
+- Remove long dead BUILD_DEFAULT_TARGET (Laura Abbott)
+- Amend the changelog when rebasing (Laura Abbott)
+- configs: New config in drivers/platform for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/pinctrl for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/net/wireless for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/net/ethernet/mellanox for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/net/can for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/hid for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/dma-buf for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/crypto for v5.4-rc1 (Jeremy Cline)
+- configs: New config in arch/s390 for v5.4-rc1 (Jeremy Cline)
+- configs: New config in block for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/cpuidle for v5.4-rc1 (Jeremy Cline)
+- redhat: configs: Split CONFIG_CRYPTO_SHA512 (Laura Abbott)
+- redhat: Set Fedora options (Laura Abbott)
+- Set CRYPTO_SHA3_*_S390 to builtin on zfcpdump (Jeremy Cline)
+- configs: New config in drivers/edac for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/firmware for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/hwmon for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/iio for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/mmc for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/tty for v5.4-rc1 (Jeremy Cline)
+- configs: New config in arch/s390 for v5.4-rc1 (Jeremy Cline)
+- configs: New config in drivers/bus for v5.4-rc1 (Jeremy Cline)
+- Add option to allow mismatched configs on the command line (Laura Abbott)
+- configs: New config in drivers/crypto for v5.4-rc1 (Jeremy Cline)
+- configs: New config in sound/pci for v5.4-rc1 (Jeremy Cline)
+- configs: New config in sound/soc for v5.4-rc1 (Jeremy Cline)
+- gitlab: Add CI job for packaging scripts (Major Hayden)
+- Speed up CI with CKI image (Major Hayden)
+- Disable e1000 driver in ARK (Neil Horman)
+- configs: Fix the pending default for CONFIG_ARM64_VA_BITS_52 (Jeremy Cline)
+- configs: Turn on OPTIMIZE_INLINING for everything (Jeremy Cline)
+- configs: Set valid pending defaults for CRYPTO_ESSIV (Jeremy Cline)
+- Add an initial CI configuration for the internal branch (Jeremy Cline)
+- New drop of configuration options for v5.4-rc1 (Jeremy Cline)
+- New drop of configuration options for v5.4-rc1 (Jeremy Cline)
+- Pull the RHEL version defines out of the Makefile (Jeremy Cline)
+- Sync up the ARK build scripts (Jeremy Cline)
+- Sync up the Fedora Rawhide configs (Jeremy Cline)
+- Sync up the ARK config files (Jeremy Cline)
+- configs: Adjust CONFIG_FORCE_MAX_ZONEORDER for Fedora (Laura Abbott)
+- configs: Add README for some other arches (Laura Abbott)
+- configs: Sync up Fedora configs (Laura Abbott)
+- [initial commit] Add structure for building with git (Laura Abbott)
+- [initial commit] Add Red Hat variables in the top level makefile (Laura Abbott)
+- [initial commit] Red Hat gitignore and attributes (Laura Abbott)
+- [initial commit] Add changelog (Laura Abbott)
+- [initial commit] Add makefile (Laura Abbott)
+- [initial commit] Add files for generating the kernel.spec (Laura Abbott)
+- [initial commit] Add rpm directory (Laura Abbott)
+- [initial commit] Add files for packaging (Laura Abbott)
+- [initial commit] Add kabi files (Laura Abbott)
+- [initial commit] Add scripts (Laura Abbott)
+- [initial commit] Add configs (Laura Abbott)
+- [initial commit] Add Makefiles (Laura Abbott)
+- Linux v6.7.0-0.rc0.5a6a09e97199
+Resolves: rhbz#1471185, rhbz#1495307, rhbz#1509329, rhbz#1518076, rhbz#1518874, rhbz#1519554, rhbz#1546831, rhbz#1559877, rhbz#1561171, rhbz#1563590, rhbz#1565704, rhbz#1565717, rhbz#1572321, rhbz#1574502, rhbz#1590829, rhbz#1595918, rhbz#1598366, rhbz#1602033, rhbz#1609604, rhbz#1610493, rhbz#1613522, rhbz#1638087, rhbz#1652256, rhbz#1652266, rhbz#1663728, rhbz#1670017, rhbz#1722136, rhbz#1730649, rhbz#1802694, rhbz#1810301, rhbz#1821565, rhbz#1831065, rhbz#1855161, rhbz#1856174, rhbz#1856176, rhbz#1858592, rhbz#1858594, rhbz#1858596, rhbz#1858599, rhbz#1869674, rhbz#1871130, rhbz#1876435, rhbz#1876436, rhbz#1876977, rhbz#1877192, rhbz#1880486, rhbz#1890304, rhbz#1903201, rhbz#1915073, rhbz#1915290, rhbz#1930649, rhbz#1939095, rhbz#1940075, rhbz#1940794, rhbz#1943423, rhbz#1945002, rhbz#1945179, rhbz#1945477, rhbz#1947240, rhbz#1948340, rhbz#1952426, rhbz#1952863, rhbz#1953486, rhbz#1956988, rhbz#1957210, rhbz#1957219, rhbz#1957305, rhbz#1957636, rhbz#1957819, rhbz#1961178, rhbz#1962936, rhbz#1964537, rhbz#1967640, rhbz#1972795, rhbz#1976270, rhbz#1976835, rhbz#1976877, rhbz#1976884, rhbz#1977056, rhbz#1977529, rhbz#1978539, rhbz#1979379, rhbz#1981406, rhbz#1983298, rhbz#1986223, rhbz#1988254, rhbz#1988384, rhbz#1990040, rhbz#1993393, rhbz#1994858, rhbz#1998953, rhbz#2000835, rhbz#2002344, rhbz#2004233, rhbz#2004821, rhbz#2006813, rhbz#2007430, rhbz#2012226, rhbz#2014492, rhbz#2019377, rhbz#2020132, rhbz#2022578, rhbz#2023782, rhbz#2024595, rhbz#2025985, rhbz#2026319, rhbz#2027506, rhbz#2031547, rhbz#2032758, rhbz#2034670, rhbz#2038999, rhbz#2040643, rhbz#2041184, rhbz#2041186, rhbz#2041365, rhbz#2041990, rhbz#2042240, rhbz#2042241, rhbz#2043141, rhbz#2044155, rhbz#2053836, rhbz#2054579, rhbz#2062054, rhbz#2062909, rhbz#2071969, rhbz#2089765, rhbz#2115876, rhbz#2122595, rhbz#2140017, rhbz#2142658, rhbz#2149273, rhbz#2153073, rhbz#2188441, rhbz#2208834, rhbz#2216678, rhbz#2227793, rhbz#2231407
diff --git a/SOURCES/linux-surface.patch b/SOURCES/linux-surface.patch
index 2de6bab..087417f 100644
--- a/SOURCES/linux-surface.patch
+++ b/SOURCES/linux-surface.patch
@@ -1,4 +1,4 @@
-From da55b6ffe4a98a4af6ced4074317ba9d026f84dd Mon Sep 17 00:00:00 2001
+From c9479d2ee549e4b5392c5f788d9905244404e207 Mon Sep 17 00:00:00 2001
From: Tsuchiya Yuto <kitakar@gmail.com>
Date: Sun, 18 Oct 2020 16:42:44 +0900
Subject: [PATCH] (surface3-oemb) add DMI matches for Surface 3 with broken DMI
@@ -40,7 +40,7 @@ Patchset: surface3-oemb
3 files changed, 24 insertions(+)
diff --git a/drivers/platform/surface/surface3-wmi.c b/drivers/platform/surface/surface3-wmi.c
-index ca4602bcc7dea..490b9731068ae 100644
+index c15ed7a12784..1ec8edb5aafa 100644
--- a/drivers/platform/surface/surface3-wmi.c
+++ b/drivers/platform/surface/surface3-wmi.c
@@ -37,6 +37,13 @@ static const struct dmi_system_id surface3_dmi_table[] = {
@@ -58,10 +58,10 @@ index ca4602bcc7dea..490b9731068ae 100644
{ }
};
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
-index 7938b52d741d8..2d5f83b0cdb0b 100644
+index edcb85bd8ea7..cea19fa3fa56 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
-@@ -3746,6 +3746,15 @@ static const struct dmi_system_id dmi_platform_data[] = {
+@@ -3753,6 +3753,15 @@ static const struct dmi_system_id dmi_platform_data[] = {
},
.driver_data = (void *)&intel_braswell_platform_data,
},
@@ -78,7 +78,7 @@ index 7938b52d741d8..2d5f83b0cdb0b 100644
/*
* Match for the GPDwin which unfortunately uses somewhat
diff --git a/sound/soc/intel/common/soc-acpi-intel-cht-match.c b/sound/soc/intel/common/soc-acpi-intel-cht-match.c
-index cdcbf04b8832f..958305779b125 100644
+index 5e2ec60e2954..207868c699f2 100644
--- a/sound/soc/intel/common/soc-acpi-intel-cht-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-cht-match.c
@@ -27,6 +27,14 @@ static const struct dmi_system_id cht_table[] = {
@@ -97,9 +97,9 @@ index cdcbf04b8832f..958305779b125 100644
};
--
-2.42.0
+2.43.0
-From 35b3c5195c9fc191de6b5a6e4361762aa37edad2 Mon Sep 17 00:00:00 2001
+From 38181ea8d1f9130ce6d677d306f819d2fa3b5f57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= <verdre@v0yd.nl>
Date: Tue, 3 Nov 2020 13:28:04 +0100
Subject: [PATCH] mwifiex: Add quirk resetting the PCI bridge on MS Surface
@@ -133,11 +133,11 @@ Patchset: mwifiex
3 files changed, 31 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
-index 6697132ecc977..f06b4ebc5bd8e 100644
+index 5f997becdbaa..9a9929424513 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
-@@ -1771,9 +1771,21 @@ mwifiex_pcie_send_boot_cmd(struct mwifiex_adapter *adapter, struct sk_buff *skb)
- static int mwifiex_pcie_init_fw_port(struct mwifiex_adapter *adapter)
+@@ -1702,9 +1702,21 @@ mwifiex_pcie_send_boot_cmd(struct mwifiex_adapter *adapter, struct sk_buff *skb)
+ static void mwifiex_pcie_init_fw_port(struct mwifiex_adapter *adapter)
{
struct pcie_service_card *card = adapter->card;
+ struct pci_dev *pdev = card->dev;
@@ -156,10 +156,10 @@ index 6697132ecc977..f06b4ebc5bd8e 100644
+ pci_reset_function(parent_pdev);
+
/* Write the RX ring read pointer in to reg->rx_rdptr */
- if (mwifiex_write_reg(adapter, reg->rx_rdptr, card->rxbd_rdptr |
- tx_wrap)) {
+ mwifiex_write_reg(adapter, reg->rx_rdptr, card->rxbd_rdptr | tx_wrap);
+ }
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
-index dd6d21f1dbfd7..f46b06f8d6435 100644
+index dd6d21f1dbfd..f46b06f8d643 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
@@ -13,7 +13,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
@@ -252,7 +252,7 @@ index dd6d21f1dbfd7..f46b06f8d6435 100644
static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev)
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
-index d6ff964aec5bf..5d30ae39d65ec 100644
+index d6ff964aec5b..5d30ae39d65e 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
@@ -4,6 +4,7 @@
@@ -264,9 +264,9 @@ index d6ff964aec5bf..5d30ae39d65ec 100644
void mwifiex_initialize_quirks(struct pcie_service_card *card);
int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev);
--
-2.42.0
+2.43.0
-From 241da24644ea2f5b8119019448b638aa8df6ab26 Mon Sep 17 00:00:00 2001
+From 86149f1c99b17f67d717419af83f3ec76315e35b Mon Sep 17 00:00:00 2001
From: Tsuchiya Yuto <kitakar@gmail.com>
Date: Sun, 4 Oct 2020 00:11:49 +0900
Subject: [PATCH] mwifiex: pcie: disable bridge_d3 for Surface gen4+
@@ -288,10 +288,10 @@ Patchset: mwifiex
3 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
-index f06b4ebc5bd8e..07f13b52ddb92 100644
+index 9a9929424513..2273e3029776 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
-@@ -370,6 +370,7 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev,
+@@ -377,6 +377,7 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct pcie_service_card *card;
@@ -299,7 +299,7 @@ index f06b4ebc5bd8e..07f13b52ddb92 100644
int ret;
pr_debug("info: vendor=0x%4.04X device=0x%4.04X rev=%d\n",
-@@ -411,6 +412,12 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev,
+@@ -418,6 +419,12 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev,
return -1;
}
@@ -313,7 +313,7 @@ index f06b4ebc5bd8e..07f13b52ddb92 100644
}
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
-index f46b06f8d6435..99b024ecbadea 100644
+index f46b06f8d643..99b024ecbade 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
@@ -14,7 +14,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = {
@@ -407,7 +407,7 @@ index f46b06f8d6435..99b024ecbadea 100644
static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev)
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
-index 5d30ae39d65ec..c14eb56eb9118 100644
+index 5d30ae39d65e..c14eb56eb911 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
+++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
@@ -5,6 +5,7 @@
@@ -419,9 +419,9 @@ index 5d30ae39d65ec..c14eb56eb9118 100644
void mwifiex_initialize_quirks(struct pcie_service_card *card);
int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev);
--
-2.42.0
+2.43.0
-From d20b58f9e2ccec57c66864e79c291c2618ab2dbe Mon Sep 17 00:00:00 2001
+From 23775dc0be26e58d04574ab75768cedd8b0076f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= <verdre@v0yd.nl>
Date: Thu, 25 Mar 2021 11:33:02 +0100
Subject: [PATCH] Bluetooth: btusb: Lower passive lescan interval on Marvell
@@ -457,7 +457,7 @@ Patchset: mwifiex
1 file changed, 15 insertions(+)
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
-index 499f4809fcdf3..2d442e080ca28 100644
+index b8e9de887b5d..66a418ae9584 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -65,6 +65,7 @@ static struct usb_driver btusb_driver;
@@ -476,7 +476,7 @@ index 499f4809fcdf3..2d442e080ca28 100644
/* Intel Bluetooth devices */
{ USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_COMBINED },
-@@ -4388,6 +4390,19 @@ static int btusb_probe(struct usb_interface *intf,
+@@ -4399,6 +4401,19 @@ static int btusb_probe(struct usb_interface *intf,
if (id->driver_info & BTUSB_MARVELL)
hdev->set_bdaddr = btusb_set_bdaddr_marvell;
@@ -497,9 +497,9 @@ index 499f4809fcdf3..2d442e080ca28 100644
(id->driver_info & BTUSB_MEDIATEK)) {
hdev->setup = btusb_mtk_setup;
--
-2.42.0
+2.43.0
-From c6f0985fae241ed43ea1245c9e5861e2c728e21e Mon Sep 17 00:00:00 2001
+From 825328cce718ba6de0fce529e8fd1f4cd6b94dde Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sat, 27 Feb 2021 00:45:52 +0100
Subject: [PATCH] ath10k: Add module parameters to override board files
@@ -521,7 +521,7 @@ Patchset: ath10k
1 file changed, 58 insertions(+)
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
-index 6cdb225b7eacc..19c036751fb16 100644
+index 6cdb225b7eac..19c036751fb1 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -38,6 +38,9 @@ static bool fw_diag_log;
@@ -618,9 +618,9 @@ index 6cdb225b7eacc..19c036751fb16 100644
ret = firmware_request_nowarn(&fw, filename, ar->dev);
ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n",
--
-2.42.0
+2.43.0
-From 986fe56f682f93925b2964f59fe78c7043758e47 Mon Sep 17 00:00:00 2001
+From f4e5ac291e877f3e7e5d888f4965310eb85379f5 Mon Sep 17 00:00:00 2001
From: Dorian Stoll <dorian.stoll@tmsp.io>
Date: Thu, 30 Jul 2020 13:21:53 +0200
Subject: [PATCH] misc: mei: Add missing IPTS device IDs
@@ -632,7 +632,7 @@ Patchset: ipts
2 files changed, 2 insertions(+)
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
-index bdc65d50b945f..08723c01d7275 100644
+index 961e5d53a27a..860f99b6ecd6 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -92,6 +92,7 @@
@@ -644,7 +644,7 @@ index bdc65d50b945f..08723c01d7275 100644
#define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
-index 676d566f38ddf..6b37dd1f8b2a3 100644
+index 676d566f38dd..6b37dd1f8b2a 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -97,6 +97,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
@@ -656,9 +656,9 @@ index 676d566f38ddf..6b37dd1f8b2a3 100644
{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)},
--
-2.42.0
+2.43.0
-From 72ee1cbf26ccc575dbfbaee5e7305ab13e1aeb1e Mon Sep 17 00:00:00 2001
+From 4c91dcde022856325e3babe1a1b9e01fcc21ab0f Mon Sep 17 00:00:00 2001
From: Liban Hannan <liban.p@gmail.com>
Date: Tue, 12 Apr 2022 23:31:12 +0100
Subject: [PATCH] iommu: ipts: use IOMMU passthrough mode for IPTS
@@ -680,7 +680,7 @@ Patchset: ipts
1 file changed, 24 insertions(+)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
-index 3685ba90ec88e..5a627e081797c 100644
+index 897159dba47d..cc6569613255 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -38,6 +38,8 @@
@@ -692,7 +692,7 @@ index 3685ba90ec88e..5a627e081797c 100644
#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
#define IOAPIC_RANGE_START (0xfee00000)
-@@ -292,12 +294,14 @@ int intel_iommu_enabled = 0;
+@@ -291,12 +293,14 @@ int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
static int dmar_map_gfx = 1;
@@ -706,8 +706,8 @@ index 3685ba90ec88e..5a627e081797c 100644
+#define IDENTMAP_IPTS 16
const struct iommu_ops intel_iommu_ops;
-
-@@ -2542,6 +2546,9 @@ static int device_def_domain_type(struct device *dev)
+ static const struct iommu_dirty_ops intel_dirty_ops;
+@@ -2548,6 +2552,9 @@ static int device_def_domain_type(struct device *dev)
if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
return IOMMU_DOMAIN_IDENTITY;
@@ -717,7 +717,7 @@ index 3685ba90ec88e..5a627e081797c 100644
}
return 0;
-@@ -2849,6 +2856,9 @@ static int __init init_dmars(void)
+@@ -2855,6 +2862,9 @@ static int __init init_dmars(void)
if (!dmar_map_gfx)
iommu_identity_mapping |= IDENTMAP_GFX;
@@ -727,7 +727,7 @@ index 3685ba90ec88e..5a627e081797c 100644
check_tylersburg_isoch();
ret = si_domain_init(hw_pass_through);
-@@ -4828,6 +4838,17 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
+@@ -4977,6 +4987,17 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
dmar_map_gfx = 0;
}
@@ -745,7 +745,7 @@ index 3685ba90ec88e..5a627e081797c 100644
/* G4x/GM45 integrated gfx dmar support is totally busted. */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
-@@ -4863,6 +4884,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
+@@ -5012,6 +5033,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
@@ -756,9 +756,9 @@ index 3685ba90ec88e..5a627e081797c 100644
{
if (risky_device(dev))
--
-2.42.0
+2.43.0
-From 8330f9f39ce8c9796259a8aeffe919fa950e18f5 Mon Sep 17 00:00:00 2001
+From 7a9591af425eafbb76700f7ab1ab3ae0c3a08e4c Mon Sep 17 00:00:00 2001
From: Dorian Stoll <dorian.stoll@tmsp.io>
Date: Sun, 11 Dec 2022 12:00:59 +0100
Subject: [PATCH] hid: Add support for Intel Precise Touch and Stylus
@@ -825,10 +825,10 @@ Patchset: ipts
create mode 100644 drivers/hid/ipts/thread.h
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
-index 790aa908e2a78..0b9d245d10e54 100644
+index 4ce74af79657..86c6c815bd5b 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
-@@ -1345,4 +1345,6 @@ source "drivers/hid/amd-sfh-hid/Kconfig"
+@@ -1341,4 +1341,6 @@ source "drivers/hid/amd-sfh-hid/Kconfig"
source "drivers/hid/surface-hid/Kconfig"
@@ -836,7 +836,7 @@ index 790aa908e2a78..0b9d245d10e54 100644
+
endif # HID_SUPPORT
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
-index 8a06d0f840bcb..2ef21b257d0b5 100644
+index 8a06d0f840bc..2ef21b257d0b 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -169,3 +169,5 @@ obj-$(INTEL_ISH_FIRMWARE_DOWNLOADER) += intel-ish-hid/
@@ -847,7 +847,7 @@ index 8a06d0f840bcb..2ef21b257d0b5 100644
+obj-$(CONFIG_HID_IPTS) += ipts/
diff --git a/drivers/hid/ipts/Kconfig b/drivers/hid/ipts/Kconfig
new file mode 100644
-index 0000000000000..297401bd388dd
+index 000000000000..297401bd388d
--- /dev/null
+++ b/drivers/hid/ipts/Kconfig
@@ -0,0 +1,14 @@
@@ -867,7 +867,7 @@ index 0000000000000..297401bd388dd
+ module will be called ipts.
diff --git a/drivers/hid/ipts/Makefile b/drivers/hid/ipts/Makefile
new file mode 100644
-index 0000000000000..883896f68e6ad
+index 000000000000..883896f68e6a
--- /dev/null
+++ b/drivers/hid/ipts/Makefile
@@ -0,0 +1,16 @@
@@ -889,7 +889,7 @@ index 0000000000000..883896f68e6ad
+ipts-objs += thread.o
diff --git a/drivers/hid/ipts/cmd.c b/drivers/hid/ipts/cmd.c
new file mode 100644
-index 0000000000000..63a4934bbc5fa
+index 000000000000..63a4934bbc5f
--- /dev/null
+++ b/drivers/hid/ipts/cmd.c
@@ -0,0 +1,61 @@
@@ -956,7 +956,7 @@ index 0000000000000..63a4934bbc5fa
+}
diff --git a/drivers/hid/ipts/cmd.h b/drivers/hid/ipts/cmd.h
new file mode 100644
-index 0000000000000..2b4079075b642
+index 000000000000..2b4079075b64
--- /dev/null
+++ b/drivers/hid/ipts/cmd.h
@@ -0,0 +1,60 @@
@@ -1022,7 +1022,7 @@ index 0000000000000..2b4079075b642
+#endif /* IPTS_CMD_H */
diff --git a/drivers/hid/ipts/context.h b/drivers/hid/ipts/context.h
new file mode 100644
-index 0000000000000..ba33259f1f7c5
+index 000000000000..ba33259f1f7c
--- /dev/null
+++ b/drivers/hid/ipts/context.h
@@ -0,0 +1,52 @@
@@ -1080,7 +1080,7 @@ index 0000000000000..ba33259f1f7c5
+#endif /* IPTS_CONTEXT_H */
diff --git a/drivers/hid/ipts/control.c b/drivers/hid/ipts/control.c
new file mode 100644
-index 0000000000000..5360842d260ba
+index 000000000000..5360842d260b
--- /dev/null
+++ b/drivers/hid/ipts/control.c
@@ -0,0 +1,486 @@
@@ -1572,7 +1572,7 @@ index 0000000000000..5360842d260ba
+}
diff --git a/drivers/hid/ipts/control.h b/drivers/hid/ipts/control.h
new file mode 100644
-index 0000000000000..26629c5144edb
+index 000000000000..26629c5144ed
--- /dev/null
+++ b/drivers/hid/ipts/control.h
@@ -0,0 +1,126 @@
@@ -1704,7 +1704,7 @@ index 0000000000000..26629c5144edb
+#endif /* IPTS_CONTROL_H */
diff --git a/drivers/hid/ipts/desc.h b/drivers/hid/ipts/desc.h
new file mode 100644
-index 0000000000000..307438c7c80cd
+index 000000000000..307438c7c80c
--- /dev/null
+++ b/drivers/hid/ipts/desc.h
@@ -0,0 +1,80 @@
@@ -1790,7 +1790,7 @@ index 0000000000000..307438c7c80cd
+#endif /* IPTS_DESC_H */
diff --git a/drivers/hid/ipts/eds1.c b/drivers/hid/ipts/eds1.c
new file mode 100644
-index 0000000000000..ecbb3a8bdaf60
+index 000000000000..ecbb3a8bdaf6
--- /dev/null
+++ b/drivers/hid/ipts/eds1.c
@@ -0,0 +1,103 @@
@@ -1899,7 +1899,7 @@ index 0000000000000..ecbb3a8bdaf60
+}
diff --git a/drivers/hid/ipts/eds1.h b/drivers/hid/ipts/eds1.h
new file mode 100644
-index 0000000000000..eeeb6575e3e89
+index 000000000000..eeeb6575e3e8
--- /dev/null
+++ b/drivers/hid/ipts/eds1.h
@@ -0,0 +1,35 @@
@@ -1940,7 +1940,7 @@ index 0000000000000..eeeb6575e3e89
+ enum hid_report_type report_type, enum hid_class_request request_type);
diff --git a/drivers/hid/ipts/eds2.c b/drivers/hid/ipts/eds2.c
new file mode 100644
-index 0000000000000..198dc65d78876
+index 000000000000..198dc65d7887
--- /dev/null
+++ b/drivers/hid/ipts/eds2.c
@@ -0,0 +1,144 @@
@@ -2090,7 +2090,7 @@ index 0000000000000..198dc65d78876
+}
diff --git a/drivers/hid/ipts/eds2.h b/drivers/hid/ipts/eds2.h
new file mode 100644
-index 0000000000000..064e3716907ab
+index 000000000000..064e3716907a
--- /dev/null
+++ b/drivers/hid/ipts/eds2.h
@@ -0,0 +1,35 @@
@@ -2131,7 +2131,7 @@ index 0000000000000..064e3716907ab
+ enum hid_report_type report_type, enum hid_class_request request_type);
diff --git a/drivers/hid/ipts/hid.c b/drivers/hid/ipts/hid.c
new file mode 100644
-index 0000000000000..e34a1a4f9fa77
+index 000000000000..e34a1a4f9fa7
--- /dev/null
+++ b/drivers/hid/ipts/hid.c
@@ -0,0 +1,225 @@
@@ -2362,7 +2362,7 @@ index 0000000000000..e34a1a4f9fa77
+}
diff --git a/drivers/hid/ipts/hid.h b/drivers/hid/ipts/hid.h
new file mode 100644
-index 0000000000000..1ebe77447903a
+index 000000000000..1ebe77447903
--- /dev/null
+++ b/drivers/hid/ipts/hid.h
@@ -0,0 +1,24 @@
@@ -2392,7 +2392,7 @@ index 0000000000000..1ebe77447903a
+#endif /* IPTS_HID_H */
diff --git a/drivers/hid/ipts/main.c b/drivers/hid/ipts/main.c
new file mode 100644
-index 0000000000000..fb5b5c13ee3ea
+index 000000000000..fb5b5c13ee3e
--- /dev/null
+++ b/drivers/hid/ipts/main.c
@@ -0,0 +1,126 @@
@@ -2524,7 +2524,7 @@ index 0000000000000..fb5b5c13ee3ea
+MODULE_LICENSE("GPL");
diff --git a/drivers/hid/ipts/mei.c b/drivers/hid/ipts/mei.c
new file mode 100644
-index 0000000000000..1e0395ceae4a4
+index 000000000000..1e0395ceae4a
--- /dev/null
+++ b/drivers/hid/ipts/mei.c
@@ -0,0 +1,188 @@
@@ -2718,7 +2718,7 @@ index 0000000000000..1e0395ceae4a4
+}
diff --git a/drivers/hid/ipts/mei.h b/drivers/hid/ipts/mei.h
new file mode 100644
-index 0000000000000..973bade6b0fdd
+index 000000000000..973bade6b0fd
--- /dev/null
+++ b/drivers/hid/ipts/mei.h
@@ -0,0 +1,66 @@
@@ -2790,7 +2790,7 @@ index 0000000000000..973bade6b0fdd
+#endif /* IPTS_MEI_H */
diff --git a/drivers/hid/ipts/receiver.c b/drivers/hid/ipts/receiver.c
new file mode 100644
-index 0000000000000..ef66c3c9db807
+index 000000000000..ef66c3c9db80
--- /dev/null
+++ b/drivers/hid/ipts/receiver.c
@@ -0,0 +1,250 @@
@@ -3046,7 +3046,7 @@ index 0000000000000..ef66c3c9db807
+}
diff --git a/drivers/hid/ipts/receiver.h b/drivers/hid/ipts/receiver.h
new file mode 100644
-index 0000000000000..3de7da62d40c1
+index 000000000000..3de7da62d40c
--- /dev/null
+++ b/drivers/hid/ipts/receiver.h
@@ -0,0 +1,16 @@
@@ -3068,7 +3068,7 @@ index 0000000000000..3de7da62d40c1
+#endif /* IPTS_RECEIVER_H */
diff --git a/drivers/hid/ipts/resources.c b/drivers/hid/ipts/resources.c
new file mode 100644
-index 0000000000000..cc14653b2a9f5
+index 000000000000..cc14653b2a9f
--- /dev/null
+++ b/drivers/hid/ipts/resources.c
@@ -0,0 +1,131 @@
@@ -3205,7 +3205,7 @@ index 0000000000000..cc14653b2a9f5
+}
diff --git a/drivers/hid/ipts/resources.h b/drivers/hid/ipts/resources.h
new file mode 100644
-index 0000000000000..2068e13285f0e
+index 000000000000..2068e13285f0
--- /dev/null
+++ b/drivers/hid/ipts/resources.h
@@ -0,0 +1,41 @@
@@ -3252,7 +3252,7 @@ index 0000000000000..2068e13285f0e
+#endif /* IPTS_RESOURCES_H */
diff --git a/drivers/hid/ipts/spec-data.h b/drivers/hid/ipts/spec-data.h
new file mode 100644
-index 0000000000000..e8dd98895a7ee
+index 000000000000..e8dd98895a7e
--- /dev/null
+++ b/drivers/hid/ipts/spec-data.h
@@ -0,0 +1,100 @@
@@ -3358,7 +3358,7 @@ index 0000000000000..e8dd98895a7ee
+#endif /* IPTS_SPEC_DATA_H */
diff --git a/drivers/hid/ipts/spec-device.h b/drivers/hid/ipts/spec-device.h
new file mode 100644
-index 0000000000000..41845f9d90257
+index 000000000000..41845f9d9025
--- /dev/null
+++ b/drivers/hid/ipts/spec-device.h
@@ -0,0 +1,290 @@
@@ -3654,7 +3654,7 @@ index 0000000000000..41845f9d90257
+#endif /* IPTS_SPEC_DEVICE_H */
diff --git a/drivers/hid/ipts/spec-hid.h b/drivers/hid/ipts/spec-hid.h
new file mode 100644
-index 0000000000000..5a58d4a0a610f
+index 000000000000..5a58d4a0a610
--- /dev/null
+++ b/drivers/hid/ipts/spec-hid.h
@@ -0,0 +1,34 @@
@@ -3694,7 +3694,7 @@ index 0000000000000..5a58d4a0a610f
+#endif /* IPTS_SPEC_HID_H */
diff --git a/drivers/hid/ipts/thread.c b/drivers/hid/ipts/thread.c
new file mode 100644
-index 0000000000000..355e92bea26f8
+index 000000000000..355e92bea26f
--- /dev/null
+++ b/drivers/hid/ipts/thread.c
@@ -0,0 +1,84 @@
@@ -3784,7 +3784,7 @@ index 0000000000000..355e92bea26f8
+}
diff --git a/drivers/hid/ipts/thread.h b/drivers/hid/ipts/thread.h
new file mode 100644
-index 0000000000000..1f966b8b32c45
+index 000000000000..1f966b8b32c4
--- /dev/null
+++ b/drivers/hid/ipts/thread.h
@@ -0,0 +1,59 @@
@@ -3848,9 +3848,9 @@ index 0000000000000..1f966b8b32c45
+
+#endif /* IPTS_THREAD_H */
--
-2.42.0
+2.43.0
-From 033de13abc9653b2d773f06182465e03d5d6463b Mon Sep 17 00:00:00 2001
+From 8aadfc38967cb2804446c8bdae851377651e6248 Mon Sep 17 00:00:00 2001
From: Dorian Stoll <dorian.stoll@tmsp.io>
Date: Sun, 11 Dec 2022 12:03:38 +0100
Subject: [PATCH] iommu: intel: Disable source id verification for ITHC
@@ -3862,7 +3862,7 @@ Patchset: ithc
1 file changed, 16 insertions(+)
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
-index 29b9e55dcf26c..986e91c813ae1 100644
+index 29b9e55dcf26..986e91c813ae 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -386,6 +386,22 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
@@ -3889,14 +3889,14 @@ index 29b9e55dcf26c..986e91c813ae1 100644
* DMA alias provides us with a PCI device and alias. The only case
* where the it will return an alias on a different bus than the
--
-2.42.0
+2.43.0
-From 0dd32bcfb70f9e36cfa009d94cd6c86a4839cff3 Mon Sep 17 00:00:00 2001
-From: Dorian Stoll <dorian.stoll@tmsp.io>
+From fe08b40d122fdb102c2cc4876d2d68ac19d74ae3 Mon Sep 17 00:00:00 2001
+From: quo <tuple@list.ru>
Date: Sun, 11 Dec 2022 12:10:54 +0100
Subject: [PATCH] hid: Add support for Intel Touch Host Controller
-Based on quo/ithc-linux@55803a2
+Based on quo/ithc-linux@0b8b45d
Signed-off-by: Dorian Stoll <dorian.stoll@tmsp.io>
Patchset: ithc
@@ -3905,14 +3905,14 @@ Patchset: ithc
drivers/hid/Makefile | 1 +
drivers/hid/ithc/Kbuild | 6 +
drivers/hid/ithc/Kconfig | 12 +
- drivers/hid/ithc/ithc-debug.c | 96 ++++++
- drivers/hid/ithc/ithc-dma.c | 258 ++++++++++++++++
- drivers/hid/ithc/ithc-dma.h | 67 +++++
- drivers/hid/ithc/ithc-main.c | 534 ++++++++++++++++++++++++++++++++++
- drivers/hid/ithc/ithc-regs.c | 64 ++++
- drivers/hid/ithc/ithc-regs.h | 186 ++++++++++++
- drivers/hid/ithc/ithc.h | 60 ++++
- 11 files changed, 1286 insertions(+)
+ drivers/hid/ithc/ithc-debug.c | 130 ++++++
+ drivers/hid/ithc/ithc-dma.c | 373 +++++++++++++++++
+ drivers/hid/ithc/ithc-dma.h | 69 ++++
+ drivers/hid/ithc/ithc-main.c | 728 ++++++++++++++++++++++++++++++++++
+ drivers/hid/ithc/ithc-regs.c | 96 +++++
+ drivers/hid/ithc/ithc-regs.h | 189 +++++++++
+ drivers/hid/ithc/ithc.h | 67 ++++
+ 11 files changed, 1673 insertions(+)
create mode 100644 drivers/hid/ithc/Kbuild
create mode 100644 drivers/hid/ithc/Kconfig
create mode 100644 drivers/hid/ithc/ithc-debug.c
@@ -3924,10 +3924,10 @@ Patchset: ithc
create mode 100644 drivers/hid/ithc/ithc.h
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
-index 0b9d245d10e54..8ba1c309228be 100644
+index 86c6c815bd5b..a87c3c6911fb 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
-@@ -1347,4 +1347,6 @@ source "drivers/hid/surface-hid/Kconfig"
+@@ -1343,4 +1343,6 @@ source "drivers/hid/surface-hid/Kconfig"
source "drivers/hid/ipts/Kconfig"
@@ -3935,7 +3935,7 @@ index 0b9d245d10e54..8ba1c309228be 100644
+
endif # HID_SUPPORT
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
-index 2ef21b257d0b5..e94b79727b489 100644
+index 2ef21b257d0b..e94b79727b48 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -171,3 +171,4 @@ obj-$(CONFIG_AMD_SFH_HID) += amd-sfh-hid/
@@ -3945,7 +3945,7 @@ index 2ef21b257d0b5..e94b79727b489 100644
+obj-$(CONFIG_HID_ITHC) += ithc/
diff --git a/drivers/hid/ithc/Kbuild b/drivers/hid/ithc/Kbuild
new file mode 100644
-index 0000000000000..aea83f2ac07b4
+index 000000000000..aea83f2ac07b
--- /dev/null
+++ b/drivers/hid/ithc/Kbuild
@@ -0,0 +1,6 @@
@@ -3957,7 +3957,7 @@ index 0000000000000..aea83f2ac07b4
+
diff --git a/drivers/hid/ithc/Kconfig b/drivers/hid/ithc/Kconfig
new file mode 100644
-index 0000000000000..ede7130236096
+index 000000000000..ede713023609
--- /dev/null
+++ b/drivers/hid/ithc/Kconfig
@@ -0,0 +1,12 @@
@@ -3975,17 +3975,21 @@ index 0000000000000..ede7130236096
+ module will be called ithc.
diff --git a/drivers/hid/ithc/ithc-debug.c b/drivers/hid/ithc/ithc-debug.c
new file mode 100644
-index 0000000000000..57bf125c45bd5
+index 000000000000..1f1f1e33f2e5
--- /dev/null
+++ b/drivers/hid/ithc/ithc-debug.c
-@@ -0,0 +1,96 @@
+@@ -0,0 +1,130 @@
++// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
++
+#include "ithc.h"
+
-+void ithc_log_regs(struct ithc *ithc) {
-+ if (!ithc->prev_regs) return;
-+ u32 __iomem *cur = (__iomem void*)ithc->regs;
-+ u32 *prev = (void*)ithc->prev_regs;
-+ for (int i = 1024; i < sizeof *ithc->regs / 4; i++) {
++void ithc_log_regs(struct ithc *ithc)
++{
++ if (!ithc->prev_regs)
++ return;
++ u32 __iomem *cur = (__iomem void *)ithc->regs;
++ u32 *prev = (void *)ithc->prev_regs;
++ for (int i = 1024; i < sizeof(*ithc->regs) / 4; i++) {
+ u32 x = readl(cur + i);
+ if (x != prev[i]) {
+ pci_info(ithc->pci, "reg %04x: %08x -> %08x\n", i * 4, prev[i], x);
@@ -3994,55 +3998,79 @@ index 0000000000000..57bf125c45bd5
+ }
+}
+
-+static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, loff_t *offset) {
++static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len,
++ loff_t *offset)
++{
++ // Debug commands consist of a single letter followed by a list of numbers (decimal or
++ // hexadecimal, space-separated).
+ struct ithc *ithc = file_inode(f)->i_private;
+ char cmd[256];
-+ if (!ithc || !ithc->pci) return -ENODEV;
-+ if (!len) return -EINVAL;
-+ if (len >= sizeof cmd) return -EINVAL;
-+ if (copy_from_user(cmd, buf, len)) return -EFAULT;
++ if (!ithc || !ithc->pci)
++ return -ENODEV;
++ if (!len)
++ return -EINVAL;
++ if (len >= sizeof(cmd))
++ return -EINVAL;
++ if (copy_from_user(cmd, buf, len))
++ return -EFAULT;
+ cmd[len] = 0;
-+ if (cmd[len-1] == '\n') cmd[len-1] = 0;
++ if (cmd[len-1] == '\n')
++ cmd[len-1] = 0;
+ pci_info(ithc->pci, "debug command: %s\n", cmd);
++
++ // Parse the list of arguments into a u32 array.
+ u32 n = 0;
+ const char *s = cmd + 1;
+ u32 a[32];
+ while (*s && *s != '\n') {
-+ if (n >= ARRAY_SIZE(a)) return -EINVAL;
-+ if (*s++ != ' ') return -EINVAL;
++ if (n >= ARRAY_SIZE(a))
++ return -EINVAL;
++ if (*s++ != ' ')
++ return -EINVAL;
+ char *e;
+ a[n++] = simple_strtoul(s, &e, 0);
-+ if (e == s) return -EINVAL;
++ if (e == s)
++ return -EINVAL;
+ s = e;
+ }
+ ithc_log_regs(ithc);
-+ switch(cmd[0]) {
++
++ // Execute the command.
++ switch (cmd[0]) {
+ case 'x': // reset
+ ithc_reset(ithc);
+ break;
+ case 'w': // write register: offset mask value
-+ if (n != 3 || (a[0] & 3)) return -EINVAL;
-+ pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", a[0], a[2], a[1]);
++ if (n != 3 || (a[0] & 3))
++ return -EINVAL;
++ pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n",
++ a[0], a[2], a[1]);
+ bitsl(((__iomem u32 *)ithc->regs) + a[0] / 4, a[1], a[2]);
+ break;
+ case 'r': // read register: offset
-+ if (n != 1 || (a[0] & 3)) return -EINVAL;
-+ pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], readl(((__iomem u32 *)ithc->regs) + a[0] / 4));
++ if (n != 1 || (a[0] & 3))
++ return -EINVAL;
++ pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0],
++ readl(((__iomem u32 *)ithc->regs) + a[0] / 4));
+ break;
+ case 's': // spi command: cmd offset len data...
+ // read config: s 4 0 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // set touch cfg: s 6 12 4 XX
-+ if (n < 3 || a[2] > (n - 3) * 4) return -EINVAL;
++ if (n < 3 || a[2] > (n - 3) * 4)
++ return -EINVAL;
+ pci_info(ithc->pci, "debug spi command %u with %u bytes of data\n", a[0], a[2]);
+ if (!CHECK(ithc_spi_command, ithc, a[0], a[1], a[2], a + 3))
-+ for (u32 i = 0; i < (a[2] + 3) / 4; i++) pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]);
++ for (u32 i = 0; i < (a[2] + 3) / 4; i++)
++ pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]);
+ break;
+ case 'd': // dma command: cmd len data...
+ // get report descriptor: d 7 8 0 0
+ // enable multitouch: d 3 2 0x0105
-+ if (n < 2 || a[1] > (n - 2) * 4) return -EINVAL;
++ if (n < 2 || a[1] > (n - 2) * 4)
++ return -EINVAL;
+ pci_info(ithc->pci, "debug dma command %u with %u bytes of data\n", a[0], a[1]);
-+ if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) pci_err(ithc->pci, "dma tx failed\n");
++ if (ithc_dma_tx(ithc, a[0], a[1], a + 2))
++ pci_err(ithc->pci, "dma tx failed\n");
+ break;
+ default:
+ return -EINVAL;
@@ -4056,87 +4084,125 @@ index 0000000000000..57bf125c45bd5
+ .write = ithc_debugfs_cmd_write,
+};
+
-+static void ithc_debugfs_devres_release(struct device *dev, void *res) {
++static void ithc_debugfs_devres_release(struct device *dev, void *res)
++{
+ struct dentry **dbgm = res;
-+ if (*dbgm) debugfs_remove_recursive(*dbgm);
++ if (*dbgm)
++ debugfs_remove_recursive(*dbgm);
+}
+
-+int ithc_debug_init(struct ithc *ithc) {
-+ struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof *dbgm, GFP_KERNEL);
-+ if (!dbgm) return -ENOMEM;
++int ithc_debug_init(struct ithc *ithc)
++{
++ struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof(*dbgm), GFP_KERNEL);
++ if (!dbgm)
++ return -ENOMEM;
+ devres_add(&ithc->pci->dev, dbgm);
+ struct dentry *dbg = debugfs_create_dir(DEVNAME, NULL);
-+ if (IS_ERR(dbg)) return PTR_ERR(dbg);
++ if (IS_ERR(dbg))
++ return PTR_ERR(dbg);
+ *dbgm = dbg;
+
+ struct dentry *cmd = debugfs_create_file("cmd", 0220, dbg, ithc, &ithc_debugfops_cmd);
-+ if (IS_ERR(cmd)) return PTR_ERR(cmd);
++ if (IS_ERR(cmd))
++ return PTR_ERR(cmd);
+
+ return 0;
+}
+
diff --git a/drivers/hid/ithc/ithc-dma.c b/drivers/hid/ithc/ithc-dma.c
new file mode 100644
-index 0000000000000..7e89b3496918d
+index 000000000000..ffb8689b8a78
--- /dev/null
+++ b/drivers/hid/ithc/ithc-dma.c
-@@ -0,0 +1,258 @@
+@@ -0,0 +1,373 @@
++// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
++
+#include "ithc.h"
+
-+static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, unsigned num_buffers, unsigned num_pages, enum dma_data_direction dir) {
++// The THC uses tables of PRDs (physical region descriptors) to describe the TX and RX data buffers.
++// Each PRD contains the DMA address and size of a block of DMA memory, and some status flags.
++// This allows each data buffer to consist of multiple non-contiguous blocks of memory.
++
++static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p,
++ unsigned int num_buffers, unsigned int num_pages, enum dma_data_direction dir)
++{
+ p->num_pages = num_pages;
+ p->dir = dir;
++ // We allocate enough space to have one PRD per data buffer page, however if the data
++ // buffer pages happen to be contiguous, we can describe the buffer using fewer PRDs, so
++ // some will remain unused (which is fine).
+ p->size = round_up(num_buffers * num_pages * sizeof(struct ithc_phys_region_desc), PAGE_SIZE);
+ p->addr = dmam_alloc_coherent(&ithc->pci->dev, p->size, &p->dma_addr, GFP_KERNEL);
-+ if (!p->addr) return -ENOMEM;
-+ if (p->dma_addr & (PAGE_SIZE - 1)) return -EFAULT;
++ if (!p->addr)
++ return -ENOMEM;
++ if (p->dma_addr & (PAGE_SIZE - 1))
++ return -EFAULT;
+ return 0;
+}
+
++// Devres managed sg_table wrapper.
+struct ithc_sg_table {
+ void *addr;
+ struct sg_table sgt;
+ enum dma_data_direction dir;
+};
-+static void ithc_dma_sgtable_free(struct sg_table *sgt) {
++static void ithc_dma_sgtable_free(struct sg_table *sgt)
++{
+ struct scatterlist *sg;
+ int i;
+ for_each_sgtable_sg(sgt, sg, i) {
+ struct page *p = sg_page(sg);
-+ if (p) __free_page(p);
++ if (p)
++ __free_page(p);
+ }
+ sg_free_table(sgt);
+}
-+static void ithc_dma_data_devres_release(struct device *dev, void *res) {
++static void ithc_dma_data_devres_release(struct device *dev, void *res)
++{
+ struct ithc_sg_table *sgt = res;
-+ if (sgt->addr) vunmap(sgt->addr);
++ if (sgt->addr)
++ vunmap(sgt->addr);
+ dma_unmap_sgtable(dev, &sgt->sgt, sgt->dir, 0);
+ ithc_dma_sgtable_free(&sgt->sgt);
+}
+
-+static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b) {
-+ // We don't use dma_alloc_coherent for data buffers, because they don't have to be contiguous (we can use one PRD per page) or coherent (they are unidirectional).
-+ // Instead we use an sg_table of individually allocated pages (5.13 has dma_alloc_noncontiguous for this, but we'd like to support 5.10 for now).
++static int ithc_dma_data_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *prds,
++ struct ithc_dma_data_buffer *b)
++{
++ // We don't use dma_alloc_coherent() for data buffers, because they don't have to be
++ // coherent (they are unidirectional) or contiguous (we can use one PRD per page).
++ // We could use dma_alloc_noncontiguous(), however this still always allocates a single
++ // DMA mapped segment, which is more restrictive than what we need.
++ // Instead we use an sg_table of individually allocated pages.
+ struct page *pages[16];
-+ if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) return -EINVAL;
++ if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages))
++ return -EINVAL;
+ b->active_idx = -1;
-+ struct ithc_sg_table *sgt = devres_alloc(ithc_dma_data_devres_release, sizeof *sgt, GFP_KERNEL);
-+ if (!sgt) return -ENOMEM;
++ struct ithc_sg_table *sgt = devres_alloc(
++ ithc_dma_data_devres_release, sizeof(*sgt), GFP_KERNEL);
++ if (!sgt)
++ return -ENOMEM;
+ sgt->dir = prds->dir;
++
+ if (!sg_alloc_table(&sgt->sgt, prds->num_pages, GFP_KERNEL)) {
+ struct scatterlist *sg;
+ int i;
+ bool ok = true;
+ for_each_sgtable_sg(&sgt->sgt, sg, i) {
-+ struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); // don't need __GFP_DMA for PCI DMA
-+ if (!p) { ok = false; break; }
++ // NOTE: don't need __GFP_DMA for PCI DMA
++ struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
++ if (!p) {
++ ok = false;
++ break;
++ }
+ sg_set_page(sg, p, PAGE_SIZE, 0);
+ }
+ if (ok && !dma_map_sgtable(&ithc->pci->dev, &sgt->sgt, prds->dir, 0)) {
+ devres_add(&ithc->pci->dev, sgt);
+ b->sgt = &sgt->sgt;
+ b->addr = sgt->addr = vmap(pages, prds->num_pages, 0, PAGE_KERNEL);
-+ if (!b->addr) return -ENOMEM;
++ if (!b->addr)
++ return -ENOMEM;
+ return 0;
+ }
+ ithc_dma_sgtable_free(&sgt->sgt);
@@ -4145,17 +4211,29 @@ index 0000000000000..7e89b3496918d
+ return -ENOMEM;
+}
+
-+static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) {
++static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds,
++ struct ithc_dma_data_buffer *b, unsigned int idx)
++{
++ // Give a buffer to the THC.
+ struct ithc_phys_region_desc *prd = prds->addr;
+ prd += idx * prds->num_pages;
-+ if (b->active_idx >= 0) { pci_err(ithc->pci, "buffer already active\n"); return -EINVAL; }
++ if (b->active_idx >= 0) {
++ pci_err(ithc->pci, "buffer already active\n");
++ return -EINVAL;
++ }
+ b->active_idx = idx;
+ if (prds->dir == DMA_TO_DEVICE) {
-+ if (b->data_size > PAGE_SIZE) return -EINVAL;
++ // TX buffer: Caller should have already filled the data buffer, so just fill
++ // the PRD and flush.
++ // (TODO: Support multi-page TX buffers. So far no device seems to use or need
++ // these though.)
++ if (b->data_size > PAGE_SIZE)
++ return -EINVAL;
+ prd->addr = sg_dma_address(b->sgt->sgl) >> 10;
+ prd->size = b->data_size | PRD_FLAG_END;
+ flush_kernel_vmap_range(b->addr, b->data_size);
+ } else if (prds->dir == DMA_FROM_DEVICE) {
++ // RX buffer: Reset PRDs.
+ struct scatterlist *sg;
+ int i;
+ for_each_sgtable_dma_sg(b->sgt, sg, i) {
@@ -4170,21 +4248,34 @@ index 0000000000000..7e89b3496918d
+ return 0;
+}
+
-+static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) {
++static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds,
++ struct ithc_dma_data_buffer *b, unsigned int idx)
++{
++ // Take a buffer from the THC.
+ struct ithc_phys_region_desc *prd = prds->addr;
+ prd += idx * prds->num_pages;
-+ if (b->active_idx != idx) { pci_err(ithc->pci, "wrong buffer index\n"); return -EINVAL; }
++ // This is purely a sanity check. We don't strictly need the idx parameter for this
++ // function, because it should always be the same as active_idx, unless we have a bug.
++ if (b->active_idx != idx) {
++ pci_err(ithc->pci, "wrong buffer index\n");
++ return -EINVAL;
++ }
+ b->active_idx = -1;
+ if (prds->dir == DMA_FROM_DEVICE) {
++ // RX buffer: Calculate actual received data size from PRDs.
+ dma_rmb(); // for the prds
+ b->data_size = 0;
+ struct scatterlist *sg;
+ int i;
+ for_each_sgtable_dma_sg(b->sgt, sg, i) {
-+ unsigned size = prd->size;
++ unsigned int size = prd->size;
+ b->data_size += size & PRD_SIZE_MASK;
-+ if (size & PRD_FLAG_END) break;
-+ if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { pci_err(ithc->pci, "truncated prd\n"); break; }
++ if (size & PRD_FLAG_END)
++ break;
++ if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) {
++ pci_err(ithc->pci, "truncated prd\n");
++ break;
++ }
+ prd++;
+ }
+ invalidate_kernel_vmap_range(b->addr, b->data_size);
@@ -4193,93 +4284,139 @@ index 0000000000000..7e89b3496918d
+ return 0;
+}
+
-+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname) {
++int ithc_dma_rx_init(struct ithc *ithc, u8 channel)
++{
+ struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
+ mutex_init(&rx->mutex);
++
++ // Allocate buffers.
+ u32 buf_size = DEVCFG_DMA_RX_SIZE(ithc->config.dma_buf_sizes);
-+ unsigned num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE;
-+ pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", NUM_RX_BUF, buf_size, num_pages);
++ unsigned int num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE;
++ pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n",
++ NUM_RX_BUF, buf_size, num_pages);
+ CHECK_RET(ithc_dma_prd_alloc, ithc, &rx->prds, NUM_RX_BUF, num_pages, DMA_FROM_DEVICE);
-+ for (unsigned i = 0; i < NUM_RX_BUF; i++)
++ for (unsigned int i = 0; i < NUM_RX_BUF; i++)
+ CHECK_RET(ithc_dma_data_alloc, ithc, &rx->prds, &rx->bufs[i]);
++
++ // Init registers.
+ writeb(DMA_RX_CONTROL2_RESET, &ithc->regs->dma_rx[channel].control2);
+ lo_hi_writeq(rx->prds.dma_addr, &ithc->regs->dma_rx[channel].addr);
+ writeb(NUM_RX_BUF - 1, &ithc->regs->dma_rx[channel].num_bufs);
+ writeb(num_pages - 1, &ithc->regs->dma_rx[channel].num_prds);
+ u8 head = readb(&ithc->regs->dma_rx[channel].head);
-+ if (head) { pci_err(ithc->pci, "head is nonzero (%u)\n", head); return -EIO; }
-+ for (unsigned i = 0; i < NUM_RX_BUF; i++)
++ if (head) {
++ pci_err(ithc->pci, "head is nonzero (%u)\n", head);
++ return -EIO;
++ }
++
++ // Init buffers.
++ for (unsigned int i = 0; i < NUM_RX_BUF; i++)
+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, &rx->bufs[i], i);
++
+ writeb(head ^ DMA_RX_WRAP_FLAG, &ithc->regs->dma_rx[channel].tail);
+ return 0;
+}
-+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) {
-+ bitsb_set(&ithc->regs->dma_rx[channel].control, DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA);
-+ CHECK(waitl, ithc, &ithc->regs->dma_rx[1].status, DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED);
++
++void ithc_dma_rx_enable(struct ithc *ithc, u8 channel)
++{
++ bitsb_set(&ithc->regs->dma_rx[channel].control,
++ DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA);
++ CHECK(waitl, ithc, &ithc->regs->dma_rx[channel].status,
++ DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED);
+}
+
-+int ithc_dma_tx_init(struct ithc *ithc) {
++int ithc_dma_tx_init(struct ithc *ithc)
++{
+ struct ithc_dma_tx *tx = &ithc->dma_tx;
+ mutex_init(&tx->mutex);
++
++ // Allocate buffers.
+ tx->max_size = DEVCFG_DMA_TX_SIZE(ithc->config.dma_buf_sizes);
-+ unsigned num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE;
-+ pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", tx->max_size, num_pages);
++ unsigned int num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE;
++ pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n",
++ tx->max_size, num_pages);
+ CHECK_RET(ithc_dma_prd_alloc, ithc, &tx->prds, 1, num_pages, DMA_TO_DEVICE);
+ CHECK_RET(ithc_dma_data_alloc, ithc, &tx->prds, &tx->buf);
++
++ // Init registers.
+ lo_hi_writeq(tx->prds.dma_addr, &ithc->regs->dma_tx.addr);
+ writeb(num_pages - 1, &ithc->regs->dma_tx.num_prds);
++
++ // Init buffers.
+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
+ return 0;
+}
+
-+static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, u8 channel, u8 buf) {
++static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data,
++ u8 channel, u8 buf)
++{
+ if (buf >= NUM_RX_BUF) {
+ pci_err(ithc->pci, "invalid dma ringbuffer index\n");
+ return -EINVAL;
+ }
-+ ithc_set_active(ithc);
+ u32 len = data->data_size;
+ struct ithc_dma_rx_header *hdr = data->addr;
+ u8 *hiddata = (void *)(hdr + 1);
-+ if (len >= sizeof *hdr && hdr->code == DMA_RX_CODE_RESET) {
++ if (len >= sizeof(*hdr) && hdr->code == DMA_RX_CODE_RESET) {
++ // The THC sends a reset request when we need to reinitialize the device.
++ // This usually only happens if we send an invalid command or put the device
++ // in a bad state.
+ CHECK(ithc_reset, ithc);
-+ } else if (len < sizeof *hdr || len != sizeof *hdr + hdr->data_size) {
++ } else if (len < sizeof(*hdr) || len != sizeof(*hdr) + hdr->data_size) {
+ if (hdr->code == DMA_RX_CODE_INPUT_REPORT) {
-+ // When the CPU enters a low power state during DMA, we can get truncated messages.
-+ // Typically this will be a single touch HID report that is only 1 byte, or a multitouch report that is 257 bytes.
++ // When the CPU enters a low power state during DMA, we can get truncated
++ // messages. For Surface devices, this will typically be a single touch
++ // report that is only 1 byte, or a multitouch report that is 257 bytes.
+ // See also ithc_set_active().
+ } else {
-+ pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", channel, buf, len, hdr->code, hdr->data_size);
-+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0);
++ pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n",
++ channel, buf, len, hdr->code, hdr->data_size);
++ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1,
++ hdr, min(len, 0x400u), 0);
+ }
+ } else if (hdr->code == DMA_RX_CODE_REPORT_DESCRIPTOR && hdr->data_size > 8) {
++ // Response to a 'get report descriptor' request.
++ // The actual descriptor is preceded by 8 nul bytes.
+ CHECK(hid_parse_report, ithc->hid, hiddata + 8, hdr->data_size - 8);
+ WRITE_ONCE(ithc->hid_parse_done, true);
+ wake_up(&ithc->wait_hid_parse);
+ } else if (hdr->code == DMA_RX_CODE_INPUT_REPORT) {
++ // Standard HID input report containing touch data.
+ CHECK(hid_input_report, ithc->hid, HID_INPUT_REPORT, hiddata, hdr->data_size, 1);
+ } else if (hdr->code == DMA_RX_CODE_FEATURE_REPORT) {
++ // Response to a 'get feature' request.
+ bool done = false;
+ mutex_lock(&ithc->hid_get_feature_mutex);
+ if (ithc->hid_get_feature_buf) {
-+ if (hdr->data_size < ithc->hid_get_feature_size) ithc->hid_get_feature_size = hdr->data_size;
++ if (hdr->data_size < ithc->hid_get_feature_size)
++ ithc->hid_get_feature_size = hdr->data_size;
+ memcpy(ithc->hid_get_feature_buf, hiddata, ithc->hid_get_feature_size);
+ ithc->hid_get_feature_buf = NULL;
+ done = true;
+ }
+ mutex_unlock(&ithc->hid_get_feature_mutex);
-+ if (done) wake_up(&ithc->wait_hid_get_feature);
-+ else CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, hiddata, hdr->data_size, 1);
++ if (done) {
++ wake_up(&ithc->wait_hid_get_feature);
++ } else {
++ // Received data without a matching request, or the request already
++ // timed out. (XXX What's the correct thing to do here?)
++ CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT,
++ hiddata, hdr->data_size, 1);
++ }
+ } else {
-+ pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", channel, buf, len, hdr->code);
-+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0);
++ pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n",
++ channel, buf, len, hdr->code);
++ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1,
++ hdr, min(len, 0x400u), 0);
+ }
+ return 0;
+}
+
-+static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) {
++static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel)
++{
++ // Process all filled RX buffers from the ringbuffer.
+ struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
-+ unsigned n = rx->num_received;
++ unsigned int n = rx->num_received;
+ u8 head_wrap = readb(&ithc->regs->dma_rx[channel].head);
+ while (1) {
+ u8 tail = n % NUM_RX_BUF;
@@ -4287,7 +4424,8 @@ index 0000000000000..7e89b3496918d
+ writeb(tail_wrap, &ithc->regs->dma_rx[channel].tail);
+ // ringbuffer is full if tail_wrap == head_wrap
+ // ringbuffer is empty if tail_wrap == head_wrap ^ WRAP_FLAG
-+ if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) return 0;
++ if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG))
++ return 0;
+
+ // take the buffer that the device just filled
+ struct ithc_dma_data_buffer *b = &rx->bufs[n % NUM_RX_BUF];
@@ -4301,7 +4439,8 @@ index 0000000000000..7e89b3496918d
+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, b, tail);
+ }
+}
-+int ithc_dma_rx(struct ithc *ithc, u8 channel) {
++int ithc_dma_rx(struct ithc *ithc, u8 channel)
++{
+ struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
+ mutex_lock(&rx->mutex);
+ int ret = ithc_dma_rx_unlocked(ithc, channel);
@@ -4309,14 +4448,21 @@ index 0000000000000..7e89b3496918d
+ return ret;
+}
+
-+static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) {
++static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data)
++{
++ ithc_set_active(ithc, 100 * USEC_PER_MSEC);
++
++ // Send a single TX buffer to the THC.
+ pci_dbg(ithc->pci, "dma tx command %u, size %u\n", cmdcode, datasize);
+ struct ithc_dma_tx_header *hdr;
++ // Data must be padded to next 4-byte boundary.
+ u8 padding = datasize & 3 ? 4 - (datasize & 3) : 0;
-+ unsigned fullsize = sizeof *hdr + datasize + padding;
-+ if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) return -EINVAL;
++ unsigned int fullsize = sizeof(*hdr) + datasize + padding;
++ if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE)
++ return -EINVAL;
+ CHECK_RET(ithc_dma_data_buffer_get, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
+
++ // Fill the TX buffer with header and data.
+ ithc->dma_tx.buf.data_size = fullsize;
+ hdr = ithc->dma_tx.buf.addr;
+ hdr->code = cmdcode;
@@ -4324,15 +4470,18 @@ index 0000000000000..7e89b3496918d
+ u8 *dest = (void *)(hdr + 1);
+ memcpy(dest, data, datasize);
+ dest += datasize;
-+ for (u8 p = 0; p < padding; p++) *dest++ = 0;
++ for (u8 p = 0; p < padding; p++)
++ *dest++ = 0;
+ CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
+
++ // Let the THC process the buffer.
+ bitsb_set(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND);
+ CHECK_RET(waitb, ithc, &ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0);
+ writel(DMA_TX_STATUS_DONE, &ithc->regs->dma_tx.status);
+ return 0;
+}
-+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) {
++int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data)
++{
+ mutex_lock(&ithc->dma_tx.mutex);
+ int ret = ithc_dma_tx_unlocked(ithc, cmdcode, datasize, data);
+ mutex_unlock(&ithc->dma_tx.mutex);
@@ -4341,10 +4490,12 @@ index 0000000000000..7e89b3496918d
+
diff --git a/drivers/hid/ithc/ithc-dma.h b/drivers/hid/ithc/ithc-dma.h
new file mode 100644
-index 0000000000000..d9f2c19a13f3a
+index 000000000000..93652e4476bf
--- /dev/null
+++ b/drivers/hid/ithc/ithc-dma.h
-@@ -0,0 +1,67 @@
+@@ -0,0 +1,69 @@
++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
++
+#define PRD_SIZE_MASK 0xffffff
+#define PRD_FLAG_END 0x1000000
+#define PRD_FLAG_SUCCESS 0x2000000
@@ -4406,7 +4557,7 @@ index 0000000000000..d9f2c19a13f3a
+ struct ithc_dma_data_buffer bufs[NUM_RX_BUF];
+};
+
-+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname);
++int ithc_dma_rx_init(struct ithc *ithc, u8 channel);
+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel);
+int ithc_dma_tx_init(struct ithc *ithc);
+int ithc_dma_rx(struct ithc *ithc, u8 channel);
@@ -4414,10 +4565,12 @@ index 0000000000000..d9f2c19a13f3a
+
diff --git a/drivers/hid/ithc/ithc-main.c b/drivers/hid/ithc/ithc-main.c
new file mode 100644
-index 0000000000000..09512b9cb4d31
+index 000000000000..87ed4aa70fda
--- /dev/null
+++ b/drivers/hid/ithc/ithc-main.c
-@@ -0,0 +1,534 @@
+@@ -0,0 +1,728 @@
++// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
++
+#include "ithc.h"
+
+MODULE_DESCRIPTION("Intel Touch Host Controller driver");
@@ -4462,6 +4615,9 @@ index 0000000000000..09512b9cb4d31
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT1) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT2) },
++ // XXX So far the THC seems to be the only Intel PCI device with PCI_CLASS_INPUT_PEN,
++ // so instead of the device list we could just do:
++ // { .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_ANY_ID, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .class = PCI_CLASS_INPUT_PEN, .class_mask = ~0, },
+ {}
+};
+MODULE_DEVICE_TABLE(pci, ithc_pci_tbl);
@@ -4472,6 +4628,7 @@ index 0000000000000..09512b9cb4d31
+module_param_named(poll, ithc_use_polling, bool, 0);
+MODULE_PARM_DESC(poll, "Use polling instead of interrupts");
+
++// Since all known devices seem to use only channel 1, by default we disable channel 0.
+static bool ithc_use_rx0 = false;
+module_param_named(rx0, ithc_use_rx0, bool, 0);
+MODULE_PARM_DESC(rx0, "Use DMA RX channel 0");
@@ -4480,37 +4637,56 @@ index 0000000000000..09512b9cb4d31
+module_param_named(rx1, ithc_use_rx1, bool, 0);
+MODULE_PARM_DESC(rx1, "Use DMA RX channel 1");
+
++// Values below 250 seem to work well on the SP7+. If this is set too high, you may observe cursor stuttering.
++static int ithc_dma_latency_us = 200;
++module_param_named(dma_latency_us, ithc_dma_latency_us, int, 0);
++MODULE_PARM_DESC(dma_latency_us, "Determines the CPU latency QoS value for DMA transfers (in microseconds), -1 to disable latency QoS");
++
++// Values above 1700 seem to work well on the SP7+. If this is set too low, you may observe cursor stuttering.
++static unsigned int ithc_dma_early_us = 2000;
++module_param_named(dma_early_us, ithc_dma_early_us, uint, 0);
++MODULE_PARM_DESC(dma_early_us, "Determines how early the CPU latency QoS value is applied before the next expected IRQ (in microseconds)");
++
+static bool ithc_log_regs_enabled = false;
+module_param_named(logregs, ithc_log_regs_enabled, bool, 0);
+MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)");
+
+// Sysfs attributes
+
-+static bool ithc_is_config_valid(struct ithc *ithc) {
++static bool ithc_is_config_valid(struct ithc *ithc)
++{
+ return ithc->config.device_id == DEVCFG_DEVICE_ID_TIC;
+}
+
-+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) {
++static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf)
++{
+ struct ithc *ithc = dev_get_drvdata(dev);
-+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
++ if (!ithc || !ithc_is_config_valid(ithc))
++ return -ENODEV;
+ return sprintf(buf, "0x%04x", ithc->config.vendor_id);
+}
+static DEVICE_ATTR_RO(vendor);
-+static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) {
++static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf)
++{
+ struct ithc *ithc = dev_get_drvdata(dev);
-+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
++ if (!ithc || !ithc_is_config_valid(ithc))
++ return -ENODEV;
+ return sprintf(buf, "0x%04x", ithc->config.product_id);
+}
+static DEVICE_ATTR_RO(product);
-+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) {
++static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf)
++{
+ struct ithc *ithc = dev_get_drvdata(dev);
-+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
++ if (!ithc || !ithc_is_config_valid(ithc))
++ return -ENODEV;
+ return sprintf(buf, "%u", ithc->config.revision);
+}
+static DEVICE_ATTR_RO(revision);
-+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) {
++static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf)
++{
+ struct ithc *ithc = dev_get_drvdata(dev);
-+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
++ if (!ithc || !ithc_is_config_valid(ithc))
++ return -ENODEV;
+ u32 v = ithc->config.fw_version;
+ return sprintf(buf, "%i.%i.%i.%i", v >> 24, v >> 16 & 0xff, v >> 8 & 0xff, v & 0xff);
+}
@@ -4537,45 +4713,75 @@ index 0000000000000..09512b9cb4d31
+static int ithc_hid_open(struct hid_device *hdev) { return 0; }
+static void ithc_hid_close(struct hid_device *hdev) { }
+
-+static int ithc_hid_parse(struct hid_device *hdev) {
++static int ithc_hid_parse(struct hid_device *hdev)
++{
+ struct ithc *ithc = hdev->driver_data;
+ u64 val = 0;
+ WRITE_ONCE(ithc->hid_parse_done, false);
-+ CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof val, &val);
-+ if (!wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), msecs_to_jiffies(1000))) return -ETIMEDOUT;
-+ return 0;
++ for (int retries = 0; ; retries++) {
++ CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof(val), &val);
++ if (wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done),
++ msecs_to_jiffies(200)))
++ return 0;
++ if (retries > 5) {
++ pci_err(ithc->pci, "failed to read report descriptor\n");
++ return -ETIMEDOUT;
++ }
++ pci_warn(ithc->pci, "failed to read report descriptor, retrying\n");
++ }
+}
+
-+static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) {
++static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf,
++ size_t len, unsigned char rtype, int reqtype)
++{
+ struct ithc *ithc = hdev->driver_data;
-+ if (!buf || !len) return -EINVAL;
++ if (!buf || !len)
++ return -EINVAL;
+ u32 code;
-+ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_OUTPUT_REPORT;
-+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_SET_FEATURE;
-+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) code = DMA_TX_CODE_GET_FEATURE;
-+ else {
-+ pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", rtype, reqtype, reportnum);
++ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) {
++ code = DMA_TX_CODE_OUTPUT_REPORT;
++ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) {
++ code = DMA_TX_CODE_SET_FEATURE;
++ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) {
++ code = DMA_TX_CODE_GET_FEATURE;
++ } else {
++ pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n",
++ rtype, reqtype, reportnum);
+ return -EINVAL;
+ }
+ buf[0] = reportnum;
++
+ if (reqtype == HID_REQ_GET_REPORT) {
++ // Prepare for response.
+ mutex_lock(&ithc->hid_get_feature_mutex);
+ ithc->hid_get_feature_buf = buf;
+ ithc->hid_get_feature_size = len;
+ mutex_unlock(&ithc->hid_get_feature_mutex);
++
++ // Transmit 'get feature' request.
+ int r = CHECK(ithc_dma_tx, ithc, code, 1, buf);
+ if (!r) {
-+ r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, !ithc->hid_get_feature_buf, msecs_to_jiffies(1000));
-+ if (!r) r = -ETIMEDOUT;
-+ else if (r < 0) r = -EINTR;
-+ else r = 0;
++ r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature,
++ !ithc->hid_get_feature_buf, msecs_to_jiffies(1000));
++ if (!r)
++ r = -ETIMEDOUT;
++ else if (r < 0)
++ r = -EINTR;
++ else
++ r = 0;
+ }
++
++ // If everything went ok, the buffer has been filled with the response data.
++ // Return the response size.
+ mutex_lock(&ithc->hid_get_feature_mutex);
+ ithc->hid_get_feature_buf = NULL;
-+ if (!r) r = ithc->hid_get_feature_size;
++ if (!r)
++ r = ithc->hid_get_feature_size;
+ mutex_unlock(&ithc->hid_get_feature_mutex);
+ return r;
+ }
++
++ // 'Set feature', or 'output report'. These don't have a response.
+ CHECK_RET(ithc_dma_tx, ithc, code, len, buf);
+ return 0;
+}
@@ -4589,17 +4795,22 @@ index 0000000000000..09512b9cb4d31
+ .raw_request = ithc_hid_raw_request,
+};
+
-+static void ithc_hid_devres_release(struct device *dev, void *res) {
++static void ithc_hid_devres_release(struct device *dev, void *res)
++{
+ struct hid_device **hidm = res;
-+ if (*hidm) hid_destroy_device(*hidm);
++ if (*hidm)
++ hid_destroy_device(*hidm);
+}
+
-+static int ithc_hid_init(struct ithc *ithc) {
-+ struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof *hidm, GFP_KERNEL);
-+ if (!hidm) return -ENOMEM;
++static int ithc_hid_init(struct ithc *ithc)
++{
++ struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof(*hidm), GFP_KERNEL);
++ if (!hidm)
++ return -ENOMEM;
+ devres_add(&ithc->pci->dev, hidm);
+ struct hid_device *hid = hid_allocate_device();
-+ if (IS_ERR(hid)) return PTR_ERR(hid);
++ if (IS_ERR(hid))
++ return PTR_ERR(hid);
+ *hidm = hid;
+
+ strscpy(hid->name, DEVFULLNAME, sizeof(hid->name));
@@ -4618,27 +4829,45 @@ index 0000000000000..09512b9cb4d31
+
+// Interrupts/polling
+
-+static void ithc_activity_timer_callback(struct timer_list *t) {
-+ struct ithc *ithc = container_of(t, struct ithc, activity_timer);
++static enum hrtimer_restart ithc_activity_start_timer_callback(struct hrtimer *t)
++{
++ struct ithc *ithc = container_of(t, struct ithc, activity_start_timer);
++ ithc_set_active(ithc, ithc_dma_early_us * 2 + USEC_PER_MSEC);
++ return HRTIMER_NORESTART;
++}
++
++static enum hrtimer_restart ithc_activity_end_timer_callback(struct hrtimer *t)
++{
++ struct ithc *ithc = container_of(t, struct ithc, activity_end_timer);
+ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
++ return HRTIMER_NORESTART;
+}
+
-+void ithc_set_active(struct ithc *ithc) {
++void ithc_set_active(struct ithc *ithc, unsigned int duration_us)
++{
++ if (ithc_dma_latency_us < 0)
++ return;
+ // When CPU usage is very low, the CPU can enter various low power states (C2-C10).
-+ // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_UNKNOWN_12 will be set when this happens.
-+ // The amount of truncated messages can become very high, resulting in user-visible effects (laggy/stuttering cursor).
-+ // To avoid this, we use a CPU latency QoS request to prevent the CPU from entering low power states during touch interactions.
-+ cpu_latency_qos_update_request(&ithc->activity_qos, 0);
-+ mod_timer(&ithc->activity_timer, jiffies + msecs_to_jiffies(1000));
++ // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_RX_TIMEOUT will be
++ // set when this happens. The amount of truncated messages can become very high, resulting
++ // in user-visible effects (laggy/stuttering cursor). To avoid this, we use a CPU latency
++ // QoS request to prevent the CPU from entering low power states during touch interactions.
++ cpu_latency_qos_update_request(&ithc->activity_qos, ithc_dma_latency_us);
++ hrtimer_start_range_ns(&ithc->activity_end_timer,
++ ns_to_ktime(duration_us * NSEC_PER_USEC), duration_us * NSEC_PER_USEC, HRTIMER_MODE_REL);
+}
+
-+static int ithc_set_device_enabled(struct ithc *ithc, bool enable) {
-+ u32 x = ithc->config.touch_cfg = (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2
-+ | (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0);
-+ return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, offsetof(struct ithc_device_config, touch_cfg), sizeof x, &x);
++static int ithc_set_device_enabled(struct ithc *ithc, bool enable)
++{
++ u32 x = ithc->config.touch_cfg =
++ (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 |
++ (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0);
++ return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE,
++ offsetof(struct ithc_device_config, touch_cfg), sizeof(x), &x);
+}
+
-+static void ithc_disable_interrupts(struct ithc *ithc) {
++static void ithc_disable_interrupts(struct ithc *ithc)
++{
+ writel(0, &ithc->regs->error_control);
+ bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_IRQ, 0);
+ bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0);
@@ -4646,43 +4875,85 @@ index 0000000000000..09512b9cb4d31
+ bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_IRQ, 0);
+}
+
-+static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned channel) {
-+ writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, &ithc->regs->dma_rx[channel].status);
++static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned int channel)
++{
++ writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA,
++ &ithc->regs->dma_rx[channel].status);
+}
+
-+static void ithc_clear_interrupts(struct ithc *ithc) {
++static void ithc_clear_interrupts(struct ithc *ithc)
++{
+ writel(0xffffffff, &ithc->regs->error_flags);
+ writel(ERROR_STATUS_DMA | ERROR_STATUS_SPI, &ithc->regs->error_status);
+ writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
+ ithc_clear_dma_rx_interrupts(ithc, 0);
+ ithc_clear_dma_rx_interrupts(ithc, 1);
-+ writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, &ithc->regs->dma_tx.status);
++ writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2,
++ &ithc->regs->dma_tx.status);
+}
+
-+static void ithc_process(struct ithc *ithc) {
++static void ithc_process(struct ithc *ithc)
++{
+ ithc_log_regs(ithc);
+
-+ // read and clear error bits
++ bool rx0 = ithc_use_rx0 && (readl(&ithc->regs->dma_rx[0].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0;
++ bool rx1 = ithc_use_rx1 && (readl(&ithc->regs->dma_rx[1].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0;
++
++ // Track time between DMA rx transfers, so we can try to predict when we need to enable CPU latency QoS for the next transfer
++ ktime_t t = ktime_get();
++ ktime_t dt = ktime_sub(t, ithc->last_rx_time);
++ if (rx0 || rx1) {
++ ithc->last_rx_time = t;
++ if (dt > ms_to_ktime(100)) {
++ ithc->cur_rx_seq_count = 0;
++ ithc->cur_rx_seq_errors = 0;
++ }
++ ithc->cur_rx_seq_count++;
++ if (!ithc_use_polling && ithc_dma_latency_us >= 0) {
++ // Disable QoS, since the DMA transfer has completed (we re-enable it after a delay below)
++ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
++ hrtimer_try_to_cancel(&ithc->activity_end_timer);
++ }
++ }
++
++ // Read and clear error bits
+ u32 err = readl(&ithc->regs->error_flags);
+ if (err) {
-+ if (err & ~ERROR_FLAG_DMA_UNKNOWN_12) pci_err(ithc->pci, "error flags: 0x%08x\n", err);
+ writel(err, &ithc->regs->error_flags);
++ if (err & ~ERROR_FLAG_DMA_RX_TIMEOUT)
++ pci_err(ithc->pci, "error flags: 0x%08x\n", err);
++ if (err & ERROR_FLAG_DMA_RX_TIMEOUT) {
++ // Only log an error if we see a significant number of these errors.
++ ithc->cur_rx_seq_errors++;
++ if (ithc->cur_rx_seq_errors && ithc->cur_rx_seq_errors % 50 == 0 && ithc->cur_rx_seq_errors > ithc->cur_rx_seq_count / 10)
++ pci_err(ithc->pci, "High number of DMA RX timeouts/errors (%u/%u, dt=%lldus). Try adjusting dma_early_us and/or dma_latency_us.\n",
++ ithc->cur_rx_seq_errors, ithc->cur_rx_seq_count, ktime_to_us(dt));
++ }
+ }
+
-+ // process DMA rx
++ // Process DMA rx
+ if (ithc_use_rx0) {
+ ithc_clear_dma_rx_interrupts(ithc, 0);
-+ ithc_dma_rx(ithc, 0);
++ if (rx0)
++ ithc_dma_rx(ithc, 0);
+ }
+ if (ithc_use_rx1) {
+ ithc_clear_dma_rx_interrupts(ithc, 1);
-+ ithc_dma_rx(ithc, 1);
++ if (rx1)
++ ithc_dma_rx(ithc, 1);
++ }
++
++ // Start timer to re-enable QoS for next rx, but only if we've seen an ERROR_FLAG_DMA_RX_TIMEOUT
++ if ((rx0 || rx1) && !ithc_use_polling && ithc_dma_latency_us >= 0 && ithc->cur_rx_seq_errors > 0) {
++ ktime_t expires = ktime_add(t, ktime_sub_us(dt, ithc_dma_early_us));
++ hrtimer_start_range_ns(&ithc->activity_start_timer, expires, 10 * NSEC_PER_USEC, HRTIMER_MODE_ABS);
+ }
+
+ ithc_log_regs(ithc);
+}
+
-+static irqreturn_t ithc_interrupt_thread(int irq, void *arg) {
++static irqreturn_t ithc_interrupt_thread(int irq, void *arg)
++{
+ struct ithc *ithc = arg;
+ pci_dbg(ithc->pci, "IRQ! err=%08x/%08x/%08x, cmd=%02x/%08x, rx0=%02x/%08x, rx1=%02x/%08x, tx=%02x/%08x\n",
+ readl(&ithc->regs->error_control), readl(&ithc->regs->error_status), readl(&ithc->regs->error_flags),
@@ -4694,14 +4965,21 @@ index 0000000000000..09512b9cb4d31
+ return IRQ_HANDLED;
+}
+
-+static int ithc_poll_thread(void *arg) {
++static int ithc_poll_thread(void *arg)
++{
+ struct ithc *ithc = arg;
-+ unsigned sleep = 100;
++ unsigned int sleep = 100;
+ while (!kthread_should_stop()) {
+ u32 n = ithc->dma_rx[1].num_received;
+ ithc_process(ithc);
-+ if (n != ithc->dma_rx[1].num_received) sleep = 20;
-+ else sleep = min(200u, sleep + (sleep >> 4) + 1);
++ // Decrease polling interval to 20ms if we received data, otherwise slowly
++ // increase it up to 200ms.
++ if (n != ithc->dma_rx[1].num_received) {
++ ithc_set_active(ithc, 100 * USEC_PER_MSEC);
++ sleep = 20;
++ } else {
++ sleep = min(200u, sleep + (sleep >> 4) + 1);
++ }
+ msleep_interruptible(sleep);
+ }
+ return 0;
@@ -4709,7 +4987,8 @@ index 0000000000000..09512b9cb4d31
+
+// Device initialization and shutdown
+
-+static void ithc_disable(struct ithc *ithc) {
++static void ithc_disable(struct ithc *ithc)
++{
+ bitsl_set(&ithc->regs->control_bits, CONTROL_QUIESCE);
+ CHECK(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, CONTROL_IS_QUIESCED);
+ bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0);
@@ -4721,81 +5000,112 @@ index 0000000000000..09512b9cb4d31
+ ithc_clear_interrupts(ithc);
+}
+
-+static int ithc_init_device(struct ithc *ithc) {
++static int ithc_init_device(struct ithc *ithc)
++{
+ ithc_log_regs(ithc);
+ bool was_enabled = (readl(&ithc->regs->control_bits) & CONTROL_NRESET) != 0;
+ ithc_disable(ithc);
+ CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_READY, CONTROL_READY);
++
++ // Since we don't yet know which SPI config the device wants, use default speed and mode
++ // initially for reading config data.
+ ithc_set_spi_config(ithc, 10, 0);
-+ bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); // seems to help with reading config
+
-+ if (was_enabled) if (msleep_interruptible(100)) return -EINTR;
++ // Setting the following bit seems to make reading the config more reliable.
++ bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000);
++
++ // If the device was previously enabled, wait a bit to make sure it's fully shut down.
++ if (was_enabled)
++ if (msleep_interruptible(100))
++ return -EINTR;
++
++ // Take the touch device out of reset.
+ bitsl(&ithc->regs->control_bits, CONTROL_QUIESCE, 0);
+ CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, 0);
+ for (int retries = 0; ; retries++) {
+ ithc_log_regs(ithc);
+ bitsl_set(&ithc->regs->control_bits, CONTROL_NRESET);
-+ if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) break;
++ if (!waitl(ithc, &ithc->regs->state, 0xf, 2))
++ break;
+ if (retries > 5) {
-+ pci_err(ithc->pci, "too many retries, failed to reset device\n");
++ pci_err(ithc->pci, "failed to reset device, state = 0x%08x\n", readl(&ithc->regs->state));
+ return -ETIMEDOUT;
+ }
-+ pci_err(ithc->pci, "invalid state, retrying reset\n");
++ pci_warn(ithc->pci, "invalid state, retrying reset\n");
+ bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0);
-+ if (msleep_interruptible(1000)) return -EINTR;
++ if (msleep_interruptible(1000))
++ return -EINTR;
+ }
+ ithc_log_regs(ithc);
+
++ // Waiting for the following status bit makes reading config much more reliable,
++ // however the official driver does not seem to do this...
+ CHECK(waitl, ithc, &ithc->regs->dma_rx[0].status, DMA_RX_STATUS_UNKNOWN_4, DMA_RX_STATUS_UNKNOWN_4);
+
-+ // read config
++ // Read configuration data.
+ for (int retries = 0; ; retries++) {
+ ithc_log_regs(ithc);
-+ memset(&ithc->config, 0, sizeof ithc->config);
-+ CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof ithc->config, &ithc->config);
++ memset(&ithc->config, 0, sizeof(ithc->config));
++ CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof(ithc->config), &ithc->config);
+ u32 *p = (void *)&ithc->config;
+ pci_info(ithc->pci, "config: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
-+ if (ithc_is_config_valid(ithc)) break;
++ if (ithc_is_config_valid(ithc))
++ break;
+ if (retries > 10) {
-+ pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ithc->config.device_id);
++ pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n",
++ ithc->config.device_id);
+ return -EIO;
+ }
-+ pci_err(ithc->pci, "failed to read config, retrying\n");
-+ if (msleep_interruptible(100)) return -EINTR;
++ pci_warn(ithc->pci, "failed to read config, retrying\n");
++ if (msleep_interruptible(100))
++ return -EINTR;
+ }
+ ithc_log_regs(ithc);
+
-+ CHECK_RET(ithc_set_spi_config, ithc, DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), DEVCFG_SPI_MODE(ithc->config.spi_config));
++ // Apply SPI config and enable touch device.
++ CHECK_RET(ithc_set_spi_config, ithc,
++ DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config),
++ DEVCFG_SPI_MODE(ithc->config.spi_config));
+ CHECK_RET(ithc_set_device_enabled, ithc, true);
+ ithc_log_regs(ithc);
+ return 0;
+}
+
-+int ithc_reset(struct ithc *ithc) {
-+ // FIXME This should probably do devres_release_group()+ithc_start(). But because this is called during DMA
-+ // processing, that would have to be done asynchronously (schedule_work()?). And with extra locking?
++int ithc_reset(struct ithc *ithc)
++{
++ // FIXME This should probably do devres_release_group()+ithc_start().
++ // But because this is called during DMA processing, that would have to be done
++ // asynchronously (schedule_work()?). And with extra locking?
+ pci_err(ithc->pci, "reset\n");
+ CHECK(ithc_init_device, ithc);
-+ if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0);
-+ if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1);
++ if (ithc_use_rx0)
++ ithc_dma_rx_enable(ithc, 0);
++ if (ithc_use_rx1)
++ ithc_dma_rx_enable(ithc, 1);
+ ithc_log_regs(ithc);
+ pci_dbg(ithc->pci, "reset completed\n");
+ return 0;
+}
+
-+static void ithc_stop(void *res) {
++static void ithc_stop(void *res)
++{
+ struct ithc *ithc = res;
+ pci_dbg(ithc->pci, "stopping\n");
+ ithc_log_regs(ithc);
-+ if (ithc->poll_thread) CHECK(kthread_stop, ithc->poll_thread);
-+ if (ithc->irq >= 0) disable_irq(ithc->irq);
++
++ if (ithc->poll_thread)
++ CHECK(kthread_stop, ithc->poll_thread);
++ if (ithc->irq >= 0)
++ disable_irq(ithc->irq);
+ CHECK(ithc_set_device_enabled, ithc, false);
+ ithc_disable(ithc);
-+ del_timer_sync(&ithc->activity_timer);
++ hrtimer_cancel(&ithc->activity_start_timer);
++ hrtimer_cancel(&ithc->activity_end_timer);
+ cpu_latency_qos_remove_request(&ithc->activity_qos);
-+ // clear dma config
-+ for(unsigned i = 0; i < 2; i++) {
++
++ // Clear DMA config.
++ for (unsigned int i = 0; i < 2; i++) {
+ CHECK(waitl, ithc, &ithc->regs->dma_rx[i].status, DMA_RX_STATUS_ENABLED, 0);
+ lo_hi_writeq(0, &ithc->regs->dma_rx[i].addr);
+ writeb(0, &ithc->regs->dma_rx[i].num_bufs);
@@ -4803,35 +5113,43 @@ index 0000000000000..09512b9cb4d31
+ }
+ lo_hi_writeq(0, &ithc->regs->dma_tx.addr);
+ writeb(0, &ithc->regs->dma_tx.num_prds);
++
+ ithc_log_regs(ithc);
+ pci_dbg(ithc->pci, "stopped\n");
+}
+
-+static void ithc_clear_drvdata(void *res) {
++static void ithc_clear_drvdata(void *res)
++{
+ struct pci_dev *pci = res;
+ pci_set_drvdata(pci, NULL);
+}
+
-+static int ithc_start(struct pci_dev *pci) {
++static int ithc_start(struct pci_dev *pci)
++{
+ pci_dbg(pci, "starting\n");
+ if (pci_get_drvdata(pci)) {
+ pci_err(pci, "device already initialized\n");
+ return -EINVAL;
+ }
-+ if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) return -ENOMEM;
++ if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL))
++ return -ENOMEM;
+
-+ struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof *ithc, GFP_KERNEL);
-+ if (!ithc) return -ENOMEM;
++ // Allocate/init main driver struct.
++ struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof(*ithc), GFP_KERNEL);
++ if (!ithc)
++ return -ENOMEM;
+ ithc->irq = -1;
+ ithc->pci = pci;
-+ snprintf(ithc->phys, sizeof ithc->phys, "pci-%s/" DEVNAME, pci_name(pci));
++ snprintf(ithc->phys, sizeof(ithc->phys), "pci-%s/" DEVNAME, pci_name(pci));
+ init_waitqueue_head(&ithc->wait_hid_parse);
+ init_waitqueue_head(&ithc->wait_hid_get_feature);
+ mutex_init(&ithc->hid_get_feature_mutex);
+ pci_set_drvdata(pci, ithc);
+ CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_clear_drvdata, pci);
-+ if (ithc_log_regs_enabled) ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof *ithc->prev_regs, GFP_KERNEL);
++ if (ithc_log_regs_enabled)
++ ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof(*ithc->prev_regs), GFP_KERNEL);
+
++ // PCI initialization.
+ CHECK_RET(pcim_enable_device, pci);
+ pci_set_master(pci);
+ CHECK_RET(pcim_iomap_regions, pci, BIT(0), DEVNAME " regs");
@@ -4839,29 +5157,39 @@ index 0000000000000..09512b9cb4d31
+ CHECK_RET(pci_set_power_state, pci, PCI_D0);
+ ithc->regs = pcim_iomap_table(pci)[0];
+
++ // Allocate IRQ.
+ if (!ithc_use_polling) {
+ CHECK_RET(pci_alloc_irq_vectors, pci, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ ithc->irq = CHECK(pci_irq_vector, pci, 0);
-+ if (ithc->irq < 0) return ithc->irq;
++ if (ithc->irq < 0)
++ return ithc->irq;
+ }
+
++ // Initialize THC and touch device.
+ CHECK_RET(ithc_init_device, ithc);
+ CHECK(devm_device_add_groups, &pci->dev, ithc_attribute_groups);
-+ if (ithc_use_rx0) CHECK_RET(ithc_dma_rx_init, ithc, 0, ithc_use_rx1 ? DEVNAME "0" : DEVNAME);
-+ if (ithc_use_rx1) CHECK_RET(ithc_dma_rx_init, ithc, 1, ithc_use_rx0 ? DEVNAME "1" : DEVNAME);
++ if (ithc_use_rx0)
++ CHECK_RET(ithc_dma_rx_init, ithc, 0);
++ if (ithc_use_rx1)
++ CHECK_RET(ithc_dma_rx_init, ithc, 1);
+ CHECK_RET(ithc_dma_tx_init, ithc);
+
-+ CHECK_RET(ithc_hid_init, ithc);
-+
+ cpu_latency_qos_add_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-+ timer_setup(&ithc->activity_timer, ithc_activity_timer_callback, 0);
++ hrtimer_init(&ithc->activity_start_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
++ ithc->activity_start_timer.function = ithc_activity_start_timer_callback;
++ hrtimer_init(&ithc->activity_end_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++ ithc->activity_end_timer.function = ithc_activity_end_timer_callback;
+
-+ // add ithc_stop callback AFTER setting up DMA buffers, so that polling/irqs/DMA are disabled BEFORE the buffers are freed
++ // Add ithc_stop() callback AFTER setting up DMA buffers, so that polling/irqs/DMA are
++ // disabled BEFORE the buffers are freed.
+ CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_stop, ithc);
+
++ CHECK_RET(ithc_hid_init, ithc);
++
++ // Start polling/IRQ.
+ if (ithc_use_polling) {
+ pci_info(pci, "using polling instead of irq\n");
-+ // use a thread instead of simple timer because we want to be able to sleep
++ // Use a thread instead of simple timer because we want to be able to sleep.
+ ithc->poll_thread = kthread_run(ithc_poll_thread, ithc, DEVNAME "poll");
+ if (IS_ERR(ithc->poll_thread)) {
+ int err = PTR_ERR(ithc->poll_thread);
@@ -4869,13 +5197,17 @@ index 0000000000000..09512b9cb4d31
+ return err;
+ }
+ } else {
-+ CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc);
++ CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL,
++ ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc);
+ }
+
-+ if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0);
-+ if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1);
++ if (ithc_use_rx0)
++ ithc_dma_rx_enable(ithc, 0);
++ if (ithc_use_rx1)
++ ithc_dma_rx_enable(ithc, 1);
+
-+ // hid_add_device can only be called after irq/polling is started and DMA is enabled, because it calls ithc_hid_parse which reads the report descriptor via DMA
++ // hid_add_device() can only be called after irq/polling is started and DMA is enabled,
++ // because it calls ithc_hid_parse() which reads the report descriptor via DMA.
+ CHECK_RET(hid_add_device, ithc->hid);
+
+ CHECK(ithc_debug_init, ithc);
@@ -4884,43 +5216,54 @@ index 0000000000000..09512b9cb4d31
+ return 0;
+}
+
-+static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) {
++static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id)
++{
+ pci_dbg(pci, "device probe\n");
+ return ithc_start(pci);
+}
+
-+static void ithc_remove(struct pci_dev *pci) {
++static void ithc_remove(struct pci_dev *pci)
++{
+ pci_dbg(pci, "device remove\n");
+ // all cleanup is handled by devres
+}
+
-+static int ithc_suspend(struct device *dev) {
++// For suspend/resume, we just deinitialize and reinitialize everything.
++// TODO It might be cleaner to keep the HID device around, however we would then have to signal
++// to userspace that the touch device has lost state and userspace needs to e.g. resend 'set
++// feature' requests. Hidraw does not seem to have a facility to do that.
++static int ithc_suspend(struct device *dev)
++{
+ struct pci_dev *pci = to_pci_dev(dev);
+ pci_dbg(pci, "pm suspend\n");
+ devres_release_group(dev, ithc_start);
+ return 0;
+}
+
-+static int ithc_resume(struct device *dev) {
++static int ithc_resume(struct device *dev)
++{
+ struct pci_dev *pci = to_pci_dev(dev);
+ pci_dbg(pci, "pm resume\n");
+ return ithc_start(pci);
+}
+
-+static int ithc_freeze(struct device *dev) {
++static int ithc_freeze(struct device *dev)
++{
+ struct pci_dev *pci = to_pci_dev(dev);
+ pci_dbg(pci, "pm freeze\n");
+ devres_release_group(dev, ithc_start);
+ return 0;
+}
+
-+static int ithc_thaw(struct device *dev) {
++static int ithc_thaw(struct device *dev)
++{
+ struct pci_dev *pci = to_pci_dev(dev);
+ pci_dbg(pci, "pm thaw\n");
+ return ithc_start(pci);
+}
+
-+static int ithc_restore(struct device *dev) {
++static int ithc_restore(struct device *dev)
++{
+ struct pci_dev *pci = to_pci_dev(dev);
+ pci_dbg(pci, "pm restore\n");
+ return ithc_start(pci);
@@ -4941,11 +5284,13 @@ index 0000000000000..09512b9cb4d31
+ //.dev_groups = ithc_attribute_groups, // could use this (since 5.14), however the attributes won't have valid values until config has been read anyway
+};
+
-+static int __init ithc_init(void) {
++static int __init ithc_init(void)
++{
+ return pci_register_driver(&ithc_driver);
+}
+
-+static void __exit ithc_exit(void) {
++static void __exit ithc_exit(void)
++{
+ pci_unregister_driver(&ithc_driver);
+}
+
@@ -4954,80 +5299,114 @@ index 0000000000000..09512b9cb4d31
+
diff --git a/drivers/hid/ithc/ithc-regs.c b/drivers/hid/ithc/ithc-regs.c
new file mode 100644
-index 0000000000000..85d567b05761f
+index 000000000000..e058721886e3
--- /dev/null
+++ b/drivers/hid/ithc/ithc-regs.c
-@@ -0,0 +1,64 @@
+@@ -0,0 +1,96 @@
++// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
++
+#include "ithc.h"
+
+#define reg_num(r) (0x1fff & (u16)(__force u64)(r))
+
-+void bitsl(__iomem u32 *reg, u32 mask, u32 val) {
-+ if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask);
++void bitsl(__iomem u32 *reg, u32 mask, u32 val)
++{
++ if (val & ~mask)
++ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n",
++ reg_num(reg), val, mask);
+ writel((readl(reg) & ~mask) | (val & mask), reg);
+}
+
-+void bitsb(__iomem u8 *reg, u8 mask, u8 val) {
-+ if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask);
++void bitsb(__iomem u8 *reg, u8 mask, u8 val)
++{
++ if (val & ~mask)
++ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n",
++ reg_num(reg), val, mask);
+ writeb((readb(reg) & ~mask) | (val & mask), reg);
+}
+
-+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) {
-+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val);
++int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val)
++{
++ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n",
++ reg_num(reg), mask, val);
+ u32 x;
+ if (readl_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) {
-+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val);
++ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n",
++ reg_num(reg), mask, val);
+ return -ETIMEDOUT;
+ }
+ pci_dbg(ithc->pci, "done waiting\n");
+ return 0;
+}
+
-+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) {
-+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val);
++int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val)
++{
++ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n",
++ reg_num(reg), mask, val);
+ u8 x;
+ if (readb_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) {
-+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val);
++ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n",
++ reg_num(reg), mask, val);
+ return -ETIMEDOUT;
+ }
+ pci_dbg(ithc->pci, "done waiting\n");
+ return 0;
+}
+
-+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) {
++int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode)
++{
+ pci_dbg(ithc->pci, "setting SPI speed to %i, mode %i\n", speed, mode);
-+ if (mode == 3) mode = 2;
++ if (mode == 3)
++ mode = 2;
+ bitsl(&ithc->regs->spi_config,
+ SPI_CONFIG_MODE(0xff) | SPI_CONFIG_SPEED(0xff) | SPI_CONFIG_UNKNOWN_18(0xff) | SPI_CONFIG_SPEED2(0xff),
+ SPI_CONFIG_MODE(mode) | SPI_CONFIG_SPEED(speed) | SPI_CONFIG_UNKNOWN_18(0) | SPI_CONFIG_SPEED2(speed));
+ return 0;
+}
+
-+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) {
++int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data)
++{
+ pci_dbg(ithc->pci, "SPI command %u, size %u, offset %u\n", command, size, offset);
-+ if (size > sizeof ithc->regs->spi_cmd.data) return -EINVAL;
++ if (size > sizeof(ithc->regs->spi_cmd.data))
++ return -EINVAL;
++
++ // Wait if the device is still busy.
+ CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0);
++ // Clear result flags.
+ writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
++
++ // Init SPI command data.
+ writeb(command, &ithc->regs->spi_cmd.code);
+ writew(size, &ithc->regs->spi_cmd.size);
+ writel(offset, &ithc->regs->spi_cmd.offset);
+ u32 *p = data, n = (size + 3) / 4;
-+ for (u32 i = 0; i < n; i++) writel(p[i], &ithc->regs->spi_cmd.data[i]);
++ for (u32 i = 0; i < n; i++)
++ writel(p[i], &ithc->regs->spi_cmd.data[i]);
++
++ // Start transmission.
+ bitsb_set(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND);
+ CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0);
-+ if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) return -EIO;
-+ if (readw(&ithc->regs->spi_cmd.size) != size) return -EMSGSIZE;
-+ for (u32 i = 0; i < n; i++) p[i] = readl(&ithc->regs->spi_cmd.data[i]);
++
++ // Read response.
++ if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE)
++ return -EIO;
++ if (readw(&ithc->regs->spi_cmd.size) != size)
++ return -EMSGSIZE;
++ for (u32 i = 0; i < n; i++)
++ p[i] = readl(&ithc->regs->spi_cmd.data[i]);
++
+ writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
+ return 0;
+}
+
diff --git a/drivers/hid/ithc/ithc-regs.h b/drivers/hid/ithc/ithc-regs.h
new file mode 100644
-index 0000000000000..1a96092ed7eed
+index 000000000000..d4007d9e2bac
--- /dev/null
+++ b/drivers/hid/ithc/ithc-regs.h
-@@ -0,0 +1,186 @@
+@@ -0,0 +1,189 @@
++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
++
+#define CONTROL_QUIESCE BIT(1)
+#define CONTROL_IS_QUIESCED BIT(2)
+#define CONTROL_NRESET BIT(3)
@@ -5054,7 +5433,7 @@ index 0000000000000..1a96092ed7eed
+
+#define ERROR_FLAG_DMA_UNKNOWN_9 BIT(9)
+#define ERROR_FLAG_DMA_UNKNOWN_10 BIT(10)
-+#define ERROR_FLAG_DMA_UNKNOWN_12 BIT(12) // set when we receive a truncated DMA message
++#define ERROR_FLAG_DMA_RX_TIMEOUT BIT(12) // set when we receive a truncated DMA message
+#define ERROR_FLAG_DMA_UNKNOWN_13 BIT(13)
+#define ERROR_FLAG_SPI_BUS_TURNAROUND BIT(16)
+#define ERROR_FLAG_SPI_RESPONSE_TIMEOUT BIT(17)
@@ -5097,6 +5476,7 @@ index 0000000000000..1a96092ed7eed
+#define DMA_RX_STATUS_HAVE_DATA BIT(5)
+#define DMA_RX_STATUS_ENABLED BIT(8)
+
++// COUNTER_RESET can be written to counter registers to reset them to zero. However, in some cases this can mess up the THC.
+#define COUNTER_RESET BIT(31)
+
+struct ithc_registers {
@@ -5177,15 +5557,15 @@ index 0000000000000..1a96092ed7eed
+#define DEVCFG_SPI_MAX_FREQ(x) (((x) >> 1) & 0xf) // high bit = use high speed mode?
+#define DEVCFG_SPI_MODE(x) (((x) >> 6) & 3)
+#define DEVCFG_SPI_UNKNOWN_8(x) (((x) >> 8) & 0x3f)
-+#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20)
-+#define DEVCFG_SPI_HEARTBEAT_INTERVAL (((x) >> 21) & 7)
++#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) // TODO implement heartbeat
++#define DEVCFG_SPI_HEARTBEAT_INTERVAL(x) (((x) >> 21) & 7)
+#define DEVCFG_SPI_UNKNOWN_25 BIT(25)
+#define DEVCFG_SPI_UNKNOWN_26 BIT(26)
+#define DEVCFG_SPI_UNKNOWN_27 BIT(27)
-+#define DEVCFG_SPI_DELAY (((x) >> 28) & 7)
-+#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31)
++#define DEVCFG_SPI_DELAY(x) (((x) >> 28) & 7) // TODO use this
++#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) // TODO use this?
+
-+struct ithc_device_config {
++struct ithc_device_config { // (Example values are from an SP7+.)
+ u32 _unknown_00; // 00 = 0xe0000402 (0xe0000401 after DMA_RX_CODE_RESET)
+ u32 _unknown_04; // 04 = 0x00000000
+ u32 dma_buf_sizes; // 08 = 0x000a00ff
@@ -5196,9 +5576,9 @@ index 0000000000000..1a96092ed7eed
+ u16 vendor_id; // 1c = 0x045e = Microsoft Corp.
+ u16 product_id; // 1e = 0x0c1a
+ u32 revision; // 20 = 0x00000001
-+ u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139
++ u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 (this value looks more random on newer devices)
+ u32 _unknown_28; // 28 = 0x00000000
-+ u32 fw_mode; // 2c = 0x00000000
++ u32 fw_mode; // 2c = 0x00000000 (for fw update?)
+ u32 _unknown_30; // 30 = 0x00000000
+ u32 _unknown_34; // 34 = 0x0404035e (u8,u8,u8,u8 = version?)
+ u32 _unknown_38; // 38 = 0x000001c0 (0x000001c1 after DMA_RX_CODE_RESET)
@@ -5216,10 +5596,12 @@ index 0000000000000..1a96092ed7eed
+
diff --git a/drivers/hid/ithc/ithc.h b/drivers/hid/ithc/ithc.h
new file mode 100644
-index 0000000000000..6a9b0d480bc15
+index 000000000000..028e55a4ec53
--- /dev/null
+++ b/drivers/hid/ithc/ithc.h
-@@ -0,0 +1,60 @@
+@@ -0,0 +1,67 @@
++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
++
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/hid.h>
@@ -5243,7 +5625,7 @@ index 0000000000000..6a9b0d480bc15
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define CHECK(fn, ...) ({ int r = fn(__VA_ARGS__); if (r < 0) pci_err(ithc->pci, "%s: %s failed with %i\n", __func__, #fn, r); r; })
-+#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while(0)
++#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while (0)
+
+#define NUM_RX_BUF 16
+
@@ -5257,8 +5639,13 @@ index 0000000000000..6a9b0d480bc15
+ struct pci_dev *pci;
+ int irq;
+ struct task_struct *poll_thread;
++
+ struct pm_qos_request activity_qos;
-+ struct timer_list activity_timer;
++ struct hrtimer activity_start_timer;
++ struct hrtimer activity_end_timer;
++ ktime_t last_rx_time;
++ unsigned int cur_rx_seq_count;
++ unsigned int cur_rx_seq_errors;
+
+ struct hid_device *hid;
+ bool hid_parse_done;
@@ -5276,1605 +5663,14 @@ index 0000000000000..6a9b0d480bc15
+};
+
+int ithc_reset(struct ithc *ithc);
-+void ithc_set_active(struct ithc *ithc);
++void ithc_set_active(struct ithc *ithc, unsigned int duration_us);
+int ithc_debug_init(struct ithc *ithc);
+void ithc_log_regs(struct ithc *ithc);
+
--
-2.42.0
-
-From 9f8d2a0f4012644f56ed8dfd322e575b57e1c208 Mon Sep 17 00:00:00 2001
-From: quo <tuple@list.ru>
-Date: Mon, 23 Oct 2023 10:15:29 +0200
-Subject: [PATCH] Update ITHC from module repo
-
-Changes:
- - Added some comments and fixed a few checkpatch warnings
- - Improved CPU latency QoS handling
- - Retry reading the report descriptor on error / timeout
-
-Based on https://github.com/quo/ithc-linux/commit/0b8b45d9775e756d6bd3a699bfaf9f5bd7b9b10b
-
-Signed-off-by: Dorian Stoll <dorian.stoll@tmsp.io>
-Patchset: ithc
----
- drivers/hid/ithc/ithc-debug.c | 94 +++++---
- drivers/hid/ithc/ithc-dma.c | 231 +++++++++++++-----
- drivers/hid/ithc/ithc-dma.h | 4 +-
- drivers/hid/ithc/ithc-main.c | 430 ++++++++++++++++++++++++----------
- drivers/hid/ithc/ithc-regs.c | 68 ++++--
- drivers/hid/ithc/ithc-regs.h | 19 +-
- drivers/hid/ithc/ithc.h | 13 +-
- 7 files changed, 623 insertions(+), 236 deletions(-)
-
-diff --git a/drivers/hid/ithc/ithc-debug.c b/drivers/hid/ithc/ithc-debug.c
-index 57bf125c45bd5..1f1f1e33f2e5a 100644
---- a/drivers/hid/ithc/ithc-debug.c
-+++ b/drivers/hid/ithc/ithc-debug.c
-@@ -1,10 +1,14 @@
-+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
-+
- #include "ithc.h"
-
--void ithc_log_regs(struct ithc *ithc) {
-- if (!ithc->prev_regs) return;
-- u32 __iomem *cur = (__iomem void*)ithc->regs;
-- u32 *prev = (void*)ithc->prev_regs;
-- for (int i = 1024; i < sizeof *ithc->regs / 4; i++) {
-+void ithc_log_regs(struct ithc *ithc)
-+{
-+ if (!ithc->prev_regs)
-+ return;
-+ u32 __iomem *cur = (__iomem void *)ithc->regs;
-+ u32 *prev = (void *)ithc->prev_regs;
-+ for (int i = 1024; i < sizeof(*ithc->regs) / 4; i++) {
- u32 x = readl(cur + i);
- if (x != prev[i]) {
- pci_info(ithc->pci, "reg %04x: %08x -> %08x\n", i * 4, prev[i], x);
-@@ -13,55 +17,79 @@ void ithc_log_regs(struct ithc *ithc) {
- }
- }
-
--static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, loff_t *offset) {
-+static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len,
-+ loff_t *offset)
-+{
-+ // Debug commands consist of a single letter followed by a list of numbers (decimal or
-+ // hexadecimal, space-separated).
- struct ithc *ithc = file_inode(f)->i_private;
- char cmd[256];
-- if (!ithc || !ithc->pci) return -ENODEV;
-- if (!len) return -EINVAL;
-- if (len >= sizeof cmd) return -EINVAL;
-- if (copy_from_user(cmd, buf, len)) return -EFAULT;
-+ if (!ithc || !ithc->pci)
-+ return -ENODEV;
-+ if (!len)
-+ return -EINVAL;
-+ if (len >= sizeof(cmd))
-+ return -EINVAL;
-+ if (copy_from_user(cmd, buf, len))
-+ return -EFAULT;
- cmd[len] = 0;
-- if (cmd[len-1] == '\n') cmd[len-1] = 0;
-+ if (cmd[len-1] == '\n')
-+ cmd[len-1] = 0;
- pci_info(ithc->pci, "debug command: %s\n", cmd);
-+
-+ // Parse the list of arguments into a u32 array.
- u32 n = 0;
- const char *s = cmd + 1;
- u32 a[32];
- while (*s && *s != '\n') {
-- if (n >= ARRAY_SIZE(a)) return -EINVAL;
-- if (*s++ != ' ') return -EINVAL;
-+ if (n >= ARRAY_SIZE(a))
-+ return -EINVAL;
-+ if (*s++ != ' ')
-+ return -EINVAL;
- char *e;
- a[n++] = simple_strtoul(s, &e, 0);
-- if (e == s) return -EINVAL;
-+ if (e == s)
-+ return -EINVAL;
- s = e;
- }
- ithc_log_regs(ithc);
-- switch(cmd[0]) {
-+
-+ // Execute the command.
-+ switch (cmd[0]) {
- case 'x': // reset
- ithc_reset(ithc);
- break;
- case 'w': // write register: offset mask value
-- if (n != 3 || (a[0] & 3)) return -EINVAL;
-- pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", a[0], a[2], a[1]);
-+ if (n != 3 || (a[0] & 3))
-+ return -EINVAL;
-+ pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n",
-+ a[0], a[2], a[1]);
- bitsl(((__iomem u32 *)ithc->regs) + a[0] / 4, a[1], a[2]);
- break;
- case 'r': // read register: offset
-- if (n != 1 || (a[0] & 3)) return -EINVAL;
-- pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], readl(((__iomem u32 *)ithc->regs) + a[0] / 4));
-+ if (n != 1 || (a[0] & 3))
-+ return -EINVAL;
-+ pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0],
-+ readl(((__iomem u32 *)ithc->regs) + a[0] / 4));
- break;
- case 's': // spi command: cmd offset len data...
- // read config: s 4 0 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- // set touch cfg: s 6 12 4 XX
-- if (n < 3 || a[2] > (n - 3) * 4) return -EINVAL;
-+ if (n < 3 || a[2] > (n - 3) * 4)
-+ return -EINVAL;
- pci_info(ithc->pci, "debug spi command %u with %u bytes of data\n", a[0], a[2]);
- if (!CHECK(ithc_spi_command, ithc, a[0], a[1], a[2], a + 3))
-- for (u32 i = 0; i < (a[2] + 3) / 4; i++) pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]);
-+ for (u32 i = 0; i < (a[2] + 3) / 4; i++)
-+ pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]);
- break;
- case 'd': // dma command: cmd len data...
- // get report descriptor: d 7 8 0 0
- // enable multitouch: d 3 2 0x0105
-- if (n < 2 || a[1] > (n - 2) * 4) return -EINVAL;
-+ if (n < 2 || a[1] > (n - 2) * 4)
-+ return -EINVAL;
- pci_info(ithc->pci, "debug dma command %u with %u bytes of data\n", a[0], a[1]);
-- if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) pci_err(ithc->pci, "dma tx failed\n");
-+ if (ithc_dma_tx(ithc, a[0], a[1], a + 2))
-+ pci_err(ithc->pci, "dma tx failed\n");
- break;
- default:
- return -EINVAL;
-@@ -75,21 +103,27 @@ static const struct file_operations ithc_debugfops_cmd = {
- .write = ithc_debugfs_cmd_write,
- };
-
--static void ithc_debugfs_devres_release(struct device *dev, void *res) {
-+static void ithc_debugfs_devres_release(struct device *dev, void *res)
-+{
- struct dentry **dbgm = res;
-- if (*dbgm) debugfs_remove_recursive(*dbgm);
-+ if (*dbgm)
-+ debugfs_remove_recursive(*dbgm);
- }
-
--int ithc_debug_init(struct ithc *ithc) {
-- struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof *dbgm, GFP_KERNEL);
-- if (!dbgm) return -ENOMEM;
-+int ithc_debug_init(struct ithc *ithc)
-+{
-+ struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof(*dbgm), GFP_KERNEL);
-+ if (!dbgm)
-+ return -ENOMEM;
- devres_add(&ithc->pci->dev, dbgm);
- struct dentry *dbg = debugfs_create_dir(DEVNAME, NULL);
-- if (IS_ERR(dbg)) return PTR_ERR(dbg);
-+ if (IS_ERR(dbg))
-+ return PTR_ERR(dbg);
- *dbgm = dbg;
-
- struct dentry *cmd = debugfs_create_file("cmd", 0220, dbg, ithc, &ithc_debugfops_cmd);
-- if (IS_ERR(cmd)) return PTR_ERR(cmd);
-+ if (IS_ERR(cmd))
-+ return PTR_ERR(cmd);
-
- return 0;
- }
-diff --git a/drivers/hid/ithc/ithc-dma.c b/drivers/hid/ithc/ithc-dma.c
-index 7e89b3496918d..ffb8689b8a780 100644
---- a/drivers/hid/ithc/ithc-dma.c
-+++ b/drivers/hid/ithc/ithc-dma.c
-@@ -1,59 +1,91 @@
-+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
-+
- #include "ithc.h"
-
--static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, unsigned num_buffers, unsigned num_pages, enum dma_data_direction dir) {
-+// The THC uses tables of PRDs (physical region descriptors) to describe the TX and RX data buffers.
-+// Each PRD contains the DMA address and size of a block of DMA memory, and some status flags.
-+// This allows each data buffer to consist of multiple non-contiguous blocks of memory.
-+
-+static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p,
-+ unsigned int num_buffers, unsigned int num_pages, enum dma_data_direction dir)
-+{
- p->num_pages = num_pages;
- p->dir = dir;
-+ // We allocate enough space to have one PRD per data buffer page, however if the data
-+ // buffer pages happen to be contiguous, we can describe the buffer using fewer PRDs, so
-+ // some will remain unused (which is fine).
- p->size = round_up(num_buffers * num_pages * sizeof(struct ithc_phys_region_desc), PAGE_SIZE);
- p->addr = dmam_alloc_coherent(&ithc->pci->dev, p->size, &p->dma_addr, GFP_KERNEL);
-- if (!p->addr) return -ENOMEM;
-- if (p->dma_addr & (PAGE_SIZE - 1)) return -EFAULT;
-+ if (!p->addr)
-+ return -ENOMEM;
-+ if (p->dma_addr & (PAGE_SIZE - 1))
-+ return -EFAULT;
- return 0;
- }
-
-+// Devres managed sg_table wrapper.
- struct ithc_sg_table {
- void *addr;
- struct sg_table sgt;
- enum dma_data_direction dir;
- };
--static void ithc_dma_sgtable_free(struct sg_table *sgt) {
-+static void ithc_dma_sgtable_free(struct sg_table *sgt)
-+{
- struct scatterlist *sg;
- int i;
- for_each_sgtable_sg(sgt, sg, i) {
- struct page *p = sg_page(sg);
-- if (p) __free_page(p);
-+ if (p)
-+ __free_page(p);
- }
- sg_free_table(sgt);
- }
--static void ithc_dma_data_devres_release(struct device *dev, void *res) {
-+static void ithc_dma_data_devres_release(struct device *dev, void *res)
-+{
- struct ithc_sg_table *sgt = res;
-- if (sgt->addr) vunmap(sgt->addr);
-+ if (sgt->addr)
-+ vunmap(sgt->addr);
- dma_unmap_sgtable(dev, &sgt->sgt, sgt->dir, 0);
- ithc_dma_sgtable_free(&sgt->sgt);
- }
-
--static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b) {
-- // We don't use dma_alloc_coherent for data buffers, because they don't have to be contiguous (we can use one PRD per page) or coherent (they are unidirectional).
-- // Instead we use an sg_table of individually allocated pages (5.13 has dma_alloc_noncontiguous for this, but we'd like to support 5.10 for now).
-+static int ithc_dma_data_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *prds,
-+ struct ithc_dma_data_buffer *b)
-+{
-+ // We don't use dma_alloc_coherent() for data buffers, because they don't have to be
-+ // coherent (they are unidirectional) or contiguous (we can use one PRD per page).
-+ // We could use dma_alloc_noncontiguous(), however this still always allocates a single
-+ // DMA mapped segment, which is more restrictive than what we need.
-+ // Instead we use an sg_table of individually allocated pages.
- struct page *pages[16];
-- if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) return -EINVAL;
-+ if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages))
-+ return -EINVAL;
- b->active_idx = -1;
-- struct ithc_sg_table *sgt = devres_alloc(ithc_dma_data_devres_release, sizeof *sgt, GFP_KERNEL);
-- if (!sgt) return -ENOMEM;
-+ struct ithc_sg_table *sgt = devres_alloc(
-+ ithc_dma_data_devres_release, sizeof(*sgt), GFP_KERNEL);
-+ if (!sgt)
-+ return -ENOMEM;
- sgt->dir = prds->dir;
-+
- if (!sg_alloc_table(&sgt->sgt, prds->num_pages, GFP_KERNEL)) {
- struct scatterlist *sg;
- int i;
- bool ok = true;
- for_each_sgtable_sg(&sgt->sgt, sg, i) {
-- struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); // don't need __GFP_DMA for PCI DMA
-- if (!p) { ok = false; break; }
-+ // NOTE: don't need __GFP_DMA for PCI DMA
-+ struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
-+ if (!p) {
-+ ok = false;
-+ break;
-+ }
- sg_set_page(sg, p, PAGE_SIZE, 0);
- }
- if (ok && !dma_map_sgtable(&ithc->pci->dev, &sgt->sgt, prds->dir, 0)) {
- devres_add(&ithc->pci->dev, sgt);
- b->sgt = &sgt->sgt;
- b->addr = sgt->addr = vmap(pages, prds->num_pages, 0, PAGE_KERNEL);
-- if (!b->addr) return -ENOMEM;
-+ if (!b->addr)
-+ return -ENOMEM;
- return 0;
- }
- ithc_dma_sgtable_free(&sgt->sgt);
-@@ -62,17 +94,29 @@ static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *pr
- return -ENOMEM;
- }
-
--static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) {
-+static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds,
-+ struct ithc_dma_data_buffer *b, unsigned int idx)
-+{
-+ // Give a buffer to the THC.
- struct ithc_phys_region_desc *prd = prds->addr;
- prd += idx * prds->num_pages;
-- if (b->active_idx >= 0) { pci_err(ithc->pci, "buffer already active\n"); return -EINVAL; }
-+ if (b->active_idx >= 0) {
-+ pci_err(ithc->pci, "buffer already active\n");
-+ return -EINVAL;
-+ }
- b->active_idx = idx;
- if (prds->dir == DMA_TO_DEVICE) {
-- if (b->data_size > PAGE_SIZE) return -EINVAL;
-+ // TX buffer: Caller should have already filled the data buffer, so just fill
-+ // the PRD and flush.
-+ // (TODO: Support multi-page TX buffers. So far no device seems to use or need
-+ // these though.)
-+ if (b->data_size > PAGE_SIZE)
-+ return -EINVAL;
- prd->addr = sg_dma_address(b->sgt->sgl) >> 10;
- prd->size = b->data_size | PRD_FLAG_END;
- flush_kernel_vmap_range(b->addr, b->data_size);
- } else if (prds->dir == DMA_FROM_DEVICE) {
-+ // RX buffer: Reset PRDs.
- struct scatterlist *sg;
- int i;
- for_each_sgtable_dma_sg(b->sgt, sg, i) {
-@@ -87,21 +131,34 @@ static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffe
- return 0;
- }
-
--static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) {
-+static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds,
-+ struct ithc_dma_data_buffer *b, unsigned int idx)
-+{
-+ // Take a buffer from the THC.
- struct ithc_phys_region_desc *prd = prds->addr;
- prd += idx * prds->num_pages;
-- if (b->active_idx != idx) { pci_err(ithc->pci, "wrong buffer index\n"); return -EINVAL; }
-+ // This is purely a sanity check. We don't strictly need the idx parameter for this
-+ // function, because it should always be the same as active_idx, unless we have a bug.
-+ if (b->active_idx != idx) {
-+ pci_err(ithc->pci, "wrong buffer index\n");
-+ return -EINVAL;
-+ }
- b->active_idx = -1;
- if (prds->dir == DMA_FROM_DEVICE) {
-+ // RX buffer: Calculate actual received data size from PRDs.
- dma_rmb(); // for the prds
- b->data_size = 0;
- struct scatterlist *sg;
- int i;
- for_each_sgtable_dma_sg(b->sgt, sg, i) {
-- unsigned size = prd->size;
-+ unsigned int size = prd->size;
- b->data_size += size & PRD_SIZE_MASK;
-- if (size & PRD_FLAG_END) break;
-- if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { pci_err(ithc->pci, "truncated prd\n"); break; }
-+ if (size & PRD_FLAG_END)
-+ break;
-+ if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) {
-+ pci_err(ithc->pci, "truncated prd\n");
-+ break;
-+ }
- prd++;
- }
- invalidate_kernel_vmap_range(b->addr, b->data_size);
-@@ -110,93 +167,139 @@ static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffe
- return 0;
- }
-
--int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname) {
-+int ithc_dma_rx_init(struct ithc *ithc, u8 channel)
-+{
- struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
- mutex_init(&rx->mutex);
-+
-+ // Allocate buffers.
- u32 buf_size = DEVCFG_DMA_RX_SIZE(ithc->config.dma_buf_sizes);
-- unsigned num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE;
-- pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", NUM_RX_BUF, buf_size, num_pages);
-+ unsigned int num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE;
-+ pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n",
-+ NUM_RX_BUF, buf_size, num_pages);
- CHECK_RET(ithc_dma_prd_alloc, ithc, &rx->prds, NUM_RX_BUF, num_pages, DMA_FROM_DEVICE);
-- for (unsigned i = 0; i < NUM_RX_BUF; i++)
-+ for (unsigned int i = 0; i < NUM_RX_BUF; i++)
- CHECK_RET(ithc_dma_data_alloc, ithc, &rx->prds, &rx->bufs[i]);
-+
-+ // Init registers.
- writeb(DMA_RX_CONTROL2_RESET, &ithc->regs->dma_rx[channel].control2);
- lo_hi_writeq(rx->prds.dma_addr, &ithc->regs->dma_rx[channel].addr);
- writeb(NUM_RX_BUF - 1, &ithc->regs->dma_rx[channel].num_bufs);
- writeb(num_pages - 1, &ithc->regs->dma_rx[channel].num_prds);
- u8 head = readb(&ithc->regs->dma_rx[channel].head);
-- if (head) { pci_err(ithc->pci, "head is nonzero (%u)\n", head); return -EIO; }
-- for (unsigned i = 0; i < NUM_RX_BUF; i++)
-+ if (head) {
-+ pci_err(ithc->pci, "head is nonzero (%u)\n", head);
-+ return -EIO;
-+ }
-+
-+ // Init buffers.
-+ for (unsigned int i = 0; i < NUM_RX_BUF; i++)
- CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, &rx->bufs[i], i);
-+
- writeb(head ^ DMA_RX_WRAP_FLAG, &ithc->regs->dma_rx[channel].tail);
- return 0;
- }
--void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) {
-- bitsb_set(&ithc->regs->dma_rx[channel].control, DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA);
-- CHECK(waitl, ithc, &ithc->regs->dma_rx[1].status, DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED);
-+
-+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel)
-+{
-+ bitsb_set(&ithc->regs->dma_rx[channel].control,
-+ DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA);
-+ CHECK(waitl, ithc, &ithc->regs->dma_rx[channel].status,
-+ DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED);
- }
-
--int ithc_dma_tx_init(struct ithc *ithc) {
-+int ithc_dma_tx_init(struct ithc *ithc)
-+{
- struct ithc_dma_tx *tx = &ithc->dma_tx;
- mutex_init(&tx->mutex);
-+
-+ // Allocate buffers.
- tx->max_size = DEVCFG_DMA_TX_SIZE(ithc->config.dma_buf_sizes);
-- unsigned num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE;
-- pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", tx->max_size, num_pages);
-+ unsigned int num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE;
-+ pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n",
-+ tx->max_size, num_pages);
- CHECK_RET(ithc_dma_prd_alloc, ithc, &tx->prds, 1, num_pages, DMA_TO_DEVICE);
- CHECK_RET(ithc_dma_data_alloc, ithc, &tx->prds, &tx->buf);
-+
-+ // Init registers.
- lo_hi_writeq(tx->prds.dma_addr, &ithc->regs->dma_tx.addr);
- writeb(num_pages - 1, &ithc->regs->dma_tx.num_prds);
-+
-+ // Init buffers.
- CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
- return 0;
- }
-
--static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, u8 channel, u8 buf) {
-+static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data,
-+ u8 channel, u8 buf)
-+{
- if (buf >= NUM_RX_BUF) {
- pci_err(ithc->pci, "invalid dma ringbuffer index\n");
- return -EINVAL;
- }
-- ithc_set_active(ithc);
- u32 len = data->data_size;
- struct ithc_dma_rx_header *hdr = data->addr;
- u8 *hiddata = (void *)(hdr + 1);
-- if (len >= sizeof *hdr && hdr->code == DMA_RX_CODE_RESET) {
-+ if (len >= sizeof(*hdr) && hdr->code == DMA_RX_CODE_RESET) {
-+ // The THC sends a reset request when we need to reinitialize the device.
-+ // This usually only happens if we send an invalid command or put the device
-+ // in a bad state.
- CHECK(ithc_reset, ithc);
-- } else if (len < sizeof *hdr || len != sizeof *hdr + hdr->data_size) {
-+ } else if (len < sizeof(*hdr) || len != sizeof(*hdr) + hdr->data_size) {
- if (hdr->code == DMA_RX_CODE_INPUT_REPORT) {
-- // When the CPU enters a low power state during DMA, we can get truncated messages.
-- // Typically this will be a single touch HID report that is only 1 byte, or a multitouch report that is 257 bytes.
-+ // When the CPU enters a low power state during DMA, we can get truncated
-+ // messages. For Surface devices, this will typically be a single touch
-+ // report that is only 1 byte, or a multitouch report that is 257 bytes.
- // See also ithc_set_active().
- } else {
-- pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", channel, buf, len, hdr->code, hdr->data_size);
-- print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0);
-+ pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n",
-+ channel, buf, len, hdr->code, hdr->data_size);
-+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1,
-+ hdr, min(len, 0x400u), 0);
- }
- } else if (hdr->code == DMA_RX_CODE_REPORT_DESCRIPTOR && hdr->data_size > 8) {
-+ // Response to a 'get report descriptor' request.
-+ // The actual descriptor is preceded by 8 nul bytes.
- CHECK(hid_parse_report, ithc->hid, hiddata + 8, hdr->data_size - 8);
- WRITE_ONCE(ithc->hid_parse_done, true);
- wake_up(&ithc->wait_hid_parse);
- } else if (hdr->code == DMA_RX_CODE_INPUT_REPORT) {
-+ // Standard HID input report containing touch data.
- CHECK(hid_input_report, ithc->hid, HID_INPUT_REPORT, hiddata, hdr->data_size, 1);
- } else if (hdr->code == DMA_RX_CODE_FEATURE_REPORT) {
-+ // Response to a 'get feature' request.
- bool done = false;
- mutex_lock(&ithc->hid_get_feature_mutex);
- if (ithc->hid_get_feature_buf) {
-- if (hdr->data_size < ithc->hid_get_feature_size) ithc->hid_get_feature_size = hdr->data_size;
-+ if (hdr->data_size < ithc->hid_get_feature_size)
-+ ithc->hid_get_feature_size = hdr->data_size;
- memcpy(ithc->hid_get_feature_buf, hiddata, ithc->hid_get_feature_size);
- ithc->hid_get_feature_buf = NULL;
- done = true;
- }
- mutex_unlock(&ithc->hid_get_feature_mutex);
-- if (done) wake_up(&ithc->wait_hid_get_feature);
-- else CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, hiddata, hdr->data_size, 1);
-+ if (done) {
-+ wake_up(&ithc->wait_hid_get_feature);
-+ } else {
-+ // Received data without a matching request, or the request already
-+ // timed out. (XXX What's the correct thing to do here?)
-+ CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT,
-+ hiddata, hdr->data_size, 1);
-+ }
- } else {
-- pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", channel, buf, len, hdr->code);
-- print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0);
-+ pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n",
-+ channel, buf, len, hdr->code);
-+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1,
-+ hdr, min(len, 0x400u), 0);
- }
- return 0;
- }
-
--static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) {
-+static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel)
-+{
-+ // Process all filled RX buffers from the ringbuffer.
- struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
-- unsigned n = rx->num_received;
-+ unsigned int n = rx->num_received;
- u8 head_wrap = readb(&ithc->regs->dma_rx[channel].head);
- while (1) {
- u8 tail = n % NUM_RX_BUF;
-@@ -204,7 +307,8 @@ static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) {
- writeb(tail_wrap, &ithc->regs->dma_rx[channel].tail);
- // ringbuffer is full if tail_wrap == head_wrap
- // ringbuffer is empty if tail_wrap == head_wrap ^ WRAP_FLAG
-- if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) return 0;
-+ if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG))
-+ return 0;
-
- // take the buffer that the device just filled
- struct ithc_dma_data_buffer *b = &rx->bufs[n % NUM_RX_BUF];
-@@ -218,7 +322,8 @@ static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) {
- CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, b, tail);
- }
- }
--int ithc_dma_rx(struct ithc *ithc, u8 channel) {
-+int ithc_dma_rx(struct ithc *ithc, u8 channel)
-+{
- struct ithc_dma_rx *rx = &ithc->dma_rx[channel];
- mutex_lock(&rx->mutex);
- int ret = ithc_dma_rx_unlocked(ithc, channel);
-@@ -226,14 +331,21 @@ int ithc_dma_rx(struct ithc *ithc, u8 channel) {
- return ret;
- }
-
--static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) {
-+static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data)
-+{
-+ ithc_set_active(ithc, 100 * USEC_PER_MSEC);
-+
-+ // Send a single TX buffer to the THC.
- pci_dbg(ithc->pci, "dma tx command %u, size %u\n", cmdcode, datasize);
- struct ithc_dma_tx_header *hdr;
-+ // Data must be padded to next 4-byte boundary.
- u8 padding = datasize & 3 ? 4 - (datasize & 3) : 0;
-- unsigned fullsize = sizeof *hdr + datasize + padding;
-- if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) return -EINVAL;
-+ unsigned int fullsize = sizeof(*hdr) + datasize + padding;
-+ if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE)
-+ return -EINVAL;
- CHECK_RET(ithc_dma_data_buffer_get, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
-
-+ // Fill the TX buffer with header and data.
- ithc->dma_tx.buf.data_size = fullsize;
- hdr = ithc->dma_tx.buf.addr;
- hdr->code = cmdcode;
-@@ -241,15 +353,18 @@ static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, vo
- u8 *dest = (void *)(hdr + 1);
- memcpy(dest, data, datasize);
- dest += datasize;
-- for (u8 p = 0; p < padding; p++) *dest++ = 0;
-+ for (u8 p = 0; p < padding; p++)
-+ *dest++ = 0;
- CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0);
-
-+ // Let the THC process the buffer.
- bitsb_set(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND);
- CHECK_RET(waitb, ithc, &ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0);
- writel(DMA_TX_STATUS_DONE, &ithc->regs->dma_tx.status);
- return 0;
- }
--int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) {
-+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data)
-+{
- mutex_lock(&ithc->dma_tx.mutex);
- int ret = ithc_dma_tx_unlocked(ithc, cmdcode, datasize, data);
- mutex_unlock(&ithc->dma_tx.mutex);
-diff --git a/drivers/hid/ithc/ithc-dma.h b/drivers/hid/ithc/ithc-dma.h
-index d9f2c19a13f3a..93652e4476bf8 100644
---- a/drivers/hid/ithc/ithc-dma.h
-+++ b/drivers/hid/ithc/ithc-dma.h
-@@ -1,3 +1,5 @@
-+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
-+
- #define PRD_SIZE_MASK 0xffffff
- #define PRD_FLAG_END 0x1000000
- #define PRD_FLAG_SUCCESS 0x2000000
-@@ -59,7 +61,7 @@ struct ithc_dma_rx {
- struct ithc_dma_data_buffer bufs[NUM_RX_BUF];
- };
-
--int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname);
-+int ithc_dma_rx_init(struct ithc *ithc, u8 channel);
- void ithc_dma_rx_enable(struct ithc *ithc, u8 channel);
- int ithc_dma_tx_init(struct ithc *ithc);
- int ithc_dma_rx(struct ithc *ithc, u8 channel);
-diff --git a/drivers/hid/ithc/ithc-main.c b/drivers/hid/ithc/ithc-main.c
-index 09512b9cb4d31..87ed4aa70fda0 100644
---- a/drivers/hid/ithc/ithc-main.c
-+++ b/drivers/hid/ithc/ithc-main.c
-@@ -1,3 +1,5 @@
-+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
-+
- #include "ithc.h"
-
- MODULE_DESCRIPTION("Intel Touch Host Controller driver");
-@@ -42,6 +44,9 @@ static const struct pci_device_id ithc_pci_tbl[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT1) },
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT2) },
-+ // XXX So far the THC seems to be the only Intel PCI device with PCI_CLASS_INPUT_PEN,
-+ // so instead of the device list we could just do:
-+ // { .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_ANY_ID, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .class = PCI_CLASS_INPUT_PEN, .class_mask = ~0, },
- {}
- };
- MODULE_DEVICE_TABLE(pci, ithc_pci_tbl);
-@@ -52,6 +57,7 @@ static bool ithc_use_polling = false;
- module_param_named(poll, ithc_use_polling, bool, 0);
- MODULE_PARM_DESC(poll, "Use polling instead of interrupts");
-
-+// Since all known devices seem to use only channel 1, by default we disable channel 0.
- static bool ithc_use_rx0 = false;
- module_param_named(rx0, ithc_use_rx0, bool, 0);
- MODULE_PARM_DESC(rx0, "Use DMA RX channel 0");
-@@ -60,37 +66,56 @@ static bool ithc_use_rx1 = true;
- module_param_named(rx1, ithc_use_rx1, bool, 0);
- MODULE_PARM_DESC(rx1, "Use DMA RX channel 1");
-
-+// Values below 250 seem to work well on the SP7+. If this is set too high, you may observe cursor stuttering.
-+static int ithc_dma_latency_us = 200;
-+module_param_named(dma_latency_us, ithc_dma_latency_us, int, 0);
-+MODULE_PARM_DESC(dma_latency_us, "Determines the CPU latency QoS value for DMA transfers (in microseconds), -1 to disable latency QoS");
-+
-+// Values above 1700 seem to work well on the SP7+. If this is set too low, you may observe cursor stuttering.
-+static unsigned int ithc_dma_early_us = 2000;
-+module_param_named(dma_early_us, ithc_dma_early_us, uint, 0);
-+MODULE_PARM_DESC(dma_early_us, "Determines how early the CPU latency QoS value is applied before the next expected IRQ (in microseconds)");
-+
- static bool ithc_log_regs_enabled = false;
- module_param_named(logregs, ithc_log_regs_enabled, bool, 0);
- MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)");
-
- // Sysfs attributes
-
--static bool ithc_is_config_valid(struct ithc *ithc) {
-+static bool ithc_is_config_valid(struct ithc *ithc)
-+{
- return ithc->config.device_id == DEVCFG_DEVICE_ID_TIC;
- }
-
--static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf)
-+{
- struct ithc *ithc = dev_get_drvdata(dev);
-- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+ if (!ithc || !ithc_is_config_valid(ithc))
-+ return -ENODEV;
- return sprintf(buf, "0x%04x", ithc->config.vendor_id);
- }
- static DEVICE_ATTR_RO(vendor);
--static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf)
-+{
- struct ithc *ithc = dev_get_drvdata(dev);
-- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+ if (!ithc || !ithc_is_config_valid(ithc))
-+ return -ENODEV;
- return sprintf(buf, "0x%04x", ithc->config.product_id);
- }
- static DEVICE_ATTR_RO(product);
--static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf)
-+{
- struct ithc *ithc = dev_get_drvdata(dev);
-- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+ if (!ithc || !ithc_is_config_valid(ithc))
-+ return -ENODEV;
- return sprintf(buf, "%u", ithc->config.revision);
- }
- static DEVICE_ATTR_RO(revision);
--static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) {
-+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf)
-+{
- struct ithc *ithc = dev_get_drvdata(dev);
-- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV;
-+ if (!ithc || !ithc_is_config_valid(ithc))
-+ return -ENODEV;
- u32 v = ithc->config.fw_version;
- return sprintf(buf, "%i.%i.%i.%i", v >> 24, v >> 16 & 0xff, v >> 8 & 0xff, v & 0xff);
- }
-@@ -117,45 +142,75 @@ static void ithc_hid_stop(struct hid_device *hdev) { }
- static int ithc_hid_open(struct hid_device *hdev) { return 0; }
- static void ithc_hid_close(struct hid_device *hdev) { }
-
--static int ithc_hid_parse(struct hid_device *hdev) {
-+static int ithc_hid_parse(struct hid_device *hdev)
-+{
- struct ithc *ithc = hdev->driver_data;
- u64 val = 0;
- WRITE_ONCE(ithc->hid_parse_done, false);
-- CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof val, &val);
-- if (!wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), msecs_to_jiffies(1000))) return -ETIMEDOUT;
-- return 0;
-+ for (int retries = 0; ; retries++) {
-+ CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof(val), &val);
-+ if (wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done),
-+ msecs_to_jiffies(200)))
-+ return 0;
-+ if (retries > 5) {
-+ pci_err(ithc->pci, "failed to read report descriptor\n");
-+ return -ETIMEDOUT;
-+ }
-+ pci_warn(ithc->pci, "failed to read report descriptor, retrying\n");
-+ }
- }
-
--static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) {
-+static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf,
-+ size_t len, unsigned char rtype, int reqtype)
-+{
- struct ithc *ithc = hdev->driver_data;
-- if (!buf || !len) return -EINVAL;
-+ if (!buf || !len)
-+ return -EINVAL;
- u32 code;
-- if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_OUTPUT_REPORT;
-- else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_SET_FEATURE;
-- else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) code = DMA_TX_CODE_GET_FEATURE;
-- else {
-- pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", rtype, reqtype, reportnum);
-+ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) {
-+ code = DMA_TX_CODE_OUTPUT_REPORT;
-+ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) {
-+ code = DMA_TX_CODE_SET_FEATURE;
-+ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) {
-+ code = DMA_TX_CODE_GET_FEATURE;
-+ } else {
-+ pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n",
-+ rtype, reqtype, reportnum);
- return -EINVAL;
- }
- buf[0] = reportnum;
-+
- if (reqtype == HID_REQ_GET_REPORT) {
-+ // Prepare for response.
- mutex_lock(&ithc->hid_get_feature_mutex);
- ithc->hid_get_feature_buf = buf;
- ithc->hid_get_feature_size = len;
- mutex_unlock(&ithc->hid_get_feature_mutex);
-+
-+ // Transmit 'get feature' request.
- int r = CHECK(ithc_dma_tx, ithc, code, 1, buf);
- if (!r) {
-- r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, !ithc->hid_get_feature_buf, msecs_to_jiffies(1000));
-- if (!r) r = -ETIMEDOUT;
-- else if (r < 0) r = -EINTR;
-- else r = 0;
-+ r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature,
-+ !ithc->hid_get_feature_buf, msecs_to_jiffies(1000));
-+ if (!r)
-+ r = -ETIMEDOUT;
-+ else if (r < 0)
-+ r = -EINTR;
-+ else
-+ r = 0;
- }
-+
-+ // If everything went ok, the buffer has been filled with the response data.
-+ // Return the response size.
- mutex_lock(&ithc->hid_get_feature_mutex);
- ithc->hid_get_feature_buf = NULL;
-- if (!r) r = ithc->hid_get_feature_size;
-+ if (!r)
-+ r = ithc->hid_get_feature_size;
- mutex_unlock(&ithc->hid_get_feature_mutex);
- return r;
- }
-+
-+ // 'Set feature', or 'output report'. These don't have a response.
- CHECK_RET(ithc_dma_tx, ithc, code, len, buf);
- return 0;
- }
-@@ -169,17 +224,22 @@ static struct hid_ll_driver ithc_ll_driver = {
- .raw_request = ithc_hid_raw_request,
- };
-
--static void ithc_hid_devres_release(struct device *dev, void *res) {
-+static void ithc_hid_devres_release(struct device *dev, void *res)
-+{
- struct hid_device **hidm = res;
-- if (*hidm) hid_destroy_device(*hidm);
-+ if (*hidm)
-+ hid_destroy_device(*hidm);
- }
-
--static int ithc_hid_init(struct ithc *ithc) {
-- struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof *hidm, GFP_KERNEL);
-- if (!hidm) return -ENOMEM;
-+static int ithc_hid_init(struct ithc *ithc)
-+{
-+ struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof(*hidm), GFP_KERNEL);
-+ if (!hidm)
-+ return -ENOMEM;
- devres_add(&ithc->pci->dev, hidm);
- struct hid_device *hid = hid_allocate_device();
-- if (IS_ERR(hid)) return PTR_ERR(hid);
-+ if (IS_ERR(hid))
-+ return PTR_ERR(hid);
- *hidm = hid;
-
- strscpy(hid->name, DEVFULLNAME, sizeof(hid->name));
-@@ -198,27 +258,45 @@ static int ithc_hid_init(struct ithc *ithc) {
-
- // Interrupts/polling
-
--static void ithc_activity_timer_callback(struct timer_list *t) {
-- struct ithc *ithc = container_of(t, struct ithc, activity_timer);
-- cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-+static enum hrtimer_restart ithc_activity_start_timer_callback(struct hrtimer *t)
-+{
-+ struct ithc *ithc = container_of(t, struct ithc, activity_start_timer);
-+ ithc_set_active(ithc, ithc_dma_early_us * 2 + USEC_PER_MSEC);
-+ return HRTIMER_NORESTART;
- }
-
--void ithc_set_active(struct ithc *ithc) {
-- // When CPU usage is very low, the CPU can enter various low power states (C2-C10).
-- // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_UNKNOWN_12 will be set when this happens.
-- // The amount of truncated messages can become very high, resulting in user-visible effects (laggy/stuttering cursor).
-- // To avoid this, we use a CPU latency QoS request to prevent the CPU from entering low power states during touch interactions.
-- cpu_latency_qos_update_request(&ithc->activity_qos, 0);
-- mod_timer(&ithc->activity_timer, jiffies + msecs_to_jiffies(1000));
--}
--
--static int ithc_set_device_enabled(struct ithc *ithc, bool enable) {
-- u32 x = ithc->config.touch_cfg = (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2
-- | (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0);
-- return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, offsetof(struct ithc_device_config, touch_cfg), sizeof x, &x);
-+static enum hrtimer_restart ithc_activity_end_timer_callback(struct hrtimer *t)
-+{
-+ struct ithc *ithc = container_of(t, struct ithc, activity_end_timer);
-+ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-+ return HRTIMER_NORESTART;
- }
-
--static void ithc_disable_interrupts(struct ithc *ithc) {
-+void ithc_set_active(struct ithc *ithc, unsigned int duration_us)
-+{
-+ if (ithc_dma_latency_us < 0)
-+ return;
-+ // When CPU usage is very low, the CPU can enter various low power states (C2-C10).
-+ // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_RX_TIMEOUT will be
-+ // set when this happens. The amount of truncated messages can become very high, resulting
-+ // in user-visible effects (laggy/stuttering cursor). To avoid this, we use a CPU latency
-+ // QoS request to prevent the CPU from entering low power states during touch interactions.
-+ cpu_latency_qos_update_request(&ithc->activity_qos, ithc_dma_latency_us);
-+ hrtimer_start_range_ns(&ithc->activity_end_timer,
-+ ns_to_ktime(duration_us * NSEC_PER_USEC), duration_us * NSEC_PER_USEC, HRTIMER_MODE_REL);
-+}
-+
-+static int ithc_set_device_enabled(struct ithc *ithc, bool enable)
-+{
-+ u32 x = ithc->config.touch_cfg =
-+ (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 |
-+ (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0);
-+ return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE,
-+ offsetof(struct ithc_device_config, touch_cfg), sizeof(x), &x);
-+}
-+
-+static void ithc_disable_interrupts(struct ithc *ithc)
-+{
- writel(0, &ithc->regs->error_control);
- bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_IRQ, 0);
- bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0);
-@@ -226,43 +304,85 @@ static void ithc_disable_interrupts(struct ithc *ithc) {
- bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_IRQ, 0);
- }
-
--static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned channel) {
-- writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, &ithc->regs->dma_rx[channel].status);
-+static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned int channel)
-+{
-+ writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA,
-+ &ithc->regs->dma_rx[channel].status);
- }
-
--static void ithc_clear_interrupts(struct ithc *ithc) {
-+static void ithc_clear_interrupts(struct ithc *ithc)
-+{
- writel(0xffffffff, &ithc->regs->error_flags);
- writel(ERROR_STATUS_DMA | ERROR_STATUS_SPI, &ithc->regs->error_status);
- writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
- ithc_clear_dma_rx_interrupts(ithc, 0);
- ithc_clear_dma_rx_interrupts(ithc, 1);
-- writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, &ithc->regs->dma_tx.status);
-+ writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2,
-+ &ithc->regs->dma_tx.status);
- }
-
--static void ithc_process(struct ithc *ithc) {
-+static void ithc_process(struct ithc *ithc)
-+{
- ithc_log_regs(ithc);
-
-- // read and clear error bits
-+ bool rx0 = ithc_use_rx0 && (readl(&ithc->regs->dma_rx[0].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0;
-+ bool rx1 = ithc_use_rx1 && (readl(&ithc->regs->dma_rx[1].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0;
-+
-+ // Track time between DMA rx transfers, so we can try to predict when we need to enable CPU latency QoS for the next transfer
-+ ktime_t t = ktime_get();
-+ ktime_t dt = ktime_sub(t, ithc->last_rx_time);
-+ if (rx0 || rx1) {
-+ ithc->last_rx_time = t;
-+ if (dt > ms_to_ktime(100)) {
-+ ithc->cur_rx_seq_count = 0;
-+ ithc->cur_rx_seq_errors = 0;
-+ }
-+ ithc->cur_rx_seq_count++;
-+ if (!ithc_use_polling && ithc_dma_latency_us >= 0) {
-+ // Disable QoS, since the DMA transfer has completed (we re-enable it after a delay below)
-+ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-+ hrtimer_try_to_cancel(&ithc->activity_end_timer);
-+ }
-+ }
-+
-+ // Read and clear error bits
- u32 err = readl(&ithc->regs->error_flags);
- if (err) {
-- if (err & ~ERROR_FLAG_DMA_UNKNOWN_12) pci_err(ithc->pci, "error flags: 0x%08x\n", err);
- writel(err, &ithc->regs->error_flags);
-+ if (err & ~ERROR_FLAG_DMA_RX_TIMEOUT)
-+ pci_err(ithc->pci, "error flags: 0x%08x\n", err);
-+ if (err & ERROR_FLAG_DMA_RX_TIMEOUT) {
-+ // Only log an error if we see a significant number of these errors.
-+ ithc->cur_rx_seq_errors++;
-+ if (ithc->cur_rx_seq_errors && ithc->cur_rx_seq_errors % 50 == 0 && ithc->cur_rx_seq_errors > ithc->cur_rx_seq_count / 10)
-+ pci_err(ithc->pci, "High number of DMA RX timeouts/errors (%u/%u, dt=%lldus). Try adjusting dma_early_us and/or dma_latency_us.\n",
-+ ithc->cur_rx_seq_errors, ithc->cur_rx_seq_count, ktime_to_us(dt));
-+ }
- }
-
-- // process DMA rx
-+ // Process DMA rx
- if (ithc_use_rx0) {
- ithc_clear_dma_rx_interrupts(ithc, 0);
-- ithc_dma_rx(ithc, 0);
-+ if (rx0)
-+ ithc_dma_rx(ithc, 0);
- }
- if (ithc_use_rx1) {
- ithc_clear_dma_rx_interrupts(ithc, 1);
-- ithc_dma_rx(ithc, 1);
-+ if (rx1)
-+ ithc_dma_rx(ithc, 1);
-+ }
-+
-+ // Start timer to re-enable QoS for next rx, but only if we've seen an ERROR_FLAG_DMA_RX_TIMEOUT
-+ if ((rx0 || rx1) && !ithc_use_polling && ithc_dma_latency_us >= 0 && ithc->cur_rx_seq_errors > 0) {
-+ ktime_t expires = ktime_add(t, ktime_sub_us(dt, ithc_dma_early_us));
-+ hrtimer_start_range_ns(&ithc->activity_start_timer, expires, 10 * NSEC_PER_USEC, HRTIMER_MODE_ABS);
- }
-
- ithc_log_regs(ithc);
- }
-
--static irqreturn_t ithc_interrupt_thread(int irq, void *arg) {
-+static irqreturn_t ithc_interrupt_thread(int irq, void *arg)
-+{
- struct ithc *ithc = arg;
- pci_dbg(ithc->pci, "IRQ! err=%08x/%08x/%08x, cmd=%02x/%08x, rx0=%02x/%08x, rx1=%02x/%08x, tx=%02x/%08x\n",
- readl(&ithc->regs->error_control), readl(&ithc->regs->error_status), readl(&ithc->regs->error_flags),
-@@ -274,14 +394,21 @@ static irqreturn_t ithc_interrupt_thread(int irq, void *arg) {
- return IRQ_HANDLED;
- }
-
--static int ithc_poll_thread(void *arg) {
-+static int ithc_poll_thread(void *arg)
-+{
- struct ithc *ithc = arg;
-- unsigned sleep = 100;
-+ unsigned int sleep = 100;
- while (!kthread_should_stop()) {
- u32 n = ithc->dma_rx[1].num_received;
- ithc_process(ithc);
-- if (n != ithc->dma_rx[1].num_received) sleep = 20;
-- else sleep = min(200u, sleep + (sleep >> 4) + 1);
-+ // Decrease polling interval to 20ms if we received data, otherwise slowly
-+ // increase it up to 200ms.
-+ if (n != ithc->dma_rx[1].num_received) {
-+ ithc_set_active(ithc, 100 * USEC_PER_MSEC);
-+ sleep = 20;
-+ } else {
-+ sleep = min(200u, sleep + (sleep >> 4) + 1);
-+ }
- msleep_interruptible(sleep);
- }
- return 0;
-@@ -289,7 +416,8 @@ static int ithc_poll_thread(void *arg) {
-
- // Device initialization and shutdown
-
--static void ithc_disable(struct ithc *ithc) {
-+static void ithc_disable(struct ithc *ithc)
-+{
- bitsl_set(&ithc->regs->control_bits, CONTROL_QUIESCE);
- CHECK(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, CONTROL_IS_QUIESCED);
- bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0);
-@@ -301,81 +429,112 @@ static void ithc_disable(struct ithc *ithc) {
- ithc_clear_interrupts(ithc);
- }
-
--static int ithc_init_device(struct ithc *ithc) {
-+static int ithc_init_device(struct ithc *ithc)
-+{
- ithc_log_regs(ithc);
- bool was_enabled = (readl(&ithc->regs->control_bits) & CONTROL_NRESET) != 0;
- ithc_disable(ithc);
- CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_READY, CONTROL_READY);
-+
-+ // Since we don't yet know which SPI config the device wants, use default speed and mode
-+ // initially for reading config data.
- ithc_set_spi_config(ithc, 10, 0);
-- bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); // seems to help with reading config
-
-- if (was_enabled) if (msleep_interruptible(100)) return -EINTR;
-+ // Setting the following bit seems to make reading the config more reliable.
-+ bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000);
-+
-+ // If the device was previously enabled, wait a bit to make sure it's fully shut down.
-+ if (was_enabled)
-+ if (msleep_interruptible(100))
-+ return -EINTR;
-+
-+ // Take the touch device out of reset.
- bitsl(&ithc->regs->control_bits, CONTROL_QUIESCE, 0);
- CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, 0);
- for (int retries = 0; ; retries++) {
- ithc_log_regs(ithc);
- bitsl_set(&ithc->regs->control_bits, CONTROL_NRESET);
-- if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) break;
-+ if (!waitl(ithc, &ithc->regs->state, 0xf, 2))
-+ break;
- if (retries > 5) {
-- pci_err(ithc->pci, "too many retries, failed to reset device\n");
-+ pci_err(ithc->pci, "failed to reset device, state = 0x%08x\n", readl(&ithc->regs->state));
- return -ETIMEDOUT;
- }
-- pci_err(ithc->pci, "invalid state, retrying reset\n");
-+ pci_warn(ithc->pci, "invalid state, retrying reset\n");
- bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0);
-- if (msleep_interruptible(1000)) return -EINTR;
-+ if (msleep_interruptible(1000))
-+ return -EINTR;
- }
- ithc_log_regs(ithc);
-
-+ // Waiting for the following status bit makes reading config much more reliable,
-+ // however the official driver does not seem to do this...
- CHECK(waitl, ithc, &ithc->regs->dma_rx[0].status, DMA_RX_STATUS_UNKNOWN_4, DMA_RX_STATUS_UNKNOWN_4);
-
-- // read config
-+ // Read configuration data.
- for (int retries = 0; ; retries++) {
- ithc_log_regs(ithc);
-- memset(&ithc->config, 0, sizeof ithc->config);
-- CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof ithc->config, &ithc->config);
-+ memset(&ithc->config, 0, sizeof(ithc->config));
-+ CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof(ithc->config), &ithc->config);
- u32 *p = (void *)&ithc->config;
- pci_info(ithc->pci, "config: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
- p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
-- if (ithc_is_config_valid(ithc)) break;
-+ if (ithc_is_config_valid(ithc))
-+ break;
- if (retries > 10) {
-- pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ithc->config.device_id);
-+ pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n",
-+ ithc->config.device_id);
- return -EIO;
- }
-- pci_err(ithc->pci, "failed to read config, retrying\n");
-- if (msleep_interruptible(100)) return -EINTR;
-+ pci_warn(ithc->pci, "failed to read config, retrying\n");
-+ if (msleep_interruptible(100))
-+ return -EINTR;
- }
- ithc_log_regs(ithc);
-
-- CHECK_RET(ithc_set_spi_config, ithc, DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), DEVCFG_SPI_MODE(ithc->config.spi_config));
-+ // Apply SPI config and enable touch device.
-+ CHECK_RET(ithc_set_spi_config, ithc,
-+ DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config),
-+ DEVCFG_SPI_MODE(ithc->config.spi_config));
- CHECK_RET(ithc_set_device_enabled, ithc, true);
- ithc_log_regs(ithc);
- return 0;
- }
-
--int ithc_reset(struct ithc *ithc) {
-- // FIXME This should probably do devres_release_group()+ithc_start(). But because this is called during DMA
-- // processing, that would have to be done asynchronously (schedule_work()?). And with extra locking?
-+int ithc_reset(struct ithc *ithc)
-+{
-+ // FIXME This should probably do devres_release_group()+ithc_start().
-+ // But because this is called during DMA processing, that would have to be done
-+ // asynchronously (schedule_work()?). And with extra locking?
- pci_err(ithc->pci, "reset\n");
- CHECK(ithc_init_device, ithc);
-- if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0);
-- if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1);
-+ if (ithc_use_rx0)
-+ ithc_dma_rx_enable(ithc, 0);
-+ if (ithc_use_rx1)
-+ ithc_dma_rx_enable(ithc, 1);
- ithc_log_regs(ithc);
- pci_dbg(ithc->pci, "reset completed\n");
- return 0;
- }
-
--static void ithc_stop(void *res) {
-+static void ithc_stop(void *res)
-+{
- struct ithc *ithc = res;
- pci_dbg(ithc->pci, "stopping\n");
- ithc_log_regs(ithc);
-- if (ithc->poll_thread) CHECK(kthread_stop, ithc->poll_thread);
-- if (ithc->irq >= 0) disable_irq(ithc->irq);
-+
-+ if (ithc->poll_thread)
-+ CHECK(kthread_stop, ithc->poll_thread);
-+ if (ithc->irq >= 0)
-+ disable_irq(ithc->irq);
- CHECK(ithc_set_device_enabled, ithc, false);
- ithc_disable(ithc);
-- del_timer_sync(&ithc->activity_timer);
-+ hrtimer_cancel(&ithc->activity_start_timer);
-+ hrtimer_cancel(&ithc->activity_end_timer);
- cpu_latency_qos_remove_request(&ithc->activity_qos);
-- // clear dma config
-- for(unsigned i = 0; i < 2; i++) {
-+
-+ // Clear DMA config.
-+ for (unsigned int i = 0; i < 2; i++) {
- CHECK(waitl, ithc, &ithc->regs->dma_rx[i].status, DMA_RX_STATUS_ENABLED, 0);
- lo_hi_writeq(0, &ithc->regs->dma_rx[i].addr);
- writeb(0, &ithc->regs->dma_rx[i].num_bufs);
-@@ -383,35 +542,43 @@ static void ithc_stop(void *res) {
- }
- lo_hi_writeq(0, &ithc->regs->dma_tx.addr);
- writeb(0, &ithc->regs->dma_tx.num_prds);
-+
- ithc_log_regs(ithc);
- pci_dbg(ithc->pci, "stopped\n");
- }
-
--static void ithc_clear_drvdata(void *res) {
-+static void ithc_clear_drvdata(void *res)
-+{
- struct pci_dev *pci = res;
- pci_set_drvdata(pci, NULL);
- }
-
--static int ithc_start(struct pci_dev *pci) {
-+static int ithc_start(struct pci_dev *pci)
-+{
- pci_dbg(pci, "starting\n");
- if (pci_get_drvdata(pci)) {
- pci_err(pci, "device already initialized\n");
- return -EINVAL;
- }
-- if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) return -ENOMEM;
-+ if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL))
-+ return -ENOMEM;
-
-- struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof *ithc, GFP_KERNEL);
-- if (!ithc) return -ENOMEM;
-+ // Allocate/init main driver struct.
-+ struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof(*ithc), GFP_KERNEL);
-+ if (!ithc)
-+ return -ENOMEM;
- ithc->irq = -1;
- ithc->pci = pci;
-- snprintf(ithc->phys, sizeof ithc->phys, "pci-%s/" DEVNAME, pci_name(pci));
-+ snprintf(ithc->phys, sizeof(ithc->phys), "pci-%s/" DEVNAME, pci_name(pci));
- init_waitqueue_head(&ithc->wait_hid_parse);
- init_waitqueue_head(&ithc->wait_hid_get_feature);
- mutex_init(&ithc->hid_get_feature_mutex);
- pci_set_drvdata(pci, ithc);
- CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_clear_drvdata, pci);
-- if (ithc_log_regs_enabled) ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof *ithc->prev_regs, GFP_KERNEL);
-+ if (ithc_log_regs_enabled)
-+ ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof(*ithc->prev_regs), GFP_KERNEL);
-
-+ // PCI initialization.
- CHECK_RET(pcim_enable_device, pci);
- pci_set_master(pci);
- CHECK_RET(pcim_iomap_regions, pci, BIT(0), DEVNAME " regs");
-@@ -419,29 +586,39 @@ static int ithc_start(struct pci_dev *pci) {
- CHECK_RET(pci_set_power_state, pci, PCI_D0);
- ithc->regs = pcim_iomap_table(pci)[0];
-
-+ // Allocate IRQ.
- if (!ithc_use_polling) {
- CHECK_RET(pci_alloc_irq_vectors, pci, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
- ithc->irq = CHECK(pci_irq_vector, pci, 0);
-- if (ithc->irq < 0) return ithc->irq;
-+ if (ithc->irq < 0)
-+ return ithc->irq;
- }
-
-+ // Initialize THC and touch device.
- CHECK_RET(ithc_init_device, ithc);
- CHECK(devm_device_add_groups, &pci->dev, ithc_attribute_groups);
-- if (ithc_use_rx0) CHECK_RET(ithc_dma_rx_init, ithc, 0, ithc_use_rx1 ? DEVNAME "0" : DEVNAME);
-- if (ithc_use_rx1) CHECK_RET(ithc_dma_rx_init, ithc, 1, ithc_use_rx0 ? DEVNAME "1" : DEVNAME);
-+ if (ithc_use_rx0)
-+ CHECK_RET(ithc_dma_rx_init, ithc, 0);
-+ if (ithc_use_rx1)
-+ CHECK_RET(ithc_dma_rx_init, ithc, 1);
- CHECK_RET(ithc_dma_tx_init, ithc);
-
-- CHECK_RET(ithc_hid_init, ithc);
--
- cpu_latency_qos_add_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE);
-- timer_setup(&ithc->activity_timer, ithc_activity_timer_callback, 0);
-+ hrtimer_init(&ithc->activity_start_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-+ ithc->activity_start_timer.function = ithc_activity_start_timer_callback;
-+ hrtimer_init(&ithc->activity_end_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-+ ithc->activity_end_timer.function = ithc_activity_end_timer_callback;
-
-- // add ithc_stop callback AFTER setting up DMA buffers, so that polling/irqs/DMA are disabled BEFORE the buffers are freed
-+ // Add ithc_stop() callback AFTER setting up DMA buffers, so that polling/irqs/DMA are
-+ // disabled BEFORE the buffers are freed.
- CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_stop, ithc);
-
-+ CHECK_RET(ithc_hid_init, ithc);
-+
-+ // Start polling/IRQ.
- if (ithc_use_polling) {
- pci_info(pci, "using polling instead of irq\n");
-- // use a thread instead of simple timer because we want to be able to sleep
-+ // Use a thread instead of simple timer because we want to be able to sleep.
- ithc->poll_thread = kthread_run(ithc_poll_thread, ithc, DEVNAME "poll");
- if (IS_ERR(ithc->poll_thread)) {
- int err = PTR_ERR(ithc->poll_thread);
-@@ -449,13 +626,17 @@ static int ithc_start(struct pci_dev *pci) {
- return err;
- }
- } else {
-- CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc);
-+ CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL,
-+ ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc);
- }
-
-- if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0);
-- if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1);
-+ if (ithc_use_rx0)
-+ ithc_dma_rx_enable(ithc, 0);
-+ if (ithc_use_rx1)
-+ ithc_dma_rx_enable(ithc, 1);
-
-- // hid_add_device can only be called after irq/polling is started and DMA is enabled, because it calls ithc_hid_parse which reads the report descriptor via DMA
-+ // hid_add_device() can only be called after irq/polling is started and DMA is enabled,
-+ // because it calls ithc_hid_parse() which reads the report descriptor via DMA.
- CHECK_RET(hid_add_device, ithc->hid);
-
- CHECK(ithc_debug_init, ithc);
-@@ -464,43 +645,54 @@ static int ithc_start(struct pci_dev *pci) {
- return 0;
- }
-
--static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) {
-+static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id)
-+{
- pci_dbg(pci, "device probe\n");
- return ithc_start(pci);
- }
-
--static void ithc_remove(struct pci_dev *pci) {
-+static void ithc_remove(struct pci_dev *pci)
-+{
- pci_dbg(pci, "device remove\n");
- // all cleanup is handled by devres
- }
-
--static int ithc_suspend(struct device *dev) {
-+// For suspend/resume, we just deinitialize and reinitialize everything.
-+// TODO It might be cleaner to keep the HID device around, however we would then have to signal
-+// to userspace that the touch device has lost state and userspace needs to e.g. resend 'set
-+// feature' requests. Hidraw does not seem to have a facility to do that.
-+static int ithc_suspend(struct device *dev)
-+{
- struct pci_dev *pci = to_pci_dev(dev);
- pci_dbg(pci, "pm suspend\n");
- devres_release_group(dev, ithc_start);
- return 0;
- }
-
--static int ithc_resume(struct device *dev) {
-+static int ithc_resume(struct device *dev)
-+{
- struct pci_dev *pci = to_pci_dev(dev);
- pci_dbg(pci, "pm resume\n");
- return ithc_start(pci);
- }
-
--static int ithc_freeze(struct device *dev) {
-+static int ithc_freeze(struct device *dev)
-+{
- struct pci_dev *pci = to_pci_dev(dev);
- pci_dbg(pci, "pm freeze\n");
- devres_release_group(dev, ithc_start);
- return 0;
- }
-
--static int ithc_thaw(struct device *dev) {
-+static int ithc_thaw(struct device *dev)
-+{
- struct pci_dev *pci = to_pci_dev(dev);
- pci_dbg(pci, "pm thaw\n");
- return ithc_start(pci);
- }
-
--static int ithc_restore(struct device *dev) {
-+static int ithc_restore(struct device *dev)
-+{
- struct pci_dev *pci = to_pci_dev(dev);
- pci_dbg(pci, "pm restore\n");
- return ithc_start(pci);
-@@ -521,11 +713,13 @@ static struct pci_driver ithc_driver = {
- //.dev_groups = ithc_attribute_groups, // could use this (since 5.14), however the attributes won't have valid values until config has been read anyway
- };
-
--static int __init ithc_init(void) {
-+static int __init ithc_init(void)
-+{
- return pci_register_driver(&ithc_driver);
- }
-
--static void __exit ithc_exit(void) {
-+static void __exit ithc_exit(void)
-+{
- pci_unregister_driver(&ithc_driver);
- }
-
-diff --git a/drivers/hid/ithc/ithc-regs.c b/drivers/hid/ithc/ithc-regs.c
-index 85d567b05761f..e058721886e37 100644
---- a/drivers/hid/ithc/ithc-regs.c
-+++ b/drivers/hid/ithc/ithc-regs.c
-@@ -1,63 +1,95 @@
-+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
-+
- #include "ithc.h"
-
- #define reg_num(r) (0x1fff & (u16)(__force u64)(r))
-
--void bitsl(__iomem u32 *reg, u32 mask, u32 val) {
-- if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask);
-+void bitsl(__iomem u32 *reg, u32 mask, u32 val)
-+{
-+ if (val & ~mask)
-+ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n",
-+ reg_num(reg), val, mask);
- writel((readl(reg) & ~mask) | (val & mask), reg);
- }
-
--void bitsb(__iomem u8 *reg, u8 mask, u8 val) {
-- if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask);
-+void bitsb(__iomem u8 *reg, u8 mask, u8 val)
-+{
-+ if (val & ~mask)
-+ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n",
-+ reg_num(reg), val, mask);
- writeb((readb(reg) & ~mask) | (val & mask), reg);
- }
-
--int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) {
-- pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val);
-+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val)
-+{
-+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n",
-+ reg_num(reg), mask, val);
- u32 x;
- if (readl_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) {
-- pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val);
-+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n",
-+ reg_num(reg), mask, val);
- return -ETIMEDOUT;
- }
- pci_dbg(ithc->pci, "done waiting\n");
- return 0;
- }
-
--int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) {
-- pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val);
-+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val)
-+{
-+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n",
-+ reg_num(reg), mask, val);
- u8 x;
- if (readb_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) {
-- pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val);
-+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n",
-+ reg_num(reg), mask, val);
- return -ETIMEDOUT;
- }
- pci_dbg(ithc->pci, "done waiting\n");
- return 0;
- }
-
--int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) {
-+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode)
-+{
- pci_dbg(ithc->pci, "setting SPI speed to %i, mode %i\n", speed, mode);
-- if (mode == 3) mode = 2;
-+ if (mode == 3)
-+ mode = 2;
- bitsl(&ithc->regs->spi_config,
- SPI_CONFIG_MODE(0xff) | SPI_CONFIG_SPEED(0xff) | SPI_CONFIG_UNKNOWN_18(0xff) | SPI_CONFIG_SPEED2(0xff),
- SPI_CONFIG_MODE(mode) | SPI_CONFIG_SPEED(speed) | SPI_CONFIG_UNKNOWN_18(0) | SPI_CONFIG_SPEED2(speed));
- return 0;
- }
-
--int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) {
-+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data)
-+{
- pci_dbg(ithc->pci, "SPI command %u, size %u, offset %u\n", command, size, offset);
-- if (size > sizeof ithc->regs->spi_cmd.data) return -EINVAL;
-+ if (size > sizeof(ithc->regs->spi_cmd.data))
-+ return -EINVAL;
-+
-+ // Wait if the device is still busy.
- CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0);
-+ // Clear result flags.
- writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
-+
-+ // Init SPI command data.
- writeb(command, &ithc->regs->spi_cmd.code);
- writew(size, &ithc->regs->spi_cmd.size);
- writel(offset, &ithc->regs->spi_cmd.offset);
- u32 *p = data, n = (size + 3) / 4;
-- for (u32 i = 0; i < n; i++) writel(p[i], &ithc->regs->spi_cmd.data[i]);
-+ for (u32 i = 0; i < n; i++)
-+ writel(p[i], &ithc->regs->spi_cmd.data[i]);
-+
-+ // Start transmission.
- bitsb_set(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND);
- CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0);
-- if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) return -EIO;
-- if (readw(&ithc->regs->spi_cmd.size) != size) return -EMSGSIZE;
-- for (u32 i = 0; i < n; i++) p[i] = readl(&ithc->regs->spi_cmd.data[i]);
-+
-+ // Read response.
-+ if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE)
-+ return -EIO;
-+ if (readw(&ithc->regs->spi_cmd.size) != size)
-+ return -EMSGSIZE;
-+ for (u32 i = 0; i < n; i++)
-+ p[i] = readl(&ithc->regs->spi_cmd.data[i]);
-+
- writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status);
- return 0;
- }
-diff --git a/drivers/hid/ithc/ithc-regs.h b/drivers/hid/ithc/ithc-regs.h
-index 1a96092ed7eed..d4007d9e2bacc 100644
---- a/drivers/hid/ithc/ithc-regs.h
-+++ b/drivers/hid/ithc/ithc-regs.h
-@@ -1,3 +1,5 @@
-+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
-+
- #define CONTROL_QUIESCE BIT(1)
- #define CONTROL_IS_QUIESCED BIT(2)
- #define CONTROL_NRESET BIT(3)
-@@ -24,7 +26,7 @@
-
- #define ERROR_FLAG_DMA_UNKNOWN_9 BIT(9)
- #define ERROR_FLAG_DMA_UNKNOWN_10 BIT(10)
--#define ERROR_FLAG_DMA_UNKNOWN_12 BIT(12) // set when we receive a truncated DMA message
-+#define ERROR_FLAG_DMA_RX_TIMEOUT BIT(12) // set when we receive a truncated DMA message
- #define ERROR_FLAG_DMA_UNKNOWN_13 BIT(13)
- #define ERROR_FLAG_SPI_BUS_TURNAROUND BIT(16)
- #define ERROR_FLAG_SPI_RESPONSE_TIMEOUT BIT(17)
-@@ -67,6 +69,7 @@
- #define DMA_RX_STATUS_HAVE_DATA BIT(5)
- #define DMA_RX_STATUS_ENABLED BIT(8)
-
-+// COUNTER_RESET can be written to counter registers to reset them to zero. However, in some cases this can mess up the THC.
- #define COUNTER_RESET BIT(31)
-
- struct ithc_registers {
-@@ -147,15 +150,15 @@ static_assert(sizeof(struct ithc_registers) == 0x1300);
- #define DEVCFG_SPI_MAX_FREQ(x) (((x) >> 1) & 0xf) // high bit = use high speed mode?
- #define DEVCFG_SPI_MODE(x) (((x) >> 6) & 3)
- #define DEVCFG_SPI_UNKNOWN_8(x) (((x) >> 8) & 0x3f)
--#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20)
--#define DEVCFG_SPI_HEARTBEAT_INTERVAL (((x) >> 21) & 7)
-+#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) // TODO implement heartbeat
-+#define DEVCFG_SPI_HEARTBEAT_INTERVAL(x) (((x) >> 21) & 7)
- #define DEVCFG_SPI_UNKNOWN_25 BIT(25)
- #define DEVCFG_SPI_UNKNOWN_26 BIT(26)
- #define DEVCFG_SPI_UNKNOWN_27 BIT(27)
--#define DEVCFG_SPI_DELAY (((x) >> 28) & 7)
--#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31)
-+#define DEVCFG_SPI_DELAY(x) (((x) >> 28) & 7) // TODO use this
-+#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) // TODO use this?
-
--struct ithc_device_config {
-+struct ithc_device_config { // (Example values are from an SP7+.)
- u32 _unknown_00; // 00 = 0xe0000402 (0xe0000401 after DMA_RX_CODE_RESET)
- u32 _unknown_04; // 04 = 0x00000000
- u32 dma_buf_sizes; // 08 = 0x000a00ff
-@@ -166,9 +169,9 @@ struct ithc_device_config {
- u16 vendor_id; // 1c = 0x045e = Microsoft Corp.
- u16 product_id; // 1e = 0x0c1a
- u32 revision; // 20 = 0x00000001
-- u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139
-+ u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 (this value looks more random on newer devices)
- u32 _unknown_28; // 28 = 0x00000000
-- u32 fw_mode; // 2c = 0x00000000
-+ u32 fw_mode; // 2c = 0x00000000 (for fw update?)
- u32 _unknown_30; // 30 = 0x00000000
- u32 _unknown_34; // 34 = 0x0404035e (u8,u8,u8,u8 = version?)
- u32 _unknown_38; // 38 = 0x000001c0 (0x000001c1 after DMA_RX_CODE_RESET)
-diff --git a/drivers/hid/ithc/ithc.h b/drivers/hid/ithc/ithc.h
-index 6a9b0d480bc15..028e55a4ec53e 100644
---- a/drivers/hid/ithc/ithc.h
-+++ b/drivers/hid/ithc/ithc.h
-@@ -1,3 +1,5 @@
-+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
-+
- #include <linux/module.h>
- #include <linux/input.h>
- #include <linux/hid.h>
-@@ -21,7 +23,7 @@
- #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
- #define CHECK(fn, ...) ({ int r = fn(__VA_ARGS__); if (r < 0) pci_err(ithc->pci, "%s: %s failed with %i\n", __func__, #fn, r); r; })
--#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while(0)
-+#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while (0)
-
- #define NUM_RX_BUF 16
-
-@@ -35,8 +37,13 @@ struct ithc {
- struct pci_dev *pci;
- int irq;
- struct task_struct *poll_thread;
-+
- struct pm_qos_request activity_qos;
-- struct timer_list activity_timer;
-+ struct hrtimer activity_start_timer;
-+ struct hrtimer activity_end_timer;
-+ ktime_t last_rx_time;
-+ unsigned int cur_rx_seq_count;
-+ unsigned int cur_rx_seq_errors;
-
- struct hid_device *hid;
- bool hid_parse_done;
-@@ -54,7 +61,7 @@ struct ithc {
- };
-
- int ithc_reset(struct ithc *ithc);
--void ithc_set_active(struct ithc *ithc);
-+void ithc_set_active(struct ithc *ithc, unsigned int duration_us);
- int ithc_debug_init(struct ithc *ithc);
- void ithc_log_regs(struct ithc *ithc);
-
---
-2.42.0
+2.43.0
-From c4cbbcd24ea10e6558753174ae6dabcc9b54e438 Mon Sep 17 00:00:00 2001
+From fb7e9294f3970a450b891c2cc7b2195861d454e3 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 22 Oct 2023 14:57:11 +0200
Subject: [PATCH] platform/surface: aggregator_registry: Add support for
@@ -6891,7 +5687,7 @@ Patchset: surface-sam
1 file changed, 3 insertions(+)
diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
-index 0fe5be5396525..0d8c8395c5886 100644
+index aeb3feae40ff..2bc4977037fc 100644
--- a/drivers/platform/surface/surface_aggregator_registry.c
+++ b/drivers/platform/surface/surface_aggregator_registry.c
@@ -367,6 +367,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
@@ -6905,9 +5701,9 @@ index 0fe5be5396525..0d8c8395c5886 100644
{ "MSHW0123", (unsigned long)ssam_node_group_sls },
--
-2.42.0
+2.43.0
-From 0bb0adce3efad7a43fc3811f6cc24148c8c75253 Mon Sep 17 00:00:00 2001
+From 2de16abc5d0d2334e2935b1bdb3667a95d0009f2 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Mon, 20 Nov 2023 19:47:00 +0100
Subject: [PATCH] platform/surface: aggregator_registry: Add support for
@@ -6925,7 +5721,7 @@ Patchset: surface-sam
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
-index 0d8c8395c5886..530db4db71aba 100644
+index 2bc4977037fc..26cb6229ad16 100644
--- a/drivers/platform/surface/surface_aggregator_registry.c
+++ b/drivers/platform/surface/surface_aggregator_registry.c
@@ -247,8 +247,8 @@ static const struct software_node *ssam_node_group_sl5[] = {
@@ -6975,9 +5771,743 @@ index 0d8c8395c5886..530db4db71aba 100644
{ },
};
--
-2.42.0
+2.43.0
+
+From c06e370b5ed873b603aa0dc2faafe24a9e63b3e8 Mon Sep 17 00:00:00 2001
+From: Ivor Wanders <ivor@iwanders.net>
+Date: Mon, 18 Dec 2023 19:21:32 -0500
+Subject: [PATCH] platform/surface: aggregator_registry: add entry for fan
+ speed
+
+Add an entry for the fan speed function.
+Add this new entry to the Surface Pro 9 group.
+
+Signed-off-by: Ivor Wanders <ivor@iwanders.net>
+Link: https://github.com/linux-surface/kernel/pull/144
+Reviewed-by: Maximilian Luz <luzmaximilian@gmail.com>
+Patchset: surface-sam
+---
+ drivers/platform/surface/surface_aggregator_registry.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
+index 26cb6229ad16..f02a933160ff 100644
+--- a/drivers/platform/surface/surface_aggregator_registry.c
++++ b/drivers/platform/surface/surface_aggregator_registry.c
+@@ -74,6 +74,12 @@ static const struct software_node ssam_node_tmp_pprof = {
+ .parent = &ssam_node_root,
+ };
+
++/* Fan speed function. */
++static const struct software_node ssam_node_fan_speed = {
++ .name = "ssam:01:05:01:01:01",
++ .parent = &ssam_node_root,
++};
++
+ /* Tablet-mode switch via KIP subsystem. */
+ static const struct software_node ssam_node_kip_tablet_switch = {
+ .name = "ssam:01:0e:01:00:01",
+@@ -319,6 +325,7 @@ static const struct software_node *ssam_node_group_sp9[] = {
+ &ssam_node_bat_ac,
+ &ssam_node_bat_main,
+ &ssam_node_tmp_pprof,
++ &ssam_node_fan_speed,
+ &ssam_node_pos_tablet_switch,
+ &ssam_node_hid_kip_keyboard,
+ &ssam_node_hid_kip_penstash,
+--
+2.43.0
+
+From 63dcbbcad69219e1487db46a5c26c1ebdd9ef6be Mon Sep 17 00:00:00 2001
+From: Ivor Wanders <ivor@iwanders.net>
+Date: Thu, 30 Nov 2023 20:20:24 -0500
+Subject: [PATCH] hwmon: add fan speed monitoring driver for Surface devices
+
+Adds a driver that provides read only access to the fan speed for Microsoft
+Surface Pro devices. The fan speed is always regulated by the EC and cannot
+be influenced directly.
+
+Signed-off-by: Ivor Wanders <ivor@iwanders.net>
+Link: https://github.com/linux-surface/kernel/pull/144
+Patchset: surface-sam
+---
+ Documentation/hwmon/index.rst | 1 +
+ Documentation/hwmon/surface_fan.rst | 25 ++++++++
+ MAINTAINERS | 8 +++
+ drivers/hwmon/Kconfig | 13 ++++
+ drivers/hwmon/Makefile | 1 +
+ drivers/hwmon/surface_fan.c | 93 +++++++++++++++++++++++++++++
+ 6 files changed, 141 insertions(+)
+ create mode 100644 Documentation/hwmon/surface_fan.rst
+ create mode 100644 drivers/hwmon/surface_fan.c
+
+diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
+index 72f4e6065bae..7c254562abd6 100644
+--- a/Documentation/hwmon/index.rst
++++ b/Documentation/hwmon/index.rst
+@@ -204,6 +204,7 @@ Hardware Monitoring Kernel Drivers
+ smsc47m1
+ sparx5-temp
+ stpddc60
++ surface_fan
+ sy7636a-hwmon
+ tc654
+ tc74
+diff --git a/Documentation/hwmon/surface_fan.rst b/Documentation/hwmon/surface_fan.rst
+new file mode 100644
+index 000000000000..07942574c4f0
+--- /dev/null
++++ b/Documentation/hwmon/surface_fan.rst
+@@ -0,0 +1,25 @@
++.. SPDX-License-Identifier: GPL-2.0-or-later
++
++Kernel driver surface_fan
++=========================
++
++Supported Devices:
++
++ * Microsoft Surface Pro 9
++
++Author: Ivor Wanders <ivor@iwanders.net>
++
++Description
++-----------
++
++This provides monitoring of the fan found in some Microsoft Surface Pro devices,
++like the Surface Pro 9. The fan is always controlled by the onboard controller.
++
++Sysfs interface
++---------------
++
++======================= ======= =========================================
++Name Perm Description
++======================= ======= =========================================
++``fan1_input`` RO Current fan speed in RPM.
++======================= ======= =========================================
+diff --git a/MAINTAINERS b/MAINTAINERS
+index a7c4cf8201e0..77eb076e77da 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -14331,6 +14331,14 @@ F: Documentation/driver-api/surface_aggregator/clients/dtx.rst
+ F: drivers/platform/surface/surface_dtx.c
+ F: include/uapi/linux/surface_aggregator/dtx.h
+
++MICROSOFT SURFACE SENSOR FAN DRIVER
++M: Maximilian Luz <luzmaximilian@gmail.com>
++M: Ivor Wanders <ivor@iwanders.net>
++L: linux-hwmon@vger.kernel.org
++S: Maintained
++F: Documentation/hwmon/surface_fan.rst
++F: drivers/hwmon/surface_fan.c
++
+ MICROSOFT SURFACE GPE LID SUPPORT DRIVER
+ M: Maximilian Luz <luzmaximilian@gmail.com>
+ L: platform-driver-x86@vger.kernel.org
+diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
+index cf27523eed5a..1cef428c79ea 100644
+--- a/drivers/hwmon/Kconfig
++++ b/drivers/hwmon/Kconfig
+@@ -1983,6 +1983,19 @@ config SENSORS_SFCTEMP
+ This driver can also be built as a module. If so, the module
+ will be called sfctemp.
+
++config SENSORS_SURFACE_FAN
++ tristate "Surface Fan Driver"
++ depends on SURFACE_AGGREGATOR
++ help
++ Driver that provides monitoring of the fan on Surface Pro devices that
++ have a fan, like the Surface Pro 9.
++
++ This makes the fan's current speed accessible through the hwmon
++ system. It does not provide control over the fan, the firmware is
++ responsible for that, this driver merely provides monitoring.
++
++ Select M or Y here, if you want to be able to read the fan's speed.
++
+ config SENSORS_ADC128D818
+ tristate "Texas Instruments ADC128D818"
+ depends on I2C
+diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
+index e84bd9685b5c..30a284fc5ab6 100644
+--- a/drivers/hwmon/Makefile
++++ b/drivers/hwmon/Makefile
+@@ -200,6 +200,7 @@ obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o
+ obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o
+ obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o
+ obj-$(CONFIG_SENSORS_STTS751) += stts751.o
++obj-$(CONFIG_SENSORS_SURFACE_FAN)+= surface_fan.o
+ obj-$(CONFIG_SENSORS_SY7636A) += sy7636a-hwmon.o
+ obj-$(CONFIG_SENSORS_AMC6821) += amc6821.o
+ obj-$(CONFIG_SENSORS_TC74) += tc74.o
+diff --git a/drivers/hwmon/surface_fan.c b/drivers/hwmon/surface_fan.c
+new file mode 100644
+index 000000000000..7c2e3ae3eb40
+--- /dev/null
++++ b/drivers/hwmon/surface_fan.c
+@@ -0,0 +1,93 @@
++// SPDX-License-Identifier: GPL-2.0+
++/*
++ * Surface Fan driver for Surface System Aggregator Module. It provides access
++ * to the fan's rpm through the hwmon system.
++ *
++ * Copyright (C) 2023 Ivor Wanders <ivor@iwanders.net>
++ */
++
++#include <linux/hwmon.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/surface_aggregator/device.h>
++#include <linux/types.h>
++
++// SSAM
++SSAM_DEFINE_SYNC_REQUEST_CL_R(__ssam_fan_rpm_get, __le16, {
++ .target_category = SSAM_SSH_TC_FAN,
++ .command_id = 0x01,
++});
++
++// hwmon
++umode_t surface_fan_hwmon_is_visible(const void *drvdata,
++ enum hwmon_sensor_types type, u32 attr,
++ int channel)
++{
++ return 0444;
++}
++
++static int surface_fan_hwmon_read(struct device *dev,
++ enum hwmon_sensor_types type, u32 attr,
++ int channel, long *val)
++{
++ struct ssam_device *sdev = dev_get_drvdata(dev);
++ int ret;
++ __le16 value;
++
++ ret = __ssam_fan_rpm_get(sdev, &value);
++ if (ret)
++ return ret;
++
++ *val = le16_to_cpu(value);
++
++ return ret;
++}
++
++static const struct hwmon_channel_info *const surface_fan_info[] = {
++ HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT),
++ NULL
++};
++
++static const struct hwmon_ops surface_fan_hwmon_ops = {
++ .is_visible = surface_fan_hwmon_is_visible,
++ .read = surface_fan_hwmon_read,
++};
++
++static const struct hwmon_chip_info surface_fan_chip_info = {
++ .ops = &surface_fan_hwmon_ops,
++ .info = surface_fan_info,
++};
++
++static int surface_fan_probe(struct ssam_device *sdev)
++{
++ struct device *hdev;
++
++ hdev = devm_hwmon_device_register_with_info(&sdev->dev,
++ "surface_fan", sdev,
++ &surface_fan_chip_info,
++ NULL);
++ if (IS_ERR(hdev))
++ return PTR_ERR(hdev);
++
++ return 0;
++}
++
++static const struct ssam_device_id ssam_fan_match[] = {
++ { SSAM_SDEV(FAN, SAM, 0x01, 0x01) },
++ {},
++};
++MODULE_DEVICE_TABLE(ssam, ssam_fan_match);
++
++static struct ssam_device_driver surface_fan = {
++ .probe = surface_fan_probe,
++ .match_table = ssam_fan_match,
++ .driver = {
++ .name = "surface_fan",
++ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
++ },
++};
++module_ssam_device_driver(surface_fan);
++
++MODULE_AUTHOR("Ivor Wanders <ivor@iwanders.net>");
++MODULE_DESCRIPTION("Fan Driver for Surface System Aggregator Module");
++MODULE_LICENSE("GPL");
+--
+2.43.0
+
+From 5f549c253e4df330fa8f311fe151df80e199bec4 Mon Sep 17 00:00:00 2001
+From: Maximilian Luz <luzmaximilian@gmail.com>
+Date: Sat, 30 Dec 2023 18:07:54 +0100
+Subject: [PATCH] hwmon: Add thermal sensor driver for Surface Aggregator
+ Module
+
+Some of the newer Microsoft Surface devices (such as the Surface Book
+3 and Pro 9) have thermal sensors connected via the Surface Aggregator
+Module (the embedded controller on those devices). Add a basic driver
+to read out the temperature values of those sensors.
+
+Link: https://github.com/linux-surface/surface-aggregator-module/issues/59
+Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
+Patchset: surface-sam
+---
+ drivers/hwmon/Kconfig | 10 +++
+ drivers/hwmon/Makefile | 1 +
+ drivers/hwmon/surface_temp.c | 165 +++++++++++++++++++++++++++++++++++
+ 3 files changed, 176 insertions(+)
+ create mode 100644 drivers/hwmon/surface_temp.c
+
+diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
+index 1cef428c79ea..ca20716911ad 100644
+--- a/drivers/hwmon/Kconfig
++++ b/drivers/hwmon/Kconfig
+@@ -1996,6 +1996,16 @@ config SENSORS_SURFACE_FAN
+
+ Select M or Y here, if you want to be able to read the fan's speed.
+
++config SENSORS_SURFACE_TEMP
++ tristate "Microsoft Surface Thermal Sensor Driver"
++ depends on SURFACE_AGGREGATOR
++ help
++ Driver for monitoring thermal sensors connected via the Surface
++ Aggregator Module (embedded controller) on Microsoft Surface devices.
++
++ This driver can also be built as a module. If so, the module
++ will be called surface_temp.
++
+ config SENSORS_ADC128D818
+ tristate "Texas Instruments ADC128D818"
+ depends on I2C
+diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
+index 30a284fc5ab6..a6bcde6b4843 100644
+--- a/drivers/hwmon/Makefile
++++ b/drivers/hwmon/Makefile
+@@ -201,6 +201,7 @@ obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o
+ obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o
+ obj-$(CONFIG_SENSORS_STTS751) += stts751.o
+ obj-$(CONFIG_SENSORS_SURFACE_FAN)+= surface_fan.o
++obj-$(CONFIG_SENSORS_SURFACE_TEMP)+= surface_temp.o
+ obj-$(CONFIG_SENSORS_SY7636A) += sy7636a-hwmon.o
+ obj-$(CONFIG_SENSORS_AMC6821) += amc6821.o
+ obj-$(CONFIG_SENSORS_TC74) += tc74.o
+diff --git a/drivers/hwmon/surface_temp.c b/drivers/hwmon/surface_temp.c
+new file mode 100644
+index 000000000000..48c3e826713f
+--- /dev/null
++++ b/drivers/hwmon/surface_temp.c
+@@ -0,0 +1,165 @@
++// SPDX-License-Identifier: GPL-2.0+
++/*
++ * Thermal sensor subsystem driver for Surface System Aggregator Module (SSAM).
++ *
++ * Copyright (C) 2022-2023 Maximilian Luz <luzmaximilian@gmail.com>
++ */
++
++#include <linux/bitops.h>
++#include <linux/hwmon.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/types.h>
++
++#include <linux/surface_aggregator/controller.h>
++#include <linux/surface_aggregator/device.h>
++
++
++/* -- SAM interface. -------------------------------------------------------- */
++
++SSAM_DEFINE_SYNC_REQUEST_CL_R(__ssam_tmp_get_available_sensors, __le16, {
++ .target_category = SSAM_SSH_TC_TMP,
++ .command_id = 0x04,
++});
++
++SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_temperature, __le16, {
++ .target_category = SSAM_SSH_TC_TMP,
++ .command_id = 0x01,
++});
++
++static int ssam_tmp_get_available_sensors(struct ssam_device *sdev, s16 *sensors)
++{
++ __le16 sensors_le;
++ int status;
++
++ status = __ssam_tmp_get_available_sensors(sdev, &sensors_le);
++ if (status)
++ return status;
++
++ *sensors = le16_to_cpu(sensors_le);
++ return 0;
++}
++
++static int ssam_tmp_get_temperature(struct ssam_device *sdev, u8 iid, long *temperature)
++{
++ __le16 temp_le;
++ int status;
++
++ status = __ssam_tmp_get_temperature(sdev->ctrl, sdev->uid.target, iid, &temp_le);
++ if (status)
++ return status;
++
++ /* Convert 1/10 °K to 1/1000 °C */
++ *temperature = (le16_to_cpu(temp_le) - 2731) * 100L;
++ return 0;
++}
++
++
++/* -- Driver.---------------------------------------------------------------- */
++
++struct ssam_temp {
++ struct ssam_device *sdev;
++ s16 sensors;
++};
++
++static umode_t ssam_temp_hwmon_is_visible(const void *data,
++ enum hwmon_sensor_types type,
++ u32 attr, int channel)
++{
++ const struct ssam_temp *ssam_temp = data;
++
++ if (!(ssam_temp->sensors & BIT(channel)))
++ return 0;
++
++ return 0444;
++}
++
++static int ssam_temp_hwmon_read(struct device *dev,
++ enum hwmon_sensor_types type,
++ u32 attr, int channel, long *value)
++{
++ const struct ssam_temp *ssam_temp = dev_get_drvdata(dev);
++
++ return ssam_tmp_get_temperature(ssam_temp->sdev, channel + 1, value);
++}
++
++static const struct hwmon_channel_info * const ssam_temp_hwmon_info[] = {
++ HWMON_CHANNEL_INFO(chip,
++ HWMON_C_REGISTER_TZ),
++ /* We have at most 16 thermal sensor channels. */
++ HWMON_CHANNEL_INFO(temp,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT,
++ HWMON_T_INPUT),
++ NULL
++};
++
++static const struct hwmon_ops ssam_temp_hwmon_ops = {
++ .is_visible = ssam_temp_hwmon_is_visible,
++ .read = ssam_temp_hwmon_read,
++};
++
++static const struct hwmon_chip_info ssam_temp_hwmon_chip_info = {
++ .ops = &ssam_temp_hwmon_ops,
++ .info = ssam_temp_hwmon_info,
++};
++
++static int ssam_temp_probe(struct ssam_device *sdev)
++{
++ struct ssam_temp *ssam_temp;
++ struct device *hwmon_dev;
++ s16 sensors;
++ int status;
++
++ status = ssam_tmp_get_available_sensors(sdev, &sensors);
++ if (status)
++ return status;
++
++ ssam_temp = devm_kzalloc(&sdev->dev, sizeof(*ssam_temp), GFP_KERNEL);
++ if (!ssam_temp)
++ return -ENOMEM;
++
++ ssam_temp->sdev = sdev;
++ ssam_temp->sensors = sensors;
++
++ hwmon_dev = devm_hwmon_device_register_with_info(&sdev->dev,
++ "surface_thermal", ssam_temp, &ssam_temp_hwmon_chip_info,
++ NULL);
++ if (IS_ERR(hwmon_dev))
++ return PTR_ERR(hwmon_dev);
++
++ return 0;
++}
++
++static const struct ssam_device_id ssam_temp_match[] = {
++ { SSAM_SDEV(TMP, SAM, 0x00, 0x02) },
++ { },
++};
++MODULE_DEVICE_TABLE(ssam, ssam_temp_match);
++
++static struct ssam_device_driver ssam_temp = {
++ .probe = ssam_temp_probe,
++ .match_table = ssam_temp_match,
++ .driver = {
++ .name = "surface_temp",
++ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
++ },
++};
++module_ssam_device_driver(ssam_temp);
++
++MODULE_AUTHOR("Maximilian Luz <luzmaximilian@gmail.com>");
++MODULE_DESCRIPTION("Thermal sensor subsystem driver for Surface System Aggregator Module");
++MODULE_LICENSE("GPL");
+--
+2.43.0
+
+From 3ccfa3b6be4794f247488f7e665ba91793ec09c7 Mon Sep 17 00:00:00 2001
+From: Maximilian Luz <luzmaximilian@gmail.com>
+Date: Sat, 30 Dec 2023 18:12:23 +0100
+Subject: [PATCH] hwmon: surface_temp: Add support for sensor names
+
+The thermal subsystem of the Surface Aggregator Module allows us to
+query the names of the respective thermal sensors. Forward those to
+userspace.
+
+Signed-off-by: Ivor Wanders <ivor@iwanders.net>
+Co-Developed-by: Maximilian Luz <luzmaximilian@gmail.com>
+Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
+Patchset: surface-sam
+---
+ drivers/hwmon/surface_temp.c | 113 +++++++++++++++++++++++++++++------
+ 1 file changed, 96 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/hwmon/surface_temp.c b/drivers/hwmon/surface_temp.c
+index 48c3e826713f..4c08926139db 100644
+--- a/drivers/hwmon/surface_temp.c
++++ b/drivers/hwmon/surface_temp.c
+@@ -17,6 +17,27 @@
+
+ /* -- SAM interface. -------------------------------------------------------- */
+
++/*
++ * Available sensors are indicated by a 16-bit bitfield, where a 1 marks the
++ * presence of a sensor. So we have at most 16 possible sensors/channels.
++ */
++#define SSAM_TMP_SENSOR_MAX_COUNT 16
++
++/*
++ * All names observed so far are 6 characters long, but there's only
++ * zeros after the name, so perhaps they can be longer. This number reflects
++ * the maximum zero-padded space observed in the returned buffer.
++ */
++#define SSAM_TMP_SENSOR_NAME_LENGTH 18
++
++struct ssam_tmp_get_name_rsp {
++ __le16 unknown1;
++ char unknown2;
++ char name[SSAM_TMP_SENSOR_NAME_LENGTH];
++} __packed;
++
++static_assert(sizeof(struct ssam_tmp_get_name_rsp) == 21);
++
+ SSAM_DEFINE_SYNC_REQUEST_CL_R(__ssam_tmp_get_available_sensors, __le16, {
+ .target_category = SSAM_SSH_TC_TMP,
+ .command_id = 0x04,
+@@ -27,6 +48,11 @@ SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_temperature, __le16, {
+ .command_id = 0x01,
+ });
+
++SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_name, struct ssam_tmp_get_name_rsp, {
++ .target_category = SSAM_SSH_TC_TMP,
++ .command_id = 0x0e,
++});
++
+ static int ssam_tmp_get_available_sensors(struct ssam_device *sdev, s16 *sensors)
+ {
+ __le16 sensors_le;
+@@ -54,12 +80,37 @@ static int ssam_tmp_get_temperature(struct ssam_device *sdev, u8 iid, long *temp
+ return 0;
+ }
+
++static int ssam_tmp_get_name(struct ssam_device *sdev, u8 iid, char *buf, size_t buf_len)
++{
++ struct ssam_tmp_get_name_rsp name_rsp;
++ int status;
++
++ status = __ssam_tmp_get_name(sdev->ctrl, sdev->uid.target, iid, &name_rsp);
++ if (status)
++ return status;
++
++ /*
++ * This should not fail unless the name in the returned struct is not
++ * null-terminated or someone changed something in the struct
++ * definitions above, since our buffer and struct have the same
++ * capacity by design. So if this fails blow this up with a warning.
++ * Since the more likely cause is that the returned string isn't
++ * null-terminated, we might have received garbage (as opposed to just
++ * an incomplete string), so also fail the function.
++ */
++ status = strscpy(buf, name_rsp.name, buf_len);
++ WARN_ON(status < 0);
++
++ return status < 0 ? status : 0;
++}
++
+
+ /* -- Driver.---------------------------------------------------------------- */
+
+ struct ssam_temp {
+ struct ssam_device *sdev;
+ s16 sensors;
++ char names[SSAM_TMP_SENSOR_MAX_COUNT][SSAM_TMP_SENSOR_NAME_LENGTH];
+ };
+
+ static umode_t ssam_temp_hwmon_is_visible(const void *data,
+@@ -83,33 +134,47 @@ static int ssam_temp_hwmon_read(struct device *dev,
+ return ssam_tmp_get_temperature(ssam_temp->sdev, channel + 1, value);
+ }
+
++static int ssam_temp_hwmon_read_string(struct device *dev,
++ enum hwmon_sensor_types type,
++ u32 attr, int channel, const char **str)
++{
++ const struct ssam_temp *ssam_temp = dev_get_drvdata(dev);
++
++ *str = ssam_temp->names[channel];
++ return 0;
++}
++
+ static const struct hwmon_channel_info * const ssam_temp_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(chip,
+ HWMON_C_REGISTER_TZ),
+- /* We have at most 16 thermal sensor channels. */
++ /*
++ * We have at most SSAM_TMP_SENSOR_MAX_COUNT = 16 thermal sensor
++ * channels.
++ */
+ HWMON_CHANNEL_INFO(temp,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT,
+- HWMON_T_INPUT),
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL,
++ HWMON_T_INPUT | HWMON_T_LABEL),
+ NULL
+ };
+
+ static const struct hwmon_ops ssam_temp_hwmon_ops = {
+ .is_visible = ssam_temp_hwmon_is_visible,
+ .read = ssam_temp_hwmon_read,
++ .read_string = ssam_temp_hwmon_read_string,
+ };
+
+ static const struct hwmon_chip_info ssam_temp_hwmon_chip_info = {
+@@ -122,6 +187,7 @@ static int ssam_temp_probe(struct ssam_device *sdev)
+ struct ssam_temp *ssam_temp;
+ struct device *hwmon_dev;
+ s16 sensors;
++ int channel;
+ int status;
+
+ status = ssam_tmp_get_available_sensors(sdev, &sensors);
+@@ -135,6 +201,19 @@ static int ssam_temp_probe(struct ssam_device *sdev)
+ ssam_temp->sdev = sdev;
+ ssam_temp->sensors = sensors;
+
++ /* Retrieve the name for each available sensor. */
++ for (channel = 0; channel < SSAM_TMP_SENSOR_MAX_COUNT; channel++)
++ {
++ if (!(sensors & BIT(channel)))
++ continue;
++
++ status = ssam_tmp_get_name(sdev, channel + 1,
++ ssam_temp->names[channel],
++ SSAM_TMP_SENSOR_NAME_LENGTH);
++ if (status)
++ return status;
++ }
++
+ hwmon_dev = devm_hwmon_device_register_with_info(&sdev->dev,
+ "surface_thermal", ssam_temp, &ssam_temp_hwmon_chip_info,
+ NULL);
+--
+2.43.0
+
+From 8ccf7b86ad270655bd1e8cd0ab8d2ff475ad0ea7 Mon Sep 17 00:00:00 2001
+From: Maximilian Luz <luzmaximilian@gmail.com>
+Date: Sat, 30 Dec 2023 18:21:12 +0100
+Subject: [PATCH] platform/surface: aggregator_registry: Add support for
+ thermal sensors on the Surface Pro 9
+
+The Surface Pro 9 has thermal sensors connected via the Surface
+Aggregator Module. Add a device node to support those.
+
+Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
+Patchset: surface-sam
+---
+ drivers/platform/surface/surface_aggregator_registry.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
+index f02a933160ff..67686042e009 100644
+--- a/drivers/platform/surface/surface_aggregator_registry.c
++++ b/drivers/platform/surface/surface_aggregator_registry.c
+@@ -74,6 +74,12 @@ static const struct software_node ssam_node_tmp_pprof = {
+ .parent = &ssam_node_root,
+ };
+
++/* Thermal sensors. */
++static const struct software_node ssam_node_tmp_sensors = {
++ .name = "ssam:01:03:01:00:02",
++ .parent = &ssam_node_root,
++};
++
+ /* Fan speed function. */
+ static const struct software_node ssam_node_fan_speed = {
+ .name = "ssam:01:05:01:01:01",
+@@ -325,6 +331,7 @@ static const struct software_node *ssam_node_group_sp9[] = {
+ &ssam_node_bat_ac,
+ &ssam_node_bat_main,
+ &ssam_node_tmp_pprof,
++ &ssam_node_tmp_sensors,
+ &ssam_node_fan_speed,
+ &ssam_node_pos_tablet_switch,
+ &ssam_node_hid_kip_keyboard,
+--
+2.43.0
-From 3772b511c710c369b737fd0a111fbda63b028f1d Mon Sep 17 00:00:00 2001
+From 38a76c85dee37facde40f245d994c4209ccddd15 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sat, 25 Jul 2020 17:19:53 +0200
Subject: [PATCH] i2c: acpi: Implement RawBytes read access
@@ -7034,7 +6564,7 @@ Patchset: surface-sam-over-hid
1 file changed, 35 insertions(+)
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
-index d6037a3286690..a290ebc77aea2 100644
+index d6037a328669..a290ebc77aea 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -628,6 +628,28 @@ static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
@@ -7087,9 +6617,9 @@ index d6037a3286690..a290ebc77aea2 100644
dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n",
accessor_type, client->addr);
--
-2.42.0
+2.43.0
-From f45a16750118da615fca44e7214204c83631ee7f Mon Sep 17 00:00:00 2001
+From f4ad3e5c368c11503d8b7af6a703f3972ebd5e98 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sat, 13 Feb 2021 16:41:18 +0100
Subject: [PATCH] platform/surface: Add driver for Surface Book 1 dGPU switch
@@ -7112,7 +6642,7 @@ Patchset: surface-sam-over-hid
create mode 100644 drivers/platform/surface/surfacebook1_dgpu_switch.c
diff --git a/drivers/platform/surface/Kconfig b/drivers/platform/surface/Kconfig
-index b629e82af97c0..68656e8f309ed 100644
+index b629e82af97c..68656e8f309e 100644
--- a/drivers/platform/surface/Kconfig
+++ b/drivers/platform/surface/Kconfig
@@ -149,6 +149,13 @@ config SURFACE_AGGREGATOR_TABLET_SWITCH
@@ -7130,7 +6660,7 @@ index b629e82af97c0..68656e8f309ed 100644
tristate "Surface DTX (Detachment System) Driver"
depends on SURFACE_AGGREGATOR
diff --git a/drivers/platform/surface/Makefile b/drivers/platform/surface/Makefile
-index 53344330939bf..7efcd0cdb5329 100644
+index 53344330939b..7efcd0cdb532 100644
--- a/drivers/platform/surface/Makefile
+++ b/drivers/platform/surface/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_SURFACE_AGGREGATOR_CDEV) += surface_aggregator_cdev.o
@@ -7143,7 +6673,7 @@ index 53344330939bf..7efcd0cdb5329 100644
obj-$(CONFIG_SURFACE_HOTPLUG) += surface_hotplug.o
diff --git a/drivers/platform/surface/surfacebook1_dgpu_switch.c b/drivers/platform/surface/surfacebook1_dgpu_switch.c
new file mode 100644
-index 0000000000000..8b816ed8f35c6
+index 000000000000..8b816ed8f35c
--- /dev/null
+++ b/drivers/platform/surface/surfacebook1_dgpu_switch.c
@@ -0,0 +1,162 @@
@@ -7310,9 +6840,9 @@ index 0000000000000..8b816ed8f35c6
+MODULE_DESCRIPTION("Discrete GPU Power-Switch for Surface Book 1");
+MODULE_LICENSE("GPL");
--
-2.42.0
+2.43.0
-From a5d9cf4762a27e2bf7f38c0d5a223b9df8b4ba8a Mon Sep 17 00:00:00 2001
+From 96cb53fd556f88f97d61b237c6015cec946865d5 Mon Sep 17 00:00:00 2001
From: Sachi King <nakato@nakato.io>
Date: Tue, 5 Oct 2021 00:05:09 +1100
Subject: [PATCH] Input: soc_button_array - support AMD variant Surface devices
@@ -7334,10 +6864,10 @@ Patchset: surface-button
1 file changed, 8 insertions(+), 25 deletions(-)
diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
-index e79f5497948b8..2bddbe6e9ea4d 100644
+index f6d060377d18..b8603f74eb28 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c
-@@ -537,8 +537,8 @@ static const struct soc_device_data soc_device_MSHW0028 = {
+@@ -540,8 +540,8 @@ static const struct soc_device_data soc_device_MSHW0028 = {
* Both, the Surface Pro 4 (surfacepro3_button.c) and the above mentioned
* devices use MSHW0040 for power and volume buttons, however the way they
* have to be addressed differs. Make sure that we only load this drivers
@@ -7348,7 +6878,7 @@ index e79f5497948b8..2bddbe6e9ea4d 100644
*/
#define MSHW0040_DSM_REVISION 0x01
#define MSHW0040_DSM_GET_OMPR 0x02 // get OEM Platform Revision
-@@ -549,31 +549,14 @@ static const guid_t MSHW0040_DSM_UUID =
+@@ -552,31 +552,14 @@ static const guid_t MSHW0040_DSM_UUID =
static int soc_device_check_MSHW0040(struct device *dev)
{
acpi_handle handle = ACPI_HANDLE(dev);
@@ -7387,9 +6917,9 @@ index e79f5497948b8..2bddbe6e9ea4d 100644
/*
--
-2.42.0
+2.43.0
-From 66f0a34801ad81ff08cc3ae0e175e0958959c461 Mon Sep 17 00:00:00 2001
+From 7909f30b15796e8df43a6d4ea32cbbd40627c410 Mon Sep 17 00:00:00 2001
From: Sachi King <nakato@nakato.io>
Date: Tue, 5 Oct 2021 00:22:57 +1100
Subject: [PATCH] platform/surface: surfacepro3_button: don't load on amd
@@ -7410,7 +6940,7 @@ Patchset: surface-button
1 file changed, 6 insertions(+), 24 deletions(-)
diff --git a/drivers/platform/surface/surfacepro3_button.c b/drivers/platform/surface/surfacepro3_button.c
-index 2755601f979cd..4240c98ca2265 100644
+index 2755601f979c..4240c98ca226 100644
--- a/drivers/platform/surface/surfacepro3_button.c
+++ b/drivers/platform/surface/surfacepro3_button.c
@@ -149,7 +149,8 @@ static int surface_button_resume(struct device *dev)
@@ -7459,9 +6989,9 @@ index 2755601f979cd..4240c98ca2265 100644
--
-2.42.0
+2.43.0
-From a55587ce4f5065bedb604f9031082ad47612a163 Mon Sep 17 00:00:00 2001
+From 28ea3660b6680bfd528ca05c543e69b8a2ad412c Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sat, 18 Feb 2023 01:02:49 +0100
Subject: [PATCH] USB: quirks: Add USB_QUIRK_DELAY_INIT for Surface Go 3
@@ -7486,7 +7016,7 @@ Patchset: surface-typecover
1 file changed, 3 insertions(+)
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
-index 15e9bd180a1d2..0d70461d01e16 100644
+index 15e9bd180a1d..0d70461d01e1 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -220,6 +220,9 @@ static const struct usb_device_id usb_quirk_list[] = {
@@ -7500,9 +7030,9 @@ index 15e9bd180a1d2..0d70461d01e16 100644
{ USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
--
-2.42.0
+2.43.0
-From 678999792d6b1c72e56c6b63fc3909b93db47b32 Mon Sep 17 00:00:00 2001
+From 039ed906cfe0578e78c40d786433e9b144c56785 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= <verdre@v0yd.nl>
Date: Thu, 5 Nov 2020 13:09:45 +0100
Subject: [PATCH] hid/multitouch: Turn off Type Cover keyboard backlight when
@@ -7538,7 +7068,7 @@ Patchset: surface-typecover
1 file changed, 98 insertions(+), 2 deletions(-)
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
-index 8db4ae05febc8..99a5efef45258 100644
+index fd5b0637dad6..0f49d8fa6333 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -34,7 +34,10 @@
@@ -7712,7 +7242,7 @@ index 8db4ae05febc8..99a5efef45258 100644
ret = sysfs_create_group(&hdev->dev.kobj, &mt_attribute_group);
if (ret)
-@@ -1842,6 +1932,7 @@ static void mt_remove(struct hid_device *hdev)
+@@ -1840,6 +1930,7 @@ static void mt_remove(struct hid_device *hdev)
{
struct mt_device *td = hid_get_drvdata(hdev);
@@ -7720,7 +7250,7 @@ index 8db4ae05febc8..99a5efef45258 100644
del_timer_sync(&td->release_timer);
sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group);
-@@ -2223,6 +2314,11 @@ static const struct hid_device_id mt_devices[] = {
+@@ -2226,6 +2317,11 @@ static const struct hid_device_id mt_devices[] = {
MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
USB_DEVICE_ID_XIROKU_CSR2) },
@@ -7733,9 +7263,9 @@ index 8db4ae05febc8..99a5efef45258 100644
{ .driver_data = MT_CLS_GOOGLE,
HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE,
--
-2.42.0
+2.43.0
-From 12427f01e38ebf653ccf44faefdcb92110c43c20 Mon Sep 17 00:00:00 2001
+From 97419c9be08b7b3e4311aa989fa8f91d0549a469 Mon Sep 17 00:00:00 2001
From: PJungkamp <p.jungkamp@gmail.com>
Date: Fri, 25 Feb 2022 12:04:25 +0100
Subject: [PATCH] hid/multitouch: Add support for surface pro type cover tablet
@@ -7764,7 +7294,7 @@ Patchset: surface-typecover
1 file changed, 122 insertions(+), 26 deletions(-)
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
-index 99a5efef45258..6ae43ea90bcd5 100644
+index 0f49d8fa6333..1fad1199775b 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -77,6 +77,7 @@ MODULE_LICENSE("GPL");
@@ -7974,7 +7504,7 @@ index 99a5efef45258..6ae43ea90bcd5 100644
hid_err(hdev, "couldn't find backlight field\n");
goto out;
}
-@@ -1909,13 +1975,24 @@ static int mt_suspend(struct hid_device *hdev, pm_message_t state)
+@@ -1908,13 +1974,24 @@ static int mt_suspend(struct hid_device *hdev, pm_message_t state)
static int mt_reset_resume(struct hid_device *hdev)
{
@@ -7999,7 +7529,7 @@ index 99a5efef45258..6ae43ea90bcd5 100644
/* Some Elan legacy devices require SET_IDLE to be set on resume.
* It should be safe to send it to other devices too.
* Tested on 3M, Stantum, Cypress, Zytronic, eGalax, and Elan panels. */
-@@ -1924,6 +2001,10 @@ static int mt_resume(struct hid_device *hdev)
+@@ -1923,12 +2000,31 @@ static int mt_resume(struct hid_device *hdev)
mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true);
@@ -8009,8 +7539,7 @@ index 99a5efef45258..6ae43ea90bcd5 100644
+
return 0;
}
- #endif
-@@ -1931,6 +2012,21 @@ static int mt_resume(struct hid_device *hdev)
+
static void mt_remove(struct hid_device *hdev)
{
struct mt_device *td = hid_get_drvdata(hdev);
@@ -8033,9 +7562,9 @@ index 99a5efef45258..6ae43ea90bcd5 100644
unregister_pm_notifier(&td->pm_notifier);
del_timer_sync(&td->release_timer);
--
-2.42.0
+2.43.0
-From 151f9dba2f3d6d066d160128da109a0173a3ff4c Mon Sep 17 00:00:00 2001
+From 3854d7e575b1091a994c58ef8ee2a89f4efced12 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 19 Feb 2023 22:12:24 +0100
Subject: [PATCH] PCI: Add quirk to prevent calling shutdown mehtod
@@ -8060,7 +7589,7 @@ Patchset: surface-shutdown
3 files changed, 40 insertions(+)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
-index 51ec9e7e784f0..40554890d7211 100644
+index 51ec9e7e784f..40554890d721 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -507,6 +507,9 @@ static void pci_device_shutdown(struct device *dev)
@@ -8074,13 +7603,14 @@ index 51ec9e7e784f0..40554890d7211 100644
if (drv && drv->shutdown)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index e3e915329510f..666ff1e9b6d7b 100644
+index d55a3ffae4b8..e8614d8476fe 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
-@@ -6212,6 +6212,42 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node);
-
+@@ -6227,3 +6227,39 @@ static void pci_fixup_d3cold_delay_1sec(struct pci_dev *pdev)
+ pdev->d3cold_delay = 1000;
+ }
+ DECLARE_PCI_FIXUP_FINAL(0x5555, 0x0004, pci_fixup_d3cold_delay_1sec);
++
+static const struct dmi_system_id no_shutdown_dmi_table[] = {
+ /*
+ * Systems on which some devices should not be touched during shutdown.
@@ -8116,13 +7646,8 @@ index e3e915329510f..666ff1e9b6d7b 100644
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x462f, quirk_no_shutdown); // Thunderbolt 4 PCI Express Root Port
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x466d, quirk_no_shutdown); // Thunderbolt 4 NHI
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x46a8, quirk_no_shutdown); // GPU
-+
- /*
- * Devices known to require a longer delay before first config space access
- * after reset recovery or resume from D3cold:
-
diff --git a/include/linux/pci.h b/include/linux/pci.h
-index 8c7c2c3c6c652..0c223b04dff91 100644
+index bc80960fad7c..eec5704d1000 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -465,6 +465,7 @@ struct pci_dev {
@@ -8134,9 +7659,9 @@ index 8c7c2c3c6c652..0c223b04dff91 100644
atomic_t enable_cnt; /* pci_enable_device has been called */
--
-2.42.0
+2.43.0
-From 912e956823b3cadd7203d3ce94418d162ff701be Mon Sep 17 00:00:00 2001
+From d9ddc9ae99c11ebc912a1a8dde46d783e873508b Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 12 Mar 2023 01:41:57 +0100
Subject: [PATCH] platform/surface: gpe: Add support for Surface Pro 9
@@ -8150,7 +7675,7 @@ Patchset: surface-gpe
1 file changed, 17 insertions(+)
diff --git a/drivers/platform/surface/surface_gpe.c b/drivers/platform/surface/surface_gpe.c
-index c219b840d491a..69c4352e8406b 100644
+index 62fd4004db31..103fc4468262 100644
--- a/drivers/platform/surface/surface_gpe.c
+++ b/drivers/platform/surface/surface_gpe.c
@@ -41,6 +41,11 @@ static const struct property_entry lid_device_props_l4F[] = {
@@ -8185,9 +7710,9 @@ index c219b840d491a..69c4352e8406b 100644
.ident = "Surface Book 1",
.matches = {
--
-2.42.0
+2.43.0
-From df083025f8c63824279c19de8ec3339440f819c9 Mon Sep 17 00:00:00 2001
+From 5fdcd780891777ef73585adf610593e6e097e6d6 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 10 Oct 2021 20:56:57 +0200
Subject: [PATCH] ACPI: delay enumeration of devices with a _DEP pointing to an
@@ -8247,10 +7772,10 @@ Patchset: cameras
1 file changed, 3 insertions(+)
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
-index 691d4b7686ee7..9283217689279 100644
+index 02bb2cce423f..b123138d3dc0 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
-@@ -2108,6 +2108,9 @@ static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used,
+@@ -2114,6 +2114,9 @@ static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used,
static void acpi_default_enumeration(struct acpi_device *device)
{
@@ -8261,9 +7786,9 @@ index 691d4b7686ee7..9283217689279 100644
* Do not enumerate devices with enumeration_by_parent flag set as
* they will be enumerated by their respective parents.
--
-2.42.0
+2.43.0
-From 87650a001d3068a8b614fd688e21bb87c2d3a3e6 Mon Sep 17 00:00:00 2001
+From eb19f5e13f14a8973920d406125f205945558fb9 Mon Sep 17 00:00:00 2001
From: zouxiaoh <xiaohong.zou@intel.com>
Date: Fri, 25 Jun 2021 08:52:59 +0800
Subject: [PATCH] iommu: intel-ipu: use IOMMU passthrough mode for Intel IPUs
@@ -8289,7 +7814,7 @@ Patchset: cameras
1 file changed, 30 insertions(+)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
-index 5a627e081797c..da866ac6b30ba 100644
+index cc6569613255..8a532d32efdd 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -38,6 +38,12 @@
@@ -8305,7 +7830,7 @@ index 5a627e081797c..da866ac6b30ba 100644
#define IS_IPTS(pdev) ((pdev)->vendor == PCI_VENDOR_ID_INTEL && \
((pdev)->device == 0x9d3e))
#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
-@@ -295,12 +301,14 @@ EXPORT_SYMBOL_GPL(intel_iommu_enabled);
+@@ -294,12 +300,14 @@ EXPORT_SYMBOL_GPL(intel_iommu_enabled);
static int dmar_map_gfx = 1;
static int dmar_map_ipts = 1;
@@ -8320,7 +7845,7 @@ index 5a627e081797c..da866ac6b30ba 100644
#define IDENTMAP_IPTS 16
const struct iommu_ops intel_iommu_ops;
-@@ -2547,6 +2555,9 @@ static int device_def_domain_type(struct device *dev)
+@@ -2553,6 +2561,9 @@ static int device_def_domain_type(struct device *dev)
if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
return IOMMU_DOMAIN_IDENTITY;
@@ -8330,7 +7855,7 @@ index 5a627e081797c..da866ac6b30ba 100644
if ((iommu_identity_mapping & IDENTMAP_IPTS) && IS_IPTS(pdev))
return IOMMU_DOMAIN_IDENTITY;
}
-@@ -2856,6 +2867,9 @@ static int __init init_dmars(void)
+@@ -2862,6 +2873,9 @@ static int __init init_dmars(void)
if (!dmar_map_gfx)
iommu_identity_mapping |= IDENTMAP_GFX;
@@ -8340,7 +7865,7 @@ index 5a627e081797c..da866ac6b30ba 100644
if (!dmar_map_ipts)
iommu_identity_mapping |= IDENTMAP_IPTS;
-@@ -4838,6 +4852,18 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
+@@ -4987,6 +5001,18 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
dmar_map_gfx = 0;
}
@@ -8359,7 +7884,7 @@ index 5a627e081797c..da866ac6b30ba 100644
static void quirk_iommu_ipts(struct pci_dev *dev)
{
if (!IS_IPTS(dev))
-@@ -4849,6 +4875,7 @@ static void quirk_iommu_ipts(struct pci_dev *dev)
+@@ -4998,6 +5024,7 @@ static void quirk_iommu_ipts(struct pci_dev *dev)
pci_info(dev, "Passthrough IOMMU for IPTS\n");
dmar_map_ipts = 0;
}
@@ -8367,7 +7892,7 @@ index 5a627e081797c..da866ac6b30ba 100644
/* G4x/GM45 integrated gfx dmar support is totally busted. */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
-@@ -4884,6 +4911,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
+@@ -5033,6 +5060,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
@@ -8378,9 +7903,9 @@ index 5a627e081797c..da866ac6b30ba 100644
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9D3E, quirk_iommu_ipts);
--
-2.42.0
+2.43.0
-From 76fec27d978bf7708a60862d4aab2e1fe7ec3f27 Mon Sep 17 00:00:00 2001
+From 1b16e7cbcbf699e4d841424568e0de1cee048d93 Mon Sep 17 00:00:00 2001
From: Daniel Scally <djrscally@gmail.com>
Date: Sun, 10 Oct 2021 20:57:02 +0200
Subject: [PATCH] platform/x86: int3472: Enable I2c daisy chain
@@ -8397,7 +7922,7 @@ Patchset: cameras
1 file changed, 7 insertions(+)
diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c
-index 1e107fd49f828..e3e1696e7f0ee 100644
+index 1e107fd49f82..e3e1696e7f0e 100644
--- a/drivers/platform/x86/intel/int3472/tps68470.c
+++ b/drivers/platform/x86/intel/int3472/tps68470.c
@@ -46,6 +46,13 @@ static int tps68470_chip_init(struct device *dev, struct regmap *regmap)
@@ -8415,9 +7940,9 @@ index 1e107fd49f828..e3e1696e7f0ee 100644
return 0;
--
-2.42.0
+2.43.0
-From 232a0f88ecc21141c6f0d94cc74eb63c7869c217 Mon Sep 17 00:00:00 2001
+From a856e6ec1aa1ce0e88abdd423a151f2bbddb8134 Mon Sep 17 00:00:00 2001
From: Daniel Scally <dan.scally@ideasonboard.com>
Date: Thu, 2 Mar 2023 12:59:39 +0000
Subject: [PATCH] platform/x86: int3472: Remap reset GPIO for INT347E
@@ -8439,20 +7964,19 @@ Patchset: cameras
1 file changed, 14 insertions(+)
diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c
-index e33c2d75975cf..c0c90ae66b705 100644
+index 07b302e09340..1d3097bc7e48 100644
--- a/drivers/platform/x86/intel/int3472/discrete.c
+++ b/drivers/platform/x86/intel/int3472/discrete.c
-@@ -57,6 +57,9 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347
+@@ -83,12 +83,26 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347
const char *func, u32 polarity)
{
- char *path = agpio->resource_source.string_ptr;
+ int ret;
+ const struct acpi_device_id ov7251_ids[] = {
+ { "INT347E" },
+ };
- struct gpiod_lookup *table_entry;
- struct acpi_device *adev;
- acpi_handle handle;
-@@ -67,6 +70,17 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347
+
+ if (int3472->n_sensor_gpios >= INT3472_MAX_SENSOR_GPIOS) {
+ dev_warn(int3472->dev, "Too many GPIOs mapped\n");
return -EINVAL;
}
@@ -8467,13 +7991,13 @@ index e33c2d75975cf..c0c90ae66b705 100644
+ polarity = GPIO_ACTIVE_HIGH;
+ }
+
- status = acpi_get_handle(NULL, path, &handle);
- if (ACPI_FAILURE(status))
- return -EINVAL;
+ ret = skl_int3472_fill_gpiod_lookup(&int3472->gpios.table[int3472->n_sensor_gpios],
+ agpio, func, polarity);
+ if (ret)
--
-2.42.0
+2.43.0
-From 0cfd5c05a675388bbb2edfa87423dc5ad931cc97 Mon Sep 17 00:00:00 2001
+From a7a10c4493fe0a381f12fd6a20a024e7797bd37c Mon Sep 17 00:00:00 2001
From: Daniel Scally <dan.scally@ideasonboard.com>
Date: Tue, 21 Mar 2023 13:45:26 +0000
Subject: [PATCH] media: i2c: Clarify that gain is Analogue gain in OV7251
@@ -8488,7 +8012,7 @@ Patchset: cameras
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/media/i2c/ov7251.c b/drivers/media/i2c/ov7251.c
-index 675fb37a6feae..43b30db08c9e4 100644
+index 6582cc0e2384..fd0796b6e07e 100644
--- a/drivers/media/i2c/ov7251.c
+++ b/drivers/media/i2c/ov7251.c
@@ -1051,7 +1051,7 @@ static int ov7251_s_ctrl(struct v4l2_ctrl *ctrl)
@@ -8500,7 +8024,7 @@ index 675fb37a6feae..43b30db08c9e4 100644
ret = ov7251_set_gain(ov7251, ctrl->val);
break;
case V4L2_CID_TEST_PATTERN:
-@@ -1551,7 +1551,7 @@ static int ov7251_init_ctrls(struct ov7251 *ov7251)
+@@ -1553,7 +1553,7 @@ static int ov7251_init_ctrls(struct ov7251 *ov7251)
ov7251->exposure = v4l2_ctrl_new_std(&ov7251->ctrls, &ov7251_ctrl_ops,
V4L2_CID_EXPOSURE, 1, 32, 1, 32);
ov7251->gain = v4l2_ctrl_new_std(&ov7251->ctrls, &ov7251_ctrl_ops,
@@ -8510,9 +8034,9 @@ index 675fb37a6feae..43b30db08c9e4 100644
V4L2_CID_TEST_PATTERN,
ARRAY_SIZE(ov7251_test_pattern_menu) - 1,
--
-2.42.0
+2.43.0
-From 18fa273c21f1dd86160f18242a81947392272443 Mon Sep 17 00:00:00 2001
+From e96fa67c9172fac9aa6e68199cf7e29d074c21e6 Mon Sep 17 00:00:00 2001
From: Daniel Scally <dan.scally@ideasonboard.com>
Date: Wed, 22 Mar 2023 11:01:42 +0000
Subject: [PATCH] media: v4l2-core: Acquire privacy led in
@@ -8531,7 +8055,7 @@ Patchset: cameras
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/media/v4l2-core/v4l2-async.c b/drivers/media/v4l2-core/v4l2-async.c
-index 091e8cf4114ba..cca10f5355844 100644
+index 8cfd593d293d..c32f0d1b29d4 100644
--- a/drivers/media/v4l2-core/v4l2-async.c
+++ b/drivers/media/v4l2-core/v4l2-async.c
@@ -796,6 +796,10 @@ int v4l2_async_register_subdev(struct v4l2_subdev *sd)
@@ -8546,7 +8070,7 @@ index 091e8cf4114ba..cca10f5355844 100644
* No reference taken. The reference is held by the device (struct
* v4l2_subdev.dev), and async sub-device does not exist independently
diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
-index 7f181fbbb1407..1c0347de4e216 100644
+index 7f181fbbb140..1c0347de4e21 100644
--- a/drivers/media/v4l2-core/v4l2-fwnode.c
+++ b/drivers/media/v4l2-core/v4l2-fwnode.c
@@ -1217,10 +1217,6 @@ int v4l2_async_register_subdev_sensor(struct v4l2_subdev *sd)
@@ -8561,9 +8085,9 @@ index 7f181fbbb1407..1c0347de4e216 100644
if (ret < 0)
goto out_cleanup;
--
-2.42.0
+2.43.0
-From 07e01113f2641afab78b155d42e9d9d399a9e164 Mon Sep 17 00:00:00 2001
+From 68dac72bec1c99890d35d6bfd1b1f66e0cf8789c Mon Sep 17 00:00:00 2001
From: Kate Hsuan <hpa@redhat.com>
Date: Tue, 21 Mar 2023 23:37:16 +0800
Subject: [PATCH] platform: x86: int3472: Add MFD cell for tps68470 LED
@@ -8579,7 +8103,7 @@ Patchset: cameras
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c
-index e3e1696e7f0ee..423dc555093f7 100644
+index e3e1696e7f0e..423dc555093f 100644
--- a/drivers/platform/x86/intel/int3472/tps68470.c
+++ b/drivers/platform/x86/intel/int3472/tps68470.c
@@ -17,7 +17,7 @@
@@ -8602,9 +8126,9 @@ index e3e1696e7f0ee..423dc555093f7 100644
for (i = 0; i < board_data->n_gpiod_lookups; i++)
gpiod_add_lookup_table(board_data->tps68470_gpio_lookup_tables[i]);
--
-2.42.0
+2.43.0
-From a704bf822539e09b00015110b48bc997692c92ce Mon Sep 17 00:00:00 2001
+From 3f446f24aecaba808693f0173e28972e651fa87d Mon Sep 17 00:00:00 2001
From: Kate Hsuan <hpa@redhat.com>
Date: Tue, 21 Mar 2023 23:37:17 +0800
Subject: [PATCH] include: mfd: tps68470: Add masks for LEDA and LEDB
@@ -8622,7 +8146,7 @@ Patchset: cameras
1 file changed, 5 insertions(+)
diff --git a/include/linux/mfd/tps68470.h b/include/linux/mfd/tps68470.h
-index 7807fa329db00..2d2abb25b944f 100644
+index 7807fa329db0..2d2abb25b944 100644
--- a/include/linux/mfd/tps68470.h
+++ b/include/linux/mfd/tps68470.h
@@ -34,6 +34,7 @@
@@ -8643,9 +8167,9 @@ index 7807fa329db00..2d2abb25b944f 100644
+
#endif /* __LINUX_MFD_TPS68470_H */
--
-2.42.0
+2.43.0
-From c8a6ce96be3a4dca7e9e99613b28494d10b4ade0 Mon Sep 17 00:00:00 2001
+From a0fe4ec438c5edb9f4360c8a2a5f5269d05c44ef Mon Sep 17 00:00:00 2001
From: Kate Hsuan <hpa@redhat.com>
Date: Tue, 21 Mar 2023 23:37:18 +0800
Subject: [PATCH] leds: tps68470: Add LED control for tps68470
@@ -8668,10 +8192,10 @@ Patchset: cameras
create mode 100644 drivers/leds/leds-tps68470.c
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
-index b92208eccdea9..312c0c21cc5ef 100644
+index a3a9ac5b5338..0bc6845b5d29 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
-@@ -873,6 +873,18 @@ config LEDS_TPS6105X
+@@ -875,6 +875,18 @@ config LEDS_TPS6105X
It is a single boost converter primarily for white LEDs and
audio amplifiers.
@@ -8691,7 +8215,7 @@ index b92208eccdea9..312c0c21cc5ef 100644
tristate "LED support for SGI Octane machines"
depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
-index d7348e8bc019a..10caea4e7c614 100644
+index d7348e8bc019..10caea4e7c61 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o
@@ -8704,7 +8228,7 @@ index d7348e8bc019a..10caea4e7c614 100644
obj-$(CONFIG_LEDS_WM8350) += leds-wm8350.o
diff --git a/drivers/leds/leds-tps68470.c b/drivers/leds/leds-tps68470.c
new file mode 100644
-index 0000000000000..35aeb5db89c8f
+index 000000000000..35aeb5db89c8
--- /dev/null
+++ b/drivers/leds/leds-tps68470.c
@@ -0,0 +1,185 @@
@@ -8894,9 +8418,9 @@ index 0000000000000..35aeb5db89c8f
+MODULE_DESCRIPTION("LED driver for TPS68470 PMIC");
+MODULE_LICENSE("GPL v2");
--
-2.42.0
+2.43.0
-From 82252c3764ecee6c09218077759072f15001f9ee Mon Sep 17 00:00:00 2001
+From 04069751b144350632ec45b5b25c2cc01d5f34ef Mon Sep 17 00:00:00 2001
From: Sachi King <nakato@nakato.io>
Date: Sat, 29 May 2021 17:47:38 +1000
Subject: [PATCH] ACPI: Add quirk for Surface Laptop 4 AMD missing irq 7
@@ -8919,7 +8443,7 @@ Patchset: amd-gpio
1 file changed, 17 insertions(+)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
-index c55c0ef47a187..f29740cf89ff6 100644
+index 85a3ce2a3666..2c0e04a3a697 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -22,6 +22,7 @@
@@ -8930,7 +8454,7 @@ index c55c0ef47a187..f29740cf89ff6 100644
#include <asm/e820/api.h>
#include <asm/irqdomain.h>
-@@ -1255,6 +1256,17 @@ static void __init mp_config_acpi_legacy_irqs(void)
+@@ -1251,6 +1252,17 @@ static void __init mp_config_acpi_legacy_irqs(void)
}
}
@@ -8948,7 +8472,7 @@ index c55c0ef47a187..f29740cf89ff6 100644
/*
* Parse IOAPIC related entries in MADT
* returns 0 on success, < 0 on error
-@@ -1310,6 +1322,11 @@ static int __init acpi_parse_madt_ioapic_entries(void)
+@@ -1306,6 +1318,11 @@ static int __init acpi_parse_madt_ioapic_entries(void)
acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0,
acpi_gbl_FADT.sci_interrupt);
@@ -8961,9 +8485,9 @@ index c55c0ef47a187..f29740cf89ff6 100644
mp_config_acpi_legacy_irqs();
--
-2.42.0
+2.43.0
-From 52e3f50633128a93bf99ca5c97f98929da66a9ed Mon Sep 17 00:00:00 2001
+From 8e2f2b852776fca1dd0ab8728be2303051cb19e1 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Thu, 3 Jun 2021 14:04:26 +0200
Subject: [PATCH] ACPI: Add AMD 13" Surface Laptop 4 model to irq 7 override
@@ -8978,10 +8502,10 @@ Patchset: amd-gpio
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
-index f29740cf89ff6..247d2a8bcdf4b 100644
+index 2c0e04a3a697..b0e1dab3d2ec 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
-@@ -1258,12 +1258,19 @@ static void __init mp_config_acpi_legacy_irqs(void)
+@@ -1254,12 +1254,19 @@ static void __init mp_config_acpi_legacy_irqs(void)
static const struct dmi_system_id surface_quirk[] __initconst = {
{
@@ -9003,9 +8527,9 @@ index f29740cf89ff6..247d2a8bcdf4b 100644
};
--
-2.42.0
+2.43.0
-From 8cd23b1bb3a8b7a3ef7cec2c37e7e46e6397a858 Mon Sep 17 00:00:00 2001
+From 4e36132e272de3d84833b799be56c2b460db08b6 Mon Sep 17 00:00:00 2001
From: "Bart Groeneveld | GPX Solutions B.V" <bart@gpxbv.nl>
Date: Mon, 5 Dec 2022 16:08:46 +0100
Subject: [PATCH] acpi: allow usage of acpi_tad on HW-reduced platforms
@@ -9028,7 +8552,7 @@ Patchset: rtc
1 file changed, 24 insertions(+), 12 deletions(-)
diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c
-index 33c3b16af556b..900445d06623d 100644
+index 33c3b16af556..900445d06623 100644
--- a/drivers/acpi/acpi_tad.c
+++ b/drivers/acpi/acpi_tad.c
@@ -432,6 +432,14 @@ static ssize_t caps_show(struct device *dev, struct device_attribute *attr,
@@ -9113,5 +8637,5 @@ index 33c3b16af556b..900445d06623d 100644
ret = sysfs_create_group(&dev->kobj, &acpi_tad_dc_attr_group);
if (ret)
--
-2.42.0
+2.43.0
diff --git a/SOURCES/mod-internal.list b/SOURCES/mod-internal.list
index 1d535d8..a63d55f 100644
--- a/SOURCES/mod-internal.list
+++ b/SOURCES/mod-internal.list
@@ -1,5 +1,7 @@
bitfield_kunit
checksum_kunit
+cfg80211-tests
+clk-fractional-divider_test
clk-gate_test
clk_test
cmdline_kunit
@@ -22,10 +24,12 @@ drm_mm_test
drm_modes_test
drm_plane_helper_test
drm_probe_helper_test
+drm_exec_test
drm_rect_test
ext4-inode-test
fat_test
fortify_kunit
+gso_test
gss_krb5_test
handshake-test
hashtable_test
@@ -42,6 +46,8 @@ lib_test
list-test
locktorture
mac80211_hwsim
+mac80211-tests
+mean_and_variance_test
memcpy_kunit
mptcp_crypto_test
mptcp_token_test
@@ -49,6 +55,7 @@ mtty
netdevsim
overflow_kunit
pktgen
+property-entry-test
rational-test
rcuscale
rcutorture
@@ -59,6 +66,7 @@ rocker
scftorture
siphash_kunit
slub_kunit
+snd-hda-cirrus-scodec-test
soc-topology-test
soc-utils-test
stackinit_kunit
diff --git a/SOURCES/nouveau-gsp-default.patch b/SOURCES/nouveau-gsp-default.patch
new file mode 100644
index 0000000..6e48562
--- /dev/null
+++ b/SOURCES/nouveau-gsp-default.patch
@@ -0,0 +1,23 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jan200101 <sentrycraft123@gmail.com>
+Date: Thu, 8 Feb 2024 00:14:36 +0100
+Subject: [PATCH] enable Nouveau GSP by default on all platforms
+
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
+---
+ drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+index 9ee58e2a0eb2..e7864dedf01b 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+@@ -2296,7 +2296,7 @@ r535_gsp_load(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif)
+ struct nvkm_subdev *subdev = &gsp->subdev;
+ int ret;
+
+- if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", fwif->enable))
++ if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", true))
+ return -EINVAL;
+
+ if ((ret = r535_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm)) ||
diff --git a/SOURCES/patch-6.6-redhat.patch b/SOURCES/patch-6.7-redhat.patch
index 7959094..dc163a4 100644
--- a/SOURCES/patch-6.6-redhat.patch
+++ b/SOURCES/patch-6.7-redhat.patch
@@ -1,31 +1,30 @@
- Makefile | 20 +-
+ Makefile | 20 ++-
arch/s390/include/asm/ipl.h | 1 +
arch/s390/kernel/ipl.c | 5 +
arch/s390/kernel/setup.c | 4 +
- arch/x86/kernel/setup.c | 22 +-
+ arch/x86/kernel/setup.c | 22 ++-
drivers/acpi/apei/hest.c | 8 +
drivers/acpi/irq.c | 17 +-
- drivers/acpi/scan.c | 9 +
- drivers/ata/libahci.c | 18 ++
- drivers/char/ipmi/ipmi_dmi.c | 15 +
+ drivers/acpi/scan.c | 9 ++
+ drivers/ata/libahci.c | 18 +++
+ drivers/char/ipmi/ipmi_dmi.c | 15 ++
drivers/char/ipmi/ipmi_msghandler.c | 16 +-
drivers/firmware/efi/Makefile | 1 +
- drivers/firmware/efi/efi.c | 124 +++++--
- drivers/firmware/efi/secureboot.c | 38 +++
- drivers/firmware/sysfb.c | 18 +-
- drivers/hid/hid-rmi.c | 66 ----
- drivers/hwtracing/coresight/coresight-etm4x-core.c | 19 ++
- drivers/input/rmi4/rmi_driver.c | 124 ++++---
- drivers/iommu/iommu.c | 22 ++
+ drivers/firmware/efi/efi.c | 124 +++++++++++----
+ drivers/firmware/efi/secureboot.c | 38 +++++
+ drivers/firmware/sysfb.c | 18 ++-
+ drivers/hid/hid-rmi.c | 66 --------
+ drivers/hwtracing/coresight/coresight-etm4x-core.c | 19 +++
+ drivers/input/rmi4/rmi_driver.c | 124 +++++++++------
+ drivers/iommu/iommu.c | 22 +++
drivers/net/wireless/ath/ath10k/wmi-tlv.c | 4 +
- drivers/pci/quirks.c | 24 ++
- drivers/rtc/rtc-cmos.c | 18 +-
- drivers/scsi/sd.c | 10 +
+ drivers/pci/quirks.c | 24 +++
+ drivers/scsi/sd.c | 10 ++
drivers/usb/core/hub.c | 7 +
- include/linux/efi.h | 22 +-
+ include/linux/efi.h | 22 ++-
include/linux/lsm_hook_defs.h | 2 +
include/linux/module.h | 1 +
- include/linux/rh_kabi.h | 172 ++++++++++
+ include/linux/rh_kabi.h | 172 +++++++++++++++++++++
include/linux/rmi.h | 1 +
include/linux/security.h | 5 +
kernel/module/main.c | 2 +
@@ -33,18 +32,14 @@
scripts/mod/modpost.c | 8 +
scripts/tags.sh | 2 +
security/integrity/platform_certs/load_uefi.c | 6 +-
- security/lockdown/Kconfig | 13 +
+ security/lockdown/Kconfig | 13 ++
security/lockdown/lockdown.c | 1 +
- security/security.c | 12 +
- sound/pci/hda/cs35l41_hda.c | 106 +++++-
- sound/pci/hda/cs35l41_hda.h | 8 +-
- sound/pci/hda/cs35l41_hda_property.c | 355 +++++++++++++++++++--
- sound/pci/hda/hda_component.h | 4 +
- sound/pci/hda/patch_realtek.c | 34 +-
- 43 files changed, 1132 insertions(+), 241 deletions(-)
+ security/security.c | 12 ++
+ tools/power/cpupower/Makefile | 2 +-
+ 38 files changed, 674 insertions(+), 176 deletions(-)
diff --git a/Makefile b/Makefile
-index bad16eda67e2..cfd8719528dd 100644
+index 96a08c9f0faa..e627bde4e578 100644
--- a/Makefile
+++ b/Makefile
@@ -22,6 +22,18 @@ $(if $(filter __%, $(MAKECMDGOALS)), \
@@ -66,7 +61,7 @@ index bad16eda67e2..cfd8719528dd 100644
# We are using a recursive build, so we need to do a little thinking
# to get the ordering right.
#
-@@ -1250,7 +1262,13 @@ define filechk_version.h
+@@ -1239,7 +1251,13 @@ define filechk_version.h
((c) > 255 ? 255 : (c)))'; \
echo \#define LINUX_VERSION_MAJOR $(VERSION); \
echo \#define LINUX_VERSION_PATCHLEVEL $(PATCHLEVEL); \
@@ -94,7 +89,7 @@ index b0d00032479d..afb9544fb007 100644
/*
* DIAG 308 support
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
-index 8d0b95c17312..118ae555a179 100644
+index ba75f6bee774..7d37ce1772f1 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2520,3 +2520,8 @@ int ipl_report_free(struct ipl_report *report)
@@ -107,7 +102,7 @@ index 8d0b95c17312..118ae555a179 100644
+ return !!ipl_secure_flag;
+}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
-index de6ad0fb2328..5cc2758be027 100644
+index 5701356f4f33..cf3593dc271e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -49,6 +49,7 @@
@@ -118,7 +113,7 @@ index de6ad0fb2328..5cc2758be027 100644
#include <linux/hugetlb.h>
#include <linux/kmemleak.h>
-@@ -914,6 +915,9 @@ void __init setup_arch(char **cmdline_p)
+@@ -907,6 +908,9 @@ void __init setup_arch(char **cmdline_p)
log_component_list();
@@ -129,7 +124,7 @@ index de6ad0fb2328..5cc2758be027 100644
/* boot_command_line has been already set up in early.c */
*cmdline_p = boot_command_line;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
-index b098b1fa2470..a159419e60df 100644
+index 1526747bedf2..c482cbf0ccaf 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -21,6 +21,7 @@
@@ -140,7 +135,7 @@ index b098b1fa2470..a159419e60df 100644
#include <linux/usb/xhci-dbgp.h>
#include <linux/static_call.h>
#include <linux/swiotlb.h>
-@@ -1028,6 +1029,13 @@ void __init setup_arch(char **cmdline_p)
+@@ -903,6 +904,13 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled(EFI_BOOT))
efi_init();
@@ -154,7 +149,7 @@ index b098b1fa2470..a159419e60df 100644
reserve_ibft_region();
dmi_setup();
-@@ -1189,19 +1197,7 @@ void __init setup_arch(char **cmdline_p)
+@@ -1064,19 +1072,7 @@ void __init setup_arch(char **cmdline_p)
/* Allocate bigger log buffer */
setup_log_buf(1);
@@ -238,7 +233,7 @@ index 1687483ff319..390b67f19181 100644
return ctx.rc;
}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
-index 1d249d0f61ae..f064f4c6405a 100644
+index 02bb2cce423f..4a6287d7a22d 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1757,6 +1757,15 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device)
@@ -258,7 +253,7 @@ index 1d249d0f61ae..f064f4c6405a 100644
acpi_dev_get_resources(device, &resource_list,
acpi_check_serial_bus_slave,
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
-index f1263364fa97..24ac410f4366 100644
+index 1a63200ea437..a911e976a596 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -729,6 +729,24 @@ int ahci_stop_engine(struct ata_port *ap)
@@ -313,7 +308,7 @@ index bbf7029e224b..cf7faa970dd6 100644
dmi_decode_ipmi((const struct dmi_header *) dev->device_data);
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
-index 186f1fee7534..93e3a76596ff 100644
+index d6f14279684d..9bc53b1970ab 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -35,6 +35,7 @@
@@ -324,7 +319,7 @@ index 186f1fee7534..93e3a76596ff 100644
#include <linux/delay.h>
#define IPMI_DRIVER_VERSION "39.2"
-@@ -5516,8 +5517,21 @@ static int __init ipmi_init_msghandler_mod(void)
+@@ -5511,8 +5512,21 @@ static int __init ipmi_init_msghandler_mod(void)
{
int rv;
@@ -360,7 +355,7 @@ index e489fefd23da..f2dfae764fb5 100644
obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o
obj-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += embedded-firmware.o
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
-index 1974f0ad32ba..6ba6391494ec 100644
+index 9d3910d1abe1..f73b3ba6ef0b 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -32,6 +32,7 @@
@@ -371,9 +366,9 @@ index 1974f0ad32ba..6ba6391494ec 100644
#include <asm/early_ioremap.h>
-@@ -983,40 +984,101 @@ int efi_mem_type(unsigned long phys_addr)
+@@ -972,40 +973,101 @@ int efi_mem_type(unsigned long phys_addr)
+ return -EINVAL;
}
- #endif
+struct efi_error_code {
+ efi_status_t status;
@@ -549,7 +544,7 @@ index 000000000000..de0a3714a5d4
+ }
+}
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
-index 82fcfd29bc4d..17b7e096b682 100644
+index 3c197db42c9d..16e4a2e90fae 100644
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -34,6 +34,22 @@
@@ -585,7 +580,7 @@ index 82fcfd29bc4d..17b7e096b682 100644
if (!IS_ERR(pd))
goto unlock_mutex;
diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c
-index 84e7ba5314d3..efc96776f761 100644
+index d4af17fdba46..154f0403cbf4 100644
--- a/drivers/hid/hid-rmi.c
+++ b/drivers/hid/hid-rmi.c
@@ -321,21 +321,12 @@ static int rmi_input_event(struct hid_device *hdev, u8 *data, int size)
@@ -610,7 +605,7 @@ index 84e7ba5314d3..efc96776f761 100644
return 1;
}
-@@ -591,56 +582,6 @@ static const struct rmi_transport_ops hid_rmi_ops = {
+@@ -589,56 +580,6 @@ static const struct rmi_transport_ops hid_rmi_ops = {
.reset = rmi_hid_reset,
};
@@ -667,7 +662,7 @@ index 84e7ba5314d3..efc96776f761 100644
static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
struct rmi_data *data = NULL;
-@@ -713,18 +654,11 @@ static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id)
+@@ -711,18 +652,11 @@ static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id)
mutex_init(&data->page_mutex);
@@ -930,7 +925,7 @@ index 258d5fe3d395..f7298e3dc8f3 100644
if (data->f01_container->dev.driver) {
/* Driver already bound, so enable ATTN now. */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
-index 3a67e636287a..eb5e796277d6 100644
+index 33e2a9b5d339..6ae1abc3f11c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -8,6 +8,7 @@
@@ -941,7 +936,7 @@ index 3a67e636287a..eb5e796277d6 100644
#include <linux/kernel.h>
#include <linux/bits.h>
#include <linux/bug.h>
-@@ -2931,6 +2932,27 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
+@@ -3031,6 +3032,27 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
}
EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
@@ -985,7 +980,7 @@ index 6b6aa3c36744..0ce08e9a0a3d 100644
arg->desc_id = ev->desc_id;
arg->status = ev->status;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
-index e0081914052f..ae2d04c2f2b3 100644
+index d55a3ffae4b8..a8c1d69567cd 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4410,6 +4410,30 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000,
@@ -1019,43 +1014,8 @@ index e0081914052f..ae2d04c2f2b3 100644
/*
* Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero)
* class code. Fix it.
-diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
-index 228fb2d11c70..696cfa7025de 100644
---- a/drivers/rtc/rtc-cmos.c
-+++ b/drivers/rtc/rtc-cmos.c
-@@ -818,18 +818,24 @@ static void rtc_wake_off(struct device *dev)
- }
-
- #ifdef CONFIG_X86
--/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */
- static void use_acpi_alarm_quirks(void)
- {
-- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
-+ switch (boot_cpu_data.x86_vendor) {
-+ case X86_VENDOR_INTEL:
-+ if (dmi_get_bios_year() < 2015)
-+ return;
-+ break;
-+ case X86_VENDOR_AMD:
-+ case X86_VENDOR_HYGON:
-+ if (dmi_get_bios_year() < 2021)
-+ return;
-+ break;
-+ default:
- return;
--
-+ }
- if (!is_hpet_enabled())
- return;
-
-- if (dmi_get_bios_year() < 2015)
-- return;
--
- use_acpi_alarm = true;
- }
- #else
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
-index c2e8d9e27749..c24dbb681664 100644
+index 542a4bbb21bc..62161ceed2e2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -118,6 +118,14 @@ static const char *sd_cache_types[] = {
@@ -1073,7 +1033,7 @@ index c2e8d9e27749..c24dbb681664 100644
static void sd_set_flush_flag(struct scsi_disk *sdkp)
{
bool wc = false, fua = false;
-@@ -4045,6 +4053,8 @@ static int __init init_sd(void)
+@@ -4048,6 +4056,8 @@ static int __init init_sd(void)
goto err_out_class;
}
@@ -1083,10 +1043,10 @@ index c2e8d9e27749..c24dbb681664 100644
if (err)
goto err_out_driver;
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
-index dfc30cebd4c4..ce1e2cf26478 100644
+index 87480a6e6d93..0f0c5550bea6 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
-@@ -5759,6 +5759,13 @@ static void hub_event(struct work_struct *work)
+@@ -5763,6 +5763,13 @@ static void hub_event(struct work_struct *work)
(u16) hub->change_bits[0],
(u16) hub->event_bits[0]);
@@ -1101,7 +1061,7 @@ index dfc30cebd4c4..ce1e2cf26478 100644
* disconnected while waiting for the lock to succeed. */
usb_lock_device(hdev);
diff --git a/include/linux/efi.h b/include/linux/efi.h
-index 80b21d1c6eaf..b66c0683f2fc 100644
+index 9cc5bf32f6f2..7462fb1fc99e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -44,6 +44,8 @@ struct screen_info;
@@ -1113,7 +1073,7 @@ index 80b21d1c6eaf..b66c0683f2fc 100644
typedef unsigned long efi_status_t;
typedef u8 efi_bool_t;
typedef u16 efi_char16_t; /* UNICODE character */
-@@ -871,6 +873,14 @@ extern int __init efi_setup_pcdp_console(char *);
+@@ -864,6 +866,14 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len)
#define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */
#define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */
#define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */
@@ -1128,7 +1088,7 @@ index 80b21d1c6eaf..b66c0683f2fc 100644
#ifdef CONFIG_EFI
/*
-@@ -882,6 +892,8 @@ static inline bool efi_enabled(int feature)
+@@ -875,6 +885,8 @@ static inline bool efi_enabled(int feature)
}
extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
@@ -1137,7 +1097,7 @@ index 80b21d1c6eaf..b66c0683f2fc 100644
bool __pure __efi_soft_reserve_enabled(void);
static inline bool __pure efi_soft_reserve_enabled(void)
-@@ -903,6 +915,8 @@ static inline bool efi_enabled(int feature)
+@@ -896,6 +908,8 @@ static inline bool efi_enabled(int feature)
static inline void
efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {}
@@ -1146,7 +1106,7 @@ index 80b21d1c6eaf..b66c0683f2fc 100644
static inline bool efi_soft_reserve_enabled(void)
{
return false;
-@@ -917,6 +931,7 @@ static inline void efi_find_mirror(void) {}
+@@ -910,6 +924,7 @@ static inline void efi_find_mirror(void) {}
#endif
extern int efi_status_to_err(efi_status_t status);
@@ -1154,7 +1114,7 @@ index 80b21d1c6eaf..b66c0683f2fc 100644
/*
* Variable Attributes
-@@ -1133,13 +1148,6 @@ static inline bool efi_runtime_disabled(void) { return true; }
+@@ -1126,13 +1141,6 @@ static inline bool efi_runtime_disabled(void) { return true; }
extern void efi_call_virt_check_flags(unsigned long flags, const void *caller);
extern unsigned long efi_call_virt_save_flags(void);
@@ -1169,10 +1129,10 @@ index 80b21d1c6eaf..b66c0683f2fc 100644
enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var)
{
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
-index 2b8d85aae083..6a560e1abb59 100644
+index 472cb16458b0..7c0f6dd800cb 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
-@@ -405,6 +405,8 @@ LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux)
+@@ -407,6 +407,8 @@ LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux)
#endif /* CONFIG_BPF_SYSCALL */
LSM_HOOK(int, 0, locked_down, enum lockdown_reason what)
@@ -1384,10 +1344,10 @@ index ab7eea01ab42..fff7c5f737fc 100644
int rmi_register_transport_device(struct rmi_transport_dev *xport);
diff --git a/include/linux/security.h b/include/linux/security.h
-index 5f16eecde00b..974be25cfa70 100644
+index 9d3138c6364c..7089843865bf 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
-@@ -484,6 +484,7 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
+@@ -486,6 +486,7 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
int security_locked_down(enum lockdown_reason what);
@@ -1395,7 +1355,7 @@ index 5f16eecde00b..974be25cfa70 100644
#else /* CONFIG_SECURITY */
static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
-@@ -1395,6 +1396,10 @@ static inline int security_locked_down(enum lockdown_reason what)
+@@ -1404,6 +1405,10 @@ static inline int security_locked_down(enum lockdown_reason what)
{
return 0;
}
@@ -1450,18 +1410,18 @@ index a2ff4242e623..f0d2be1ee4f1 100644
int module_sig_check(struct load_info *info, int flags)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
-index ac4ef3e206bb..80ede130812c 100644
+index cb6406f485a9..71e1f15d9dce 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
-@@ -23,6 +23,7 @@
+@@ -22,6 +22,7 @@
+ #include <errno.h>
#include "modpost.h"
#include "../../include/linux/license.h"
- #include "../../include/linux/module_symbol.h"
+#include "../../include/generated/uapi/linux/version.h"
static bool module_enabled;
/* Are we using CONFIG_MODVERSIONS? */
-@@ -2090,6 +2091,12 @@ static void write_buf(struct buffer *b, const char *fname)
+@@ -1987,6 +1988,12 @@ static void write_buf(struct buffer *b, const char *fname)
}
}
@@ -1474,7 +1434,7 @@ index ac4ef3e206bb..80ede130812c 100644
static void write_if_changed(struct buffer *b, const char *fname)
{
char *tmp;
-@@ -2150,6 +2157,7 @@ static void write_mod_c_file(struct module *mod)
+@@ -2047,6 +2054,7 @@ static void write_mod_c_file(struct module *mod)
add_depends(&buf, mod);
add_moddevtable(&buf, mod);
add_srcversion(&buf, mod);
@@ -1556,10 +1516,10 @@ index 68d19632aeb7..ef348935b6ff 100644
static int __init lockdown_lsm_init(void)
diff --git a/security/security.c b/security/security.c
-index 23b129d482a7..55d0fe0d121b 100644
+index 266cec94369b..c572a4da96b2 100644
--- a/security/security.c
+++ b/security/security.c
-@@ -5230,6 +5230,18 @@ int security_locked_down(enum lockdown_reason what)
+@@ -5248,6 +5248,18 @@ int security_locked_down(enum lockdown_reason what)
}
EXPORT_SYMBOL(security_locked_down);
@@ -1578,715 +1538,16 @@ index 23b129d482a7..55d0fe0d121b 100644
#ifdef CONFIG_PERF_EVENTS
/**
* security_perf_event_open() - Check if a perf event open is allowed
-diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c
-index 3c157b006a5a..257f71e9ec07 100644
---- a/sound/pci/hda/cs35l41_hda.c
-+++ b/sound/pci/hda/cs35l41_hda.c
-@@ -33,6 +33,9 @@
- #define CAL_AMBIENT_DSP_CTL_NAME "CAL_AMBIENT"
- #define CAL_DSP_CTL_TYPE 5
- #define CAL_DSP_CTL_ALG 205
-+#define CS35L41_UUID "50d90cdc-3de4-4f18-b528-c7fe3b71f40d"
-+#define CS35L41_DSM_GET_MUTE 5
-+#define CS35L41_NOTIFY_EVENT 0x91
-
- static bool firmware_autostart = 1;
- module_param(firmware_autostart, bool, 0444);
-@@ -563,6 +566,31 @@ static void cs35l41_hda_play_start(struct device *dev)
-
- }
-
-+static void cs35l41_mute(struct device *dev, bool mute)
-+{
-+ struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
-+ struct regmap *reg = cs35l41->regmap;
-+
-+ dev_dbg(dev, "Mute(%d:%d) Playback Started: %d\n", mute, cs35l41->mute_override,
-+ cs35l41->playback_started);
-+
-+ if (cs35l41->playback_started) {
-+ if (mute || cs35l41->mute_override) {
-+ dev_dbg(dev, "Muting\n");
-+ regmap_multi_reg_write(reg, cs35l41_hda_mute, ARRAY_SIZE(cs35l41_hda_mute));
-+ } else {
-+ dev_dbg(dev, "Unmuting\n");
-+ if (cs35l41->firmware_running) {
-+ regmap_multi_reg_write(reg, cs35l41_hda_unmute_dsp,
-+ ARRAY_SIZE(cs35l41_hda_unmute_dsp));
-+ } else {
-+ regmap_multi_reg_write(reg, cs35l41_hda_unmute,
-+ ARRAY_SIZE(cs35l41_hda_unmute));
-+ }
-+ }
-+ }
-+}
-+
- static void cs35l41_hda_play_done(struct device *dev)
- {
- struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
-@@ -572,13 +600,7 @@ static void cs35l41_hda_play_done(struct device *dev)
-
- cs35l41_global_enable(dev, reg, cs35l41->hw_cfg.bst_type, 1,
- cs35l41->firmware_running);
-- if (cs35l41->firmware_running) {
-- regmap_multi_reg_write(reg, cs35l41_hda_unmute_dsp,
-- ARRAY_SIZE(cs35l41_hda_unmute_dsp));
-- } else {
-- regmap_multi_reg_write(reg, cs35l41_hda_unmute,
-- ARRAY_SIZE(cs35l41_hda_unmute));
-- }
-+ cs35l41_mute(dev, false);
- }
-
- static void cs35l41_hda_pause_start(struct device *dev)
-@@ -588,7 +610,7 @@ static void cs35l41_hda_pause_start(struct device *dev)
-
- dev_dbg(dev, "Pause (Start)\n");
-
-- regmap_multi_reg_write(reg, cs35l41_hda_mute, ARRAY_SIZE(cs35l41_hda_mute));
-+ cs35l41_mute(dev, true);
- cs35l41_global_enable(dev, reg, cs35l41->hw_cfg.bst_type, 0,
- cs35l41->firmware_running);
- }
-@@ -1116,6 +1138,53 @@ static int cs35l41_create_controls(struct cs35l41_hda *cs35l41)
- return 0;
- }
-
-+static bool cs35l41_dsm_supported(acpi_handle handle, unsigned int commands)
-+{
-+ guid_t guid;
-+
-+ guid_parse(CS35L41_UUID, &guid);
-+
-+ return acpi_check_dsm(handle, &guid, 0, BIT(commands));
-+}
-+
-+static int cs35l41_get_acpi_mute_state(struct cs35l41_hda *cs35l41, acpi_handle handle)
-+{
-+ guid_t guid;
-+ union acpi_object *ret;
-+ int mute = -ENODEV;
-+
-+ guid_parse(CS35L41_UUID, &guid);
-+
-+ if (cs35l41_dsm_supported(handle, CS35L41_DSM_GET_MUTE)) {
-+ ret = acpi_evaluate_dsm(handle, &guid, 0, CS35L41_DSM_GET_MUTE, NULL);
-+ mute = *ret->buffer.pointer;
-+ dev_dbg(cs35l41->dev, "CS35L41_DSM_GET_MUTE: %d\n", mute);
-+ }
-+
-+ dev_dbg(cs35l41->dev, "%s: %d\n", __func__, mute);
-+
-+ return mute;
-+}
-+
-+static void cs35l41_acpi_device_notify(acpi_handle handle, u32 event, struct device *dev)
-+{
-+ struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
-+ int mute;
-+
-+ if (event != CS35L41_NOTIFY_EVENT)
-+ return;
-+
-+ mute = cs35l41_get_acpi_mute_state(cs35l41, handle);
-+ if (mute < 0) {
-+ dev_warn(cs35l41->dev, "Unable to retrieve mute state: %d\n", mute);
-+ return;
-+ }
-+
-+ dev_dbg(cs35l41->dev, "Requesting mute value: %d\n", mute);
-+ cs35l41->mute_override = (mute > 0);
-+ cs35l41_mute(cs35l41->dev, cs35l41->mute_override);
-+}
-+
- static int cs35l41_hda_bind(struct device *dev, struct device *master, void *master_data)
- {
- struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
-@@ -1157,6 +1226,14 @@ static int cs35l41_hda_bind(struct device *dev, struct device *master, void *mas
- comps->playback_hook = cs35l41_hda_playback_hook;
- comps->pre_playback_hook = cs35l41_hda_pre_playback_hook;
- comps->post_playback_hook = cs35l41_hda_post_playback_hook;
-+ comps->acpi_notify = cs35l41_acpi_device_notify;
-+ comps->adev = cs35l41->dacpi;
-+
-+ comps->acpi_notifications_supported = cs35l41_dsm_supported(acpi_device_handle(comps->adev),
-+ CS35L41_DSM_GET_MUTE);
-+
-+ cs35l41->mute_override = cs35l41_get_acpi_mute_state(cs35l41,
-+ acpi_device_handle(cs35l41->dacpi)) > 0;
-
- mutex_unlock(&cs35l41->fw_mutex);
-
-@@ -1430,8 +1507,8 @@ static int cs35l41_hda_read_acpi(struct cs35l41_hda *cs35l41, const char *hid, i
- return -ENODEV;
- }
-
-+ cs35l41->dacpi = adev;
- physdev = get_device(acpi_get_first_physical_node(adev));
-- acpi_dev_put(adev);
-
- sub = acpi_get_subsystem_id(ACPI_HANDLE(physdev));
- if (IS_ERR(sub))
-@@ -1541,6 +1618,7 @@ static int cs35l41_hda_read_acpi(struct cs35l41_hda *cs35l41, const char *hid, i
- hw_cfg->valid = false;
- hw_cfg->gpio1.valid = false;
- hw_cfg->gpio2.valid = false;
-+ acpi_dev_put(cs35l41->dacpi);
- put_physdev:
- put_device(physdev);
-
-@@ -1644,10 +1722,7 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
- if (ret)
- goto err;
-
-- ret = regmap_multi_reg_write(cs35l41->regmap, cs35l41_hda_mute,
-- ARRAY_SIZE(cs35l41_hda_mute));
-- if (ret)
-- goto err;
-+ cs35l41_mute(cs35l41->dev, true);
-
- INIT_WORK(&cs35l41->fw_load_work, cs35l41_fw_load_work);
- mutex_init(&cs35l41->fw_mutex);
-@@ -1684,6 +1759,8 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
- if (cs35l41_safe_reset(cs35l41->regmap, cs35l41->hw_cfg.bst_type))
- gpiod_set_value_cansleep(cs35l41->reset_gpio, 0);
- gpiod_put(cs35l41->reset_gpio);
-+ gpiod_put(cs35l41->cs_gpio);
-+ acpi_dev_put(cs35l41->dacpi);
- kfree(cs35l41->acpi_subsystem_id);
-
- return ret;
-@@ -1703,11 +1780,14 @@ void cs35l41_hda_remove(struct device *dev)
-
- component_del(cs35l41->dev, &cs35l41_hda_comp_ops);
-
-+ acpi_dev_put(cs35l41->dacpi);
-+
- pm_runtime_put_noidle(cs35l41->dev);
-
- if (cs35l41_safe_reset(cs35l41->regmap, cs35l41->hw_cfg.bst_type))
- gpiod_set_value_cansleep(cs35l41->reset_gpio, 0);
- gpiod_put(cs35l41->reset_gpio);
-+ gpiod_put(cs35l41->cs_gpio);
- kfree(cs35l41->acpi_subsystem_id);
- }
- EXPORT_SYMBOL_NS_GPL(cs35l41_hda_remove, SND_HDA_SCODEC_CS35L41);
-diff --git a/sound/pci/hda/cs35l41_hda.h b/sound/pci/hda/cs35l41_hda.h
-index b93bf762976e..3d925d677213 100644
---- a/sound/pci/hda/cs35l41_hda.h
-+++ b/sound/pci/hda/cs35l41_hda.h
-@@ -10,6 +10,7 @@
- #ifndef __CS35L41_HDA_H__
- #define __CS35L41_HDA_H__
-
-+#include <linux/acpi.h>
- #include <linux/efi.h>
- #include <linux/regulator/consumer.h>
- #include <linux/gpio/consumer.h>
-@@ -34,8 +35,8 @@ struct cs35l41_amp_efi_data {
- } __packed;
-
- enum cs35l41_hda_spk_pos {
-- CS35l41_LEFT,
-- CS35l41_RIGHT,
-+ CS35L41_LEFT,
-+ CS35L41_RIGHT,
- };
-
- enum cs35l41_hda_gpio_function {
-@@ -49,6 +50,7 @@ struct cs35l41_hda {
- struct device *dev;
- struct regmap *regmap;
- struct gpio_desc *reset_gpio;
-+ struct gpio_desc *cs_gpio;
- struct cs35l41_hw_cfg hw_cfg;
- struct hda_codec *codec;
-
-@@ -70,6 +72,8 @@ struct cs35l41_hda {
- bool halo_initialized;
- bool playback_started;
- struct cs_dsp cs_dsp;
-+ struct acpi_device *dacpi;
-+ bool mute_override;
- };
-
- enum halo_state {
-diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c
-index b62a4e6968e2..c9eb70290973 100644
---- a/sound/pci/hda/cs35l41_hda_property.c
-+++ b/sound/pci/hda/cs35l41_hda_property.c
-@@ -6,9 +6,300 @@
- //
- // Author: Stefan Binding <sbinding@opensource.cirrus.com>
-
-+#include <linux/acpi.h>
- #include <linux/gpio/consumer.h>
- #include <linux/string.h>
- #include "cs35l41_hda_property.h"
-+#include <linux/spi/spi.h>
-+
-+#define MAX_AMPS 4
-+
-+struct cs35l41_config {
-+ const char *ssid;
-+ enum {
-+ SPI,
-+ I2C
-+ } bus;
-+ int num_amps;
-+ enum {
-+ INTERNAL,
-+ EXTERNAL
-+ } boost_type;
-+ u8 channel[MAX_AMPS];
-+ int reset_gpio_index; /* -1 if no reset gpio */
-+ int spkid_gpio_index; /* -1 if no spkid gpio */
-+ int cs_gpio_index; /* -1 if no cs gpio, or cs-gpios already exists, max num amps == 2 */
-+ int boost_ind_nanohenry; /* Required if boost_type == Internal */
-+ int boost_peak_milliamp; /* Required if boost_type == Internal */
-+ int boost_cap_microfarad; /* Required if boost_type == Internal */
-+};
-+
-+static const struct cs35l41_config cs35l41_config_table[] = {
-+/*
-+ * Device 103C89C6 does have _DSD, however it is setup to use the wrong boost type.
-+ * We can override the _DSD to correct the boost type here.
-+ * Since this laptop has valid ACPI, we do not need to handle cs-gpios, since that already exists
-+ * in the ACPI. The Reset GPIO is also valid, so we can use the Reset defined in _DSD.
-+ */
-+ { "103C89C6", SPI, 2, INTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, -1, -1, -1, 1000, 4500, 24 },
-+ { "104312AF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "10431433", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
-+ { "10431463", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
-+ { "10431473", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 },
-+ { "10431483", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 },
-+ { "10431493", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "104314D3", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "104314E3", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
-+ { "10431503", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
-+ { "10431533", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
-+ { "10431573", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "10431663", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 },
-+ { "104316D3", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
-+ { "104316F3", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
-+ { "104317F3", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
-+ { "10431863", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "104318D3", I2C, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
-+ { "10431C9F", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "10431CAF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "10431CCF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "10431CDF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "10431CEF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
-+ { "10431D1F", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
-+ { "10431DA2", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
-+ { "10431E02", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
-+ { "10431EE2", I2C, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
-+ { "10431F12", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
-+ { "10431F1F", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 },
-+ { "10431F62", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
-+ {}
-+};
-+
-+static int cs35l41_add_gpios(struct cs35l41_hda *cs35l41, struct device *physdev, int reset_gpio,
-+ int spkid_gpio, int cs_gpio_index, int num_amps)
-+{
-+ struct acpi_gpio_mapping *gpio_mapping;
-+ struct acpi_gpio_params *reset_gpio_params;
-+ struct acpi_gpio_params *spkid_gpio_params;
-+ struct acpi_gpio_params *cs_gpio_params;
-+ unsigned int num_entries = 0;
-+ unsigned int reset_index, spkid_index, csgpio_index;
-+ int i;
-+
-+ /*
-+ * GPIO Mapping only needs to be done once, since it would be available for subsequent amps
-+ */
-+ if (cs35l41->dacpi->driver_gpios)
-+ return 0;
-+
-+ if (reset_gpio >= 0) {
-+ reset_index = num_entries;
-+ num_entries++;
-+ }
-+
-+ if (spkid_gpio >= 0) {
-+ spkid_index = num_entries;
-+ num_entries++;
-+ }
-+
-+ if ((cs_gpio_index >= 0) && (num_amps == 2)) {
-+ csgpio_index = num_entries;
-+ num_entries++;
-+ }
-+
-+ if (!num_entries)
-+ return 0;
-+
-+ /* must include termination entry */
-+ num_entries++;
-+
-+ gpio_mapping = devm_kcalloc(physdev, num_entries, sizeof(struct acpi_gpio_mapping),
-+ GFP_KERNEL);
-+
-+ if (!gpio_mapping)
-+ goto err;
-+
-+ if (reset_gpio >= 0) {
-+ gpio_mapping[reset_index].name = "reset-gpios";
-+ reset_gpio_params = devm_kcalloc(physdev, num_amps, sizeof(struct acpi_gpio_params),
-+ GFP_KERNEL);
-+ if (!reset_gpio_params)
-+ goto err;
-+
-+ for (i = 0; i < num_amps; i++)
-+ reset_gpio_params[i].crs_entry_index = reset_gpio;
-+
-+ gpio_mapping[reset_index].data = reset_gpio_params;
-+ gpio_mapping[reset_index].size = num_amps;
-+ }
-+
-+ if (spkid_gpio >= 0) {
-+ gpio_mapping[spkid_index].name = "spk-id-gpios";
-+ spkid_gpio_params = devm_kcalloc(physdev, num_amps, sizeof(struct acpi_gpio_params),
-+ GFP_KERNEL);
-+ if (!spkid_gpio_params)
-+ goto err;
-+
-+ for (i = 0; i < num_amps; i++)
-+ spkid_gpio_params[i].crs_entry_index = spkid_gpio;
-+
-+ gpio_mapping[spkid_index].data = spkid_gpio_params;
-+ gpio_mapping[spkid_index].size = num_amps;
-+ }
-+
-+ if ((cs_gpio_index >= 0) && (num_amps == 2)) {
-+ gpio_mapping[csgpio_index].name = "cs-gpios";
-+ /* only one GPIO CS is supported without using _DSD, obtained using index 0 */
-+ cs_gpio_params = devm_kzalloc(physdev, sizeof(struct acpi_gpio_params), GFP_KERNEL);
-+ if (!cs_gpio_params)
-+ goto err;
-+
-+ cs_gpio_params->crs_entry_index = cs_gpio_index;
-+
-+ gpio_mapping[csgpio_index].data = cs_gpio_params;
-+ gpio_mapping[csgpio_index].size = 1;
-+ }
-+
-+ return devm_acpi_dev_add_driver_gpios(physdev, gpio_mapping);
-+err:
-+ devm_kfree(physdev, gpio_mapping);
-+ devm_kfree(physdev, reset_gpio_params);
-+ devm_kfree(physdev, spkid_gpio_params);
-+ devm_kfree(physdev, cs_gpio_params);
-+ return -ENOMEM;
-+}
-+
-+static int generic_dsd_config(struct cs35l41_hda *cs35l41, struct device *physdev, int id,
-+ const char *hid)
-+{
-+ struct cs35l41_hw_cfg *hw_cfg = &cs35l41->hw_cfg;
-+ const struct cs35l41_config *cfg;
-+ struct gpio_desc *cs_gpiod;
-+ struct spi_device *spi;
-+ bool dsd_found;
-+ int ret;
-+
-+ for (cfg = cs35l41_config_table; cfg->ssid; cfg++) {
-+ if (!strcasecmp(cfg->ssid, cs35l41->acpi_subsystem_id))
-+ break;
-+ }
-+
-+ if (!cfg->ssid)
-+ return -ENOENT;
-+
-+ if (!cs35l41->dacpi || cs35l41->dacpi != ACPI_COMPANION(physdev)) {
-+ dev_err(cs35l41->dev, "ACPI Device does not match, cannot override _DSD.\n");
-+ return -ENODEV;
-+ }
-+
-+ dev_info(cs35l41->dev, "Adding DSD properties for %s\n", cs35l41->acpi_subsystem_id);
-+
-+ dsd_found = acpi_dev_has_props(cs35l41->dacpi);
-+
-+ if (!dsd_found) {
-+ ret = cs35l41_add_gpios(cs35l41, physdev, cfg->reset_gpio_index,
-+ cfg->spkid_gpio_index, cfg->cs_gpio_index,
-+ cfg->num_amps);
-+ if (ret) {
-+ dev_err(cs35l41->dev, "Error adding GPIO mapping: %d\n", ret);
-+ return ret;
-+ }
-+ } else if (cfg->reset_gpio_index >= 0 || cfg->spkid_gpio_index >= 0) {
-+ dev_warn(cs35l41->dev, "Cannot add Reset/Speaker ID/SPI CS GPIO Mapping, "
-+ "_DSD already exists.\n");
-+ }
-+
-+ if (cfg->bus == SPI) {
-+ cs35l41->index = id;
-+ /*
-+ * Manually set the Chip Select for the second amp <cs_gpio_index> in the node.
-+ * This is only supported for systems with 2 amps, since we cannot expand the
-+ * default number of chip selects without using cs-gpios
-+ * The CS GPIO must be set high prior to communicating with the first amp (which
-+ * uses a native chip select), to ensure the second amp does not clash with the
-+ * first.
-+ */
-+ if (cfg->cs_gpio_index >= 0) {
-+ spi = to_spi_device(cs35l41->dev);
-+
-+ if (cfg->num_amps != 2) {
-+ dev_warn(cs35l41->dev,
-+ "Cannot update SPI CS, Number of Amps (%d) != 2\n",
-+ cfg->num_amps);
-+ } else if (dsd_found) {
-+ dev_warn(cs35l41->dev,
-+ "Cannot update SPI CS, _DSD already exists.\n");
-+ } else {
-+ /*
-+ * This is obtained using driver_gpios, since only one GPIO for CS
-+ * exists, this can be obtained using index 0.
-+ */
-+ cs_gpiod = gpiod_get_index(physdev, "cs", 0, GPIOD_OUT_LOW);
-+ if (IS_ERR(cs_gpiod)) {
-+ dev_err(cs35l41->dev,
-+ "Unable to get Chip Select GPIO descriptor\n");
-+ return PTR_ERR(cs_gpiod);
-+ }
-+ if (id == 1) {
-+ spi_set_csgpiod(spi, 0, cs_gpiod);
-+ cs35l41->cs_gpio = cs_gpiod;
-+ } else {
-+ gpiod_set_value_cansleep(cs_gpiod, true);
-+ gpiod_put(cs_gpiod);
-+ }
-+ spi_setup(spi);
-+ }
-+ }
-+ } else {
-+ if (cfg->num_amps > 2)
-+ /*
-+ * i2c addresses for 3/4 amps are used in order: 0x40, 0x41, 0x42, 0x43,
-+ * subtracting 0x40 would give zero-based index
-+ */
-+ cs35l41->index = id - 0x40;
-+ else
-+ /* i2c addr 0x40 for first amp (always), 0x41/0x42 for 2nd amp */
-+ cs35l41->index = id == 0x40 ? 0 : 1;
-+ }
-+
-+ if (cfg->num_amps == 3)
-+ /* 3 amps means a center channel, so no duplicate channels */
-+ cs35l41->channel_index = 0;
-+ else
-+ /*
-+ * if 4 amps, there are duplicate channels, so they need different indexes
-+ * if 2 amps, no duplicate channels, channel_index would be 0
-+ */
-+ cs35l41->channel_index = cs35l41->index / 2;
-+
-+ cs35l41->reset_gpio = fwnode_gpiod_get_index(acpi_fwnode_handle(cs35l41->dacpi), "reset",
-+ cs35l41->index, GPIOD_OUT_LOW,
-+ "cs35l41-reset");
-+ cs35l41->speaker_id = cs35l41_get_speaker_id(physdev, cs35l41->index, cfg->num_amps, -1);
-+
-+ hw_cfg->spk_pos = cfg->channel[cs35l41->index];
-+
-+ if (cfg->boost_type == INTERNAL) {
-+ hw_cfg->bst_type = CS35L41_INT_BOOST;
-+ hw_cfg->bst_ind = cfg->boost_ind_nanohenry;
-+ hw_cfg->bst_ipk = cfg->boost_peak_milliamp;
-+ hw_cfg->bst_cap = cfg->boost_cap_microfarad;
-+ hw_cfg->gpio1.func = CS35L41_NOT_USED;
-+ hw_cfg->gpio1.valid = true;
-+ } else {
-+ hw_cfg->bst_type = CS35L41_EXT_BOOST;
-+ hw_cfg->bst_ind = -1;
-+ hw_cfg->bst_ipk = -1;
-+ hw_cfg->bst_cap = -1;
-+ hw_cfg->gpio1.func = CS35l41_VSPK_SWITCH;
-+ hw_cfg->gpio1.valid = true;
-+ }
-+
-+ hw_cfg->gpio2.func = CS35L41_INTERRUPT;
-+ hw_cfg->gpio2.valid = true;
-+ hw_cfg->valid = true;
-+
-+ return 0;
-+}
-
- /*
- * Device CLSA010(0/1) doesn't have _DSD so a gpiod_get by the label reset won't work.
-@@ -43,37 +334,6 @@ static int lenovo_legion_no_acpi(struct cs35l41_hda *cs35l41, struct device *phy
- return 0;
- }
-
--/*
-- * Device 103C89C6 does have _DSD, however it is setup to use the wrong boost type.
-- * We can override the _DSD to correct the boost type here.
-- * Since this laptop has valid ACPI, we do not need to handle cs-gpios, since that already exists
-- * in the ACPI.
-- */
--static int hp_vision_acpi_fix(struct cs35l41_hda *cs35l41, struct device *physdev, int id,
-- const char *hid)
--{
-- struct cs35l41_hw_cfg *hw_cfg = &cs35l41->hw_cfg;
--
-- dev_info(cs35l41->dev, "Adding DSD properties for %s\n", cs35l41->acpi_subsystem_id);
--
-- cs35l41->index = id;
-- cs35l41->channel_index = 0;
-- cs35l41->reset_gpio = gpiod_get_index(physdev, NULL, 1, GPIOD_OUT_HIGH);
-- cs35l41->speaker_id = -ENOENT;
-- hw_cfg->spk_pos = cs35l41->index ? 1 : 0; // right:left
-- hw_cfg->gpio1.func = CS35L41_NOT_USED;
-- hw_cfg->gpio1.valid = true;
-- hw_cfg->gpio2.func = CS35L41_INTERRUPT;
-- hw_cfg->gpio2.valid = true;
-- hw_cfg->bst_type = CS35L41_INT_BOOST;
-- hw_cfg->bst_ind = 1000;
-- hw_cfg->bst_ipk = 4500;
-- hw_cfg->bst_cap = 24;
-- hw_cfg->valid = true;
--
-- return 0;
--}
--
- struct cs35l41_prop_model {
- const char *hid;
- const char *ssid;
-@@ -84,7 +344,36 @@ struct cs35l41_prop_model {
- static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
- { "CLSA0100", NULL, lenovo_legion_no_acpi },
- { "CLSA0101", NULL, lenovo_legion_no_acpi },
-- { "CSC3551", "103C89C6", hp_vision_acpi_fix },
-+ { "CSC3551", "103C89C6", generic_dsd_config },
-+ { "CSC3551", "104312AF", generic_dsd_config },
-+ { "CSC3551", "10431433", generic_dsd_config },
-+ { "CSC3551", "10431463", generic_dsd_config },
-+ { "CSC3551", "10431473", generic_dsd_config },
-+ { "CSC3551", "10431483", generic_dsd_config },
-+ { "CSC3551", "10431493", generic_dsd_config },
-+ { "CSC3551", "104314D3", generic_dsd_config },
-+ { "CSC3551", "104314E3", generic_dsd_config },
-+ { "CSC3551", "10431503", generic_dsd_config },
-+ { "CSC3551", "10431533", generic_dsd_config },
-+ { "CSC3551", "10431573", generic_dsd_config },
-+ { "CSC3551", "10431663", generic_dsd_config },
-+ { "CSC3551", "104316D3", generic_dsd_config },
-+ { "CSC3551", "104316F3", generic_dsd_config },
-+ { "CSC3551", "104317F3", generic_dsd_config },
-+ { "CSC3551", "10431863", generic_dsd_config },
-+ { "CSC3551", "104318D3", generic_dsd_config },
-+ { "CSC3551", "10431C9F", generic_dsd_config },
-+ { "CSC3551", "10431CAF", generic_dsd_config },
-+ { "CSC3551", "10431CCF", generic_dsd_config },
-+ { "CSC3551", "10431CDF", generic_dsd_config },
-+ { "CSC3551", "10431CEF", generic_dsd_config },
-+ { "CSC3551", "10431D1F", generic_dsd_config },
-+ { "CSC3551", "10431DA2", generic_dsd_config },
-+ { "CSC3551", "10431E02", generic_dsd_config },
-+ { "CSC3551", "10431EE2", generic_dsd_config },
-+ { "CSC3551", "10431F12", generic_dsd_config },
-+ { "CSC3551", "10431F1F", generic_dsd_config },
-+ { "CSC3551", "10431F62", generic_dsd_config },
- {}
- };
-
-@@ -97,7 +386,7 @@ int cs35l41_add_dsd_properties(struct cs35l41_hda *cs35l41, struct device *physd
- if (!strcmp(model->hid, hid) &&
- (!model->ssid ||
- (cs35l41->acpi_subsystem_id &&
-- !strcmp(model->ssid, cs35l41->acpi_subsystem_id))))
-+ !strcasecmp(model->ssid, cs35l41->acpi_subsystem_id))))
- return model->add_prop(cs35l41, physdev, id, hid);
- }
-
-diff --git a/sound/pci/hda/hda_component.h b/sound/pci/hda/hda_component.h
-index f170aec967c1..bbd6f0ed16c1 100644
---- a/sound/pci/hda/hda_component.h
-+++ b/sound/pci/hda/hda_component.h
-@@ -6,6 +6,7 @@
- * Cirrus Logic International Semiconductor Ltd.
- */
-
-+#include <linux/acpi.h>
- #include <linux/component.h>
-
- #define HDA_MAX_COMPONENTS 4
-@@ -15,6 +16,9 @@ struct hda_component {
- struct device *dev;
- char name[HDA_MAX_NAME_SIZE];
- struct hda_codec *codec;
-+ struct acpi_device *adev;
-+ bool acpi_notifications_supported;
-+ void (*acpi_notify)(acpi_handle handle, u32 event, struct device *dev);
- void (*pre_playback_hook)(struct device *dev, int action);
- void (*playback_hook)(struct device *dev, int action);
- void (*post_playback_hook)(struct device *dev, int action);
-diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
-index 375569d0864b..cfe4a3b588dd 100644
---- a/sound/pci/hda/patch_realtek.c
-+++ b/sound/pci/hda/patch_realtek.c
-@@ -9873,22 +9873,28 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
- SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE),
- SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC),
- SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
-- SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650P", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC),
-- SND_PCI_QUIRK(0x1043, 0x1463, "Asus GA402X", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC),
-- SND_PCI_QUIRK(0x1043, 0x1473, "ASUS GU604V", ALC285_FIXUP_ASUS_HEADSET_MIC),
-- SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603V", ALC285_FIXUP_ASUS_HEADSET_MIC),
-- SND_PCI_QUIRK(0x1043, 0x1493, "ASUS GV601V", ALC285_FIXUP_ASUS_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x1463, "Asus GA402X/GA402N", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x1473, "ASUS GU604VI/VC/VE/VG/VJ/VQ/VU/VV/VY/VZ", ALC285_FIXUP_ASUS_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603VQ/VU/VV/VJ/VI", ALC285_FIXUP_ASUS_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x1493, "ASUS GV601VV/VU/VJ/VQ/VI", ALC285_FIXUP_ASUS_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x14d3, "ASUS G614JY/JZ/JG", ALC245_FIXUP_CS35L41_SPI_2),
-+ SND_PCI_QUIRK(0x1043, 0x14e3, "ASUS G513PI/PU/PV", ALC287_FIXUP_CS35L41_I2C_2),
-+ SND_PCI_QUIRK(0x1043, 0x1503, "ASUS G733PY/PZ/PZV/PYV", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
-- SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA", ALC287_FIXUP_CS35L41_I2C_2),
-- SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301V", ALC285_FIXUP_ASUS_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA/XJ/XQ/XU/XV/XI", ALC287_FIXUP_CS35L41_I2C_2),
-+ SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301VV/VQ/VU/VJ/VA/VC/VE/VVC/VQC/VUC/VJC/VEC/VCC", ALC285_FIXUP_ASUS_HEADSET_MIC),
- SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
-- SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZV", ALC285_FIXUP_ASUS_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZI/ZJ/ZQ/ZU/ZV", ALC285_FIXUP_ASUS_HEADSET_MIC),
- SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
-+ SND_PCI_QUIRK(0x1043, 0x16d3, "ASUS UX5304VA", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
-+ SND_PCI_QUIRK(0x1043, 0x16f3, "ASUS UX7602VI/BZ", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS),
- SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK),
-- SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally RC71L_RC71L", ALC294_FIXUP_ASUS_ALLY),
-+ SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally NR2301L/X", ALC294_FIXUP_ASUS_ALLY),
-+ SND_PCI_QUIRK(0x1043, 0x1863, "ASUS UX6404VI/VV", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS),
- SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
- SND_PCI_QUIRK(0x1043, 0x18d3, "ASUS UM3504DA", ALC294_FIXUP_CS35L41_I2C_2),
-@@ -9913,10 +9919,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
- SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
- SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
-- SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
-- SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
-+ SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
- SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
-- SND_PCI_QUIRK(0x1043, 0x1d1f, "ASUS ROG Strix G17 2023 (G713PV)", ALC287_FIXUP_CS35L41_I2C_2),
-+ SND_PCI_QUIRK(0x1043, 0x1ccf, "ASUS G814JU/JV/JI", ALC245_FIXUP_CS35L41_SPI_2),
-+ SND_PCI_QUIRK(0x1043, 0x1cdf, "ASUS G814JY/JZ/JG", ALC245_FIXUP_CS35L41_SPI_2),
-+ SND_PCI_QUIRK(0x1043, 0x1cef, "ASUS G834JY/JZ/JI/JG", ALC285_FIXUP_ASUS_HEADSET_MIC),
-+ SND_PCI_QUIRK(0x1043, 0x1d1f, "ASUS G713PI/PU/PV/PVN", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
- SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
- SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2),
-@@ -9932,6 +9941,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
- SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401),
- SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
- SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2),
-+ SND_PCI_QUIRK(0x1043, 0x1f1f, "ASUS H7604JI/JV/J3D", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401),
- SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
+diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
+index b53753dee02f..90701fc65aa2 100644
+--- a/tools/power/cpupower/Makefile
++++ b/tools/power/cpupower/Makefile
+@@ -53,7 +53,7 @@ DESTDIR ?=
+
+ VERSION:= $(shell ./utils/version-gen.sh)
+ LIB_MAJ= 0.0.1
+-LIB_MIN= 1
++LIB_MIN= 0
+
+ PACKAGE = cpupower
+ PACKAGE_BUGREPORT = linux-pm@vger.kernel.org
diff --git a/SOURCES/rog-ally-audio-fix.patch b/SOURCES/rog-ally-audio-fix.patch
index 79ae8e8..07fb3bf 100644
--- a/SOURCES/rog-ally-audio-fix.patch
+++ b/SOURCES/rog-ally-audio-fix.patch
@@ -54,7 +54,7 @@ index 2b8f8fd52..f4933be4c 100644
struct cs35l41_prop_model {
const char *hid;
const char *ssid;
-@@ -360,7 +360,7 @@
+@@ -413,7 +413,7 @@
{ "CSC3551", "10431663", generic_dsd_config },
{ "CSC3551", "104316D3", generic_dsd_config },
{ "CSC3551", "104316F3", generic_dsd_config },
diff --git a/SOURCES/rog-ally-gyro-fix.patch b/SOURCES/rog-ally-gyro-fix.patch
new file mode 100644
index 0000000..fc02fe0
--- /dev/null
+++ b/SOURCES/rog-ally-gyro-fix.patch
@@ -0,0 +1,2974 @@
+Add devicetree description document for Bosch BMI323, a 6-Axis IMU.
+
+Signed-off-by: Jagath Jog J <jagathjog1996@gmail.com>
+Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+---
+ .../bindings/iio/imu/bosch,bmi323.yaml | 77 +++++++++++++++++++
+ 1 file changed, 77 insertions(+)
+ create mode 100644 Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml
+
+diff --git a/Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml b/Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml
+new file mode 100644
+index 000000000000..64ef26e19669
+--- /dev/null
++++ b/Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml
+@@ -0,0 +1,77 @@
++# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
++%YAML 1.2
++---
++$id: http://devicetree.org/schemas/iio/imu/bosch,bmi323.yaml#
++$schema: http://devicetree.org/meta-schemas/core.yaml#
++
++title: Bosch BMI323 6-Axis IMU
++
++maintainers:
++ - Jagath Jog J <jagathjog1996@gmail.com>
++
++description:
++ BMI323 is a 6-axis inertial measurement unit that supports acceleration and
++ gyroscopic measurements with hardware fifo buffering. Sensor also provides
++ events information such as motion, steps, orientation, single and double
++ tap detection.
++
++properties:
++ compatible:
++ const: bosch,bmi323
++
++ reg:
++ maxItems: 1
++
++ vdd-supply: true
++ vddio-supply: true
++
++ interrupts:
++ minItems: 1
++ maxItems: 2
++
++ interrupt-names:
++ minItems: 1
++ maxItems: 2
++ items:
++ enum:
++ - INT1
++ - INT2
++
++ drive-open-drain:
++ description:
++ set if the specified interrupt pin should be configured as
++ open drain. If not set, defaults to push-pull.
++
++ mount-matrix:
++ description:
++ an optional 3x3 mounting rotation matrix.
++
++required:
++ - compatible
++ - reg
++ - vdd-supply
++ - vddio-supply
++
++allOf:
++ - $ref: /schemas/spi/spi-peripheral-props.yaml#
++
++unevaluatedProperties: false
++
++examples:
++ - |
++ // Example for I2C
++ #include <dt-bindings/interrupt-controller/irq.h>
++ i2c {
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ imu@68 {
++ compatible = "bosch,bmi323";
++ reg = <0x68>;
++ vddio-supply = <&vddio>;
++ vdd-supply = <&vdd>;
++ interrupt-parent = <&gpio1>;
++ interrupts = <29 IRQ_TYPE_EDGE_RISING>;
++ interrupt-names = "INT1";
++ };
++ };
+From: Jagath Jog J <jagathjog1996@gmail.com>
+To: jic23@kernel.org, andriy.shevchenko@linux.intel.com,
+ lars@metafoo.de, robh+dt@kernel.org,
+ krzysztof.kozlowski+dt@linaro.org
+Cc: linux-iio@vger.kernel.org, devicetree@vger.kernel.org,
+ linux-kernel@vger.kernel.org
+Subject: [RFC 2/2] iio: imu: Add driver for BMI323 IMU
+Date: Mon, 18 Sep 2023 13:33:14 +0530 [thread overview]
+Message-ID: <20230918080314.11959-3-jagathjog1996@gmail.com> (raw)
+In-Reply-To: <20230918080314.11959-1-jagathjog1996@gmail.com>
+
+The Bosch BMI323 is a 6-axis low-power IMU that provide measurements for
+acceleration, angular rate, and temperature. This sensor includes
+motion-triggered interrupt features, such as a step counter, tap detection,
+and activity/inactivity interrupt capabilities.
+
+The driver supports various functionalities, including data ready, FIFO
+data handling, and events such as tap detection, step counting, and
+activity interrupts.
+
+Signed-off-by: Jagath Jog J <jagathjog1996@gmail.com>
+---
+ MAINTAINERS | 7 +
+ drivers/iio/imu/Kconfig | 1 +
+ drivers/iio/imu/Makefile | 1 +
+ drivers/iio/imu/bmi323/Kconfig | 33 +
+ drivers/iio/imu/bmi323/Makefile | 7 +
+ drivers/iio/imu/bmi323/bmi323.h | 209 +++
+ drivers/iio/imu/bmi323/bmi323_core.c | 2139 +++++++++++++++++++++++
+ drivers/iio/imu/bmi323/bmi323_i2c.c | 121 ++
+ drivers/iio/imu/bmi323/bmi323_spi.c | 92 +
+ 10 files changed, 2628 insertions(+)
+ create mode 100644 drivers/iio/imu/bmi323/Kconfig
+ create mode 100644 drivers/iio/imu/bmi323/Makefile
+ create mode 100644 drivers/iio/imu/bmi323/bmi323.h
+ create mode 100644 drivers/iio/imu/bmi323/bmi323_core.c
+ create mode 100644 drivers/iio/imu/bmi323/bmi323_i2c.c
+ create mode 100644 drivers/iio/imu/bmi323/bmi323_spi.c
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 4e07c032d06a..47ca415212a7 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -3636,6 +3636,13 @@
+ F: Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml
+ F: drivers/iio/accel/bma400*
+
++BOSCH SENSORTEC BMI323 IMU IIO DRIVER
++M: Jagath Jog J <jagathjog1996@gmail.com>
++L: linux-iio@vger.kernel.org
++S: Maintained
++F: Documentation/devicetree/bindings/iio/imu/bosch,bma400.yaml
++F: drivers/iio/imu/bmi323/
++
+ BPF JIT for ARM
+ M: Russell King <linux@armlinux.org.uk>
+ M: Puranjay Mohan <puranjay12@gmail.com>
+diff --git a/drivers/iio/imu/Kconfig b/drivers/iio/imu/Kconfig
+index c2f97629e9cd..6c9a85294bc1 100644
+--- a/drivers/iio/imu/Kconfig
++++ b/drivers/iio/imu/Kconfig
+@@ -54,6 +54,7 @@ config ADIS16480
+
+ source "drivers/iio/imu/bmi160/Kconfig"
+ source "drivers/iio/imu/bno055/Kconfig"
++source "drivers/iio/imu/bmi323/Kconfig"
+
+ config FXOS8700
+ tristate
+diff --git a/drivers/iio/imu/Makefile b/drivers/iio/imu/Makefile
+index 6eb612034722..627406476357 100644
+--- a/drivers/iio/imu/Makefile
++++ b/drivers/iio/imu/Makefile
+@@ -16,6 +16,7 @@ obj-$(CONFIG_IIO_ADIS_LIB) += adis_lib.o
+
+ obj-y += bmi160/
+ obj-y += bno055/
++obj-y += bmi323/
+
+ obj-$(CONFIG_FXOS8700) += fxos8700_core.o
+ obj-$(CONFIG_FXOS8700_I2C) += fxos8700_i2c.o
+diff --git a/drivers/iio/imu/bmi323/Kconfig b/drivers/iio/imu/bmi323/Kconfig
+new file mode 100644
+index 000000000000..ab37b285393c
+--- /dev/null
++++ b/drivers/iio/imu/bmi323/Kconfig
+@@ -0,0 +1,33 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# BMI323 IMU driver
++#
++
++config BMI323
++ tristate
++ select IIO_BUFFER
++ select IIO_TRIGGERED_BUFFER
++
++config BMI323_I2C
++ tristate "Bosch BMI323 I2C driver"
++ depends on I2C
++ select BMI323
++ select REGMAP_I2C
++ help
++ Enable support for the Bosch BMI323 6-Axis IMU connected to I2C
++ interface.
++
++ This driver can also be built as a module. If so, the module will be
++ called bmi323_i2c.
++
++config BMI323_SPI
++ tristate "Bosch BMI323 SPI driver"
++ depends on SPI
++ select BMI323
++ select REGMAP_SPI
++ help
++ Enable support for the Bosch BMI323 6-Axis IMU connected to SPI
++ interface.
++
++ This driver can also be built as a module. If so, the module will be
++ called bmi323_spi.
+diff --git a/drivers/iio/imu/bmi323/Makefile b/drivers/iio/imu/bmi323/Makefile
+new file mode 100644
+index 000000000000..a6a6dc0207c9
+--- /dev/null
++++ b/drivers/iio/imu/bmi323/Makefile
+@@ -0,0 +1,7 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# Makefile for Bosch BMI323 IMU
++#
++obj-$(CONFIG_BMI323) += bmi323_core.o
++obj-$(CONFIG_BMI323_I2C) += bmi323_i2c.o
++obj-$(CONFIG_BMI323_SPI) += bmi323_spi.o
+diff --git a/drivers/iio/imu/bmi323/bmi323.h b/drivers/iio/imu/bmi323/bmi323.h
+new file mode 100644
+index 000000000000..dff126d41658
+--- /dev/null
++++ b/drivers/iio/imu/bmi323/bmi323.h
+@@ -0,0 +1,209 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * IIO driver for Bosch BMI323 6-Axis IMU
++ *
++ * Copyright (C) 2023, Jagath Jog J <jagathjog1996@gmail.com>
++ */
++
++#ifndef _BMI323_H_
++#define _BMI323_H_
++
++#include <linux/bits.h>
++#include <linux/regmap.h>
++#include <linux/units.h>
++
++#define BMI323_I2C_DUMMY 2
++#define BMI323_SPI_DUMMY 1
++
++/* Register map */
++
++#define BMI323_CHIP_ID_REG 0x00
++#define BMI323_CHIP_ID_VAL 0x0043
++#define BMI323_CHIP_ID_MSK GENMASK(7, 0)
++#define BMI323_ERR_REG 0x01
++#define BMI323_STATUS_REG 0x02
++#define BMI323_STATUS_POR_MSK BIT(0)
++
++/* Accelero/Gyro/Temp data registers */
++#define BMI323_ACCEL_X_REG 0x03
++#define BMI323_GYRO_X_REG 0x06
++#define BMI323_TEMP_REG 0x09
++#define BMI323_ALL_CHAN_MSK GENMASK(5, 0)
++
++/* Status registers */
++#define BMI323_STATUS_INT1_REG 0x0D
++#define BMI323_STATUS_INT2_REG 0x0E
++#define BMI323_STATUS_NOMOTION_MSK BIT(0)
++#define BMI323_STATUS_MOTION_MSK BIT(1)
++#define BMI323_STATUS_STP_WTR_MSK BIT(5)
++#define BMI323_STATUS_TAP_MSK BIT(8)
++#define BMI323_STATUS_ERROR_MSK BIT(10)
++#define BMI323_STATUS_TMP_DRDY_MSK BIT(11)
++#define BMI323_STATUS_GYR_DRDY_MSK BIT(12)
++#define BMI323_STATUS_ACC_DRDY_MSK BIT(13)
++#define BMI323_STATUS_ACC_GYR_DRDY_MSK GENMASK(13, 12)
++#define BMI323_STATUS_FIFO_WTRMRK_MSK BIT(14)
++#define BMI323_STATUS_FIFO_FULL_MSK BIT(15)
++
++/* Feature registers */
++#define BMI323_FEAT_IO0_REG 0x10
++#define BMI323_FEAT_IO0_XYZ_NOMOTION_MSK GENMASK(2, 0)
++#define BMI323_FEAT_IO0_XYZ_MOTION_MSK GENMASK(5, 3)
++#define BMI323_FEAT_XYZ_MSK GENMASK(2, 0)
++#define BMI323_FEAT_IO0_STP_CNT_MSK BIT(9)
++#define BMI323_FEAT_IO0_S_TAP_MSK BIT(12)
++#define BMI323_FEAT_IO0_D_TAP_MSK BIT(13)
++#define BMI323_FEAT_IO1_REG 0x11
++#define BMI323_FEAT_IO1_ERR_MSK GENMASK(3, 0)
++#define BMI323_FEAT_IO2_REG 0x12
++#define BMI323_FEAT_IO_STATUS_REG 0x14
++#define BMI323_FEAT_IO_STATUS_MSK BIT(0)
++#define BMI323_FEAT_ENG_POLL 2000
++#define BMI323_FEAT_ENG_TIMEOUT 10000
++
++/* FIFO registers */
++#define BMI323_FIFO_FILL_LEVEL_REG 0x15
++#define BMI323_FIFO_DATA_REG 0x16
++
++/* Accelero/Gyro config registers */
++#define BMI323_ACC_CONF_REG 0x20
++#define BMI323_GYRO_CONF_REG 0x21
++#define BMI323_ACC_GYRO_CONF_MODE_MSK GENMASK(14, 12)
++#define BMI323_ACC_GYRO_CONF_ODR_MSK GENMASK(3, 0)
++#define BMI323_ACC_GYRO_CONF_SCL_MSK GENMASK(6, 4)
++#define BMI323_ACC_GYRO_CONF_BW_MSK BIT(7)
++#define BMI323_ACC_GYRO_CONF_AVG_MSK GENMASK(10, 8)
++
++/* FIFO registers */
++#define BMI323_FIFO_WTRMRK_REG 0x35
++#define BMI323_FIFO_CONF_REG 0x36
++#define BMI323_FIFO_CONF_STP_FUL_MSK BIT(0)
++#define BMI323_FIFO_CONF_ACC_GYR_EN_MSK GENMASK(10, 9)
++#define BMI323_FIFO_ACC_GYR_MSK GENMASK(1, 0)
++#define BMI323_FIFO_CTRL_REG 0x37
++#define BMI323_FIFO_FLUSH_MSK BIT(0)
++
++/* Interrupt pin config registers */
++#define BMI323_IO_INT_CTR_REG 0x38
++#define BMI323_IO_INT1_LVL_MSK BIT(0)
++#define BMI323_IO_INT1_OD_MSK BIT(1)
++#define BMI323_IO_INT1_OP_EN_MSK BIT(2)
++#define BMI323_IO_INT1_LVL_OD_OP_MSK GENMASK(2, 0)
++#define BMI323_IO_INT2_LVL_MSK BIT(8)
++#define BMI323_IO_INT2_OD_MSK BIT(9)
++#define BMI323_IO_INT2_OP_EN_MSK BIT(10)
++#define BMI323_IO_INT2_LVL_OD_OP_MSK GENMASK(10, 8)
++#define BMI323_IO_INT_CONF_REG 0x39
++#define BMI323_IO_INT_LTCH_MSK BIT(0)
++#define BMI323_INT_MAP1_REG 0x3A
++#define BMI323_INT_MAP2_REG 0x3B
++#define BMI323_NOMOTION_MSK GENMASK(1, 0)
++#define BMI323_MOTION_MSK GENMASK(3, 2)
++#define BMI323_STEP_CNT_MSK GENMASK(11, 10)
++#define BMI323_TAP_MSK GENMASK(1, 0)
++#define BMI323_TMP_DRDY_MSK GENMASK(7, 6)
++#define BMI323_GYR_DRDY_MSK GENMASK(9, 8)
++#define BMI323_ACC_DRDY_MSK GENMASK(11, 10)
++#define BMI323_FIFO_WTRMRK_MSK GENMASK(13, 12)
++#define BMI323_FIFO_FULL_MSK GENMASK(15, 14)
++
++/* Feature registers */
++#define BMI323_FEAT_CTRL_REG 0x40
++#define BMI323_FEAT_ENG_EN_MSK BIT(0)
++#define BMI323_FEAT_DATA_ADDR 0x41
++#define BMI323_FEAT_DATA_TX 0x42
++#define BMI323_FEAT_DATA_STATUS 0x43
++#define BMI323_FEAT_DATA_TX_RDY_MSK BIT(1)
++#define BMI323_FEAT_EVNT_EXT_REG 0x47
++#define BMI323_FEAT_EVNT_EXT_S_MSK BIT(3)
++#define BMI323_FEAT_EVNT_EXT_D_MSK BIT(4)
++
++#define BMI323_CMD_REG 0x7E
++#define BMI323_RST_VAL 0xDEAF
++#define BMI323_CFG_RES_REG 0x7F
++
++/* Extended registers */
++#define BMI323_GEN_SET1_REG 0x02
++#define BMI323_GEN_SET1_MODE_MSK BIT(0)
++#define BMI323_GEN_HOLD_DUR_MSK GENMASK(4, 1)
++
++/* Any Motion/No Motion config registers */
++#define BMI323_ANYMO1_REG 0x05
++#define BMI323_NOMO1_REG 0x08
++#define BMI323_MO2_OFFSET 0x01
++#define BMI323_MO3_OFFSET 0x02
++#define BMI323_MO1_REF_UP_MSK BIT(12)
++#define BMI323_MO1_SLOPE_TH_MSK GENMASK(11, 0)
++#define BMI323_MO2_HYSTR_MSK GENMASK(9, 0)
++#define BMI323_MO3_DURA_MSK GENMASK(12, 0)
++
++/* Step counter config registers */
++#define BMI323_STEP_SC1_REG 0x10
++#define BMI323_STEP_SC1_WTRMRK_MSK GENMASK(9, 0)
++#define BMI323_STEP_SC1_RST_CNT_MSK BIT(10)
++#define BMI323_STEP_SC1_REG 0x10
++#define BMI323_STEP_LEN 2
++
++/* Tap gesture config registers */
++#define BMI323_TAP1_REG 0x1E
++#define BMI323_TAP1_AXIS_SEL_MSK GENMASK(1, 0)
++#define BMI323_AXIS_XYZ_MSK GENMASK(1, 0)
++#define BMI323_TAP1_TIMOUT_MSK BIT(2)
++#define BMI323_TAP1_MAX_PEAKS_MSK GENMASK(5, 3)
++#define BMI323_TAP1_MODE_MSK GENMASK(7, 6)
++#define BMI323_TAP2_REG 0x1F
++#define BMI323_TAP2_THRES_MSK GENMASK(9, 0)
++#define BMI323_TAP2_MAX_DUR_MSK GENMASK(15, 10)
++#define BMI323_TAP3_REG 0x20
++#define BMI323_TAP3_QUIET_TIM_MSK GENMASK(15, 12)
++#define BMI323_TAP3_QT_BW_TAP_MSK GENMASK(11, 8)
++#define BMI323_TAP3_QT_AFT_GES_MSK GENMASK(15, 12)
++
++#define BMI323_MOTION_THRES_SCALE 512
++#define BMI323_MOTION_HYSTR_SCALE 512
++#define BMI323_MOTION_DURAT_SCALE 50
++#define BMI323_TAP_THRES_SCALE 512
++#define BMI323_DUR_BW_TAP_SCALE 200
++#define BMI323_QUITE_TIM_GES_SCALE 25
++#define BMI323_MAX_GES_DUR_SCALE 25
++
++/*
++ * The formula to calculate temperature in C.
++ * See datasheet section 6.1.1, Register Map Overview
++ *
++ * T_C = (temp_raw / 512) + 23
++ */
++#define BMI323_TEMP_OFFSET 11776
++#define BMI323_TEMP_SCALE 1953125
++
++/*
++ * The BMI323 features a FIFO with a capacity of 2048 bytes. Each frame
++ * consists of accelerometer (X, Y, Z) data and gyroscope (X, Y, Z) data,
++ * totaling 6 words or 12 bytes. The FIFO buffer can hold a total of
++ * 170 frames.
++ *
++ * If a watermark interrupt is configured for 170 frames, the interrupt will
++ * trigger when the FIFO reaches 169 frames, so limit the maximum watermark
++ * level to 169 frames. In terms of data, 169 frames would equal 1014 bytes,
++ * which is approximately 2 frames before the FIFO reaches its full capacity.
++ * See datasheet section 5.7.3 FIFO Buffer Interrupts
++ */
++#define BMI323_BYTES_PER_SAMPLE 2
++#define BMI323_FIFO_LENGTH_IN_BYTES 2048
++#define BMI323_FIFO_FRAME_LENGTH 6
++#define BMI323_FIFO_FULL_IN_FRAMES \
++ ((BMI323_FIFO_LENGTH_IN_BYTES / \
++ (BMI323_BYTES_PER_SAMPLE * BMI323_FIFO_FRAME_LENGTH)) - 1)
++#define BMI323_FIFO_FULL_IN_WORDS \
++ (BMI323_FIFO_FULL_IN_FRAMES * BMI323_FIFO_FRAME_LENGTH)
++
++#define BMI323_INT_MICRO_TO_RAW(val, val2, scale) ((val) * (scale) + \
++ ((val2) * (scale)) / MEGA)
++
++#define BMI323_RAW_TO_MICRO(raw, scale) ((((raw) % (scale)) * MEGA) / scale)
++
++struct device;
++int bmi323_core_probe(struct device *dev);
++extern const struct regmap_config bmi323_regmap_config;
++
++#endif
+diff --git a/drivers/iio/imu/bmi323/bmi323_core.c b/drivers/iio/imu/bmi323/bmi323_core.c
+new file mode 100644
+index 000000000000..0bd5dedd9a63
+--- /dev/null
++++ b/drivers/iio/imu/bmi323/bmi323_core.c
+@@ -0,0 +1,2139 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * IIO core driver for Bosch BMI323 6-Axis IMU.
++ *
++ * Copyright (C) 2023, Jagath Jog J <jagathjog1996@gmail.com>
++ *
++ * Datasheet: https://www.bosch-sensortec.com/media/boschsensortec/downloads/datasheets/bst-bmi323-ds000.pdf
++ */
++
++#include <linux/bitfield.h>
++#include <linux/cleanup.h>
++#include <linux/device.h>
++#include <linux/interrupt.h>
++#include <linux/minmax.h>
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/property.h>
++#include <linux/regmap.h>
++#include <linux/regulator/consumer.h>
++#include <linux/units.h>
++
++#include <asm/unaligned.h>
++
++#include <linux/iio/buffer.h>
++#include <linux/iio/events.h>
++#include <linux/iio/iio.h>
++#include <linux/iio/sysfs.h>
++#include <linux/iio/trigger.h>
++#include <linux/iio/trigger_consumer.h>
++#include <linux/iio/triggered_buffer.h>
++
++#include "bmi323.h"
++
++enum bmi323_sensor_type {
++ BMI323_ACCEL,
++ BMI323_GYRO,
++ BMI323_SENSORS_CNT,
++};
++
++enum bmi323_opr_mode {
++ ACC_GYRO_MODE_DISABLE = 0x00,
++ GYRO_DRIVE_MODE_ENABLED = 0x01,
++ ACC_GYRO_MODE_DUTYCYCLE = 0x03,
++ ACC_GYRO_MODE_CONTINOUS = 0x04,
++ ACC_GYRO_MODE_HIGH_PERF = 0x07,
++};
++
++enum bmi323_state {
++ BMI323_IDLE,
++ BMI323_BUFFER_DRDY_TRIGGERED,
++ BMI323_BUFFER_FIFO,
++};
++
++enum bmi323_irq_pin {
++ BMI323_IRQ_DISABLED,
++ BMI323_IRQ_INT1,
++ BMI323_IRQ_INT2,
++};
++
++enum bmi323_3db_bw {
++ BMI323_BW_ODR_BY_2,
++ BMI323_BW_ODR_BY_4,
++};
++
++enum bmi323_scan {
++ BMI323_ACCEL_X,
++ BMI323_ACCEL_Y,
++ BMI323_ACCEL_Z,
++ BMI323_GYRO_X,
++ BMI323_GYRO_Y,
++ BMI323_GYRO_Z,
++ BMI323_CHAN_MAX
++};
++
++struct bmi323_hw {
++ u8 data;
++ u8 config;
++ const int (*scale_table)[2];
++ int scale_table_len;
++};
++
++/*
++ * The accelerometer supports +-2G/4G/8G/16G ranges, and the resolution of
++ * each sample is 16 bits, signed.
++ * At +-8G the scale can calculated by
++ * ((8 + 8) * 9.80665 / (2^16 - 1)) * 10^6 = 2394.23819 scale in micro
++ *
++ */
++static const int bmi323_accel_scale[][2] = {
++ { 0, 598 },
++ { 0, 1197 },
++ { 0, 2394 },
++ { 0, 4788 },
++};
++
++static const int bmi323_gyro_scale[][2] = {
++ { 0, 66 },
++ { 0, 133 },
++ { 0, 266 },
++ { 0, 532 },
++ { 0, 1065 },
++};
++
++static const int bmi323_accel_gyro_avrg[] = {0, 2, 4, 8, 16, 32, 64};
++
++static const struct bmi323_hw bmi323_hw[2] = {
++ [BMI323_ACCEL] = {
++ .data = BMI323_ACCEL_X_REG,
++ .config = BMI323_ACC_CONF_REG,
++ .scale_table = bmi323_accel_scale,
++ .scale_table_len = ARRAY_SIZE(bmi323_accel_scale),
++ },
++ [BMI323_GYRO] = {
++ .data = BMI323_GYRO_X_REG,
++ .config = BMI323_GYRO_CONF_REG,
++ .scale_table = bmi323_gyro_scale,
++ .scale_table_len = ARRAY_SIZE(bmi323_gyro_scale),
++ },
++};
++
++struct bmi323_data {
++ struct device *dev;
++ struct regmap *regmap;
++ struct iio_mount_matrix orientation;
++ enum bmi323_irq_pin irq_pin;
++ struct iio_trigger *trig;
++ bool drdy_trigger_enabled;
++ enum bmi323_state state;
++ s64 fifo_tstamp, old_fifo_tstamp;
++ u32 odrns[BMI323_SENSORS_CNT];
++ u32 odrhz[BMI323_SENSORS_CNT];
++ unsigned int feature_events;
++
++ /*
++ * Lock to protect the members of device's private data from concurrent
++ * access and also to serialize the access of extended registers.
++ * See bmi323_write_ext_reg(..) for more info.
++ */
++ struct mutex mutex;
++ int watermark;
++ __le16 fifo_buff[BMI323_FIFO_FULL_IN_WORDS] __aligned(IIO_DMA_MINALIGN);
++ struct {
++ __le16 channels[BMI323_CHAN_MAX];
++ s64 ts __aligned(8);
++ } buffer;
++ __le16 steps_count[BMI323_STEP_LEN];
++};
++
++static const struct iio_mount_matrix *
++bmi323_get_mount_matrix(const struct iio_dev *idev,
++ const struct iio_chan_spec *chan)
++{
++ struct bmi323_data *data = iio_priv(idev);
++
++ return &data->orientation;
++}
++
++static const struct iio_chan_spec_ext_info bmi323_ext_info[] = {
++ IIO_MOUNT_MATRIX(IIO_SHARED_BY_TYPE, bmi323_get_mount_matrix),
++ { }
++};
++
++static const struct iio_event_spec bmi323_step_wtrmrk_event = {
++ .type = IIO_EV_TYPE_CHANGE,
++ .dir = IIO_EV_DIR_NONE,
++ .mask_shared_by_type = BIT(IIO_EV_INFO_ENABLE) |
++ BIT(IIO_EV_INFO_VALUE),
++};
++
++static const struct iio_event_spec bmi323_accel_event[] = {
++ {
++ .type = IIO_EV_TYPE_MAG,
++ .dir = IIO_EV_DIR_FALLING,
++ .mask_shared_by_type = BIT(IIO_EV_INFO_VALUE) |
++ BIT(IIO_EV_INFO_PERIOD) |
++ BIT(IIO_EV_INFO_HYSTERESIS) |
++ BIT(IIO_EV_INFO_ENABLE),
++ },
++ {
++ .type = IIO_EV_TYPE_MAG,
++ .dir = IIO_EV_DIR_RISING,
++ .mask_shared_by_type = BIT(IIO_EV_INFO_VALUE) |
++ BIT(IIO_EV_INFO_PERIOD) |
++ BIT(IIO_EV_INFO_HYSTERESIS) |
++ BIT(IIO_EV_INFO_ENABLE),
++ },
++ {
++ .type = IIO_EV_TYPE_GESTURE,
++ .dir = IIO_EV_DIR_SINGLETAP,
++ .mask_shared_by_type = BIT(IIO_EV_INFO_ENABLE) |
++ BIT(IIO_EV_INFO_VALUE) |
++ BIT(IIO_EV_INFO_RESET_TIMEOUT),
++ },
++ {
++ .type = IIO_EV_TYPE_GESTURE,
++ .dir = IIO_EV_DIR_DOUBLETAP,
++ .mask_shared_by_type = BIT(IIO_EV_INFO_ENABLE) |
++ BIT(IIO_EV_INFO_VALUE) |
++ BIT(IIO_EV_INFO_RESET_TIMEOUT) |
++ BIT(IIO_EV_INFO_TAP2_MIN_DELAY),
++ },
++};
++
++#define BMI323_ACCEL_CHANNEL(_type, _axis, _index) { \
++ .type = _type, \
++ .modified = 1, \
++ .channel2 = IIO_MOD_##_axis, \
++ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \
++ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ) | \
++ BIT(IIO_CHAN_INFO_SCALE) | \
++ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \
++ .info_mask_shared_by_type_available = \
++ BIT(IIO_CHAN_INFO_SAMP_FREQ) | \
++ BIT(IIO_CHAN_INFO_SCALE) | \
++ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \
++ .scan_index = _index, \
++ .scan_type = { \
++ .sign = 's', \
++ .realbits = 16, \
++ .storagebits = 16, \
++ .endianness = IIO_LE, \
++ }, \
++ .ext_info = bmi323_ext_info, \
++ .event_spec = bmi323_accel_event, \
++ .num_event_specs = ARRAY_SIZE(bmi323_accel_event), \
++}
++
++#define BMI323_GYRO_CHANNEL(_type, _axis, _index) { \
++ .type = _type, \
++ .modified = 1, \
++ .channel2 = IIO_MOD_##_axis, \
++ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \
++ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ) | \
++ BIT(IIO_CHAN_INFO_SCALE) | \
++ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \
++ .info_mask_shared_by_type_available = \
++ BIT(IIO_CHAN_INFO_SAMP_FREQ) | \
++ BIT(IIO_CHAN_INFO_SCALE) | \
++ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \
++ .scan_index = _index, \
++ .scan_type = { \
++ .sign = 's', \
++ .realbits = 16, \
++ .storagebits = 16, \
++ .endianness = IIO_LE, \
++ }, \
++ .ext_info = bmi323_ext_info, \
++}
++
++static const struct iio_chan_spec bmi323_channels[] = {
++ BMI323_ACCEL_CHANNEL(IIO_ACCEL, X, BMI323_ACCEL_X),
++ BMI323_ACCEL_CHANNEL(IIO_ACCEL, Y, BMI323_ACCEL_Y),
++ BMI323_ACCEL_CHANNEL(IIO_ACCEL, Z, BMI323_ACCEL_Z),
++ BMI323_GYRO_CHANNEL(IIO_ANGL_VEL, X, BMI323_GYRO_X),
++ BMI323_GYRO_CHANNEL(IIO_ANGL_VEL, Y, BMI323_GYRO_Y),
++ BMI323_GYRO_CHANNEL(IIO_ANGL_VEL, Z, BMI323_GYRO_Z),
++ {
++ .type = IIO_TEMP,
++ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
++ BIT(IIO_CHAN_INFO_OFFSET) |
++ BIT(IIO_CHAN_INFO_SCALE),
++ .scan_index = -1,
++ },
++ {
++ .type = IIO_STEPS,
++ .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) |
++ BIT(IIO_CHAN_INFO_ENABLE),
++ .scan_index = -1,
++ .event_spec = &bmi323_step_wtrmrk_event,
++ .num_event_specs = 1,
++
++ },
++ IIO_CHAN_SOFT_TIMESTAMP(BMI323_CHAN_MAX),
++};
++
++static const int bmi323_acc_gyro_odr[][2] = {
++ { 0, 781250 },
++ { 1, 562500 },
++ { 3, 125000 },
++ { 6, 250000 },
++ { 12, 500000 },
++ { 25, 0 },
++ { 50, 0 },
++ { 100, 0 },
++ { 200, 0 },
++ { 400, 0 },
++ { 800, 0 },
++};
++
++static const int bmi323_acc_gyro_odrns[] = {
++ 1280 * MEGA,
++ 640 * MEGA,
++ 320 * MEGA,
++ 160 * MEGA,
++ 80 * MEGA,
++ 40 * MEGA,
++ 20 * MEGA,
++ 10 * MEGA,
++ 5 * MEGA,
++ 2500 * KILO,
++ 1250 * KILO,
++};
++
++static enum bmi323_sensor_type bmi323_iio_to_sensor(enum iio_chan_type iio_type)
++{
++ switch (iio_type) {
++ case IIO_ACCEL:
++ return BMI323_ACCEL;
++ case IIO_ANGL_VEL:
++ return BMI323_GYRO;
++ default:
++ return -EINVAL;
++ }
++}
++
++static int bmi323_set_mode(struct bmi323_data *data,
++ enum bmi323_sensor_type sensor,
++ enum bmi323_opr_mode mode)
++{
++ guard(mutex)(&data->mutex);
++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config,
++ BMI323_ACC_GYRO_CONF_MODE_MSK,
++ FIELD_PREP(BMI323_ACC_GYRO_CONF_MODE_MSK,
++ mode));
++}
++
++/*
++ * When writing data to extended register there must be no communication to
++ * any other register before write transaction is complete.
++ * See datasheet section 6.2 Extended Register Map Description.
++ */
++static int bmi323_write_ext_reg(struct bmi323_data *data, unsigned int ext_addr,
++ unsigned int ext_data)
++{
++ int ret, feature_status;
++
++ ret = regmap_read(data->regmap, BMI323_FEAT_DATA_STATUS,
++ &feature_status);
++ if (ret)
++ return ret;
++
++ if (!FIELD_GET(BMI323_FEAT_DATA_TX_RDY_MSK, feature_status))
++ return -EBUSY;
++
++ ret = regmap_write(data->regmap, BMI323_FEAT_DATA_ADDR, ext_addr);
++ if (ret)
++ return ret;
++
++ return regmap_write(data->regmap, BMI323_FEAT_DATA_TX, ext_data);
++}
++
++/*
++ * When reading data from extended register there must be no communication to
++ * any other register before read transaction is complete.
++ * See datasheet section 6.2 Extended Register Map Description.
++ */
++static int bmi323_read_ext_reg(struct bmi323_data *data, unsigned int ext_addr,
++ unsigned int *ext_data)
++{
++ int ret, feature_status;
++
++ ret = regmap_read(data->regmap, BMI323_FEAT_DATA_STATUS,
++ &feature_status);
++ if (ret)
++ return ret;
++
++ if (!FIELD_GET(BMI323_FEAT_DATA_TX_RDY_MSK, feature_status))
++ return -EBUSY;
++
++ ret = regmap_write(data->regmap, BMI323_FEAT_DATA_ADDR, ext_addr);
++ if (ret)
++ return ret;
++
++ return regmap_read(data->regmap, BMI323_FEAT_DATA_TX, ext_data);
++}
++
++static int bmi323_update_ext_reg(struct bmi323_data *data,
++ unsigned int ext_addr,
++ unsigned int mask, unsigned int ext_data)
++{
++ unsigned int value;
++ int ret;
++
++ ret = bmi323_read_ext_reg(data, ext_addr, &value);
++ if (ret)
++ return ret;
++
++ set_mask_bits(&value, mask, ext_data);
++
++ return bmi323_write_ext_reg(data, ext_addr, value);
++}
++
++static int bmi323_get_error_status(struct bmi323_data *data)
++{
++ int error, ret;
++
++ guard(mutex)(&data->mutex);
++ ret = regmap_read(data->regmap, BMI323_ERR_REG, &error);
++ if (ret)
++ return ret;
++
++ if (error)
++ dev_err(data->dev, "Sensor error 0x%x\n", error);
++
++ return error;
++}
++
++static int bmi323_feature_engine_events(struct bmi323_data *data,
++ const unsigned int event_mask,
++ bool state)
++{
++ unsigned int value;
++ int ret;
++
++ ret = regmap_read(data->regmap, BMI323_FEAT_IO0_REG, &value);
++ if (ret)
++ return ret;
++
++ /* Register must be cleared before changing an active config */
++ ret = regmap_write(data->regmap, BMI323_FEAT_IO0_REG, 0);
++ if (ret)
++ return ret;
++
++ if (state)
++ value |= event_mask;
++ else
++ value &= ~event_mask;
++
++ ret = regmap_write(data->regmap, BMI323_FEAT_IO0_REG, value);
++ if (ret)
++ return ret;
++
++ return regmap_write(data->regmap, BMI323_FEAT_IO_STATUS_REG,
++ BMI323_FEAT_IO_STATUS_MSK);
++}
++
++static int bmi323_step_wtrmrk_en(struct bmi323_data *data, int state)
++{
++ enum bmi323_irq_pin step_irq;
++ int ret;
++
++ guard(mutex)(&data->mutex);
++ if (!FIELD_GET(BMI323_FEAT_IO0_STP_CNT_MSK, data->feature_events))
++ return -EINVAL;
++
++ if (state)
++ step_irq = data->irq_pin;
++ else
++ step_irq = BMI323_IRQ_DISABLED;
++
++ ret = bmi323_update_ext_reg(data, BMI323_STEP_SC1_REG,
++ BMI323_STEP_SC1_WTRMRK_MSK,
++ FIELD_PREP(BMI323_STEP_SC1_WTRMRK_MSK,
++ state ? 1 : 0));
++ if (ret)
++ return ret;
++
++ return regmap_update_bits(data->regmap, BMI323_INT_MAP1_REG,
++ BMI323_STEP_CNT_MSK,
++ FIELD_PREP(BMI323_STEP_CNT_MSK, step_irq));
++}
++
++static int bmi323_motion_config_reg(enum iio_event_direction dir)
++{
++ switch (dir) {
++ case IIO_EV_DIR_RISING:
++ return BMI323_ANYMO1_REG;
++ case IIO_EV_DIR_FALLING:
++ return BMI323_NOMO1_REG;
++ default:
++ return -EINVAL;
++ }
++}
++
++static int bmi323_motion_event_en(struct bmi323_data *data,
++ enum iio_event_direction dir, int state)
++{
++ unsigned int state_value = state ? BMI323_FEAT_XYZ_MSK : 0;
++ int config, ret, msk, raw, field_value;
++ enum bmi323_irq_pin motion_irq;
++ int irq_msk, irq_field_val;
++
++ if (state)
++ motion_irq = data->irq_pin;
++ else
++ motion_irq = BMI323_IRQ_DISABLED;
++
++ switch (dir) {
++ case IIO_EV_DIR_RISING:
++ msk = BMI323_FEAT_IO0_XYZ_MOTION_MSK;
++ raw = 512;
++ config = BMI323_ANYMO1_REG;
++ irq_msk = BMI323_MOTION_MSK;
++ irq_field_val = FIELD_PREP(BMI323_MOTION_MSK, motion_irq);
++ field_value = FIELD_PREP(BMI323_FEAT_IO0_XYZ_MOTION_MSK,
++ state_value);
++ break;
++ case IIO_EV_DIR_FALLING:
++ msk = BMI323_FEAT_IO0_XYZ_NOMOTION_MSK;
++ raw = 0;
++ config = BMI323_NOMO1_REG;
++ irq_msk = BMI323_NOMOTION_MSK;
++ irq_field_val = FIELD_PREP(BMI323_NOMOTION_MSK, motion_irq);
++ field_value = FIELD_PREP(BMI323_FEAT_IO0_XYZ_NOMOTION_MSK,
++ state_value);
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ guard(mutex)(&data->mutex);
++ ret = bmi323_feature_engine_events(data, msk, state);
++ if (ret)
++ return ret;
++
++ ret = bmi323_update_ext_reg(data, config,
++ BMI323_MO1_REF_UP_MSK,
++ FIELD_PREP(BMI323_MO1_REF_UP_MSK, 0));
++ if (ret)
++ return ret;
++
++ /* Set initial value to avoid interrupts while enabling*/
++ ret = bmi323_update_ext_reg(data, config,
++ BMI323_MO1_SLOPE_TH_MSK,
++ FIELD_PREP(BMI323_MO1_SLOPE_TH_MSK, raw));
++ if (ret)
++ return ret;
++
++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP1_REG, irq_msk,
++ irq_field_val);
++ if (ret)
++ return ret;
++
++ set_mask_bits(&data->feature_events, msk, field_value);
++
++ return 0;
++}
++
++static int bmi323_tap_event_en(struct bmi323_data *data,
++ enum iio_event_direction dir, int state)
++{
++ enum bmi323_irq_pin tap_irq;
++ int ret, tap_enabled;
++
++ guard(mutex)(&data->mutex);
++
++ if (data->odrhz[BMI323_ACCEL] < 200) {
++ dev_err(data->dev, "Invalid accelrometer parameter\n");
++ return -EINVAL;
++ }
++
++ switch (dir) {
++ case IIO_EV_DIR_SINGLETAP:
++ ret = bmi323_feature_engine_events(data,
++ BMI323_FEAT_IO0_S_TAP_MSK,
++ state);
++ if (ret)
++ return ret;
++
++ set_mask_bits(&data->feature_events, BMI323_FEAT_IO0_S_TAP_MSK,
++ FIELD_PREP(BMI323_FEAT_IO0_S_TAP_MSK, state));
++ break;
++ case IIO_EV_DIR_DOUBLETAP:
++ ret = bmi323_feature_engine_events(data,
++ BMI323_FEAT_IO0_D_TAP_MSK,
++ state);
++ if (ret)
++ return ret;
++
++ set_mask_bits(&data->feature_events, BMI323_FEAT_IO0_D_TAP_MSK,
++ FIELD_PREP(BMI323_FEAT_IO0_D_TAP_MSK, state));
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ tap_enabled = FIELD_GET(BMI323_FEAT_IO0_S_TAP_MSK |
++ BMI323_FEAT_IO0_D_TAP_MSK,
++ data->feature_events);
++
++ if (tap_enabled)
++ tap_irq = data->irq_pin;
++ else
++ tap_irq = BMI323_IRQ_DISABLED;
++
++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG,
++ BMI323_TAP_MSK,
++ FIELD_PREP(BMI323_TAP_MSK, tap_irq));
++ if (ret)
++ return ret;
++
++ if (!state)
++ return 0;
++
++ ret = bmi323_update_ext_reg(data, BMI323_TAP1_REG,
++ BMI323_TAP1_MAX_PEAKS_MSK,
++ FIELD_PREP(BMI323_TAP1_MAX_PEAKS_MSK,
++ 0x04));
++ if (ret)
++ return ret;
++
++ ret = bmi323_update_ext_reg(data, BMI323_TAP1_REG,
++ BMI323_TAP1_AXIS_SEL_MSK,
++ FIELD_PREP(BMI323_TAP1_AXIS_SEL_MSK,
++ BMI323_AXIS_XYZ_MSK));
++ if (ret)
++ return ret;
++
++ return bmi323_update_ext_reg(data, BMI323_TAP1_REG,
++ BMI323_TAP1_TIMOUT_MSK,
++ FIELD_PREP(BMI323_TAP1_TIMOUT_MSK,
++ 0));
++}
++
++static ssize_t in_accel_gesture_tap_wait_dur_show(struct device *dev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
++ struct bmi323_data *data = iio_priv(indio_dev);
++ unsigned int reg_value, raw;
++ int ret, val[2];
++
++ scoped_guard(mutex, &data->mutex) {
++ ret = bmi323_read_ext_reg(data, BMI323_TAP2_REG, &reg_value);
++ if (ret)
++ return ret;
++ }
++
++ raw = FIELD_GET(BMI323_TAP2_MAX_DUR_MSK, reg_value);
++ val[0] = raw / BMI323_MAX_GES_DUR_SCALE;
++ val[1] = BMI323_RAW_TO_MICRO(raw, BMI323_MAX_GES_DUR_SCALE);
++
++ return iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, ARRAY_SIZE(val),
++ val);
++}
++
++static ssize_t in_accel_gesture_tap_wait_dur_store(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf, size_t len)
++{
++ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
++ struct bmi323_data *data = iio_priv(indio_dev);
++ int ret, val_int, val_fract, raw;
++
++ ret = iio_str_to_fixpoint(buf, 100000, &val_int, &val_fract);
++ if (ret)
++ return ret;
++
++ raw = BMI323_INT_MICRO_TO_RAW(val_int, val_fract,
++ BMI323_MAX_GES_DUR_SCALE);
++ if (!in_range(raw, 0, 64))
++ return -EINVAL;
++
++ guard(mutex)(&data->mutex);
++ ret = bmi323_update_ext_reg(data, BMI323_TAP2_REG,
++ BMI323_TAP2_MAX_DUR_MSK,
++ FIELD_PREP(BMI323_TAP2_MAX_DUR_MSK, raw));
++ if (ret)
++ return ret;
++
++ return len;
++}
++
++/*
++ * Maximum duration from first tap within the second tap is expected to happen.
++ * This timeout is applicable only if gesture_tap_wait_timeout is enabled.
++ */
++static IIO_DEVICE_ATTR_RW(in_accel_gesture_tap_wait_dur, 0);
++
++static ssize_t in_accel_gesture_tap_wait_timeout_show(struct device *dev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
++ struct bmi323_data *data = iio_priv(indio_dev);
++ unsigned int reg_value, raw;
++ int ret;
++
++ scoped_guard(mutex, &data->mutex) {
++ ret = bmi323_read_ext_reg(data, BMI323_TAP1_REG, &reg_value);
++ if (ret)
++ return ret;
++ }
++
++ raw = FIELD_GET(BMI323_TAP1_TIMOUT_MSK, reg_value);
++
++ return iio_format_value(buf, IIO_VAL_INT, 1, &raw);
++}
++
++static ssize_t in_accel_gesture_tap_wait_timeout_store(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf,
++ size_t len)
++{
++ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
++ struct bmi323_data *data = iio_priv(indio_dev);
++ bool val;
++ int ret;
++
++ ret = kstrtobool(buf, &val);
++ if (ret)
++ return ret;
++
++ guard(mutex)(&data->mutex);
++ ret = bmi323_update_ext_reg(data, BMI323_TAP1_REG,
++ BMI323_TAP1_TIMOUT_MSK,
++ FIELD_PREP(BMI323_TAP1_TIMOUT_MSK, val));
++ if (ret)
++ return ret;
++
++ return len;
++}
++
++/* Enable/disable gesture confirmation with wait time */
++static IIO_DEVICE_ATTR_RW(in_accel_gesture_tap_wait_timeout, 0);
++
++static IIO_CONST_ATTR(in_accel_gesture_tap_wait_dur_available,
++ "[0.0 0.04 2.52]");
++
++static IIO_CONST_ATTR(in_accel_gesture_doubletap_tap2_min_delay_available,
++ "[0.005 0.005 0.075]");
++
++static IIO_CONST_ATTR(in_accel_gesture_tap_reset_timeout_available,
++ "[0.04 0.04 0.6]");
++
++static IIO_CONST_ATTR(in_accel_gesture_tap_value_available, "[0.0 0.002 1.99]");
++
++static IIO_CONST_ATTR(in_accel_mag_value_available, "[0.0 0.002 7.99]");
++
++static IIO_CONST_ATTR(in_accel_mag_period_available, "[0.0 0.02 162.0]");
++
++static IIO_CONST_ATTR(in_accel_mag_hysteresis_available, "[0.0 0.002 1.99]");
++
++static struct attribute *bmi323_event_attributes[] = {
++ &iio_const_attr_in_accel_gesture_tap_value_available.dev_attr.attr,
++ &iio_const_attr_in_accel_gesture_tap_reset_timeout_available.dev_attr.attr,
++ &iio_const_attr_in_accel_gesture_doubletap_tap2_min_delay_available.dev_attr.attr,
++ &iio_const_attr_in_accel_gesture_tap_wait_dur_available.dev_attr.attr,
++ &iio_dev_attr_in_accel_gesture_tap_wait_timeout.dev_attr.attr,
++ &iio_dev_attr_in_accel_gesture_tap_wait_dur.dev_attr.attr,
++ &iio_const_attr_in_accel_mag_value_available.dev_attr.attr,
++ &iio_const_attr_in_accel_mag_period_available.dev_attr.attr,
++ &iio_const_attr_in_accel_mag_hysteresis_available.dev_attr.attr,
++ NULL
++};
++
++static const struct attribute_group bmi323_event_attribute_group = {
++ .attrs = bmi323_event_attributes,
++};
++
++static int bmi323_write_event_config(struct iio_dev *indio_dev,
++ const struct iio_chan_spec *chan,
++ enum iio_event_type type,
++ enum iio_event_direction dir, int state)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++
++ switch (type) {
++ case IIO_EV_TYPE_MAG:
++ return bmi323_motion_event_en(data, dir, state);
++ case IIO_EV_TYPE_GESTURE:
++ return bmi323_tap_event_en(data, dir, state);
++ case IIO_EV_TYPE_CHANGE:
++ return bmi323_step_wtrmrk_en(data, state);
++ default:
++ return -EINVAL;
++ }
++}
++
++static int bmi323_read_event_config(struct iio_dev *indio_dev,
++ const struct iio_chan_spec *chan,
++ enum iio_event_type type,
++ enum iio_event_direction dir)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++ int ret, value, reg_val;
++
++ guard(mutex)(&data->mutex);
++
++ switch (chan->type) {
++ case IIO_ACCEL:
++ switch (dir) {
++ case IIO_EV_DIR_SINGLETAP:
++ ret = FIELD_GET(BMI323_FEAT_IO0_S_TAP_MSK,
++ data->feature_events);
++ break;
++ case IIO_EV_DIR_DOUBLETAP:
++ ret = FIELD_GET(BMI323_FEAT_IO0_D_TAP_MSK,
++ data->feature_events);
++ break;
++ case IIO_EV_DIR_RISING:
++ value = FIELD_GET(BMI323_FEAT_IO0_XYZ_MOTION_MSK,
++ data->feature_events);
++ ret = value ? 1 : 0;
++ break;
++ case IIO_EV_DIR_FALLING:
++ value = FIELD_GET(BMI323_FEAT_IO0_XYZ_NOMOTION_MSK,
++ data->feature_events);
++ ret = value ? 1 : 0;
++ break;
++ default:
++ ret = -EINVAL;
++ break;
++ }
++ return ret;
++ case IIO_STEPS:
++ ret = regmap_read(data->regmap, BMI323_INT_MAP1_REG, &reg_val);
++ if (ret)
++ return ret;
++
++ return FIELD_GET(BMI323_STEP_CNT_MSK, reg_val) ? 1 : 0;
++ default:
++ return -EINVAL;
++ }
++}
++
++static int bmi323_write_event_value(struct iio_dev *indio_dev,
++ const struct iio_chan_spec *chan,
++ enum iio_event_type type,
++ enum iio_event_direction dir,
++ enum iio_event_info info,
++ int val, int val2)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++ unsigned int raw;
++ int reg;
++
++ guard(mutex)(&data->mutex);
++
++ switch (type) {
++ case IIO_EV_TYPE_GESTURE:
++ switch (info) {
++ case IIO_EV_INFO_VALUE:
++ if (!in_range(val, 0, 2))
++ return -EINVAL;
++
++ raw = BMI323_INT_MICRO_TO_RAW(val, val2,
++ BMI323_TAP_THRES_SCALE);
++
++ return bmi323_update_ext_reg(data, BMI323_TAP2_REG,
++ BMI323_TAP2_THRES_MSK,
++ FIELD_PREP(BMI323_TAP2_THRES_MSK,
++ raw));
++ case IIO_EV_INFO_RESET_TIMEOUT:
++ if (val || !in_range(val2, 40000, 560001))
++ return -EINVAL;
++
++ raw = BMI323_INT_MICRO_TO_RAW(val, val2,
++ BMI323_QUITE_TIM_GES_SCALE);
++
++ return bmi323_update_ext_reg(data, BMI323_TAP3_REG,
++ BMI323_TAP3_QT_AFT_GES_MSK,
++ FIELD_PREP(BMI323_TAP3_QT_AFT_GES_MSK,
++ raw));
++ case IIO_EV_INFO_TAP2_MIN_DELAY:
++ if (val || !in_range(val2, 5000, 70001))
++ return -EINVAL;
++
++ raw = BMI323_INT_MICRO_TO_RAW(val, val2,
++ BMI323_DUR_BW_TAP_SCALE);
++
++ return bmi323_update_ext_reg(data, BMI323_TAP3_REG,
++ BMI323_TAP3_QT_BW_TAP_MSK,
++ FIELD_PREP(BMI323_TAP3_QT_BW_TAP_MSK,
++ raw));
++ default:
++ return -EINVAL;
++ }
++ case IIO_EV_TYPE_MAG:
++ reg = bmi323_motion_config_reg(dir);
++ if (reg < 0)
++ return -EINVAL;
++
++ switch (info) {
++ case IIO_EV_INFO_VALUE:
++ if (!in_range(val, 0, 8))
++ return -EINVAL;
++
++ raw = BMI323_INT_MICRO_TO_RAW(val, val2,
++ BMI323_MOTION_THRES_SCALE);
++
++ return bmi323_update_ext_reg(data, reg,
++ BMI323_MO1_SLOPE_TH_MSK,
++ FIELD_PREP(BMI323_MO1_SLOPE_TH_MSK,
++ raw));
++ case IIO_EV_INFO_PERIOD:
++ if (!in_range(val, 0, 163))
++ return -EINVAL;
++
++ raw = BMI323_INT_MICRO_TO_RAW(val, val2,
++ BMI323_MOTION_DURAT_SCALE);
++
++ return bmi323_update_ext_reg(data,
++ reg + BMI323_MO3_OFFSET,
++ BMI323_MO3_DURA_MSK,
++ FIELD_PREP(BMI323_MO3_DURA_MSK,
++ raw));
++ case IIO_EV_INFO_HYSTERESIS:
++ if (!in_range(val, 0, 2))
++ return -EINVAL;
++
++ raw = BMI323_INT_MICRO_TO_RAW(val, val2,
++ BMI323_MOTION_HYSTR_SCALE);
++
++ return bmi323_update_ext_reg(data,
++ reg + BMI323_MO2_OFFSET,
++ BMI323_MO2_HYSTR_MSK,
++ FIELD_PREP(BMI323_MO2_HYSTR_MSK,
++ raw));
++ default:
++ return -EINVAL;
++ }
++ case IIO_EV_TYPE_CHANGE:
++ if (!in_range(val, 0, 20461))
++ return -EINVAL;
++
++ raw = val / 20;
++ return bmi323_update_ext_reg(data, BMI323_STEP_SC1_REG,
++ BMI323_STEP_SC1_WTRMRK_MSK,
++ FIELD_PREP(BMI323_STEP_SC1_WTRMRK_MSK,
++ raw));
++ default:
++ return -EINVAL;
++ }
++}
++
++static int bmi323_read_event_value(struct iio_dev *indio_dev,
++ const struct iio_chan_spec *chan,
++ enum iio_event_type type,
++ enum iio_event_direction dir,
++ enum iio_event_info info,
++ int *val, int *val2)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++ unsigned int raw, reg_value;
++ int ret, reg;
++
++ guard(mutex)(&data->mutex);
++
++ switch (type) {
++ case IIO_EV_TYPE_GESTURE:
++ switch (info) {
++ case IIO_EV_INFO_VALUE:
++ ret = bmi323_read_ext_reg(data, BMI323_TAP2_REG,
++ &reg_value);
++ if (ret)
++ return ret;
++
++ raw = FIELD_GET(BMI323_TAP2_THRES_MSK, reg_value);
++ *val = raw / BMI323_TAP_THRES_SCALE;
++ *val2 = BMI323_RAW_TO_MICRO(raw, BMI323_TAP_THRES_SCALE);
++ return IIO_VAL_INT_PLUS_MICRO;
++ case IIO_EV_INFO_RESET_TIMEOUT:
++ ret = bmi323_read_ext_reg(data, BMI323_TAP3_REG,
++ &reg_value);
++ if (ret)
++ return ret;
++
++ raw = FIELD_GET(BMI323_TAP3_QT_AFT_GES_MSK, reg_value);
++ *val = 0;
++ *val2 = BMI323_RAW_TO_MICRO(raw,
++ BMI323_QUITE_TIM_GES_SCALE);
++ return IIO_VAL_INT_PLUS_MICRO;
++ case IIO_EV_INFO_TAP2_MIN_DELAY:
++ ret = bmi323_read_ext_reg(data, BMI323_TAP3_REG,
++ &reg_value);
++ if (ret)
++ return ret;
++
++ raw = FIELD_GET(BMI323_TAP3_QT_BW_TAP_MSK, reg_value);
++ *val = 0;
++ *val2 = BMI323_RAW_TO_MICRO(raw,
++ BMI323_DUR_BW_TAP_SCALE);
++ return IIO_VAL_INT_PLUS_MICRO;
++ default:
++ return -EINVAL;
++ }
++ case IIO_EV_TYPE_MAG:
++ reg = bmi323_motion_config_reg(dir);
++ if (reg < 0)
++ return -EINVAL;
++
++ switch (info) {
++ case IIO_EV_INFO_VALUE:
++ ret = bmi323_read_ext_reg(data, reg, &reg_value);
++ if (ret)
++ return ret;
++
++ raw = FIELD_GET(BMI323_MO1_SLOPE_TH_MSK, reg_value);
++ *val = raw / BMI323_MOTION_THRES_SCALE;
++ *val2 = BMI323_RAW_TO_MICRO(raw,
++ BMI323_MOTION_THRES_SCALE);
++ return IIO_VAL_INT_PLUS_MICRO;
++ case IIO_EV_INFO_PERIOD:
++ ret = bmi323_read_ext_reg(data,
++ reg + BMI323_MO3_OFFSET,
++ &reg_value);
++ if (ret)
++ return ret;
++
++ raw = FIELD_GET(BMI323_MO3_DURA_MSK, reg_value);
++ *val = raw / BMI323_MOTION_DURAT_SCALE;
++ *val2 = BMI323_RAW_TO_MICRO(raw,
++ BMI323_MOTION_DURAT_SCALE);
++ return IIO_VAL_INT_PLUS_MICRO;
++ case IIO_EV_INFO_HYSTERESIS:
++ ret = bmi323_read_ext_reg(data,
++ reg + BMI323_MO2_OFFSET,
++ &reg_value);
++ if (ret)
++ return ret;
++
++ raw = FIELD_GET(BMI323_MO2_HYSTR_MSK, reg_value);
++ *val = raw / BMI323_MOTION_HYSTR_SCALE;
++ *val2 = BMI323_RAW_TO_MICRO(raw,
++ BMI323_MOTION_HYSTR_SCALE);
++ return IIO_VAL_INT_PLUS_MICRO;
++ default:
++ return -EINVAL;
++ }
++ case IIO_EV_TYPE_CHANGE:
++ ret = bmi323_read_ext_reg(data, BMI323_STEP_SC1_REG,
++ &reg_value);
++ if (ret)
++ return ret;
++
++ raw = FIELD_GET(BMI323_STEP_SC1_WTRMRK_MSK, reg_value);
++ *val = raw * 20;
++ return IIO_VAL_INT;
++ default:
++ return -EINVAL;
++ }
++}
++
++static int __bmi323_fifo_flush(struct iio_dev *indio_dev)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++ int i, ret, fifo_lvl, frame_count, bit, index;
++ __le16 *frame, *pchannels;
++ u64 sample_period;
++ s64 tstamp;
++
++ guard(mutex)(&data->mutex);
++ ret = regmap_read(data->regmap, BMI323_FIFO_FILL_LEVEL_REG, &fifo_lvl);
++ if (ret)
++ return ret;
++
++ fifo_lvl = min(fifo_lvl, BMI323_FIFO_FULL_IN_WORDS);
++
++ frame_count = fifo_lvl / BMI323_FIFO_FRAME_LENGTH;
++ if (!frame_count)
++ return -EINVAL;
++
++ if (fifo_lvl % BMI323_FIFO_FRAME_LENGTH)
++ dev_warn(data->dev, "Bad FIFO alignment\n");
++
++ /*
++ * Approximate timestamps for each of the sample based on the sampling
++ * frequency, timestamp for last sample and number of samples.
++ */
++ if (data->old_fifo_tstamp) {
++ sample_period = data->fifo_tstamp - data->old_fifo_tstamp;
++ do_div(sample_period, frame_count);
++ } else {
++ sample_period = data->odrns[BMI323_ACCEL];
++ }
++
++ tstamp = data->fifo_tstamp - (frame_count - 1) * sample_period;
++
++ ret = regmap_noinc_read(data->regmap, BMI323_FIFO_DATA_REG,
++ &data->fifo_buff[0],
++ fifo_lvl * BMI323_BYTES_PER_SAMPLE);
++ if (ret)
++ return ret;
++
++ for (i = 0; i < frame_count; i++) {
++ frame = &data->fifo_buff[i * BMI323_FIFO_FRAME_LENGTH];
++ pchannels = &data->buffer.channels[0];
++
++ index = 0;
++ for_each_set_bit(bit, indio_dev->active_scan_mask,
++ BMI323_CHAN_MAX)
++ pchannels[index++] = frame[bit];
++
++ iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer,
++ tstamp);
++
++ tstamp += sample_period;
++ }
++
++ return frame_count;
++}
++
++static int bmi323_set_watermark(struct iio_dev *indio_dev, unsigned int val)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++
++ val = min(val, (u32)BMI323_FIFO_FULL_IN_FRAMES);
++
++ guard(mutex)(&data->mutex);
++ data->watermark = val;
++
++ return 0;
++}
++
++static int bmi323_fifo_disable(struct bmi323_data *data)
++{
++ int ret;
++
++ guard(mutex)(&data->mutex);
++ ret = regmap_write(data->regmap, BMI323_FIFO_CONF_REG, 0);
++ if (ret)
++ return ret;
++
++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG,
++ BMI323_FIFO_WTRMRK_MSK,
++ FIELD_PREP(BMI323_FIFO_WTRMRK_MSK, 0));
++ if (ret)
++ return ret;
++
++ data->fifo_tstamp = 0;
++ data->state = BMI323_IDLE;
++
++ return 0;
++}
++
++static int bmi323_buffer_predisable(struct iio_dev *indio_dev)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++
++ if (iio_device_get_current_mode(indio_dev) == INDIO_BUFFER_TRIGGERED)
++ return 0;
++
++ return bmi323_fifo_disable(data);
++}
++
++static int bmi323_update_watermark(struct bmi323_data *data)
++{
++ int wtrmrk;
++
++ wtrmrk = data->watermark * BMI323_FIFO_FRAME_LENGTH;
++
++ return regmap_write(data->regmap, BMI323_FIFO_WTRMRK_REG, wtrmrk);
++}
++
++static int bmi323_fifo_enable(struct bmi323_data *data)
++{
++ int ret;
++
++ guard(mutex)(&data->mutex);
++ ret = regmap_update_bits(data->regmap, BMI323_FIFO_CONF_REG,
++ BMI323_FIFO_CONF_ACC_GYR_EN_MSK,
++ FIELD_PREP(BMI323_FIFO_CONF_ACC_GYR_EN_MSK,
++ BMI323_FIFO_ACC_GYR_MSK));
++ if (ret)
++ return ret;
++
++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG,
++ BMI323_FIFO_WTRMRK_MSK,
++ FIELD_PREP(BMI323_FIFO_WTRMRK_MSK,
++ data->irq_pin));
++ if (ret)
++ return ret;
++
++ ret = bmi323_update_watermark(data);
++ if (ret)
++ return ret;
++
++ ret = regmap_write(data->regmap, BMI323_FIFO_CTRL_REG,
++ BMI323_FIFO_FLUSH_MSK);
++ if (ret)
++ return ret;
++
++ data->state = BMI323_BUFFER_FIFO;
++
++ return 0;
++}
++
++static int bmi323_buffer_preenable(struct iio_dev *indio_dev)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++
++ guard(mutex)(&data->mutex);
++ /*
++ * When the ODR of the accelerometer and gyroscope do not match, the
++ * maximum ODR value between the accelerometer and gyroscope is used
++ * for FIFO and the signal with lower ODR will insert dummy frame.
++ * So allow buffer read only when ODR's of accelero and gyro are equal.
++ * See datasheet section 5.7 "FIFO Data Buffering".
++ */
++ if (data->odrns[BMI323_ACCEL] != data->odrns[BMI323_GYRO]) {
++ dev_err(data->dev, "Accelero and Gyro ODR doesn't match\n");
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static int bmi323_buffer_postenable(struct iio_dev *indio_dev)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++
++ if (iio_device_get_current_mode(indio_dev) == INDIO_BUFFER_TRIGGERED)
++ return 0;
++
++ return bmi323_fifo_enable(data);
++}
++
++static ssize_t hwfifo_watermark_show(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
++ struct bmi323_data *data = iio_priv(indio_dev);
++ int wm;
++
++ scoped_guard(mutex, &data->mutex)
++ wm = data->watermark;
++
++ return sysfs_emit(buf, "%d\n", wm);
++}
++static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0);
++
++static ssize_t hwfifo_enabled_show(struct device *dev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
++ struct bmi323_data *data = iio_priv(indio_dev);
++ bool state;
++
++ scoped_guard(mutex, &data->mutex)
++ state = data->state == BMI323_BUFFER_FIFO;
++
++ return sysfs_emit(buf, "%d\n", state);
++}
++static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0);
++
++static const struct iio_dev_attr *bmi323_fifo_attributes[] = {
++ &iio_dev_attr_hwfifo_watermark,
++ &iio_dev_attr_hwfifo_enabled,
++ NULL
++};
++
++static const struct iio_buffer_setup_ops bmi323_buffer_ops = {
++ .preenable = bmi323_buffer_preenable,
++ .postenable = bmi323_buffer_postenable,
++ .predisable = bmi323_buffer_predisable,
++};
++
++static irqreturn_t bmi323_irq_thread_handler(int irq, void *private)
++{
++ struct iio_dev *indio_dev = private;
++ struct bmi323_data *data = iio_priv(indio_dev);
++ unsigned int status_addr, status, feature_event;
++ s64 timestamp = iio_get_time_ns(indio_dev);
++ int ret;
++
++ if (data->irq_pin == BMI323_IRQ_INT1)
++ status_addr = BMI323_STATUS_INT1_REG;
++ else
++ status_addr = BMI323_STATUS_INT2_REG;
++
++ scoped_guard(mutex, &data->mutex) {
++ ret = regmap_read(data->regmap, status_addr, &status);
++ if (ret)
++ return IRQ_NONE;
++ }
++
++ if (!status || FIELD_GET(BMI323_STATUS_ERROR_MSK, status))
++ return IRQ_NONE;
++
++ if (FIELD_GET(BMI323_STATUS_FIFO_WTRMRK_MSK, status)) {
++ data->old_fifo_tstamp = data->fifo_tstamp;
++ data->fifo_tstamp = iio_get_time_ns(indio_dev);
++ ret = __bmi323_fifo_flush(indio_dev);
++ if (ret < 0)
++ return IRQ_NONE;
++ }
++
++ if (FIELD_GET(BMI323_STATUS_ACC_GYR_DRDY_MSK, status))
++ iio_trigger_poll_nested(data->trig);
++
++ if (FIELD_GET(BMI323_STATUS_MOTION_MSK, status))
++ iio_push_event(indio_dev, IIO_MOD_EVENT_CODE(IIO_ACCEL, 0,
++ IIO_MOD_X_OR_Y_OR_Z,
++ IIO_EV_TYPE_MAG,
++ IIO_EV_DIR_RISING),
++ timestamp);
++
++ if (FIELD_GET(BMI323_STATUS_NOMOTION_MSK, status))
++ iio_push_event(indio_dev, IIO_MOD_EVENT_CODE(IIO_ACCEL, 0,
++ IIO_MOD_X_OR_Y_OR_Z,
++ IIO_EV_TYPE_MAG,
++ IIO_EV_DIR_FALLING),
++ timestamp);
++
++ if (FIELD_GET(BMI323_STATUS_STP_WTR_MSK, status))
++ iio_push_event(indio_dev, IIO_MOD_EVENT_CODE(IIO_STEPS, 0,
++ IIO_NO_MOD,
++ IIO_EV_TYPE_CHANGE,
++ IIO_EV_DIR_NONE),
++ timestamp);
++
++ if (FIELD_GET(BMI323_STATUS_TAP_MSK, status)) {
++ scoped_guard(mutex, &data->mutex) {
++ ret = regmap_read(data->regmap,
++ BMI323_FEAT_EVNT_EXT_REG,
++ &feature_event);
++ if (ret)
++ return IRQ_NONE;
++ }
++
++ if (FIELD_GET(BMI323_FEAT_EVNT_EXT_S_MSK, feature_event)) {
++ iio_push_event(indio_dev,
++ IIO_MOD_EVENT_CODE(IIO_ACCEL, 0,
++ IIO_MOD_X_OR_Y_OR_Z,
++ IIO_EV_TYPE_GESTURE,
++ IIO_EV_DIR_SINGLETAP),
++ timestamp);
++ }
++
++ if (FIELD_GET(BMI323_FEAT_EVNT_EXT_D_MSK, feature_event))
++ iio_push_event(indio_dev,
++ IIO_MOD_EVENT_CODE(IIO_ACCEL, 0,
++ IIO_MOD_X_OR_Y_OR_Z,
++ IIO_EV_TYPE_GESTURE,
++ IIO_EV_DIR_DOUBLETAP),
++ timestamp);
++ }
++
++ return IRQ_HANDLED;
++}
++
++static int bmi323_set_drdy_irq(struct bmi323_data *data,
++ enum bmi323_irq_pin irq_pin)
++{
++ int ret;
++
++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG,
++ BMI323_GYR_DRDY_MSK,
++ FIELD_PREP(BMI323_GYR_DRDY_MSK, irq_pin));
++ if (ret)
++ return ret;
++
++ return regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG,
++ BMI323_ACC_DRDY_MSK,
++ FIELD_PREP(BMI323_ACC_DRDY_MSK, irq_pin));
++}
++
++static int bmi323_data_rdy_trigger_set_state(struct iio_trigger *trig,
++ bool state)
++{
++ struct bmi323_data *data = iio_trigger_get_drvdata(trig);
++ enum bmi323_irq_pin irq_pin;
++
++ guard(mutex)(&data->mutex);
++
++ if (data->state == BMI323_BUFFER_FIFO) {
++ dev_warn(data->dev, "Can't set trigger when FIFO enabled\n");
++ return -EBUSY;
++ }
++
++ if (state) {
++ data->state = BMI323_BUFFER_DRDY_TRIGGERED;
++ irq_pin = data->irq_pin;
++ } else {
++ data->state = BMI323_IDLE;
++ irq_pin = BMI323_IRQ_DISABLED;
++ }
++
++ return bmi323_set_drdy_irq(data, irq_pin);
++}
++
++static const struct iio_trigger_ops bmi323_trigger_ops = {
++ .set_trigger_state = &bmi323_data_rdy_trigger_set_state,
++};
++
++static irqreturn_t bmi323_trigger_handler(int irq, void *p)
++{
++ struct iio_poll_func *pf = p;
++ struct iio_dev *indio_dev = pf->indio_dev;
++ struct bmi323_data *data = iio_priv(indio_dev);
++ int ret, bit, index = 0;
++
++ /* Lock to protect the data->buffer */
++ guard(mutex)(&data->mutex);
++
++ if (*indio_dev->active_scan_mask == BMI323_ALL_CHAN_MSK) {
++ ret = regmap_bulk_read(data->regmap, BMI323_ACCEL_X_REG,
++ &data->buffer.channels,
++ ARRAY_SIZE(data->buffer.channels));
++ if (ret)
++ return IRQ_NONE;
++ } else {
++ for_each_set_bit(bit, indio_dev->active_scan_mask,
++ BMI323_CHAN_MAX) {
++ ret = regmap_raw_read(data->regmap,
++ BMI323_ACCEL_X_REG + bit,
++ &data->buffer.channels[index++],
++ BMI323_BYTES_PER_SAMPLE);
++ if (ret)
++ return IRQ_NONE;
++ }
++ }
++
++ iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer,
++ iio_get_time_ns(indio_dev));
++
++ iio_trigger_notify_done(indio_dev->trig);
++
++ return IRQ_HANDLED;
++}
++
++static int bmi323_set_average(struct bmi323_data *data,
++ enum bmi323_sensor_type sensor, int avg)
++{
++ int raw = ARRAY_SIZE(bmi323_accel_gyro_avrg);
++
++ while (raw--)
++ if (avg == bmi323_accel_gyro_avrg[raw])
++ break;
++ if (raw < 0)
++ return -EINVAL;
++
++ guard(mutex)(&data->mutex);
++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config,
++ BMI323_ACC_GYRO_CONF_AVG_MSK,
++ FIELD_PREP(BMI323_ACC_GYRO_CONF_AVG_MSK,
++ raw));
++}
++
++static int bmi323_get_average(struct bmi323_data *data,
++ enum bmi323_sensor_type sensor, int *avg)
++{
++ int ret, value, raw;
++
++ scoped_guard(mutex, &data->mutex) {
++ ret = regmap_read(data->regmap, bmi323_hw[sensor].config, &value);
++ if (ret)
++ return ret;
++ }
++
++ raw = FIELD_GET(BMI323_ACC_GYRO_CONF_AVG_MSK, value);
++ *avg = bmi323_accel_gyro_avrg[raw];
++
++ return IIO_VAL_INT;
++}
++
++static int bmi323_enable_steps(struct bmi323_data *data, int val)
++{
++ int ret;
++
++ guard(mutex)(&data->mutex);
++ if (data->odrhz[BMI323_ACCEL] < 200) {
++ dev_err(data->dev, "Invalid accelrometer parameter\n");
++ return -EINVAL;
++ }
++
++ ret = bmi323_feature_engine_events(data, BMI323_FEAT_IO0_STP_CNT_MSK,
++ val ? 1 : 0);
++ if (ret)
++ return ret;
++
++ set_mask_bits(&data->feature_events, BMI323_FEAT_IO0_STP_CNT_MSK,
++ FIELD_PREP(BMI323_FEAT_IO0_STP_CNT_MSK, val ? 1 : 0));
++
++ return 0;
++}
++
++static int bmi323_read_steps(struct bmi323_data *data, int *val)
++{
++ int ret;
++
++ guard(mutex)(&data->mutex);
++ if (!FIELD_GET(BMI323_FEAT_IO0_STP_CNT_MSK, data->feature_events))
++ return -EINVAL;
++
++ ret = regmap_bulk_read(data->regmap, BMI323_FEAT_IO2_REG,
++ data->steps_count,
++ ARRAY_SIZE(data->steps_count));
++ if (ret)
++ return ret;
++
++ *val = get_unaligned_le32(data->steps_count);
++
++ return IIO_VAL_INT;
++}
++
++static int bmi323_read_axis(struct bmi323_data *data,
++ struct iio_chan_spec const *chan, int *val)
++{
++ enum bmi323_sensor_type sensor;
++ unsigned int value;
++ u8 addr;
++ int ret;
++
++ ret = bmi323_get_error_status(data);
++ if (ret)
++ return -EINVAL;
++
++ sensor = bmi323_iio_to_sensor(chan->type);
++ addr = bmi323_hw[sensor].data + (chan->channel2 - IIO_MOD_X);
++
++ scoped_guard(mutex, &data->mutex) {
++ ret = regmap_read(data->regmap, addr, &value);
++ if (ret)
++ return ret;
++ }
++
++ *val = sign_extend32(value, chan->scan_type.realbits - 1);
++
++ return IIO_VAL_INT;
++}
++
++static int bmi323_get_temp_data(struct bmi323_data *data, int *val)
++{
++ unsigned int value;
++ int ret;
++
++ ret = bmi323_get_error_status(data);
++ if (ret)
++ return -EINVAL;
++
++ scoped_guard(mutex, &data->mutex) {
++ ret = regmap_read(data->regmap, BMI323_TEMP_REG, &value);
++ if (ret)
++ return ret;
++ }
++
++ *val = sign_extend32(value, 15);
++
++ return IIO_VAL_INT;
++}
++
++static int bmi323_get_odr(struct bmi323_data *data,
++ enum bmi323_sensor_type sensor, int *odr, int *uodr)
++{
++ int ret, value, odr_raw;
++
++ scoped_guard(mutex, &data->mutex) {
++ ret = regmap_read(data->regmap, bmi323_hw[sensor].config, &value);
++ if (ret)
++ return ret;
++ }
++
++ odr_raw = FIELD_GET(BMI323_ACC_GYRO_CONF_ODR_MSK, value);
++ *odr = bmi323_acc_gyro_odr[odr_raw - 1][0];
++ *uodr = bmi323_acc_gyro_odr[odr_raw - 1][1];
++
++ return IIO_VAL_INT_PLUS_MICRO;
++}
++
++static int bmi323_configure_power_mode(struct bmi323_data *data,
++ enum bmi323_sensor_type sensor,
++ int odr_index)
++{
++ enum bmi323_opr_mode mode;
++
++ if (bmi323_acc_gyro_odr[odr_index][0] > 25)
++ mode = ACC_GYRO_MODE_CONTINOUS;
++ else
++ mode = ACC_GYRO_MODE_DUTYCYCLE;
++
++ return bmi323_set_mode(data, sensor, mode);
++}
++
++static int bmi323_set_odr(struct bmi323_data *data,
++ enum bmi323_sensor_type sensor, int odr, int uodr)
++{
++ int odr_raw, ret;
++
++ odr_raw = ARRAY_SIZE(bmi323_acc_gyro_odr);
++
++ while (odr_raw--)
++ if (odr == bmi323_acc_gyro_odr[odr_raw][0] &&
++ uodr == bmi323_acc_gyro_odr[odr_raw][1])
++ break;
++ if (odr_raw < 0)
++ return -EINVAL;
++
++ ret = bmi323_configure_power_mode(data, sensor, odr_raw);
++ if (ret)
++ return -EINVAL;
++
++ guard(mutex)(&data->mutex);
++ data->odrhz[sensor] = bmi323_acc_gyro_odr[odr_raw][0];
++ data->odrns[sensor] = bmi323_acc_gyro_odrns[odr_raw];
++
++ odr_raw++;
++
++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config,
++ BMI323_ACC_GYRO_CONF_ODR_MSK,
++ FIELD_PREP(BMI323_ACC_GYRO_CONF_ODR_MSK,
++ odr_raw));
++}
++
++static int bmi323_get_scale(struct bmi323_data *data,
++ enum bmi323_sensor_type sensor, int *val2)
++{
++ int ret, value, scale_raw;
++
++ scoped_guard(mutex, &data->mutex) {
++ ret = regmap_read(data->regmap, bmi323_hw[sensor].config,
++ &value);
++ if (ret)
++ return ret;
++ }
++
++ scale_raw = FIELD_GET(BMI323_ACC_GYRO_CONF_SCL_MSK, value);
++ *val2 = bmi323_hw[sensor].scale_table[scale_raw][1];
++
++ return IIO_VAL_INT_PLUS_MICRO;
++}
++
++static int bmi323_set_scale(struct bmi323_data *data,
++ enum bmi323_sensor_type sensor, int val, int val2)
++{
++ int scale_raw;
++
++ scale_raw = bmi323_hw[sensor].scale_table_len;
++
++ while (scale_raw--)
++ if (val == bmi323_hw[sensor].scale_table[scale_raw][0] &&
++ val2 == bmi323_hw[sensor].scale_table[scale_raw][1])
++ break;
++ if (scale_raw < 0)
++ return -EINVAL;
++
++ guard(mutex)(&data->mutex);
++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config,
++ BMI323_ACC_GYRO_CONF_SCL_MSK,
++ FIELD_PREP(BMI323_ACC_GYRO_CONF_SCL_MSK,
++ scale_raw));
++}
++
++static int bmi323_read_avail(struct iio_dev *indio_dev,
++ struct iio_chan_spec const *chan,
++ const int **vals, int *type, int *length,
++ long mask)
++{
++ enum bmi323_sensor_type sensor;
++
++ switch (mask) {
++ case IIO_CHAN_INFO_SAMP_FREQ:
++ *type = IIO_VAL_INT_PLUS_MICRO;
++ *vals = (const int *)bmi323_acc_gyro_odr;
++ *length = ARRAY_SIZE(bmi323_acc_gyro_odr) * 2;
++ return IIO_AVAIL_LIST;
++ case IIO_CHAN_INFO_SCALE:
++ sensor = bmi323_iio_to_sensor(chan->type);
++ *type = IIO_VAL_INT_PLUS_MICRO;
++ *vals = (const int *)bmi323_hw[sensor].scale_table;
++ *length = bmi323_hw[sensor].scale_table_len * 2;
++ return IIO_AVAIL_LIST;
++ case IIO_CHAN_INFO_OVERSAMPLING_RATIO:
++ *type = IIO_VAL_INT;
++ *vals = (const int *)bmi323_accel_gyro_avrg;
++ *length = ARRAY_SIZE(bmi323_accel_gyro_avrg);
++ return IIO_AVAIL_LIST;
++ default:
++ return -EINVAL;
++ }
++}
++
++static int bmi323_write_raw(struct iio_dev *indio_dev,
++ struct iio_chan_spec const *chan, int val,
++ int val2, long mask)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++ int ret;
++
++ switch (mask) {
++ case IIO_CHAN_INFO_SAMP_FREQ:
++ ret = iio_device_claim_direct_mode(indio_dev);
++ if (ret)
++ return ret;
++
++ ret = bmi323_set_odr(data, bmi323_iio_to_sensor(chan->type),
++ val, val2);
++ iio_device_release_direct_mode(indio_dev);
++ return ret;
++ case IIO_CHAN_INFO_SCALE:
++ ret = iio_device_claim_direct_mode(indio_dev);
++ if (ret)
++ return ret;
++
++ ret = bmi323_set_scale(data, bmi323_iio_to_sensor(chan->type),
++ val, val2);
++ iio_device_release_direct_mode(indio_dev);
++ return ret;
++ case IIO_CHAN_INFO_OVERSAMPLING_RATIO:
++ ret = iio_device_claim_direct_mode(indio_dev);
++ if (ret)
++ return ret;
++
++ ret = bmi323_set_average(data, bmi323_iio_to_sensor(chan->type),
++ val);
++
++ iio_device_release_direct_mode(indio_dev);
++ return ret;
++ case IIO_CHAN_INFO_ENABLE:
++ return bmi323_enable_steps(data, val);
++ case IIO_CHAN_INFO_PROCESSED:
++ scoped_guard(mutex, &data->mutex) {
++ if (val || !FIELD_GET(BMI323_FEAT_IO0_STP_CNT_MSK,
++ data->feature_events))
++ return -EINVAL;
++
++ /* Clear step counter value */
++ ret = bmi323_update_ext_reg(data, BMI323_STEP_SC1_REG,
++ BMI323_STEP_SC1_RST_CNT_MSK,
++ FIELD_PREP(BMI323_STEP_SC1_RST_CNT_MSK,
++ 1));
++ }
++ return ret;
++ default:
++ return -EINVAL;
++ }
++}
++
++static int bmi323_read_raw(struct iio_dev *indio_dev,
++ struct iio_chan_spec const *chan, int *val,
++ int *val2, long mask)
++{
++ struct bmi323_data *data = iio_priv(indio_dev);
++ int ret;
++
++ switch (mask) {
++ case IIO_CHAN_INFO_PROCESSED:
++ return bmi323_read_steps(data, val);
++ case IIO_CHAN_INFO_RAW:
++ switch (chan->type) {
++ case IIO_ACCEL:
++ case IIO_ANGL_VEL:
++ ret = iio_device_claim_direct_mode(indio_dev);
++ if (ret)
++ return ret;
++
++ ret = bmi323_read_axis(data, chan, val);
++
++ iio_device_release_direct_mode(indio_dev);
++ return ret;
++ case IIO_TEMP:
++ return bmi323_get_temp_data(data, val);
++ default:
++ return -EINVAL;
++ }
++ case IIO_CHAN_INFO_SAMP_FREQ:
++ return bmi323_get_odr(data, bmi323_iio_to_sensor(chan->type),
++ val, val2);
++ case IIO_CHAN_INFO_SCALE:
++ switch (chan->type) {
++ case IIO_ACCEL:
++ case IIO_ANGL_VEL:
++ *val = 0;
++ return bmi323_get_scale(data,
++ bmi323_iio_to_sensor(chan->type),
++ val2);
++ case IIO_TEMP:
++ *val = BMI323_TEMP_SCALE / MEGA;
++ *val2 = BMI323_TEMP_SCALE % MEGA;
++ return IIO_VAL_INT_PLUS_MICRO;
++ default:
++ return -EINVAL;
++ }
++ case IIO_CHAN_INFO_OVERSAMPLING_RATIO:
++ return bmi323_get_average(data,
++ bmi323_iio_to_sensor(chan->type),
++ val);
++ case IIO_CHAN_INFO_OFFSET:
++ switch (chan->type) {
++ case IIO_TEMP:
++ *val = BMI323_TEMP_OFFSET;
++ return IIO_VAL_INT;
++ default:
++ return -EINVAL;
++ }
++ case IIO_CHAN_INFO_ENABLE:
++ scoped_guard(mutex, &data->mutex)
++ *val = FIELD_GET(BMI323_FEAT_IO0_STP_CNT_MSK,
++ data->feature_events);
++ return IIO_VAL_INT;
++ default:
++ return -EINVAL;
++ }
++}
++
++static const struct iio_info bmi323_info = {
++ .read_raw = bmi323_read_raw,
++ .write_raw = bmi323_write_raw,
++ .read_avail = bmi323_read_avail,
++ .hwfifo_set_watermark = bmi323_set_watermark,
++ .write_event_config = bmi323_write_event_config,
++ .read_event_config = bmi323_read_event_config,
++ .write_event_value = bmi323_write_event_value,
++ .read_event_value = bmi323_read_event_value,
++ .event_attrs = &bmi323_event_attribute_group,
++};
++
++#define BMI323_SCAN_MASK_ACCEL_3AXIS \
++ (BIT(BMI323_ACCEL_X) | BIT(BMI323_ACCEL_Y) | BIT(BMI323_ACCEL_Z))
++
++#define BMI323_SCAN_MASK_GYRO_3AXIS \
++ (BIT(BMI323_GYRO_X) | BIT(BMI323_GYRO_Y) | BIT(BMI323_GYRO_Z))
++
++static const unsigned long bmi323_avail_scan_masks[] = {
++ /* 3-axis accel */
++ BMI323_SCAN_MASK_ACCEL_3AXIS,
++ /* 3-axis gyro */
++ BMI323_SCAN_MASK_GYRO_3AXIS,
++ /* 3-axis accel + 3-axis gyro */
++ BMI323_SCAN_MASK_ACCEL_3AXIS | BMI323_SCAN_MASK_GYRO_3AXIS,
++ 0
++};
++
++static int bmi323_int_pin_config(struct bmi323_data *data,
++ enum bmi323_irq_pin irq_pin,
++ bool active_high, bool open_drain, bool latch)
++{
++ unsigned int mask, field_value;
++ int ret;
++
++ ret = regmap_update_bits(data->regmap, BMI323_IO_INT_CONF_REG,
++ BMI323_IO_INT_LTCH_MSK,
++ FIELD_PREP(BMI323_IO_INT_LTCH_MSK, latch));
++ if (ret)
++ return ret;
++
++ ret = bmi323_update_ext_reg(data, BMI323_GEN_SET1_REG,
++ BMI323_GEN_HOLD_DUR_MSK,
++ FIELD_PREP(BMI323_GEN_HOLD_DUR_MSK, 0));
++ if (ret)
++ return ret;
++
++ switch (irq_pin) {
++ case BMI323_IRQ_INT1:
++ mask = BMI323_IO_INT1_LVL_OD_OP_MSK;
++
++ field_value = FIELD_PREP(BMI323_IO_INT1_LVL_MSK, active_high) |
++ FIELD_PREP(BMI323_IO_INT1_OD_MSK, open_drain) |
++ FIELD_PREP(BMI323_IO_INT1_OP_EN_MSK, 1);
++ break;
++ case BMI323_IRQ_INT2:
++ mask = BMI323_IO_INT2_LVL_OD_OP_MSK;
++
++ field_value = FIELD_PREP(BMI323_IO_INT2_LVL_MSK, active_high) |
++ FIELD_PREP(BMI323_IO_INT2_OD_MSK, open_drain) |
++ FIELD_PREP(BMI323_IO_INT2_OP_EN_MSK, 1);
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ return regmap_update_bits(data->regmap, BMI323_IO_INT_CTR_REG, mask,
++ field_value);
++}
++
++static int bmi323_trigger_probe(struct bmi323_data *data,
++ struct iio_dev *indio_dev)
++{
++ bool open_drain, active_high, latch;
++ struct fwnode_handle *fwnode;
++ enum bmi323_irq_pin irq_pin;
++ int ret, irq, irq_type;
++ struct irq_data *desc;
++
++ fwnode = dev_fwnode(data->dev);
++ if (!fwnode)
++ return -ENODEV;
++
++ irq = fwnode_irq_get_byname(fwnode, "INT1");
++ if (irq > 0) {
++ irq_pin = BMI323_IRQ_INT1;
++ } else {
++ irq = fwnode_irq_get_byname(fwnode, "INT2");
++ if (irq < 0)
++ return 0;
++
++ irq_pin = BMI323_IRQ_INT2;
++ }
++
++ desc = irq_get_irq_data(irq);
++ if (!desc)
++ return dev_err_probe(data->dev, -EINVAL,
++ "Could not find IRQ %d\n", irq);
++
++ irq_type = irqd_get_trigger_type(desc);
++ switch (irq_type) {
++ case IRQF_TRIGGER_RISING:
++ latch = false;
++ active_high = true;
++ break;
++ case IRQF_TRIGGER_HIGH:
++ latch = true;
++ active_high = true;
++ break;
++ case IRQF_TRIGGER_FALLING:
++ latch = false;
++ active_high = false;
++ break;
++ case IRQF_TRIGGER_LOW:
++ latch = true;
++ active_high = false;
++ break;
++ default:
++ return dev_err_probe(data->dev, -EINVAL,
++ "Invalid interrupt type 0x%x specified\n",
++ irq_type);
++ }
++
++ open_drain = fwnode_property_read_bool(fwnode, "drive-open-drain");
++
++ ret = bmi323_int_pin_config(data, irq_pin, active_high, open_drain,
++ latch);
++ if (ret)
++ return dev_err_probe(data->dev, ret,
++ "Failed to configure irq line\n");
++
++ data->trig = devm_iio_trigger_alloc(data->dev, "%s-trig-%d",
++ indio_dev->name, irq_pin);
++ if (!data->trig)
++ return -ENOMEM;
++
++ data->trig->ops = &bmi323_trigger_ops;
++ iio_trigger_set_drvdata(data->trig, data);
++
++ ret = devm_request_threaded_irq(data->dev, irq, NULL,
++ bmi323_irq_thread_handler,
++ IRQF_ONESHOT, "bmi323-int", indio_dev);
++ if (ret)
++ return dev_err_probe(data->dev, ret, "Failed to request IRQ\n");
++
++ ret = devm_iio_trigger_register(data->dev, data->trig);
++ if (ret)
++ return dev_err_probe(data->dev, ret,
++ "Trigger registration failed\n");
++
++ data->irq_pin = irq_pin;
++
++ return 0;
++}
++
++static int bmi323_feature_engine_enable(struct bmi323_data *data, bool en)
++{
++ unsigned int feature_status;
++ int ret;
++
++ if (!en)
++ return regmap_write(data->regmap, BMI323_FEAT_CTRL_REG, 0);
++
++ ret = regmap_write(data->regmap, BMI323_FEAT_IO2_REG, 0x012c);
++ if (ret)
++ return ret;
++
++ ret = regmap_write(data->regmap, BMI323_FEAT_IO_STATUS_REG,
++ BMI323_FEAT_IO_STATUS_MSK);
++ if (ret)
++ return ret;
++
++ ret = regmap_write(data->regmap, BMI323_FEAT_CTRL_REG,
++ BMI323_FEAT_ENG_EN_MSK);
++ if (ret)
++ return ret;
++
++ /*
++ * It takes around 4 msec to enable the Feature engine, so check
++ * the status of the feature engine every 2 msec for a maximum
++ * of 5 trials.
++ */
++ ret = regmap_read_poll_timeout(data->regmap, BMI323_FEAT_IO1_REG,
++ feature_status,
++ FIELD_GET(BMI323_FEAT_IO1_ERR_MSK,
++ feature_status) == 1,
++ BMI323_FEAT_ENG_POLL,
++ BMI323_FEAT_ENG_TIMEOUT);
++ if (ret)
++ return dev_err_probe(data->dev, -EINVAL,
++ "Failed to enable feature engine\n");
++
++ return 0;
++}
++
++static void bmi323_disable(void *data_ptr)
++{
++ struct bmi323_data *data = data_ptr;
++
++ bmi323_set_mode(data, BMI323_ACCEL, ACC_GYRO_MODE_DISABLE);
++ bmi323_set_mode(data, BMI323_GYRO, ACC_GYRO_MODE_DISABLE);
++}
++
++static int bmi323_set_bw(struct bmi323_data *data,
++ enum bmi323_sensor_type sensor, enum bmi323_3db_bw bw)
++{
++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config,
++ BMI323_ACC_GYRO_CONF_BW_MSK,
++ FIELD_PREP(BMI323_ACC_GYRO_CONF_BW_MSK, bw));
++}
++
++static int bmi323_init(struct bmi323_data *data)
++{
++ int ret, val;
++
++ /*
++ * Perform soft reset to make sure the device is in a known state after
++ * start up. A delay of 1.5 ms is required after reset.
++ * See datasheet section 5.17 "Soft Reset".
++ */
++ ret = regmap_write(data->regmap, BMI323_CMD_REG, BMI323_RST_VAL);
++ if (ret)
++ return ret;
++
++ usleep_range(1500, 2000);
++
++ /*
++ * Dummy read is required to enable SPI interface after reset.
++ * See datasheet section 7.2.1 "Protocol Selection".
++ */
++ regmap_read(data->regmap, BMI323_CHIP_ID_REG, &val);
++
++ ret = regmap_read(data->regmap, BMI323_STATUS_REG, &val);
++ if (ret)
++ return ret;
++
++ if (!FIELD_GET(BMI323_STATUS_POR_MSK, val))
++ return dev_err_probe(data->dev, -EINVAL,
++ "Sensor initialization error\n");
++
++ ret = regmap_read(data->regmap, BMI323_CHIP_ID_REG, &val);
++ if (ret)
++ return ret;
++
++ if (FIELD_GET(BMI323_CHIP_ID_MSK, val) != BMI323_CHIP_ID_VAL)
++ return dev_err_probe(data->dev, -EINVAL, "Chip ID mismatch\n");
++
++ ret = bmi323_feature_engine_enable(data, true);
++ if (ret)
++ return ret;
++
++ ret = regmap_read(data->regmap, BMI323_ERR_REG, &val);
++ if (ret)
++ return ret;
++
++ if (val)
++ return dev_err_probe(data->dev, -EINVAL,
++ "Sensor power error = 0x%x\n", val);
++
++ /*
++ * Set the Bandwidth coefficient which defines the 3 dB cutoff
++ * frequency in relation to the ODR.
++ */
++ ret = bmi323_set_bw(data, BMI323_ACCEL, BMI323_BW_ODR_BY_2);
++ if (ret)
++ return ret;
++
++ ret = bmi323_set_bw(data, BMI323_GYRO, BMI323_BW_ODR_BY_2);
++ if (ret)
++ return ret;
++
++ ret = bmi323_set_odr(data, BMI323_ACCEL, 25, 0);
++ if (ret)
++ return ret;
++
++ ret = bmi323_set_odr(data, BMI323_GYRO, 25, 0);
++ if (ret)
++ return ret;
++
++ return devm_add_action_or_reset(data->dev, bmi323_disable, data);
++}
++
++int bmi323_core_probe(struct device *dev)
++{
++ static const char * const regulator_names[] = { "vdd", "vddio" };
++ struct iio_dev *indio_dev;
++ struct bmi323_data *data;
++ struct regmap *regmap;
++ int ret;
++
++ regmap = dev_get_regmap(dev, NULL);
++ if (!regmap)
++ return dev_err_probe(dev, -ENODEV, "Failed to get regmap\n");
++
++ indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
++ if (!indio_dev)
++ return dev_err_probe(dev, -ENOMEM,
++ "Failed to allocate device\n");
++
++ ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names),
++ regulator_names);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to enable regulators\n");
++
++ data = iio_priv(indio_dev);
++ data->dev = dev;
++ data->regmap = regmap;
++ mutex_init(&data->mutex);
++
++ ret = bmi323_init(data);
++ if (ret)
++ return -EINVAL;
++
++ ret = iio_read_mount_matrix(dev, &data->orientation);
++ if (ret)
++ return ret;
++
++ indio_dev->name = "bmi323-imu";
++ indio_dev->info = &bmi323_info;
++ indio_dev->channels = bmi323_channels;
++ indio_dev->num_channels = ARRAY_SIZE(bmi323_channels);
++ indio_dev->available_scan_masks = bmi323_avail_scan_masks;
++ indio_dev->modes = INDIO_DIRECT_MODE | INDIO_BUFFER_SOFTWARE;
++ dev_set_drvdata(data->dev, indio_dev);
++
++ ret = bmi323_trigger_probe(data, indio_dev);
++ if (ret)
++ return -EINVAL;
++
++ ret = devm_iio_triggered_buffer_setup_ext(data->dev, indio_dev,
++ &iio_pollfunc_store_time,
++ bmi323_trigger_handler,
++ IIO_BUFFER_DIRECTION_IN,
++ &bmi323_buffer_ops,
++ bmi323_fifo_attributes);
++ if (ret)
++ return dev_err_probe(data->dev, ret,
++ "Failed to setup trigger buffer\n");
++
++ ret = devm_iio_device_register(data->dev, indio_dev);
++ if (ret)
++ return dev_err_probe(data->dev, ret,
++ "Unable to register iio device\n");
++
++ return 0;
++}
++EXPORT_SYMBOL_NS_GPL(bmi323_core_probe, IIO_BMI323);
++
++MODULE_DESCRIPTION("Bosch BMI323 IMU driver");
++MODULE_AUTHOR("Jagath Jog J <jagathjog1996@gmail.com>");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/iio/imu/bmi323/bmi323_i2c.c b/drivers/iio/imu/bmi323/bmi323_i2c.c
+new file mode 100644
+index 000000000000..0008e186367d
+--- /dev/null
++++ b/drivers/iio/imu/bmi323/bmi323_i2c.c
+@@ -0,0 +1,121 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * I2C driver for Bosch BMI323 6-Axis IMU.
++ *
++ * Copyright (C) 2023, Jagath Jog J <jagathjog1996@gmail.com>
++ */
++
++#include <linux/i2c.h>
++#include <linux/mod_devicetable.h>
++#include <linux/module.h>
++#include <linux/regmap.h>
++
++#include "bmi323.h"
++
++struct bmi323_i2c_priv {
++ struct i2c_client *i2c;
++ u8 i2c_rx_buffer[BMI323_FIFO_LENGTH_IN_BYTES + BMI323_I2C_DUMMY];
++};
++
++/*
++ * From BMI323 datasheet section 4: Notes on the Serial Interface Support.
++ * Each I2C register read operation requires to read two dummy bytes before
++ * the actual payload.
++ */
++static int bmi323_regmap_i2c_read(void *context, const void *reg_buf,
++ size_t reg_size, void *val_buf,
++ size_t val_size)
++{
++ struct bmi323_i2c_priv *priv = context;
++ struct i2c_msg msgs[2];
++ int ret;
++
++ msgs[0].addr = priv->i2c->addr;
++ msgs[0].flags = priv->i2c->flags;
++ msgs[0].len = reg_size;
++ msgs[0].buf = (u8 *)reg_buf;
++
++ msgs[1].addr = priv->i2c->addr;
++ msgs[1].len = val_size + BMI323_I2C_DUMMY;
++ msgs[1].buf = priv->i2c_rx_buffer;
++ msgs[1].flags = priv->i2c->flags | I2C_M_RD;
++
++ ret = i2c_transfer(priv->i2c->adapter, msgs, ARRAY_SIZE(msgs));
++ if (ret < 0)
++ return -EIO;
++
++ memcpy(val_buf, priv->i2c_rx_buffer + BMI323_I2C_DUMMY, val_size);
++
++ return 0;
++}
++
++static int bmi323_regmap_i2c_write(void *context, const void *data,
++ size_t count)
++{
++ struct bmi323_i2c_priv *priv = context;
++ u8 reg;
++
++ reg = *(u8 *)data;
++ return i2c_smbus_write_i2c_block_data(priv->i2c, reg,
++ count - sizeof(u8),
++ data + sizeof(u8));
++}
++
++static struct regmap_bus bmi323_regmap_bus = {
++ .read = bmi323_regmap_i2c_read,
++ .write = bmi323_regmap_i2c_write,
++};
++
++const struct regmap_config bmi323_i2c_regmap_config = {
++ .reg_bits = 8,
++ .val_bits = 16,
++ .max_register = BMI323_CFG_RES_REG,
++ .val_format_endian = REGMAP_ENDIAN_LITTLE,
++};
++
++static int bmi323_i2c_probe(struct i2c_client *i2c)
++{
++ struct device *dev = &i2c->dev;
++ struct bmi323_i2c_priv *priv;
++ struct regmap *regmap;
++
++ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
++ if (!priv)
++ return -ENOMEM;
++
++ priv->i2c = i2c;
++ regmap = devm_regmap_init(dev, &bmi323_regmap_bus, priv,
++ &bmi323_i2c_regmap_config);
++ if (IS_ERR(regmap))
++ return dev_err_probe(dev, PTR_ERR(regmap),
++ "Failed to initialize I2C Regmap\n");
++
++ return bmi323_core_probe(dev);
++}
++
++static const struct i2c_device_id bmi323_i2c_ids[] = {
++ { "bmi323" },
++ { }
++};
++MODULE_DEVICE_TABLE(i2c, bmi323_i2c_ids);
++
++static const struct of_device_id bmi323_of_i2c_match[] = {
++ { .compatible = "bosch,bmi323" },
++ { }
++};
++MODULE_DEVICE_TABLE(of, bmi323_of_i2c_match);
++
++static struct i2c_driver bmi323_i2c_driver = {
++ .driver = {
++ .name = "bmi323",
++ .of_match_table = bmi323_of_i2c_match,
++ },
++ .probe = bmi323_i2c_probe,
++ .id_table = bmi323_i2c_ids,
++};
++module_i2c_driver(bmi323_i2c_driver);
++
++MODULE_DESCRIPTION("Bosch BMI323 IMU driver");
++MODULE_AUTHOR("Jagath Jog J <jagathjog1996@gmail.com>");
++MODULE_LICENSE("GPL");
++MODULE_IMPORT_NS(IIO_BMI323);
+diff --git a/drivers/iio/imu/bmi323/bmi323_spi.c b/drivers/iio/imu/bmi323/bmi323_spi.c
+new file mode 100644
+index 000000000000..6dc3352dd714
+--- /dev/null
++++ b/drivers/iio/imu/bmi323/bmi323_spi.c
+@@ -0,0 +1,92 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * SPI driver for Bosch BMI323 6-Axis IMU.
++ *
++ * Copyright (C) 2023, Jagath Jog J <jagathjog1996@gmail.com>
++ */
++
++#include <linux/mod_devicetable.h>
++#include <linux/module.h>
++#include <linux/regmap.h>
++#include <linux/spi/spi.h>
++
++#include "bmi323.h"
++
++/*
++ * From BMI323 datasheet section 4: Notes on the Serial Interface Support.
++ * Each SPI register read operation requires to read one dummy byte before
++ * the actual payload.
++ */
++static int bmi323_regmap_spi_read(void *context, const void *reg_buf,
++ size_t reg_size, void *val_buf,
++ size_t val_size)
++{
++ struct spi_device *spi = context;
++
++ return spi_write_then_read(spi, reg_buf, reg_size, val_buf, val_size);
++}
++
++static int bmi323_regmap_spi_write(void *context, const void *data,
++ size_t count)
++{
++ struct spi_device *spi = context;
++ u8 *data_buff = (u8 *)data;
++
++ data_buff[1] = data_buff[0];
++ return spi_write(spi, data_buff + 1, count - 1);
++}
++
++static struct regmap_bus bmi323_regmap_bus = {
++ .read = bmi323_regmap_spi_read,
++ .write = bmi323_regmap_spi_write,
++};
++
++const struct regmap_config bmi323_spi_regmap_config = {
++ .reg_bits = 8,
++ .val_bits = 16,
++ .pad_bits = 8,
++ .read_flag_mask = BIT(7),
++ .max_register = BMI323_CFG_RES_REG,
++ .val_format_endian = REGMAP_ENDIAN_LITTLE,
++};
++
++static int bmi323_spi_probe(struct spi_device *spi)
++{
++ struct device *dev = &spi->dev;
++ struct regmap *regmap;
++
++ regmap = devm_regmap_init(dev, &bmi323_regmap_bus, dev,
++ &bmi323_spi_regmap_config);
++ if (IS_ERR(regmap))
++ return dev_err_probe(dev, PTR_ERR(regmap),
++ "Failed to initialize SPI Regmap\n");
++
++ return bmi323_core_probe(dev);
++}
++
++static const struct spi_device_id bmi323_spi_ids[] = {
++ { "bmi323" },
++ { }
++};
++MODULE_DEVICE_TABLE(spi, bmi323_spi_ids);
++
++static const struct of_device_id bmi323_of_spi_match[] = {
++ { .compatible = "bosch,bmi323" },
++ { }
++};
++MODULE_DEVICE_TABLE(of, bmi323_of_spi_match);
++
++static struct spi_driver bmi323_spi_driver = {
++ .driver = {
++ .name = "bmi323",
++ .of_match_table = bmi323_of_spi_match,
++ },
++ .probe = bmi323_spi_probe,
++ .id_table = bmi323_spi_ids,
++};
++module_spi_driver(bmi323_spi_driver);
++
++MODULE_DESCRIPTION("Bosch BMI323 IMU driver");
++MODULE_AUTHOR("Jagath Jog J <jagathjog1996@gmail.com>");
++MODULE_LICENSE("GPL");
++MODULE_IMPORT_NS(IIO_BMI323);
+Make the local structures static within their respective driver files.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202311070530.qKhLTz1Y-lkp@intel.com/
+Fixes: b512c767e7bc ("iio: imu: Add driver for BMI323 IMU")
+Signed-off-by: Jagath Jog J <jagathjog1996@gmail.com>
+---
+ drivers/iio/imu/bmi323/bmi323_i2c.c | 2 +-
+ drivers/iio/imu/bmi323/bmi323_spi.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/iio/imu/bmi323/bmi323_i2c.c b/drivers/iio/imu/bmi323/bmi323_i2c.c
+index 0008e186367d..20a8001b9956 100644
+--- a/drivers/iio/imu/bmi323/bmi323_i2c.c
++++ b/drivers/iio/imu/bmi323/bmi323_i2c.c
+@@ -66,7 +66,7 @@ static struct regmap_bus bmi323_regmap_bus = {
+ .write = bmi323_regmap_i2c_write,
+ };
+
+-const struct regmap_config bmi323_i2c_regmap_config = {
++static const struct regmap_config bmi323_i2c_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 16,
+ .max_register = BMI323_CFG_RES_REG,
+diff --git a/drivers/iio/imu/bmi323/bmi323_spi.c b/drivers/iio/imu/bmi323/bmi323_spi.c
+index 6dc3352dd714..7b1e8127d0dd 100644
+--- a/drivers/iio/imu/bmi323/bmi323_spi.c
++++ b/drivers/iio/imu/bmi323/bmi323_spi.c
+@@ -41,7 +41,7 @@ static struct regmap_bus bmi323_regmap_bus = {
+ .write = bmi323_regmap_spi_write,
+ };
+
+-const struct regmap_config bmi323_spi_regmap_config = {
++static const struct regmap_config bmi323_spi_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 16,
+ .pad_bits = 8,
+diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
+index 1105918..d665a6e 100644
+--- a/drivers/iio/accel/bmc150-accel-core.c
++++ b/drivers/iio/accel/bmc150-accel-core.c
+@@ -10,6 +10,7 @@
+ #include <linux/delay.h>
+ #include <linux/slab.h>
+ #include <linux/acpi.h>
++#include <linux/dmi.h>
+ #include <linux/of_irq.h>
+ #include <linux/pm.h>
+ #include <linux/pm_runtime.h>
+@@ -1670,6 +1671,8 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq,
+ struct iio_dev *indio_dev;
+ int ret;
+
++ if (dmi_match(DMI_BOARD_NAME, "RC71L") || (dmi_match(DMI_BOARD_NAME, "AB05-AMD") && dmi_match(DMI_PRODUCT_NAME, "AIR Plus")))
++ return -ENODEV; // Abort loading bmc150 for ASUS ROG ALLY, Ayaneo Air Plus
+ indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
+ if (!indio_dev)
+ return -ENOMEM;
+diff --git a/drivers/iio/imu/bmi323/bmi323_i2c.c b/drivers/iio/imu/bmi323/bmi323_i2c.c
+index 20a8001..346ba2d 100644
+--- a/drivers/iio/imu/bmi323/bmi323_i2c.c
++++ b/drivers/iio/imu/bmi323/bmi323_i2c.c
+@@ -5,6 +5,7 @@
+ * Copyright (C) 2023, Jagath Jog J <jagathjog1996@gmail.com>
+ */
+
++#include <linux/acpi.h>
+ #include <linux/i2c.h>
+ #include <linux/mod_devicetable.h>
+ #include <linux/module.h>
+@@ -93,6 +94,12 @@ static int bmi323_i2c_probe(struct i2c_client *i2c)
+ return bmi323_core_probe(dev);
+ }
+
++static const struct acpi_device_id bmi323_acpi_match[] = {
++ {"BOSC0200"},
++ { },
++};
++MODULE_DEVICE_TABLE(acpi, bmi323_acpi_match);
++
+ static const struct i2c_device_id bmi323_i2c_ids[] = {
+ { "bmi323" },
+ { }
+@@ -109,6 +116,7 @@ static struct i2c_driver bmi323_i2c_driver = {
+ .driver = {
+ .name = "bmi323",
+ .of_match_table = bmi323_of_i2c_match,
++ .acpi_match_table = ACPI_PTR(bmi323_acpi_match),
+ },
+ .probe = bmi323_i2c_probe,
+ .id_table = bmi323_i2c_ids,
+diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
+index d752e9c..b495dba 100644
+--- a/drivers/iio/industrialio-core.c
++++ b/drivers/iio/industrialio-core.c
+@@ -13,6 +13,7 @@
+ #include <linux/cdev.h>
+ #include <linux/debugfs.h>
+ #include <linux/device.h>
++#include <linux/dmi.h>
+ #include <linux/err.h>
+ #include <linux/fs.h>
+ #include <linux/idr.h>
+@@ -571,6 +572,14 @@ static const struct iio_mount_matrix iio_mount_idmatrix = {
+ }
+ };
+
++static const struct iio_mount_matrix iio_mount_invert_x_matrix = {
++ .rotation = {
++ "-1", "0", "0",
++ "0", "1", "0",
++ "0", "0", "1"
++ }
++};
++
+ static int iio_setup_mount_idmatrix(const struct device *dev,
+ struct iio_mount_matrix *matrix)
+ {
+@@ -579,6 +588,14 @@ static int iio_setup_mount_idmatrix(const struct device *dev,
+ return 0;
+ }
+
++static int iio_setup_mount_invert_x_matrix(const struct device *dev,
++ struct iio_mount_matrix *matrix)
++{
++ *matrix = iio_mount_invert_x_matrix;
++ dev_info(dev, "using inverted X-axis mounting matrix...\n");
++ return 0;
++}
++
+ ssize_t iio_show_mount_matrix(struct iio_dev *indio_dev, uintptr_t priv,
+ const struct iio_chan_spec *chan, char *buf)
+ {
+@@ -615,6 +632,8 @@ int iio_read_mount_matrix(struct device *dev, struct iio_mount_matrix *matrix)
+ int err;
+
+ err = device_property_read_string_array(dev, "mount-matrix", matrix->rotation, len);
++ if (dmi_match(DMI_BOARD_NAME, "RC71L"))
++ return iio_setup_mount_invert_x_matrix(dev, matrix);
+ if (err == len)
+ return 0;
+
+diff --git a/drivers/iio/imu/bmi323/bmi323_core.c b/drivers/iio/imu/bmi323/bmi323_core.c
+index 0bd5ded..ded8596 100644
+--- a/drivers/iio/imu/bmi323/bmi323_core.c
++++ b/drivers/iio/imu/bmi323/bmi323_core.c
+@@ -10,6 +10,7 @@
+ #include <linux/bitfield.h>
+ #include <linux/cleanup.h>
+ #include <linux/device.h>
++#include <linux/dmi.h>
+ #include <linux/interrupt.h>
+ #include <linux/minmax.h>
+ #include <linux/module.h>
+@@ -285,6 +286,9 @@ static const int bmi323_acc_gyro_odr[][2] = {
+ { 200, 0 },
+ { 400, 0 },
+ { 800, 0 },
++ { 1600, 0},
++ { 3200, 0},
++ { 6400, 0},
+ };
+
+ static const int bmi323_acc_gyro_odrns[] = {
diff --git a/SOURCES/rpminspect.yaml b/SOURCES/rpminspect.yaml
index b4e599a..87c9272 100644
--- a/SOURCES/rpminspect.yaml
+++ b/SOURCES/rpminspect.yaml
@@ -23,7 +23,7 @@ emptyrpm:
patches:
ignore_list:
- linux-kernel-test.patch
- - patch-6.6-redhat.patch
+ - patch-6.7-redhat.patch
runpath:
ignore:
diff --git a/SOURCES/steam-deck.patch b/SOURCES/steam-deck.patch
index 9eba750..bbdb9ce 100644
--- a/SOURCES/steam-deck.patch
+++ b/SOURCES/steam-deck.patch
@@ -225,14 +225,14 @@ diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 11d076cad8a2..d03c1e1d339f 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
-@@ -191,6 +191,7 @@ obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
+@@ -199,6 +199,7 @@
obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o
obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o
obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o
+obj-$(CONFIG_SENSORS_STEAMDECK) += steamdeck-hwmon.o
obj-$(CONFIG_SENSORS_STTS751) += stts751.o
- obj-$(CONFIG_SENSORS_SY7636A) += sy7636a-hwmon.o
- obj-$(CONFIG_SENSORS_AMC6821) += amc6821.o
+ obj-$(CONFIG_SENSORS_SURFACE_FAN)+= surface_fan.o
+ obj-$(CONFIG_SENSORS_SURFACE_TEMP)+= surface_temp.o
diff --git a/drivers/hwmon/steamdeck-hwmon.c b/drivers/hwmon/steamdeck-hwmon.c
new file mode 100644
index 000000000000..fab9e9460bd4
@@ -609,10 +609,10 @@ diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index 290186e44e6b..4d444a9e2c1f 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
-@@ -189,4 +189,11 @@ config EXTCON_USBC_TUSB320
- Say Y here to enable support for USB Type C cable detection extcon
- support using a TUSB320.
-
+@@ -202,4 +202,11 @@
+ The DHC (Digital Home Hub) RTD series SoC contains a type c module.
+ This driver will detect the status of the type-c port.
+
+config EXTCON_STEAMDECK
+ tristate "Steam Deck extcon support"
+ depends on MFD_STEAMDECK
@@ -625,10 +625,10 @@ diff --git a/drivers/extcon/Makefile b/drivers/extcon/Makefile
index 1b390d934ca9..1c7e217f29e4 100644
--- a/drivers/extcon/Makefile
+++ b/drivers/extcon/Makefile
-@@ -25,3 +25,4 @@ obj-$(CONFIG_EXTCON_SM5502) += extcon-sm5502.o
- obj-$(CONFIG_EXTCON_USB_GPIO) += extcon-usb-gpio.o
+@@ -26,3 +26,4 @@
obj-$(CONFIG_EXTCON_USBC_CROS_EC) += extcon-usbc-cros-ec.o
obj-$(CONFIG_EXTCON_USBC_TUSB320) += extcon-usbc-tusb320.o
+ obj-$(CONFIG_EXTCON_RTK_TYPE_C) += extcon-rtk-type-c.o
+obj-$(CONFIG_EXTCON_STEAMDECK) += extcon-steamdeck.o
diff --git a/drivers/extcon/extcon-steamdeck.c b/drivers/extcon/extcon-steamdeck.c
new file mode 100644
diff --git a/SOURCES/steamdeck-oled-audio.patch b/SOURCES/steamdeck-oled-audio.patch
index 978e76a..eee4e1e 100644
--- a/SOURCES/steamdeck-oled-audio.patch
+++ b/SOURCES/steamdeck-oled-audio.patch
@@ -209,7 +209,7 @@ index a06af82b8..ae32748a5 100644
+static int acp_cs35l41_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params)
+{
-+ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
++ struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
+ struct snd_soc_card *card = rtd->card;
+ struct snd_soc_dai *codec_dai;
+ int ret, i;
@@ -218,7 +218,7 @@ index a06af82b8..ae32748a5 100644
+
+ ret = 0;
+ for (i = 0; i < num_codecs; i++) {
-+ codec_dai = asoc_rtd_to_codec(rtd, i);
++ codec_dai = snd_soc_rtd_to_codec(rtd, i);
+ if (strcmp(codec_dai->name, "cs35l41-pcm") == 0) {
+ switch (params_rate(params)) {
+ case 48000:
@@ -326,7 +326,7 @@ index a06af82b8..ae32748a5 100644
+ links[i].no_pcm = 1;
+ if (!drv_data->bt_codec_id) {
+ /* Use dummy codec if codec id not specified */
-+ links[i].codecs = &asoc_dummy_dlc;
++ links[i].codecs = &snd_soc_dummy_dlc;
+ links[i].num_codecs = 1;
+ }
i++;
@@ -344,14 +344,14 @@ index 2b3ec6594..6feef5a93 100644
DMIC_BE_ID,
};
-@@ -41,6 +42,7 @@ enum codec_endpoints {
+@@ -45,6 +45,7 @@
MAX98360A,
RT5682S,
NAU8825,
+ CS35L41,
NAU8821,
MAX98388,
- };
+ ES83XX,
@@ -53,9 +55,11 @@ enum platform_end_point {
struct acp_card_drvdata {
unsigned int hs_cpu_id;
diff --git a/SOURCES/steamdeck-oled-bt.patch b/SOURCES/steamdeck-oled-bt.patch
deleted file mode 100644
index 20cf681..0000000
--- a/SOURCES/steamdeck-oled-bt.patch
+++ /dev/null
@@ -1,239 +0,0 @@
-From fca3761de38864b0422006aaaf9ce8e0aba5e316 Mon Sep 17 00:00:00 2001
-From: Thomas Crider <gloriouseggroll@gmail.com>
-Date: Sat, 2 Dec 2023 05:07:16 -0500
-Subject: [PATCH] steamdeck-bt-unified
-
----
- drivers/bluetooth/btqca.c | 78 +++++++++++++++++++++++++++++++++++++
- drivers/bluetooth/btqca.h | 3 ++
- drivers/bluetooth/hci_qca.c | 9 ++++-
- net/bluetooth/hci_sync.c | 10 +++--
- 4 files changed, 95 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
-index 5a35ac413..de2195b72 100644
---- a/drivers/bluetooth/btqca.c
-+++ b/drivers/bluetooth/btqca.c
-@@ -205,6 +205,48 @@ static int qca_send_reset(struct hci_dev *hdev)
- return 0;
- }
-
-+static int qca_read_fw_board_id(struct hci_dev *hdev, u16 *bid)
-+{
-+ u8 cmd;
-+ struct sk_buff *skb;
-+ struct edl_event_hdr *edl;
-+ int err = 0;
-+ int bid_len;
-+
-+ bt_dev_dbg(hdev, "QCA read board ID");
-+
-+ cmd = EDL_GET_BID_REQ_CMD;
-+ skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, EDL_PATCH_CMD_LEN,
-+ &cmd, 0, HCI_INIT_TIMEOUT);
-+ if (IS_ERR(skb)) {
-+ err = PTR_ERR(skb);
-+ bt_dev_err(hdev, "Reading QCA board ID failed (%d)", err);
-+ return err;
-+ }
-+
-+ edl = skb_pull_data(skb, sizeof(*edl));
-+ if (!edl) {
-+ bt_dev_err(hdev, "QCA read board ID with no header");
-+ err = -EILSEQ;
-+ goto out;
-+ }
-+
-+ if (edl->cresp != EDL_CMD_REQ_RES_EVT ||
-+ edl->rtype != EDL_GET_BID_REQ_CMD) {
-+ bt_dev_err(hdev, "QCA Wrong packet: %d %d", edl->cresp, edl->rtype);
-+ err = -EIO;
-+ goto out;
-+ }
-+
-+ bid_len = edl->data[0];
-+ *bid = (edl->data[1] << 8) + edl->data[2];
-+ bt_dev_info(hdev, "%s: bid len = %x, bid = %x", __func__, bid_len, *bid);
-+
-+out:
-+ kfree_skb(skb);
-+ return err;
-+}
-+
- int qca_send_pre_shutdown_cmd(struct hci_dev *hdev)
- {
- struct sk_buff *skb;
-@@ -574,6 +616,30 @@ int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr)
- }
- EXPORT_SYMBOL_GPL(qca_set_bdaddr_rome);
-
-+static void qca_generate_nvm_name(struct hci_dev *hdev, char *fwname,
-+ size_t max_size, struct qca_btsoc_version ver, u16 bid)
-+{
-+ u8 rom_ver = 0;
-+ u32 soc_ver;
-+ const char *variant;
-+
-+ soc_ver = get_soc_ver(ver.soc_id, ver.rom_ver);
-+ rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f);
-+
-+ if ((ver.soc_id & 0x0000ff00) == QCA_HSP_GF_SOC_ID) /* hsp gf chip */
-+ variant = "g";
-+ else
-+ variant = "";
-+
-+ if (bid == 0x0)
-+ snprintf(fwname, max_size, "qca/hpnv%02x%s.bin", rom_ver, variant);
-+ else
-+ snprintf(fwname, max_size, "qca/hpnv%02x%s.%x",
-+ rom_ver, variant, bid);
-+
-+ bt_dev_info(hdev, "%s: nvm name is %s", __func__, fwname);
-+}
-+
- int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
- enum qca_btsoc_type soc_type, struct qca_btsoc_version ver,
- const char *firmware_name)
-@@ -582,6 +648,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
- int err;
- u8 rom_ver = 0;
- u32 soc_ver;
-+ u16 boardid = 0;
-
- bt_dev_dbg(hdev, "QCA setup on UART");
-
-@@ -605,6 +672,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
- /* Download rampatch file */
- config.type = TLV_TYPE_PATCH;
- switch (soc_type) {
-+ case QCA_QCA2066:
-+ snprintf(config.fwname, sizeof(config.fwname),
-+ "qca/hpbtfw%02x.tlv", rom_ver);
-+ break;
- case QCA_WCN3990:
- case QCA_WCN3991:
- case QCA_WCN3998:
-@@ -649,6 +720,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
- /* Give the controller some time to get ready to receive the NVM */
- msleep(10);
-
-+ if (soc_type == QCA_QCA2066)
-+ qca_read_fw_board_id(hdev, &boardid);
-+
- /* Download NVM configuration */
- config.type = TLV_TYPE_NVM;
- if (firmware_name) {
-@@ -656,6 +730,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
- "qca/%s", firmware_name);
- } else {
- switch (soc_type) {
-+ case QCA_QCA2066:
-+ qca_generate_nvm_name(hdev, config.fwname, sizeof(config.fwname),
-+ ver, boardid);
-+ break;
- case QCA_WCN3990:
- case QCA_WCN3991:
- case QCA_WCN3998:
-diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h
-index 03bff5c00..ffed9ea8a 100644
---- a/drivers/bluetooth/btqca.h
-+++ b/drivers/bluetooth/btqca.h
-@@ -13,6 +13,7 @@
- #define EDL_PATCH_TLV_REQ_CMD (0x1E)
- #define EDL_GET_BUILD_INFO_CMD (0x20)
- #define EDL_NVM_ACCESS_SET_REQ_CMD (0x01)
-+#define EDL_GET_BID_REQ_CMD (0x23)
- #define EDL_PATCH_CONFIG_CMD (0x28)
- #define MAX_SIZE_PER_TLV_SEGMENT (243)
- #define QCA_PRE_SHUTDOWN_CMD (0xFC08)
-@@ -48,6 +49,7 @@
-
- #define QCA_FW_BUILD_VER_LEN 255
-
-+#define QCA_HSP_GF_SOC_ID 0x1200
-
- enum qca_baudrate {
- QCA_BAUDRATE_115200 = 0,
-@@ -146,6 +148,7 @@ enum qca_btsoc_type {
- QCA_WCN3990,
- QCA_WCN3998,
- QCA_WCN3991,
-+ QCA_QCA2066,
- QCA_QCA6390,
- QCA_WCN6750,
- QCA_WCN6855,
-diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
-index 4b57e15f9..891c25ffc 100644
---- a/drivers/bluetooth/hci_qca.c
-+++ b/drivers/bluetooth/hci_qca.c
-@@ -1861,7 +1861,7 @@ static int qca_setup(struct hci_uart *hu)
- break;
-
- default:
-- soc_name = "ROME/QCA6390";
-+ soc_name = "ROME/QCA6390/QCA2066";
- }
- bt_dev_info(hdev, "setting up %s", soc_name);
-
-@@ -1987,6 +1987,11 @@ static const struct hci_uart_proto qca_proto = {
- .dequeue = qca_dequeue,
- };
-
-+static const struct qca_device_data qca_soc_data_qca2066 = {
-+ .soc_type = QCA_QCA2066,
-+ .num_vregs = 0,
-+};
-+
- static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = {
- .soc_type = QCA_WCN3988,
- .vregs = (struct qca_vreg []) {
-@@ -2569,6 +2574,7 @@ static const struct of_device_id qca_bluetooth_of_match[] = {
- { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750},
- { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855},
- { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850},
-+ { .compatible = "qcom,qca2066-bt", .data = &qca_soc_data_qca2066},
- { /* sentinel */ }
- };
- MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match);
-@@ -2580,6 +2586,7 @@ static const struct acpi_device_id qca_bluetooth_acpi_match[] = {
- { "DLA16390", (kernel_ulong_t)&qca_soc_data_qca6390 },
- { "DLB16390", (kernel_ulong_t)&qca_soc_data_qca6390 },
- { "DLB26390", (kernel_ulong_t)&qca_soc_data_qca6390 },
-+ { "QCOM2066", (kernel_ulong_t)&qca_soc_data_qca2066 },
- { },
- };
- MODULE_DEVICE_TABLE(acpi, qca_bluetooth_acpi_match);
-diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
-index 9e71362c0..ac5c0cafd 100644
---- a/net/bluetooth/hci_sync.c
-+++ b/net/bluetooth/hci_sync.c
-@@ -3800,12 +3800,14 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev)
- if (lmp_bredr_capable(hdev)) {
- events[4] |= 0x01; /* Flow Specification Complete */
-
-- /* Don't set Disconnect Complete when suspended as that
-- * would wakeup the host when disconnecting due to
-+ /* Don't set Disconnect Complete and mode change when suspended
-+ * as that would wakeup the host when disconnecting due to
- * suspend.
- */
-- if (hdev->suspended)
-+ if (hdev->suspended){
- events[0] &= 0xef;
-+ events[2] &= 0xf7;
-+ }
- } else {
- /* Use a different default for LE-only devices */
- memset(events, 0, sizeof(events));
-@@ -5931,7 +5933,7 @@ int hci_suspend_sync(struct hci_dev *hdev)
-
- if (hci_conn_count(hdev)) {
- /* Soft disconnect everything (power off) */
-- err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF);
-+ err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_USER_TERM);
- if (err) {
- /* Set state to BT_RUNNING so resume doesn't notify */
- hdev->suspend_state = BT_RUNNING;
---
-2.43.0
-
diff --git a/SOURCES/steamdeck-oled-hw-quirks.patch b/SOURCES/steamdeck-oled-hw-quirks.patch
index 644c270..28cb762 100644
--- a/SOURCES/steamdeck-oled-hw-quirks.patch
+++ b/SOURCES/steamdeck-oled-hw-quirks.patch
@@ -125,10 +125,10 @@ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdg
index b8633df418d43..77a1bedaee98c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
-@@ -416,8 +416,6 @@
- struct drm_property *regamma_tf_property;
+@@ -346,8 +346,6 @@ struct amdgpu_mode_info {
+ const enum drm_plane_type *plane_type;
};
-
+
-#define AMDGPU_MAX_BL_LEVEL 0xFF
-
struct amdgpu_backlight_privdata {
@@ -277,6 +277,89 @@ index e1a77a0d66336..8e61c86819fe2 100644
GitLab
+From f1f63fbd6a31efad6165f4b35b20ba65f25f877b Mon Sep 17 00:00:00 2001
+From: Christian Marcheselli <christianm@valvesoftware.com>
+Date: Thu, 23 Feb 2023 16:41:42 -0800
+Subject: [PATCH] Galileo-only workaround for backlight settings
+
+(cherry picked from commit 657d5054e6ed013000111db0cc2612f525d5e42d)
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index e959aa28b019..ccda049be022 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -147,7 +147,7 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
+ #define PSP_FOOTER_BYTES 0x100
+
+ /* Maximum backlight level. */
+-#define AMDGPU_MAX_BL_LEVEL 0xFFFF
++#define AMDGPU_MAX_BL_LEVEL 0xFFF
+
+ /**
+ * DOC: overview
+@@ -4103,9 +4103,12 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c
+ {
+ unsigned min, max;
+
+- if (!get_brightness_range(caps, &min, &max))
+- return brightness;
++ //if (!get_brightness_range(caps, &min, &max))
++ // return brightness;
++ min = 0;
++ max = 500000;
+
++ DRM_INFO("[%s, %d] brightness range %d - %d", __func__, __LINE__, min, max);
+ // Rescale 0..AMDGPU_MAX_BL_LEVEL to min..max
+ return min + DIV_ROUND_CLOSEST((max - min) * brightness,
+ AMDGPU_MAX_BL_LEVEL);
+--
+GitLab
+
+
+From 234d6d21b9eda7fba368e6423626db4bd04e4afd Mon Sep 17 00:00:00 2001
+From: "Pierre-Loup A. Griffais" <pgriffais@valvesoftware.com>
+Date: Tue, 7 Nov 2023 16:57:15 -0800
+Subject: [PATCH] Revert "Galileo-only workaround for backlight settings"
+
+This reverts commit f1f63fbd6a31efad6165f4b35b20ba65f25f877b.
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index b004154ba913..6d7df6ae890a 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -147,7 +147,7 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
+ #define PSP_FOOTER_BYTES 0x100
+
+ /* Maximum backlight level. */
+-#define AMDGPU_MAX_BL_LEVEL 0xFFF
++#define AMDGPU_MAX_BL_LEVEL 0xFFFF
+
+ /**
+ * DOC: overview
+@@ -4123,12 +4123,9 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c
+ {
+ unsigned min, max;
+
+- //if (!get_brightness_range(caps, &min, &max))
+- // return brightness;
+- min = 0;
+- max = 500000;
++ if (!get_brightness_range(caps, &min, &max))
++ return brightness;
+
+- DRM_INFO("[%s, %d] brightness range %d - %d", __func__, __LINE__, min, max);
+ // Rescale 0..AMDGPU_MAX_BL_LEVEL to min..max
+ return min + DIV_ROUND_CLOSEST((max - min) * brightness,
+ AMDGPU_MAX_BL_LEVEL);
+--
+GitLab
+
From ab7d646eacf9f1c745d284e293211569a4428573 Mon Sep 17 00:00:00 2001
From: "Pierre-Loup A. Griffais" <pgriffais@valvesoftware.com>
Date: Wed, 8 Nov 2023 19:45:52 -0800
@@ -352,18 +435,6 @@ index 30e7c627f21a7..472fa2c8ebcec 100644
/*
* Synopsys USB 3.x host HAPS platform has a class code of
-diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
-index b76ff08506181..95f33dadb2be2 100644
---- a/include/linux/pci_ids.h
-+++ b/include/linux/pci_ids.h
-@@ -568,7 +568,6 @@
- #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3
- #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb
- #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3
--#define PCI_DEVICE_ID_AMD_VANGOGH_USB 0x163a
- #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
- #define PCI_DEVICE_ID_AMD_LANCE 0x2000
- #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
--
GitLab
@@ -435,15 +506,6 @@ index b87797bc5874..28e6fa8d7860 100644
adev->family < AMDGPU_FAMILY_AI) {
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
mod_freesync_handle_v_update(
-@@ -8098,7 +8098,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
- int planes_count = 0, vpos, hpos;
- unsigned long flags;
- u32 target_vblank, last_flip_vblank;
-- bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state);
-+ bool vrr_active = true;//amdgpu_dm_crtc_vrr_active(acrtc_state);
- bool cursor_update = false;
- bool pflip_present = false;
- bool dirty_rects_changed = false;
--
GitLab
diff --git a/SOURCES/steamdeck-oled-wifi.patch b/SOURCES/steamdeck-oled-wifi.patch
index a25f07d..65494c6 100644
--- a/SOURCES/steamdeck-oled-wifi.patch
+++ b/SOURCES/steamdeck-oled-wifi.patch
@@ -1,18 +1,19 @@
-From 01fd63d2e9b32cd917c9036dfb703b5c4bbd872d Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "neil.shi" <neil.shi@quectel.com>
Date: Tue, 23 May 2023 16:58:08 +0800
Subject: [PATCH] wifi: ath11k: [DBS PATCH 1/6]: Indicate NAN support to
firmware
Signed-off-by: neil.shi <neil.shi@quectel.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
drivers/net/wireless/ath/ath11k/hw.c | 1 +
drivers/net/wireless/ath/ath11k/wmi.c | 1 +
- drivers/net/wireless/ath/ath11k/wmi.h | 19 +++++++++++++++++++
- 3 files changed, 21 insertions(+)
+ drivers/net/wireless/ath/ath11k/wmi.h | 10 ++++++++++
+ 3 files changed, 12 insertions(+)
diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
-index dbcc0c4035b62..6309efe4b7c1f 100644
+index d7b5ec6e6904..210759ddf2fb 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -100,6 +100,7 @@ static void ath11k_init_wmi_config_qca6390(struct ath11k_base *ab,
@@ -21,13 +22,13 @@ index dbcc0c4035b62..6309efe4b7c1f 100644
config->flag1 |= WMI_RSRC_CFG_FLAG1_BSS_CHANNEL_INFO_64;
+ config->host_service_flags |= WMI_RSRC_CFG_HOST_SERVICE_FLAG_NAN_IFACE_SUPPORT;
}
-
+
static void ath11k_hw_ipq8074_reo_setup(struct ath11k_base *ab)
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
-index 3e0a47f4a3ebd..64648e0d9484d 100644
+index 2845b4313d3a..e2dcdb3b78cc 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
-@@ -4069,6 +4132,7 @@ ath11k_wmi_copy_resource_config(struct wmi_resource_config *wmi_cfg,
+@@ -4069,6 +4069,7 @@ ath11k_wmi_copy_resource_config(struct wmi_resource_config *wmi_cfg,
wmi_cfg->sched_params = tg_cfg->sched_params;
wmi_cfg->twt_ap_pdev_count = tg_cfg->twt_ap_pdev_count;
wmi_cfg->twt_ap_sta_count = tg_cfg->twt_ap_sta_count;
@@ -36,7 +37,7 @@ index 3e0a47f4a3ebd..64648e0d9484d 100644
~(1 << WMI_CFG_HOST_SERVICE_FLAG_REG_CC_EXT);
wmi_cfg->host_service_flags |= (tg_cfg->is_reg_cc_ext_event_supported <<
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
-index 8f2c07d70a4a2..042c7b0d16631 100644
+index 100bb816b592..a8354022f575 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
@@ -2330,6 +2330,7 @@ struct wmi_init_cmd {
@@ -44,10 +45,10 @@ index 8f2c07d70a4a2..042c7b0d16631 100644
#define WMI_RSRC_CFG_FLAG2_CALC_NEXT_DTIM_COUNT_SET BIT(9)
#define WMI_RSRC_CFG_FLAG1_ACK_RSSI BIT(18)
+#define WMI_RSRC_CFG_HOST_SERVICE_FLAG_NAN_IFACE_SUPPORT BIT(0)
-
+
#define WMI_CFG_HOST_SERVICE_FLAG_REG_CC_EXT 4
-
-@@ -5700,6 +5710,15 @@ struct target_resource_config {
+
+@@ -5700,6 +5701,15 @@ struct target_resource_config {
u8 is_reg_cc_ext_event_supported;
u32 ema_max_vap_cnt;
u32 ema_max_profile_period;
@@ -61,17 +62,16 @@ index 8f2c07d70a4a2..042c7b0d16631 100644
+ u32 flags2;
+ u32 host_service_flags;
};
-
+
enum wmi_debug_log_param {
---
-GitLab
-From bc6d3226e567630188a41a78a12514c74babdea9 Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "neil.shi" <neil.shi@quectel.com>
Date: Tue, 23 May 2023 17:01:06 +0800
Subject: [PATCH] wifi: ath11k: [DBS PATCH 2/6] wifi: ath11k: add support for
QCA206X
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
drivers/net/wireless/ath/ath11k/core.c | 64 ++++++++++++++++++++++++++
drivers/net/wireless/ath/ath11k/core.h | 1 +
@@ -80,13 +80,13 @@ Subject: [PATCH] wifi: ath11k: [DBS PATCH 2/6] wifi: ath11k: add support for
4 files changed, 82 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
-index 893fefadbba96..96ed5b7cd0048 100644
+index 0c6ecbb9a066..766cf2db5b45 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
-@@ -394,6 +394,70 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+@@ -411,6 +411,70 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.fragment_160mhz = false,
},
-
+
+ .interface_modes = BIT(NL80211_IFTYPE_STATION) |
+ BIT(NL80211_IFTYPE_AP),
+ .supports_monitor = false,
@@ -155,22 +155,22 @@ index 893fefadbba96..96ed5b7cd0048 100644
BIT(NL80211_IFTYPE_AP),
.supports_monitor = false,
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
-index bd06536f82a64..ab120329619c0 100644
+index 667d55e26156..0c0960994231 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
-@@ -144,6 +144,7 @@ enum ath11k_hw_rev {
+@@ -147,6 +147,7 @@ enum ath11k_hw_rev {
ATH11K_HW_WCN6855_HW21,
ATH11K_HW_WCN6750_HW10,
ATH11K_HW_IPQ5018_HW10,
+ ATH11K_HW_QCA206X_HW21,
};
-
+
enum ath11k_firmware_mode {
diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
-index a62ee05c54097..c76f665dc369d 100644
+index afeabd6ecc67..92917d842d57 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
-@@ -434,6 +434,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
+@@ -443,6 +443,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
case ATH11K_HW_QCA6390_HW20:
case ATH11K_HW_WCN6855_HW20:
case ATH11K_HW_WCN6855_HW21:
@@ -179,19 +179,19 @@ index a62ee05c54097..c76f665dc369d 100644
break;
default:
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
-index 3c6005ab9a717..93dd259bd85ad 100644
+index 09e65c5e55c4..2fa4a99e7b08 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
-@@ -27,6 +27,8 @@
+@@ -28,6 +28,8 @@
#define QCN9074_DEVICE_ID 0x1104
#define WCN6855_DEVICE_ID 0x1103
-
+
+#define SUB_VERSION 0x1910010
+
static const struct pci_device_id ath11k_pci_id_table[] = {
{ PCI_VDEVICE(QCOM, QCA6390_DEVICE_ID) },
{ PCI_VDEVICE(QCOM, WCN6855_DEVICE_ID) },
-@@ -806,7 +808,19 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
+@@ -809,7 +811,19 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
break;
case 0x10:
case 0x11:
@@ -212,18 +212,16 @@ index 3c6005ab9a717..93dd259bd85ad 100644
break;
default:
goto unsupported_wcn6855_soc;
-@@ -1017,6 +1031,7 @@ static struct pci_driver ath11k_pci_driver = {
+@@ -1021,6 +1035,7 @@ static struct pci_driver ath11k_pci_driver = {
static int ath11k_pci_init(void)
{
int ret;
+ u32 sub_version;
-
+
ret = pci_register_driver(&ath11k_pci_driver);
if (ret)
---
-GitLab
-From 707933ef2a20db8f7c3d9d3c654a8dcb2f582436 Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "neil.shi" <neil.shi@quectel.com>
Date: Tue, 23 May 2023 17:04:27 +0800
Subject: [PATCH] wifi: ath11k: [DBS PATCH 3/6]: support 2 stations and report
@@ -234,18 +232,19 @@ num_rxmda_per_pdev in hw_params, and report addresses
for these interfaces.
Signed-off-by: neil.shi <neil.shi@quectel.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
drivers/net/wireless/ath/ath11k/mac.c | 83 ++++++++++++++++++++-------
1 file changed, 63 insertions(+), 20 deletions(-)
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
-index cb77dd6ce9665..c7fa31deefacd 100644
+index 71c6dab1aedb..967dbe3cfe94 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
-@@ -8774,6 +8774,31 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar,
+@@ -9284,6 +9284,31 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar,
return 0;
}
-
+
+static void ath11k_mac_setup_mac_address_list(struct ath11k *ar)
+{
+ struct mac_address *addresses;
@@ -274,10 +273,10 @@ index cb77dd6ce9665..c7fa31deefacd 100644
static int ath11k_mac_setup_iface_combinations(struct ath11k *ar)
{
struct ath11k_base *ab = ar->ab;
-@@ -8793,28 +8818,43 @@ static int ath11k_mac_setup_iface_combinations(struct ath11k *ar)
+@@ -9303,28 +9328,43 @@ static int ath11k_mac_setup_iface_combinations(struct ath11k *ar)
return -ENOMEM;
}
-
+
- limits[0].max = 1;
- limits[0].types |= BIT(NL80211_IFTYPE_STATION);
-
@@ -286,13 +285,13 @@ index cb77dd6ce9665..c7fa31deefacd 100644
+ if (ab->hw_params.single_pdev_only && ar->ab->hw_params.num_rxmda_per_pdev > 1) {
+ limits[0].max = 2;
+ limits[0].types |= BIT(NL80211_IFTYPE_STATION);
-
+
- if (IS_ENABLED(CONFIG_MAC80211_MESH) &&
- ab->hw_params.interface_modes & BIT(NL80211_IFTYPE_MESH_POINT))
- limits[1].types |= BIT(NL80211_IFTYPE_MESH_POINT);
+ limits[1].max = 1;
+ limits[1].types |= BIT(NL80211_IFTYPE_AP);
-
+
- combinations[0].limits = limits;
- combinations[0].n_limits = n_limits;
- combinations[0].max_interfaces = 16;
@@ -335,56 +334,47 @@ index cb77dd6ce9665..c7fa31deefacd 100644
+ BIT(NL80211_CHAN_WIDTH_80P80) |
+ BIT(NL80211_CHAN_WIDTH_160);
+ }
-
+
ar->hw->wiphy->iface_combinations = combinations;
ar->hw->wiphy->n_iface_combinations = 1;
-@@ -8875,6 +8915,8 @@ static void __ath11k_mac_unregister(struct ath11k *ar)
+@@ -9389,6 +9429,8 @@ static void __ath11k_mac_unregister(struct ath11k *ar)
kfree(ar->hw->wiphy->iface_combinations[0].limits);
kfree(ar->hw->wiphy->iface_combinations);
-
+
+ kfree(ar->hw->wiphy->addresses);
+
SET_IEEE80211_DEV(ar->hw, NULL);
}
-
-@@ -8917,6 +8959,7 @@ static int __ath11k_mac_register(struct ath11k *ar)
+
+@@ -9431,6 +9473,7 @@ static int __ath11k_mac_register(struct ath11k *ar)
ath11k_pdev_caps_update(ar);
-
+
SET_IEEE80211_PERM_ADDR(ar->hw, ar->mac_addr);
+ ath11k_mac_setup_mac_address_list(ar);
-
+
SET_IEEE80211_DEV(ar->hw, ab->dev);
+
---
-GitLab
-
-From 6591470d389d674f100568393112c169841db26f Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "neil.shi" <neil.shi@quectel.com>
Date: Tue, 23 May 2023 17:07:21 +0800
Subject: [PATCH] wifi: ath11k: [DBS PATCH 6/6]: send coex config to firmware
for QCA206X
Signed-off-by: neil.shi <neil.shi@quectel.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
- drivers/net/wireless/ath/ath11k/core.c | 27 +++++++++
+ drivers/net/wireless/ath/ath11k/core.c | 26 +++++++++
drivers/net/wireless/ath/ath11k/hw.h | 1 +
drivers/net/wireless/ath/ath11k/wmi.c | 26 +++++++++
drivers/net/wireless/ath/ath11k/wmi.h | 77 ++++++++++++++++++++++++++
- 4 files changed, 131 insertions(+)
+ 4 files changed, 130 insertions(+)
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
-index 96ed5b7cd0048..849c7c12198e0 100644
+index 766cf2db5b45..e3c55b1e792e 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
-@@ -16,6 +16,7 @@
- #include "debug.h"
- #include "hif.h"
- #include "wow.h"
-+#include "wmi.h"
-
- unsigned int ath11k_debug_mask;
- EXPORT_SYMBOL(ath11k_debug_mask);
-@@ -115,6 +116,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+@@ -122,6 +122,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tcl_ring_retry = true,
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
@@ -392,7 +382,7 @@ index 96ed5b7cd0048..849c7c12198e0 100644
},
{
.hw_rev = ATH11K_HW_IPQ6018_HW10,
-@@ -204,6 +206,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+@@ -205,6 +206,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
.support_fw_mac_sequence = false,
@@ -400,7 +390,7 @@ index 96ed5b7cd0048..849c7c12198e0 100644
},
{
.name = "qca6390 hw2.0",
-@@ -371,6 +374,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+@@ -372,6 +374,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
.support_fw_mac_sequence = false,
@@ -408,7 +398,7 @@ index 96ed5b7cd0048..849c7c12198e0 100644
},
{
.name = "wcn6855 hw2.0",
-@@ -418,6 +422,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+@@ -435,6 +438,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.fw_wmi_diag_event = true,
.current_cc_support = true,
.dbr_debug_support = false,
@@ -416,7 +406,7 @@ index 96ed5b7cd0048..849c7c12198e0 100644
},
{
.name = "qca206x hw2.1",
-@@ -456,6 +525,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+@@ -521,6 +525,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
.tx_ring_size = DP_TCL_DATA_RING_SIZE,
.smp2p_wow_exit = false,
.support_fw_mac_sequence = true,
@@ -424,10 +414,10 @@ index 96ed5b7cd0048..849c7c12198e0 100644
},
{
.name = "wcn6855 hw2.1",
-@@ -1448,6 +1454,18 @@ static void ath11k_core_pdev_destroy(struct ath11k_base *ab)
+@@ -1637,6 +1642,18 @@ static void ath11k_core_pdev_destroy(struct ath11k_base *ab)
ath11k_debugfs_pdev_destroy(ab);
}
-
+
+static int ath11k_core_config_coex_isolation(struct ath11k_base *ab)
+{
+ struct ath11k *ar = ath11k_ab_to_ar(ab, 0);
@@ -443,10 +433,10 @@ index 96ed5b7cd0048..849c7c12198e0 100644
static int ath11k_core_start(struct ath11k_base *ab)
{
int ret;
-@@ -1545,6 +1563,15 @@ static int ath11k_core_start(struct ath11k_base *ab)
+@@ -1734,6 +1751,15 @@ static int ath11k_core_start(struct ath11k_base *ab)
goto err_reo_cleanup;
}
-
+
+ if (ab->hw_params.coex_isolation) {
+ ret = ath11k_core_config_coex_isolation(ab);
+ if (ret) {
@@ -457,13 +447,13 @@ index 96ed5b7cd0048..849c7c12198e0 100644
+ }
+
return 0;
-
+
err_reo_cleanup:
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
-index 8a3f24862edc4..4da64301d2974 100644
+index d51a99669dd6..9b96ce7f6538 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
-@@ -200,6 +200,7 @@ struct ath11k_hw_params {
+@@ -206,6 +206,7 @@ struct ath11k_hw_params {
bool fw_wmi_diag_event;
bool current_cc_support;
bool dbr_debug_support;
@@ -472,10 +462,10 @@ index 8a3f24862edc4..4da64301d2974 100644
const struct cfg80211_sar_capa *bios_sar_capa;
bool m3_fw_support;
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
-index 9037919a3ae90..5050c0dfe2508 100644
+index e2dcdb3b78cc..cf292442fab4 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
-@@ -8880,6 +8880,32 @@
+@@ -8880,6 +8880,32 @@ ath11k_wmi_send_unit_test_cmd(struct ath11k *ar,
return ret;
}
@@ -509,11 +499,11 @@ index 9037919a3ae90..5050c0dfe2508 100644
{
struct ath11k_vif *arvif;
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
-index 373d38538db0c..d63073eaaec3d 100644
+index a8354022f575..a18723fe9aae 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
-@@ -6322,6 +6323,82 @@ enum wmi_sta_keepalive_method {
-
+@@ -6332,6 +6332,82 @@ enum wmi_sta_keepalive_method {
+
const void **ath11k_wmi_tlv_parse_alloc(struct ath11k_base *ab, const void *ptr,
size_t len, gfp_t gfp);
+enum wmi_coex_config_type {
@@ -595,7 +585,7 @@ index 373d38538db0c..d63073eaaec3d 100644
int ath11k_wmi_cmd_send(struct ath11k_pdev_wmi *wmi, struct sk_buff *skb,
u32 cmd_id);
struct sk_buff *ath11k_wmi_alloc_skb(struct ath11k_wmi_base *wmi_sc, u32 len);
-@@ -6171,6 +6247,7 @@ int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar,
+@@ -6493,6 +6569,7 @@ int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar,
const u8 mac_addr[ETH_ALEN]);
int ath11k_wmi_fw_dbglog_cfg(struct ath11k *ar, u32 *module_id_bitmap,
struct ath11k_fw_dbglog *dbglog);
@@ -603,32 +593,28 @@ index 373d38538db0c..d63073eaaec3d 100644
int ath11k_wmi_wow_config_pno(struct ath11k *ar, u32 vdev_id,
struct wmi_pno_scan_req *pno_scan);
int ath11k_wmi_wow_del_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id);
---
-GitLab
-
-From 0f02da05404b27449b01cc3b3a992dcb6f795287 Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "neil.shi" <neil.shi@quectel.com>
Date: Tue, 23 May 2023 17:41:00 +0800
Subject: [PATCH] wifi: ath11k: merge all patches to 6.1.11, fix exceptions and
compilation errors
Signed-off-by: neil.shi <neil.shi@quectel.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
- drivers/net/wireless/ath/ath11k/debugfs.c | 1 +
- drivers/net/wireless/ath/ath11k/hw.h | 6 ++++
- drivers/net/wireless/ath/ath11k/pci.c | 35 ++++++++++++++++-------
- drivers/net/wireless/ath/ath11k/pcic.c | 11 +++++++
- 4 files changed, 43 insertions(+), 10 deletions(-)
+ drivers/net/wireless/ath/ath11k/pci.c | 35 ++++++++++++++++++--------
+ drivers/net/wireless/ath/ath11k/pcic.c | 11 ++++++++
+ 2 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
-index 61c8840a0559..798c1010b215 100644
+index 2fa4a99e7b08..88f11f6e47b7 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
-@@ -228,7 +228,12 @@ static u32 ath11k_pci_window_read32(struct ath11k_base *ab, u32 offset)
+@@ -110,7 +110,12 @@ static u32 ath11k_pci_window_read32(struct ath11k_base *ab, u32 offset)
struct ath11k_pci *ab_pci = ath11k_pci_priv(ab);
u32 window_start, val;
-
+
- window_start = ath11k_pci_get_window_start(ab, offset);
+ if (ab->hw_params.static_window_map)
+ window_start = ath11k_pci_get_window_start(ab, offset);
@@ -636,19 +622,19 @@ index 61c8840a0559..798c1010b215 100644
+ window_start = ATH11K_PCI_WINDOW_START;
+
+ //window_start = ath11k_pci_get_window_start(ab, offset);
-
+
if (window_start == ATH11K_PCI_WINDOW_START) {
spin_lock_bh(&ab_pci->window_lock);
-@@ -852,6 +857,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
+@@ -735,6 +740,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
u32 soc_hw_version_major, soc_hw_version_minor, addr;
const struct ath11k_pci_ops *pci_ops;
int ret;
+ u32 sub_version;
+ int ops_init = 0;
-
+
ab = ath11k_core_alloc(&pdev->dev, sizeof(*ab_pci), ATH11K_BUS_PCI);
-
-@@ -899,8 +906,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
+
+@@ -781,8 +788,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
case QCA6390_DEVICE_ID:
ath11k_pci_read_hw_version(ab, &soc_hw_version_major,
&soc_hw_version_minor);
@@ -659,7 +645,7 @@ index 61c8840a0559..798c1010b215 100644
ab->hw_rev = ATH11K_HW_QCA6390_HW20;
break;
default:
-@@ -920,6 +927,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
+@@ -802,6 +809,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
ab->id.bdf_search = ATH11K_BDF_SEARCH_BUS_AND_BOARD;
ath11k_pci_read_hw_version(ab, &soc_hw_version_major,
&soc_hw_version_minor);
@@ -673,7 +659,7 @@ index 61c8840a0559..798c1010b215 100644
switch (soc_hw_version_major) {
case 2:
switch (soc_hw_version_minor) {
-@@ -930,7 +944,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
+@@ -812,7 +826,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
case 0x10:
case 0x11:
//ab->hw_rev = ATH11K_HW_WCN6855_HW21;
@@ -682,18 +668,18 @@ index 61c8840a0559..798c1010b215 100644
ath11k_dbg(ab, ATH11K_DBG_PCI, "sub_version 0x%x\n", sub_version);
switch (sub_version) {
case 0x1019A0E1:
-@@ -955,7 +969,6 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
+@@ -837,7 +851,6 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
goto err_pci_free_region;
}
-
+
- pci_ops = &ath11k_pci_ops_qca6390;
break;
default:
dev_err(&pdev->dev, "Unknown PCI device found: 0x%x\n",
-@@ -964,11 +977,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
+@@ -846,11 +859,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
goto err_pci_free_region;
}
-
+
- ret = ath11k_pcic_register_pci_ops(ab, pci_ops);
- if (ret) {
- ath11k_err(ab, "failed to register PCI ops: %d\n", ret);
@@ -706,11 +692,11 @@ index 61c8840a0559..798c1010b215 100644
+ goto err_pci_free_region;
+ }
+ }
-
+
ret = ath11k_pcic_init_msi_config(ab);
if (ret) {
diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c
-index 063e97815455..82233e30c835 100644
+index 16d1e332193f..406eef086836 100644
--- a/drivers/net/wireless/ath/ath11k/pcic.c
+++ b/drivers/net/wireless/ath/ath11k/pcic.c
@@ -115,6 +115,17 @@ static const struct ath11k_msi_config ath11k_msi_config[] = {
@@ -729,21 +715,21 @@ index 063e97815455..82233e30c835 100644
+ .hw_rev = ATH11K_HW_QCA206X_HW21,
+ },
};
-
+
int ath11k_pcic_init_msi_config(struct ath11k_base *ab)
---
-GitLab
-From d4d6f1583876b3702603939ac41b98498cf6dd10 Mon Sep 17 00:00:00 2001
+
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Crider <gloriouseggroll@gmail.com>
Date: Thu, 7 Dec 2023 17:22:18 -0500
Subject: [PATCH] wifi-fixup
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
drivers/net/wireless/ath/ath11k/core.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
-index 77daa0882..765b91e8d 100644
+index e3c55b1e792e..b286707af6e2 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -420,7 +420,8 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
@@ -774,6 +760,3 @@ index 77daa0882..765b91e8d 100644
.single_pdev_only = true,
.rxdma1_enable = false,
.num_rxmda_per_pdev = 2,
---
-2.43.0
-
diff --git a/SOURCES/t2linux.patch b/SOURCES/t2linux.patch
new file mode 100644
index 0000000..ba58dde
--- /dev/null
+++ b/SOURCES/t2linux.patch
@@ -0,0 +1,12153 @@
+From 80093f92d42d77f27de6b204550baf4622070732 Mon Sep 17 00:00:00 2001
+From: Aditya Garg <gargaditya08@live.com>
+Date: Tue, 12 Sep 2023 12:26:12 +0530
+Subject: [PATCH] Add apple-bce driver
+
+---
+ drivers/staging/apple-bce/Makefile | 28 +
+ drivers/staging/apple-bce/apple_bce.c | 443 ++++++++++
+ drivers/staging/apple-bce/apple_bce.h | 38 +
+ drivers/staging/apple-bce/audio/audio.c | 711 ++++++++++++++++
+ drivers/staging/apple-bce/audio/audio.h | 123 +++
+ drivers/staging/apple-bce/audio/description.h | 42 +
+ drivers/staging/apple-bce/audio/pcm.c | 308 +++++++
+ drivers/staging/apple-bce/audio/pcm.h | 16 +
+ drivers/staging/apple-bce/audio/protocol.c | 347 ++++++++
+ drivers/staging/apple-bce/audio/protocol.h | 147 ++++
+ .../staging/apple-bce/audio/protocol_bce.c | 226 ++++++
+ .../staging/apple-bce/audio/protocol_bce.h | 72 ++
+ drivers/staging/apple-bce/mailbox.c | 151 ++++
+ drivers/staging/apple-bce/mailbox.h | 53 ++
+ drivers/staging/apple-bce/queue.c | 390 +++++++++
+ drivers/staging/apple-bce/queue.h | 177 ++++
+ drivers/staging/apple-bce/queue_dma.c | 220 +++++
+ drivers/staging/apple-bce/queue_dma.h | 50 ++
+ drivers/staging/apple-bce/vhci/command.h | 204 +++++
+ drivers/staging/apple-bce/vhci/queue.c | 268 +++++++
+ drivers/staging/apple-bce/vhci/queue.h | 76 ++
+ drivers/staging/apple-bce/vhci/transfer.c | 661 +++++++++++++++
+ drivers/staging/apple-bce/vhci/transfer.h | 71 ++
+ drivers/staging/apple-bce/vhci/vhci.c | 759 ++++++++++++++++++
+ drivers/staging/apple-bce/vhci/vhci.h | 48 ++
+ 25 files changed, 5629 insertions(+)
+ create mode 100644 drivers/staging/apple-bce/Makefile
+ create mode 100644 drivers/staging/apple-bce/apple_bce.c
+ create mode 100644 drivers/staging/apple-bce/apple_bce.h
+ create mode 100644 drivers/staging/apple-bce/audio/audio.c
+ create mode 100644 drivers/staging/apple-bce/audio/audio.h
+ create mode 100644 drivers/staging/apple-bce/audio/description.h
+ create mode 100644 drivers/staging/apple-bce/audio/pcm.c
+ create mode 100644 drivers/staging/apple-bce/audio/pcm.h
+ create mode 100644 drivers/staging/apple-bce/audio/protocol.c
+ create mode 100644 drivers/staging/apple-bce/audio/protocol.h
+ create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.c
+ create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.h
+ create mode 100644 drivers/staging/apple-bce/mailbox.c
+ create mode 100644 drivers/staging/apple-bce/mailbox.h
+ create mode 100644 drivers/staging/apple-bce/queue.c
+ create mode 100644 drivers/staging/apple-bce/queue.h
+ create mode 100644 drivers/staging/apple-bce/queue_dma.c
+ create mode 100644 drivers/staging/apple-bce/queue_dma.h
+ create mode 100644 drivers/staging/apple-bce/vhci/command.h
+ create mode 100644 drivers/staging/apple-bce/vhci/queue.c
+ create mode 100644 drivers/staging/apple-bce/vhci/queue.h
+ create mode 100644 drivers/staging/apple-bce/vhci/transfer.c
+ create mode 100644 drivers/staging/apple-bce/vhci/transfer.h
+ create mode 100644 drivers/staging/apple-bce/vhci/vhci.c
+ create mode 100644 drivers/staging/apple-bce/vhci/vhci.h
+
+diff --git a/drivers/staging/apple-bce/Makefile b/drivers/staging/apple-bce/Makefile
+new file mode 100644
+index 000000000..a6a656f06
+--- /dev/null
++++ b/drivers/staging/apple-bce/Makefile
+@@ -0,0 +1,28 @@
++modname := apple-bce
++obj-m += $(modname).o
++
++apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o
++
++MY_CFLAGS += -DWITHOUT_NVME_PATCH
++#MY_CFLAGS += -g -DDEBUG
++ccflags-y += ${MY_CFLAGS}
++CC += ${MY_CFLAGS}
++
++KVERSION := $(KERNELRELEASE)
++ifeq ($(origin KERNELRELEASE), undefined)
++KVERSION := $(shell uname -r)
++endif
++
++KDIR := /lib/modules/$(KVERSION)/build
++PWD := $(shell pwd)
++
++.PHONY: all
++
++all:
++ $(MAKE) -C $(KDIR) M=$(PWD) modules
++
++clean:
++ $(MAKE) -C $(KDIR) M=$(PWD) clean
++
++install:
++ $(MAKE) -C $(KDIR) M=$(PWD) modules_install
+diff --git a/drivers/staging/apple-bce/apple_bce.c b/drivers/staging/apple-bce/apple_bce.c
+new file mode 100644
+index 000000000..ad89632df
+--- /dev/null
++++ b/drivers/staging/apple-bce/apple_bce.c
+@@ -0,0 +1,443 @@
++#include "apple_bce.h"
++#include <linux/module.h>
++#include <linux/crc32.h>
++#include "audio/audio.h"
++#include <linux/version.h>
++
++static dev_t bce_chrdev;
++static struct class *bce_class;
++
++struct apple_bce_device *global_bce;
++
++static int bce_create_command_queues(struct apple_bce_device *bce);
++static void bce_free_command_queues(struct apple_bce_device *bce);
++static irqreturn_t bce_handle_mb_irq(int irq, void *dev);
++static irqreturn_t bce_handle_dma_irq(int irq, void *dev);
++static int bce_fw_version_handshake(struct apple_bce_device *bce);
++static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq);
++
++static int apple_bce_probe(struct pci_dev *dev, const struct pci_device_id *id)
++{
++ struct apple_bce_device *bce = NULL;
++ int status = 0;
++ int nvec;
++
++ pr_info("apple-bce: capturing our device\n");
++
++ if (pci_enable_device(dev))
++ return -ENODEV;
++ if (pci_request_regions(dev, "apple-bce")) {
++ status = -ENODEV;
++ goto fail;
++ }
++ pci_set_master(dev);
++ nvec = pci_alloc_irq_vectors(dev, 1, 8, PCI_IRQ_MSI);
++ if (nvec < 5) {
++ status = -EINVAL;
++ goto fail;
++ }
++
++ bce = kzalloc(sizeof(struct apple_bce_device), GFP_KERNEL);
++ if (!bce) {
++ status = -ENOMEM;
++ goto fail;
++ }
++
++ bce->pci = dev;
++ pci_set_drvdata(dev, bce);
++
++ bce->devt = bce_chrdev;
++ bce->dev = device_create(bce_class, &dev->dev, bce->devt, NULL, "apple-bce");
++ if (IS_ERR_OR_NULL(bce->dev)) {
++ status = PTR_ERR(bce_class);
++ goto fail;
++ }
++
++ bce->reg_mem_mb = pci_iomap(dev, 4, 0);
++ bce->reg_mem_dma = pci_iomap(dev, 2, 0);
++
++ if (IS_ERR_OR_NULL(bce->reg_mem_mb) || IS_ERR_OR_NULL(bce->reg_mem_dma)) {
++ dev_warn(&dev->dev, "apple-bce: Failed to pci_iomap required regions\n");
++ goto fail;
++ }
++
++ bce_mailbox_init(&bce->mbox, bce->reg_mem_mb);
++ bce_timestamp_init(&bce->timestamp, bce->reg_mem_mb);
++
++ spin_lock_init(&bce->queues_lock);
++ ida_init(&bce->queue_ida);
++
++ if ((status = pci_request_irq(dev, 0, bce_handle_mb_irq, NULL, dev, "bce_mbox")))
++ goto fail;
++ if ((status = pci_request_irq(dev, 4, NULL, bce_handle_dma_irq, dev, "bce_dma")))
++ goto fail_interrupt_0;
++
++ if ((status = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(37)))) {
++ dev_warn(&dev->dev, "dma: Setting mask failed\n");
++ goto fail_interrupt;
++ }
++
++ /* Gets the function 0's interface. This is needed because Apple only accepts DMA on our function if function 0
++ is a bus master, so we need to work around this. */
++ bce->pci0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
++#ifndef WITHOUT_NVME_PATCH
++ if ((status = pci_enable_device_mem(bce->pci0))) {
++ dev_warn(&dev->dev, "apple-bce: failed to enable function 0\n");
++ goto fail_dev0;
++ }
++#endif
++ pci_set_master(bce->pci0);
++
++ bce_timestamp_start(&bce->timestamp, true);
++
++ if ((status = bce_fw_version_handshake(bce)))
++ goto fail_ts;
++ pr_info("apple-bce: handshake done\n");
++
++ if ((status = bce_create_command_queues(bce))) {
++ pr_info("apple-bce: Creating command queues failed\n");
++ goto fail_ts;
++ }
++
++ global_bce = bce;
++
++ bce_vhci_create(bce, &bce->vhci);
++
++ return 0;
++
++fail_ts:
++ bce_timestamp_stop(&bce->timestamp);
++#ifndef WITHOUT_NVME_PATCH
++ pci_disable_device(bce->pci0);
++fail_dev0:
++#endif
++ pci_dev_put(bce->pci0);
++fail_interrupt:
++ pci_free_irq(dev, 4, dev);
++fail_interrupt_0:
++ pci_free_irq(dev, 0, dev);
++fail:
++ if (bce && bce->dev) {
++ device_destroy(bce_class, bce->devt);
++
++ if (!IS_ERR_OR_NULL(bce->reg_mem_mb))
++ pci_iounmap(dev, bce->reg_mem_mb);
++ if (!IS_ERR_OR_NULL(bce->reg_mem_dma))
++ pci_iounmap(dev, bce->reg_mem_dma);
++
++ kfree(bce);
++ }
++
++ pci_free_irq_vectors(dev);
++ pci_release_regions(dev);
++ pci_disable_device(dev);
++
++ if (!status)
++ status = -EINVAL;
++ return status;
++}
++
++static int bce_create_command_queues(struct apple_bce_device *bce)
++{
++ int status;
++ struct bce_queue_memcfg *cfg;
++
++ bce->cmd_cq = bce_alloc_cq(bce, 0, 0x20);
++ bce->cmd_cmdq = bce_alloc_cmdq(bce, 1, 0x20);
++ if (bce->cmd_cq == NULL || bce->cmd_cmdq == NULL) {
++ status = -ENOMEM;
++ goto err;
++ }
++ bce->queues[0] = (struct bce_queue *) bce->cmd_cq;
++ bce->queues[1] = (struct bce_queue *) bce->cmd_cmdq->sq;
++
++ cfg = kzalloc(sizeof(struct bce_queue_memcfg), GFP_KERNEL);
++ if (!cfg) {
++ status = -ENOMEM;
++ goto err;
++ }
++ bce_get_cq_memcfg(bce->cmd_cq, cfg);
++ if ((status = bce_register_command_queue(bce, cfg, false)))
++ goto err;
++ bce_get_sq_memcfg(bce->cmd_cmdq->sq, bce->cmd_cq, cfg);
++ if ((status = bce_register_command_queue(bce, cfg, true)))
++ goto err;
++ kfree(cfg);
++
++ return 0;
++
++err:
++ if (bce->cmd_cq)
++ bce_free_cq(bce, bce->cmd_cq);
++ if (bce->cmd_cmdq)
++ bce_free_cmdq(bce, bce->cmd_cmdq);
++ return status;
++}
++
++static void bce_free_command_queues(struct apple_bce_device *bce)
++{
++ bce_free_cq(bce, bce->cmd_cq);
++ bce_free_cmdq(bce, bce->cmd_cmdq);
++ bce->cmd_cq = NULL;
++ bce->queues[0] = NULL;
++}
++
++static irqreturn_t bce_handle_mb_irq(int irq, void *dev)
++{
++ struct apple_bce_device *bce = pci_get_drvdata(dev);
++ bce_mailbox_handle_interrupt(&bce->mbox);
++ return IRQ_HANDLED;
++}
++
++static irqreturn_t bce_handle_dma_irq(int irq, void *dev)
++{
++ int i;
++ struct apple_bce_device *bce = pci_get_drvdata(dev);
++ spin_lock(&bce->queues_lock);
++ for (i = 0; i < BCE_MAX_QUEUE_COUNT; i++)
++ if (bce->queues[i] && bce->queues[i]->type == BCE_QUEUE_CQ)
++ bce_handle_cq_completions(bce, (struct bce_queue_cq *) bce->queues[i]);
++ spin_unlock(&bce->queues_lock);
++ return IRQ_HANDLED;
++}
++
++static int bce_fw_version_handshake(struct apple_bce_device *bce)
++{
++ u64 result;
++ int status;
++
++ if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SET_FW_PROTOCOL_VERSION, BC_PROTOCOL_VERSION),
++ &result)))
++ return status;
++ if (BCE_MB_TYPE(result) != BCE_MB_SET_FW_PROTOCOL_VERSION ||
++ BCE_MB_VALUE(result) != BC_PROTOCOL_VERSION) {
++ pr_err("apple-bce: FW version handshake failed %x:%llx\n", BCE_MB_TYPE(result), BCE_MB_VALUE(result));
++ return -EINVAL;
++ }
++ return 0;
++}
++
++static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq)
++{
++ int status;
++ int cmd_type;
++ u64 result;
++ // OS X uses an bidirectional direction, but that's not really needed
++ dma_addr_t a = dma_map_single(&bce->pci->dev, cfg, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE);
++ if (dma_mapping_error(&bce->pci->dev, a))
++ return -ENOMEM;
++ cmd_type = is_sq ? BCE_MB_REGISTER_COMMAND_SQ : BCE_MB_REGISTER_COMMAND_CQ;
++ status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(cmd_type, a), &result);
++ dma_unmap_single(&bce->pci->dev, a, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE);
++ if (status)
++ return status;
++ if (BCE_MB_TYPE(result) != BCE_MB_REGISTER_COMMAND_QUEUE_REPLY)
++ return -EINVAL;
++ return 0;
++}
++
++static void apple_bce_remove(struct pci_dev *dev)
++{
++ struct apple_bce_device *bce = pci_get_drvdata(dev);
++ bce->is_being_removed = true;
++
++ bce_vhci_destroy(&bce->vhci);
++
++ bce_timestamp_stop(&bce->timestamp);
++#ifndef WITHOUT_NVME_PATCH
++ pci_disable_device(bce->pci0);
++#endif
++ pci_dev_put(bce->pci0);
++ pci_free_irq(dev, 0, dev);
++ pci_free_irq(dev, 4, dev);
++ bce_free_command_queues(bce);
++ pci_iounmap(dev, bce->reg_mem_mb);
++ pci_iounmap(dev, bce->reg_mem_dma);
++ device_destroy(bce_class, bce->devt);
++ pci_free_irq_vectors(dev);
++ pci_release_regions(dev);
++ pci_disable_device(dev);
++ kfree(bce);
++}
++
++static int bce_save_state_and_sleep(struct apple_bce_device *bce)
++{
++ int attempt, status = 0;
++ u64 resp;
++ dma_addr_t dma_addr;
++ void *dma_ptr = NULL;
++ size_t size = max(PAGE_SIZE, 4096UL);
++
++ for (attempt = 0; attempt < 5; ++attempt) {
++ pr_debug("apple-bce: suspend: attempt %i, buffer size %li\n", attempt, size);
++ dma_ptr = dma_alloc_coherent(&bce->pci->dev, size, &dma_addr, GFP_KERNEL);
++ if (!dma_ptr) {
++ pr_err("apple-bce: suspend failed (data alloc failed)\n");
++ break;
++ }
++ BUG_ON((dma_addr % 4096) != 0);
++ status = bce_mailbox_send(&bce->mbox,
++ BCE_MB_MSG(BCE_MB_SAVE_STATE_AND_SLEEP, (dma_addr & ~(4096LLU - 1)) | (size / 4096)), &resp);
++ if (status) {
++ pr_err("apple-bce: suspend failed (mailbox send)\n");
++ break;
++ }
++ if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_RESTORE_STATE_COMPLETE) {
++ bce->saved_data_dma_addr = dma_addr;
++ bce->saved_data_dma_ptr = dma_ptr;
++ bce->saved_data_dma_size = size;
++ return 0;
++ } else if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE) {
++ dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr);
++ /* The 0x10ff magic value was extracted from Apple's driver */
++ size = (BCE_MB_VALUE(resp) + 0x10ff) & ~(4096LLU - 1);
++ pr_debug("apple-bce: suspend: device requested a larger buffer (%li)\n", size);
++ continue;
++ } else {
++ pr_err("apple-bce: suspend failed (invalid device response)\n");
++ status = -EINVAL;
++ break;
++ }
++ }
++ if (dma_ptr)
++ dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr);
++ if (!status)
++ return bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SLEEP_NO_STATE, 0), &resp);
++ return status;
++}
++
++static int bce_restore_state_and_wake(struct apple_bce_device *bce)
++{
++ int status;
++ u64 resp;
++ if (!bce->saved_data_dma_ptr) {
++ if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_NO_STATE, 0), &resp))) {
++ pr_err("apple-bce: resume with no state failed (mailbox send)\n");
++ return status;
++ }
++ if (BCE_MB_TYPE(resp) != BCE_MB_RESTORE_NO_STATE) {
++ pr_err("apple-bce: resume with no state failed (invalid device response)\n");
++ return -EINVAL;
++ }
++ return 0;
++ }
++
++ if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_STATE_AND_WAKE,
++ (bce->saved_data_dma_addr & ~(4096LLU - 1)) | (bce->saved_data_dma_size / 4096)), &resp))) {
++ pr_err("apple-bce: resume with state failed (mailbox send)\n");
++ goto finish_with_state;
++ }
++ if (BCE_MB_TYPE(resp) != BCE_MB_SAVE_RESTORE_STATE_COMPLETE) {
++ pr_err("apple-bce: resume with state failed (invalid device response)\n");
++ status = -EINVAL;
++ goto finish_with_state;
++ }
++
++finish_with_state:
++ dma_free_coherent(&bce->pci->dev, bce->saved_data_dma_size, bce->saved_data_dma_ptr, bce->saved_data_dma_addr);
++ bce->saved_data_dma_ptr = NULL;
++ return status;
++}
++
++static int apple_bce_suspend(struct device *dev)
++{
++ struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev));
++ int status;
++
++ bce_timestamp_stop(&bce->timestamp);
++
++ if ((status = bce_save_state_and_sleep(bce)))
++ return status;
++
++ return 0;
++}
++
++static int apple_bce_resume(struct device *dev)
++{
++ struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev));
++ int status;
++
++ pci_set_master(bce->pci);
++ pci_set_master(bce->pci0);
++
++ if ((status = bce_restore_state_and_wake(bce)))
++ return status;
++
++ bce_timestamp_start(&bce->timestamp, false);
++
++ return 0;
++}
++
++static struct pci_device_id apple_bce_ids[ ] = {
++ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1801) },
++ { 0, },
++};
++
++struct dev_pm_ops apple_bce_pci_driver_pm = {
++ .suspend = apple_bce_suspend,
++ .resume = apple_bce_resume
++};
++struct pci_driver apple_bce_pci_driver = {
++ .name = "apple-bce",
++ .id_table = apple_bce_ids,
++ .probe = apple_bce_probe,
++ .remove = apple_bce_remove,
++ .driver = {
++ .pm = &apple_bce_pci_driver_pm
++ }
++};
++
++
++static int __init apple_bce_module_init(void)
++{
++ int result;
++ if ((result = alloc_chrdev_region(&bce_chrdev, 0, 1, "apple-bce")))
++ goto fail_chrdev;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
++ bce_class = class_create(THIS_MODULE, "apple-bce");
++#else
++ bce_class = class_create("apple-bce");
++#endif
++ if (IS_ERR(bce_class)) {
++ result = PTR_ERR(bce_class);
++ goto fail_class;
++ }
++ if ((result = bce_vhci_module_init())) {
++ pr_err("apple-bce: bce-vhci init failed");
++ goto fail_class;
++ }
++
++ result = pci_register_driver(&apple_bce_pci_driver);
++ if (result)
++ goto fail_drv;
++
++ aaudio_module_init();
++
++ return 0;
++
++fail_drv:
++ pci_unregister_driver(&apple_bce_pci_driver);
++fail_class:
++ class_destroy(bce_class);
++fail_chrdev:
++ unregister_chrdev_region(bce_chrdev, 1);
++ if (!result)
++ result = -EINVAL;
++ return result;
++}
++static void __exit apple_bce_module_exit(void)
++{
++ pci_unregister_driver(&apple_bce_pci_driver);
++
++ aaudio_module_exit();
++ bce_vhci_module_exit();
++ class_destroy(bce_class);
++ unregister_chrdev_region(bce_chrdev, 1);
++}
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("MrARM");
++MODULE_DESCRIPTION("Apple BCE Driver");
++MODULE_VERSION("0.01");
++module_init(apple_bce_module_init);
++module_exit(apple_bce_module_exit);
+diff --git a/drivers/staging/apple-bce/apple_bce.h b/drivers/staging/apple-bce/apple_bce.h
+new file mode 100644
+index 000000000..f13ab8d57
+--- /dev/null
++++ b/drivers/staging/apple-bce/apple_bce.h
+@@ -0,0 +1,38 @@
++#pragma once
++
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "mailbox.h"
++#include "queue.h"
++#include "vhci/vhci.h"
++
++#define BC_PROTOCOL_VERSION 0x20001
++#define BCE_MAX_QUEUE_COUNT 0x100
++
++#define BCE_QUEUE_USER_MIN 2
++#define BCE_QUEUE_USER_MAX (BCE_MAX_QUEUE_COUNT - 1)
++
++struct apple_bce_device {
++ struct pci_dev *pci, *pci0;
++ dev_t devt;
++ struct device *dev;
++ void __iomem *reg_mem_mb;
++ void __iomem *reg_mem_dma;
++ struct bce_mailbox mbox;
++ struct bce_timestamp timestamp;
++ struct bce_queue *queues[BCE_MAX_QUEUE_COUNT];
++ struct spinlock queues_lock;
++ struct ida queue_ida;
++ struct bce_queue_cq *cmd_cq;
++ struct bce_queue_cmdq *cmd_cmdq;
++ struct bce_queue_sq *int_sq_list[BCE_MAX_QUEUE_COUNT];
++ bool is_being_removed;
++
++ dma_addr_t saved_data_dma_addr;
++ void *saved_data_dma_ptr;
++ size_t saved_data_dma_size;
++
++ struct bce_vhci vhci;
++};
++
++extern struct apple_bce_device *global_bce;
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/audio/audio.c b/drivers/staging/apple-bce/audio/audio.c
+new file mode 100644
+index 000000000..bd16ddd16
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/audio.c
+@@ -0,0 +1,711 @@
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/module.h>
++#include <linux/random.h>
++#include <sound/core.h>
++#include <sound/initval.h>
++#include <sound/pcm.h>
++#include <sound/jack.h>
++#include "audio.h"
++#include "pcm.h"
++#include <linux/version.h>
++
++static int aaudio_alsa_index = SNDRV_DEFAULT_IDX1;
++static char *aaudio_alsa_id = SNDRV_DEFAULT_STR1;
++
++static dev_t aaudio_chrdev;
++static struct class *aaudio_class;
++
++static int aaudio_init_cmd(struct aaudio_device *a);
++static int aaudio_init_bs(struct aaudio_device *a);
++static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id);
++static void aaudio_free_dev(struct aaudio_subdevice *sdev);
++
++static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id)
++{
++ struct aaudio_device *aaudio = NULL;
++ struct aaudio_subdevice *sdev = NULL;
++ int status = 0;
++ u32 cfg;
++
++ pr_info("aaudio: capturing our device\n");
++
++ if (pci_enable_device(dev))
++ return -ENODEV;
++ if (pci_request_regions(dev, "aaudio")) {
++ status = -ENODEV;
++ goto fail;
++ }
++ pci_set_master(dev);
++
++ aaudio = kzalloc(sizeof(struct aaudio_device), GFP_KERNEL);
++ if (!aaudio) {
++ status = -ENOMEM;
++ goto fail;
++ }
++
++ aaudio->bce = global_bce;
++ if (!aaudio->bce) {
++ dev_warn(&dev->dev, "aaudio: No BCE available\n");
++ status = -EINVAL;
++ goto fail;
++ }
++
++ aaudio->pci = dev;
++ pci_set_drvdata(dev, aaudio);
++
++ aaudio->devt = aaudio_chrdev;
++ aaudio->dev = device_create(aaudio_class, &dev->dev, aaudio->devt, NULL, "aaudio");
++ if (IS_ERR_OR_NULL(aaudio->dev)) {
++ status = PTR_ERR(aaudio_class);
++ goto fail;
++ }
++ device_link_add(aaudio->dev, aaudio->bce->dev, DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER);
++
++ init_completion(&aaudio->remote_alive);
++ INIT_LIST_HEAD(&aaudio->subdevice_list);
++
++ /* Init: set an unknown flag in the bitset */
++ if (pci_read_config_dword(dev, 4, &cfg))
++ dev_warn(&dev->dev, "aaudio: pci_read_config_dword fail\n");
++ if (pci_write_config_dword(dev, 4, cfg | 6u))
++ dev_warn(&dev->dev, "aaudio: pci_write_config_dword fail\n");
++
++ dev_info(aaudio->dev, "aaudio: bs len = %llx\n", pci_resource_len(dev, 0));
++ aaudio->reg_mem_bs_dma = pci_resource_start(dev, 0);
++ aaudio->reg_mem_bs = pci_iomap(dev, 0, 0);
++ aaudio->reg_mem_cfg = pci_iomap(dev, 4, 0);
++
++ aaudio->reg_mem_gpr = (u32 __iomem *) ((u8 __iomem *) aaudio->reg_mem_cfg + 0xC000);
++
++ if (IS_ERR_OR_NULL(aaudio->reg_mem_bs) || IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) {
++ dev_warn(&dev->dev, "aaudio: Failed to pci_iomap required regions\n");
++ goto fail;
++ }
++
++ if (aaudio_bce_init(aaudio)) {
++ dev_warn(&dev->dev, "aaudio: Failed to init BCE command transport\n");
++ goto fail;
++ }
++
++ if (snd_card_new(aaudio->dev, aaudio_alsa_index, aaudio_alsa_id, THIS_MODULE, 0, &aaudio->card)) {
++ dev_err(&dev->dev, "aaudio: Failed to create ALSA card\n");
++ goto fail;
++ }
++
++ strcpy(aaudio->card->shortname, "Apple T2 Audio");
++ strcpy(aaudio->card->longname, "Apple T2 Audio");
++ strcpy(aaudio->card->mixername, "Apple T2 Audio");
++ /* Dynamic alsa ids start at 100 */
++ aaudio->next_alsa_id = 100;
++
++ if (aaudio_init_cmd(aaudio)) {
++ dev_err(&dev->dev, "aaudio: Failed to initialize over BCE\n");
++ goto fail_snd;
++ }
++
++ if (aaudio_init_bs(aaudio)) {
++ dev_err(&dev->dev, "aaudio: Failed to initialize BufferStruct\n");
++ goto fail_snd;
++ }
++
++ if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) {
++ dev_err(&dev->dev, "Failed to set remote access\n");
++ return status;
++ }
++
++ if (snd_card_register(aaudio->card)) {
++ dev_err(&dev->dev, "aaudio: Failed to register ALSA sound device\n");
++ goto fail_snd;
++ }
++
++ list_for_each_entry(sdev, &aaudio->subdevice_list, list) {
++ struct aaudio_buffer_struct_device *dev = &aaudio->bs->devices[sdev->buf_id];
++
++ if (sdev->out_stream_cnt == 1 && !strcmp(dev->name, "Speaker")) {
++ struct snd_pcm_hardware *hw = sdev->out_streams[0].alsa_hw_desc;
++
++ snprintf(aaudio->card->driver, sizeof(aaudio->card->driver) / sizeof(char), "AppleT2x%d", hw->channels_min);
++ }
++ }
++
++ return 0;
++
++fail_snd:
++ snd_card_free(aaudio->card);
++fail:
++ if (aaudio && aaudio->dev)
++ device_destroy(aaudio_class, aaudio->devt);
++ kfree(aaudio);
++
++ if (!IS_ERR_OR_NULL(aaudio->reg_mem_bs))
++ pci_iounmap(dev, aaudio->reg_mem_bs);
++ if (!IS_ERR_OR_NULL(aaudio->reg_mem_cfg))
++ pci_iounmap(dev, aaudio->reg_mem_cfg);
++
++ pci_release_regions(dev);
++ pci_disable_device(dev);
++
++ if (!status)
++ status = -EINVAL;
++ return status;
++}
++
++
++
++static void aaudio_remove(struct pci_dev *dev)
++{
++ struct aaudio_subdevice *sdev;
++ struct aaudio_device *aaudio = pci_get_drvdata(dev);
++
++ snd_card_free(aaudio->card);
++ while (!list_empty(&aaudio->subdevice_list)) {
++ sdev = list_first_entry(&aaudio->subdevice_list, struct aaudio_subdevice, list);
++ list_del(&sdev->list);
++ aaudio_free_dev(sdev);
++ }
++ pci_iounmap(dev, aaudio->reg_mem_bs);
++ pci_iounmap(dev, aaudio->reg_mem_cfg);
++ device_destroy(aaudio_class, aaudio->devt);
++ pci_free_irq_vectors(dev);
++ pci_release_regions(dev);
++ pci_disable_device(dev);
++ kfree(aaudio);
++}
++
++static int aaudio_suspend(struct device *dev)
++{
++ struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev));
++
++ if (aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_OFF))
++ dev_warn(aaudio->dev, "Failed to reset remote access\n");
++
++ pci_disable_device(aaudio->pci);
++ return 0;
++}
++
++static int aaudio_resume(struct device *dev)
++{
++ int status;
++ struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev));
++
++ if ((status = pci_enable_device(aaudio->pci)))
++ return status;
++ pci_set_master(aaudio->pci);
++
++ if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) {
++ dev_err(aaudio->dev, "Failed to set remote access\n");
++ return status;
++ }
++
++ return 0;
++}
++
++static int aaudio_init_cmd(struct aaudio_device *a)
++{
++ int status;
++ struct aaudio_send_ctx sctx;
++ struct aaudio_msg buf;
++ u64 dev_cnt, dev_i;
++ aaudio_device_id_t *dev_l;
++
++ if ((status = aaudio_send(a, &sctx, 500,
++ aaudio_msg_write_alive_notification, 1, 3))) {
++ dev_err(a->dev, "Sending alive notification failed\n");
++ return status;
++ }
++
++ if (wait_for_completion_timeout(&a->remote_alive, msecs_to_jiffies(500)) == 0) {
++ dev_err(a->dev, "Timed out waiting for remote\n");
++ return -ETIMEDOUT;
++ }
++ dev_info(a->dev, "Continuing init\n");
++
++ buf = aaudio_reply_alloc();
++ if ((status = aaudio_cmd_get_device_list(a, &buf, &dev_l, &dev_cnt))) {
++ dev_err(a->dev, "Failed to get device list\n");
++ aaudio_reply_free(&buf);
++ return status;
++ }
++ for (dev_i = 0; dev_i < dev_cnt; ++dev_i)
++ aaudio_init_dev(a, dev_l[dev_i]);
++ aaudio_reply_free(&buf);
++
++ return 0;
++}
++
++static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm);
++static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev);
++
++static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id)
++{
++ struct aaudio_subdevice *sdev;
++ struct aaudio_msg buf = aaudio_reply_alloc();
++ u64 uid_len, stream_cnt, i;
++ aaudio_object_id_t *stream_list;
++ char *uid;
++
++ sdev = kzalloc(sizeof(struct aaudio_subdevice), GFP_KERNEL);
++
++ if (aaudio_cmd_get_property(a, &buf, dev_id, dev_id, AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_UID, 0),
++ NULL, 0, (void **) &uid, &uid_len) || uid_len > AAUDIO_DEVICE_MAX_UID_LEN) {
++ dev_err(a->dev, "Failed to get device uid for device %llx\n", dev_id);
++ goto fail;
++ }
++ dev_info(a->dev, "Remote device %llx %.*s\n", dev_id, (int) uid_len, uid);
++
++ sdev->a = a;
++ INIT_LIST_HEAD(&sdev->list);
++ sdev->dev_id = dev_id;
++ sdev->buf_id = AAUDIO_BUFFER_ID_NONE;
++ strncpy(sdev->uid, uid, uid_len);
++ sdev->uid[uid_len + 1] = '\0';
++
++ if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id,
++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_INPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->in_latency, sizeof(u32)))
++ dev_warn(a->dev, "Failed to query device input latency\n");
++ if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id,
++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->out_latency, sizeof(u32)))
++ dev_warn(a->dev, "Failed to query device output latency\n");
++
++ if (aaudio_cmd_get_input_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) {
++ dev_err(a->dev, "Failed to get input stream list for device %llx\n", dev_id);
++ goto fail;
++ }
++ if (stream_cnt > AAUDIO_DEIVCE_MAX_INPUT_STREAMS) {
++ dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n",
++ sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_INPUT_STREAMS);
++ stream_cnt = AAUDIO_DEIVCE_MAX_INPUT_STREAMS;
++ }
++ sdev->in_stream_cnt = stream_cnt;
++ for (i = 0; i < stream_cnt; i++) {
++ sdev->in_streams[i].id = stream_list[i];
++ sdev->in_streams[i].buffer_cnt = 0;
++ aaudio_init_stream_info(sdev, &sdev->in_streams[i]);
++ sdev->in_streams[i].latency += sdev->in_latency;
++ }
++
++ if (aaudio_cmd_get_output_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) {
++ dev_err(a->dev, "Failed to get output stream list for device %llx\n", dev_id);
++ goto fail;
++ }
++ if (stream_cnt > AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS) {
++ dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n",
++ sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS);
++ stream_cnt = AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS;
++ }
++ sdev->out_stream_cnt = stream_cnt;
++ for (i = 0; i < stream_cnt; i++) {
++ sdev->out_streams[i].id = stream_list[i];
++ sdev->out_streams[i].buffer_cnt = 0;
++ aaudio_init_stream_info(sdev, &sdev->out_streams[i]);
++ sdev->out_streams[i].latency += sdev->in_latency;
++ }
++
++ if (sdev->is_pcm)
++ aaudio_create_pcm(sdev);
++ /* Headphone Jack status */
++ if (!strcmp(sdev->uid, "Codec Output")) {
++ if (snd_jack_new(a->card, sdev->uid, SND_JACK_HEADPHONE, &sdev->jack, true, false))
++ dev_warn(a->dev, "Failed to create an attached jack for %s\n", sdev->uid);
++ aaudio_cmd_property_listener(a, sdev->dev_id, sdev->dev_id,
++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0));
++ aaudio_handle_jack_connection_change(sdev);
++ }
++
++ aaudio_reply_free(&buf);
++
++ list_add_tail(&sdev->list, &a->subdevice_list);
++ return;
++
++fail:
++ aaudio_reply_free(&buf);
++ kfree(sdev);
++}
++
++static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm)
++{
++ if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id,
++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_PHYS_FORMAT, 0), NULL, 0,
++ &strm->desc, sizeof(strm->desc)))
++ dev_warn(sdev->a->dev, "Failed to query stream descriptor\n");
++ if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id,
++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_LATENCY, 0), NULL, 0, &strm->latency, sizeof(u32)))
++ dev_warn(sdev->a->dev, "Failed to query stream latency\n");
++ if (strm->desc.format_id == AAUDIO_FORMAT_LPCM)
++ sdev->is_pcm = true;
++}
++
++static void aaudio_free_dev(struct aaudio_subdevice *sdev)
++{
++ size_t i;
++ for (i = 0; i < sdev->in_stream_cnt; i++) {
++ if (sdev->in_streams[i].alsa_hw_desc)
++ kfree(sdev->in_streams[i].alsa_hw_desc);
++ if (sdev->in_streams[i].buffers)
++ kfree(sdev->in_streams[i].buffers);
++ }
++ for (i = 0; i < sdev->out_stream_cnt; i++) {
++ if (sdev->out_streams[i].alsa_hw_desc)
++ kfree(sdev->out_streams[i].alsa_hw_desc);
++ if (sdev->out_streams[i].buffers)
++ kfree(sdev->out_streams[i].buffers);
++ }
++ kfree(sdev);
++}
++
++static struct aaudio_subdevice *aaudio_find_dev_by_dev_id(struct aaudio_device *a, aaudio_device_id_t dev_id)
++{
++ struct aaudio_subdevice *sdev;
++ list_for_each_entry(sdev, &a->subdevice_list, list) {
++ if (dev_id == sdev->dev_id)
++ return sdev;
++ }
++ return NULL;
++}
++
++static struct aaudio_subdevice *aaudio_find_dev_by_uid(struct aaudio_device *a, const char *uid)
++{
++ struct aaudio_subdevice *sdev;
++ list_for_each_entry(sdev, &a->subdevice_list, list) {
++ if (!strcmp(uid, sdev->uid))
++ return sdev;
++ }
++ return NULL;
++}
++
++static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm,
++ struct aaudio_buffer_struct_stream *bs_strm);
++static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm,
++ struct aaudio_buffer_struct_stream *bs_strm);
++
++static int aaudio_init_bs(struct aaudio_device *a)
++{
++ int i, j;
++ struct aaudio_buffer_struct_device *dev;
++ struct aaudio_subdevice *sdev;
++ u32 ver, sig, bs_base;
++
++ ver = ioread32(&a->reg_mem_gpr[0]);
++ if (ver < 3) {
++ dev_err(a->dev, "aaudio: Bad GPR version (%u)", ver);
++ return -EINVAL;
++ }
++ sig = ioread32(&a->reg_mem_gpr[1]);
++ if (sig != AAUDIO_SIG) {
++ dev_err(a->dev, "aaudio: Bad GPR sig (%x)", sig);
++ return -EINVAL;
++ }
++ bs_base = ioread32(&a->reg_mem_gpr[2]);
++ a->bs = (struct aaudio_buffer_struct *) ((u8 *) a->reg_mem_bs + bs_base);
++ if (a->bs->signature != AAUDIO_SIG) {
++ dev_err(a->dev, "aaudio: Bad BufferStruct sig (%x)", a->bs->signature);
++ return -EINVAL;
++ }
++ dev_info(a->dev, "aaudio: BufferStruct ver = %i\n", a->bs->version);
++ dev_info(a->dev, "aaudio: Num devices = %i\n", a->bs->num_devices);
++ for (i = 0; i < a->bs->num_devices; i++) {
++ dev = &a->bs->devices[i];
++ dev_info(a->dev, "aaudio: Device %i %s\n", i, dev->name);
++
++ sdev = aaudio_find_dev_by_uid(a, dev->name);
++ if (!sdev) {
++ dev_err(a->dev, "aaudio: Subdevice not found for BufferStruct device %s\n", dev->name);
++ continue;
++ }
++ sdev->buf_id = (u8) i;
++ dev->num_input_streams = 0;
++ for (j = 0; j < dev->num_output_streams; j++) {
++ dev_info(a->dev, "aaudio: Device %i Stream %i: Output; Buffer Count = %i\n", i, j,
++ dev->output_streams[j].num_buffers);
++ if (j < sdev->out_stream_cnt)
++ aaudio_init_bs_stream(a, &sdev->out_streams[j], &dev->output_streams[j]);
++ }
++ }
++
++ list_for_each_entry(sdev, &a->subdevice_list, list) {
++ if (sdev->buf_id != AAUDIO_BUFFER_ID_NONE)
++ continue;
++ sdev->buf_id = i;
++ dev_info(a->dev, "aaudio: Created device %i %s\n", i, sdev->uid);
++ strcpy(a->bs->devices[i].name, sdev->uid);
++ a->bs->devices[i].num_input_streams = 0;
++ a->bs->devices[i].num_output_streams = 0;
++ a->bs->num_devices = ++i;
++ }
++ list_for_each_entry(sdev, &a->subdevice_list, list) {
++ if (sdev->in_stream_cnt == 1) {
++ dev_info(a->dev, "aaudio: Device %i Host Stream; Input\n", sdev->buf_id);
++ aaudio_init_bs_stream_host(a, &sdev->in_streams[0], &a->bs->devices[sdev->buf_id].input_streams[0]);
++ a->bs->devices[sdev->buf_id].num_input_streams = 1;
++ wmb();
++
++ if (aaudio_cmd_set_input_stream_address_ranges(a, sdev->dev_id)) {
++ dev_err(a->dev, "aaudio: Failed to set input stream address ranges\n");
++ }
++ }
++ }
++
++ return 0;
++}
++
++static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm,
++ struct aaudio_buffer_struct_stream *bs_strm)
++{
++ size_t i;
++ strm->buffer_cnt = bs_strm->num_buffers;
++ if (bs_strm->num_buffers > AAUDIO_DEIVCE_MAX_BUFFER_COUNT) {
++ dev_warn(a->dev, "BufferStruct buffer count %u exceeds driver limit of %u\n", bs_strm->num_buffers,
++ AAUDIO_DEIVCE_MAX_BUFFER_COUNT);
++ strm->buffer_cnt = AAUDIO_DEIVCE_MAX_BUFFER_COUNT;
++ }
++ if (!strm->buffer_cnt)
++ return;
++ strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL);
++ if (!strm->buffers) {
++ dev_err(a->dev, "Buffer list allocation failed\n");
++ return;
++ }
++ for (i = 0; i < strm->buffer_cnt; i++) {
++ strm->buffers[i].dma_addr = a->reg_mem_bs_dma + (dma_addr_t) bs_strm->buffers[i].address;
++ strm->buffers[i].ptr = a->reg_mem_bs + bs_strm->buffers[i].address;
++ strm->buffers[i].size = bs_strm->buffers[i].size;
++ }
++
++ if (strm->buffer_cnt == 1) {
++ strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL);
++ if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) {
++ kfree(strm->alsa_hw_desc);
++ strm->alsa_hw_desc = NULL;
++ }
++ }
++}
++
++static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm,
++ struct aaudio_buffer_struct_stream *bs_strm)
++{
++ size_t size;
++ dma_addr_t dma_addr;
++ void *dma_ptr;
++ size = strm->desc.bytes_per_packet * 16640;
++ dma_ptr = dma_alloc_coherent(&a->pci->dev, size, &dma_addr, GFP_KERNEL);
++ if (!dma_ptr) {
++ dev_err(a->dev, "dma_alloc_coherent failed\n");
++ return;
++ }
++ bs_strm->buffers[0].address = dma_addr;
++ bs_strm->buffers[0].size = size;
++ bs_strm->num_buffers = 1;
++
++ memset(dma_ptr, 0, size);
++
++ strm->buffer_cnt = 1;
++ strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL);
++ if (!strm->buffers) {
++ dev_err(a->dev, "Buffer list allocation failed\n");
++ return;
++ }
++ strm->buffers[0].dma_addr = dma_addr;
++ strm->buffers[0].ptr = dma_ptr;
++ strm->buffers[0].size = size;
++
++ strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL);
++ if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) {
++ kfree(strm->alsa_hw_desc);
++ strm->alsa_hw_desc = NULL;
++ }
++}
++
++static void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg);
++
++void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++ struct aaudio_send_ctx sctx;
++ struct aaudio_msg_base base;
++ if (aaudio_msg_read_base(msg, &base))
++ return;
++ switch (base.msg) {
++ case AAUDIO_MSG_NOTIFICATION_BOOT:
++ dev_info(a->dev, "Received boot notification from remote\n");
++
++ /* Resend the alive notify */
++ if (aaudio_send(a, &sctx, 500,
++ aaudio_msg_write_alive_notification, 1, 3)) {
++ pr_err("Sending alive notification failed\n");
++ }
++ break;
++ case AAUDIO_MSG_NOTIFICATION_ALIVE:
++ dev_info(a->dev, "Received alive notification from remote\n");
++ complete_all(&a->remote_alive);
++ break;
++ case AAUDIO_MSG_PROPERTY_CHANGED:
++ aaudio_handle_prop_change(a, msg);
++ break;
++ default:
++ dev_info(a->dev, "Unhandled notification %i", base.msg);
++ break;
++ }
++}
++
++struct aaudio_prop_change_work_struct {
++ struct work_struct ws;
++ struct aaudio_device *a;
++ aaudio_device_id_t dev;
++ aaudio_object_id_t obj;
++ struct aaudio_prop_addr prop;
++};
++
++static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev)
++{
++ u32 plugged;
++ if (!sdev->jack)
++ return;
++ /* NOTE: Apple made the plug status scoped to the input and output streams. This makes no sense for us, so I just
++ * always pick the OUTPUT status. */
++ if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, sdev->dev_id,
++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0), NULL, 0, &plugged, sizeof(plugged))) {
++ dev_err(sdev->a->dev, "Failed to get jack enable status\n");
++ return;
++ }
++ dev_dbg(sdev->a->dev, "Jack is now %s\n", plugged ? "plugged" : "unplugged");
++ snd_jack_report(sdev->jack, plugged ? sdev->jack->type : 0);
++}
++
++void aaudio_handle_prop_change_work(struct work_struct *ws)
++{
++ struct aaudio_prop_change_work_struct *work = container_of(ws, struct aaudio_prop_change_work_struct, ws);
++ struct aaudio_subdevice *sdev;
++
++ sdev = aaudio_find_dev_by_dev_id(work->a, work->dev);
++ if (!sdev) {
++ dev_err(work->a->dev, "Property notification change: device not found\n");
++ goto done;
++ }
++ dev_dbg(work->a->dev, "Property changed for device: %s\n", sdev->uid);
++
++ if (work->prop.scope == AAUDIO_PROP_SCOPE_OUTPUT && work->prop.selector == AAUDIO_PROP_JACK_PLUGGED) {
++ aaudio_handle_jack_connection_change(sdev);
++ }
++
++done:
++ kfree(work);
++}
++
++void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++ /* NOTE: This is a scheduled work because this callback will generally need to query device information and this
++ * is not possible when we are in the reply parsing code's context. */
++ struct aaudio_prop_change_work_struct *work;
++ work = kmalloc(sizeof(struct aaudio_prop_change_work_struct), GFP_KERNEL);
++ work->a = a;
++ INIT_WORK(&work->ws, aaudio_handle_prop_change_work);
++ aaudio_msg_read_property_changed(msg, &work->dev, &work->obj, &work->prop);
++ schedule_work(&work->ws);
++}
++
++#define aaudio_send_cmd_response(a, sctx, msg, fn, ...) \
++ if (aaudio_send_with_tag(a, sctx, ((struct aaudio_msg_header *) msg->data)->tag, 500, fn, ##__VA_ARGS__)) \
++ pr_err("aaudio: Failed to reply to a command\n");
++
++void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++ ktime_t time_os = ktime_get_boottime();
++ struct aaudio_send_ctx sctx;
++ struct aaudio_subdevice *sdev;
++ u64 devid, timestamp, update_seed;
++ aaudio_msg_read_update_timestamp(msg, &devid, &timestamp, &update_seed);
++ dev_dbg(a->dev, "Received timestamp update for dev=%llx ts=%llx seed=%llx\n", devid, timestamp, update_seed);
++
++ sdev = aaudio_find_dev_by_dev_id(a, devid);
++ aaudio_handle_timestamp(sdev, time_os, timestamp);
++
++ aaudio_send_cmd_response(a, &sctx, msg,
++ aaudio_msg_write_update_timestamp_response);
++}
++
++void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++ struct aaudio_msg_base base;
++ if (aaudio_msg_read_base(msg, &base))
++ return;
++ switch (base.msg) {
++ case AAUDIO_MSG_UPDATE_TIMESTAMP:
++ aaudio_handle_cmd_timestamp(a, msg);
++ break;
++ default:
++ dev_info(a->dev, "Unhandled device command %i", base.msg);
++ break;
++ }
++}
++
++static struct pci_device_id aaudio_ids[ ] = {
++ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1803) },
++ { 0, },
++};
++
++struct dev_pm_ops aaudio_pci_driver_pm = {
++ .suspend = aaudio_suspend,
++ .resume = aaudio_resume
++};
++struct pci_driver aaudio_pci_driver = {
++ .name = "aaudio",
++ .id_table = aaudio_ids,
++ .probe = aaudio_probe,
++ .remove = aaudio_remove,
++ .driver = {
++ .pm = &aaudio_pci_driver_pm
++ }
++};
++
++
++int aaudio_module_init(void)
++{
++ int result;
++ if ((result = alloc_chrdev_region(&aaudio_chrdev, 0, 1, "aaudio")))
++ goto fail_chrdev;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
++ aaudio_class = class_create(THIS_MODULE, "aaudio");
++#else
++ aaudio_class = class_create("aaudio");
++#endif
++ if (IS_ERR(aaudio_class)) {
++ result = PTR_ERR(aaudio_class);
++ goto fail_class;
++ }
++
++ result = pci_register_driver(&aaudio_pci_driver);
++ if (result)
++ goto fail_drv;
++ return 0;
++
++fail_drv:
++ pci_unregister_driver(&aaudio_pci_driver);
++fail_class:
++ class_destroy(aaudio_class);
++fail_chrdev:
++ unregister_chrdev_region(aaudio_chrdev, 1);
++ if (!result)
++ result = -EINVAL;
++ return result;
++}
++
++void aaudio_module_exit(void)
++{
++ pci_unregister_driver(&aaudio_pci_driver);
++ class_destroy(aaudio_class);
++ unregister_chrdev_region(aaudio_chrdev, 1);
++}
++
++struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[] = {
++ {"Speaker", 0},
++ {"Digital Mic", 1},
++ {"Codec Output", 2},
++ {"Codec Input", 3},
++ {"Bridge Loopback", 4},
++ {}
++};
++
++module_param_named(index, aaudio_alsa_index, int, 0444);
++MODULE_PARM_DESC(index, "Index value for Apple Internal Audio soundcard.");
++module_param_named(id, aaudio_alsa_id, charp, 0444);
++MODULE_PARM_DESC(id, "ID string for Apple Internal Audio soundcard.");
+diff --git a/drivers/staging/apple-bce/audio/audio.h b/drivers/staging/apple-bce/audio/audio.h
+new file mode 100644
+index 000000000..693006e93
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/audio.h
+@@ -0,0 +1,123 @@
++#ifndef AAUDIO_H
++#define AAUDIO_H
++
++#include <linux/types.h>
++#include <sound/pcm.h>
++#include "../apple_bce.h"
++#include "protocol_bce.h"
++#include "description.h"
++
++#define AAUDIO_SIG 0x19870423
++
++#define AAUDIO_DEVICE_MAX_UID_LEN 128
++#define AAUDIO_DEIVCE_MAX_INPUT_STREAMS 1
++#define AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS 1
++#define AAUDIO_DEIVCE_MAX_BUFFER_COUNT 1
++
++#define AAUDIO_BUFFER_ID_NONE 0xffu
++
++struct snd_card;
++struct snd_pcm;
++struct snd_pcm_hardware;
++struct snd_jack;
++
++struct __attribute__((packed)) __attribute__((aligned(4))) aaudio_buffer_struct_buffer {
++ size_t address;
++ size_t size;
++ size_t pad[4];
++};
++struct aaudio_buffer_struct_stream {
++ u8 num_buffers;
++ struct aaudio_buffer_struct_buffer buffers[100];
++ char filler[32];
++};
++struct aaudio_buffer_struct_device {
++ char name[128];
++ u8 num_input_streams;
++ u8 num_output_streams;
++ struct aaudio_buffer_struct_stream input_streams[5];
++ struct aaudio_buffer_struct_stream output_streams[5];
++ char filler[128];
++};
++struct aaudio_buffer_struct {
++ u32 version;
++ u32 signature;
++ u32 flags;
++ u8 num_devices;
++ struct aaudio_buffer_struct_device devices[20];
++};
++
++struct aaudio_device;
++struct aaudio_dma_buf {
++ dma_addr_t dma_addr;
++ void *ptr;
++ size_t size;
++};
++struct aaudio_stream {
++ aaudio_object_id_t id;
++ size_t buffer_cnt;
++ struct aaudio_dma_buf *buffers;
++
++ struct aaudio_apple_description desc;
++ struct snd_pcm_hardware *alsa_hw_desc;
++ u32 latency;
++
++ bool waiting_for_first_ts;
++
++ ktime_t remote_timestamp;
++ snd_pcm_sframes_t frame_min;
++ int started;
++};
++struct aaudio_subdevice {
++ struct aaudio_device *a;
++ struct list_head list;
++ aaudio_device_id_t dev_id;
++ u32 in_latency, out_latency;
++ u8 buf_id;
++ int alsa_id;
++ char uid[AAUDIO_DEVICE_MAX_UID_LEN + 1];
++ size_t in_stream_cnt;
++ struct aaudio_stream in_streams[AAUDIO_DEIVCE_MAX_INPUT_STREAMS];
++ size_t out_stream_cnt;
++ struct aaudio_stream out_streams[AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS];
++ bool is_pcm;
++ struct snd_pcm *pcm;
++ struct snd_jack *jack;
++};
++struct aaudio_alsa_pcm_id_mapping {
++ const char *name;
++ int alsa_id;
++};
++
++struct aaudio_device {
++ struct pci_dev *pci;
++ dev_t devt;
++ struct device *dev;
++ void __iomem *reg_mem_bs;
++ dma_addr_t reg_mem_bs_dma;
++ void __iomem *reg_mem_cfg;
++
++ u32 __iomem *reg_mem_gpr;
++
++ struct aaudio_buffer_struct *bs;
++
++ struct apple_bce_device *bce;
++ struct aaudio_bce bcem;
++
++ struct snd_card *card;
++
++ struct list_head subdevice_list;
++ int next_alsa_id;
++
++ struct completion remote_alive;
++};
++
++void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg);
++void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg);
++
++int aaudio_module_init(void);
++void aaudio_module_exit(void);
++
++extern struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[];
++
++#endif //AAUDIO_H
+diff --git a/drivers/staging/apple-bce/audio/description.h b/drivers/staging/apple-bce/audio/description.h
+new file mode 100644
+index 000000000..dfef3ab68
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/description.h
+@@ -0,0 +1,42 @@
++#ifndef AAUDIO_DESCRIPTION_H
++#define AAUDIO_DESCRIPTION_H
++
++#include <linux/types.h>
++
++struct aaudio_apple_description {
++ u64 sample_rate_double;
++ u32 format_id;
++ u32 format_flags;
++ u32 bytes_per_packet;
++ u32 frames_per_packet;
++ u32 bytes_per_frame;
++ u32 channels_per_frame;
++ u32 bits_per_channel;
++ u32 reserved;
++};
++
++enum {
++ AAUDIO_FORMAT_LPCM = 0x6c70636d // 'lpcm'
++};
++
++enum {
++ AAUDIO_FORMAT_FLAG_FLOAT = 1,
++ AAUDIO_FORMAT_FLAG_BIG_ENDIAN = 2,
++ AAUDIO_FORMAT_FLAG_SIGNED = 4,
++ AAUDIO_FORMAT_FLAG_PACKED = 8,
++ AAUDIO_FORMAT_FLAG_ALIGNED_HIGH = 16,
++ AAUDIO_FORMAT_FLAG_NON_INTERLEAVED = 32,
++ AAUDIO_FORMAT_FLAG_NON_MIXABLE = 64
++};
++
++static inline u64 aaudio_double_to_u64(u64 d)
++{
++ u8 sign = (u8) ((d >> 63) & 1);
++ s32 exp = (s32) ((d >> 52) & 0x7ff) - 1023;
++ u64 fr = d & ((1LL << 52) - 1);
++ if (sign || exp < 0)
++ return 0;
++ return (u64) ((1LL << exp) + (fr >> (52 - exp)));
++}
++
++#endif //AAUDIO_DESCRIPTION_H
+diff --git a/drivers/staging/apple-bce/audio/pcm.c b/drivers/staging/apple-bce/audio/pcm.c
+new file mode 100644
+index 000000000..1026e10a9
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/pcm.c
+@@ -0,0 +1,308 @@
++#include "pcm.h"
++#include "audio.h"
++
++static u64 aaudio_get_alsa_fmtbit(struct aaudio_apple_description *desc)
++{
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_FLOAT) {
++ if (desc->bits_per_channel == 32) {
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN)
++ return SNDRV_PCM_FMTBIT_FLOAT_BE;
++ else
++ return SNDRV_PCM_FMTBIT_FLOAT_LE;
++ } else if (desc->bits_per_channel == 64) {
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN)
++ return SNDRV_PCM_FMTBIT_FLOAT64_BE;
++ else
++ return SNDRV_PCM_FMTBIT_FLOAT64_LE;
++ } else {
++ pr_err("aaudio: unsupported bits per channel for float format: %u\n", desc->bits_per_channel);
++ return 0;
++ }
++ }
++#define DEFINE_BPC_OPTION(val, b) \
++ case val: \
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) { \
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \
++ return SNDRV_PCM_FMTBIT_S ## b ## BE; \
++ else \
++ return SNDRV_PCM_FMTBIT_U ## b ## BE; \
++ } else { \
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \
++ return SNDRV_PCM_FMTBIT_S ## b ## LE; \
++ else \
++ return SNDRV_PCM_FMTBIT_U ## b ## LE; \
++ }
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_PACKED) {
++ switch (desc->bits_per_channel) {
++ case 8:
++ case 16:
++ case 32:
++ break;
++ DEFINE_BPC_OPTION(24, 24_3)
++ default:
++ pr_err("aaudio: unsupported bits per channel for packed format: %u\n", desc->bits_per_channel);
++ return 0;
++ }
++ }
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_ALIGNED_HIGH) {
++ switch (desc->bits_per_channel) {
++ DEFINE_BPC_OPTION(24, 32_)
++ default:
++ pr_err("aaudio: unsupported bits per channel for high-aligned format: %u\n", desc->bits_per_channel);
++ return 0;
++ }
++ }
++ switch (desc->bits_per_channel) {
++ case 8:
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED)
++ return SNDRV_PCM_FMTBIT_S8;
++ else
++ return SNDRV_PCM_FMTBIT_U8;
++ DEFINE_BPC_OPTION(16, 16_)
++ DEFINE_BPC_OPTION(24, 24_)
++ DEFINE_BPC_OPTION(32, 32_)
++ default:
++ pr_err("aaudio: unsupported bits per channel: %u\n", desc->bits_per_channel);
++ return 0;
++ }
++}
++int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw,
++ size_t buf_size)
++{
++ uint rate;
++ alsa_hw->info = (SNDRV_PCM_INFO_MMAP |
++ SNDRV_PCM_INFO_BLOCK_TRANSFER |
++ SNDRV_PCM_INFO_MMAP_VALID |
++ SNDRV_PCM_INFO_DOUBLE);
++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_NON_MIXABLE)
++ pr_warn("aaudio: unsupported hw flag: NON_MIXABLE\n");
++ if (!(desc->format_flags & AAUDIO_FORMAT_FLAG_NON_INTERLEAVED))
++ alsa_hw->info |= SNDRV_PCM_INFO_INTERLEAVED;
++ alsa_hw->formats = aaudio_get_alsa_fmtbit(desc);
++ if (!alsa_hw->formats)
++ return -EINVAL;
++ rate = (uint) aaudio_double_to_u64(desc->sample_rate_double);
++ alsa_hw->rates = snd_pcm_rate_to_rate_bit(rate);
++ alsa_hw->rate_min = rate;
++ alsa_hw->rate_max = rate;
++ alsa_hw->channels_min = desc->channels_per_frame;
++ alsa_hw->channels_max = desc->channels_per_frame;
++ alsa_hw->buffer_bytes_max = buf_size;
++ alsa_hw->period_bytes_min = desc->bytes_per_packet;
++ alsa_hw->period_bytes_max = desc->bytes_per_packet;
++ alsa_hw->periods_min = (uint) (buf_size / desc->bytes_per_packet);
++ alsa_hw->periods_max = (uint) (buf_size / desc->bytes_per_packet);
++ pr_debug("aaudio_create_hw_info: format = %llu, rate = %u/%u. channels = %u, periods = %u, period size = %lu\n",
++ alsa_hw->formats, alsa_hw->rate_min, alsa_hw->rates, alsa_hw->channels_min, alsa_hw->periods_min,
++ alsa_hw->period_bytes_min);
++ return 0;
++}
++
++static struct aaudio_stream *aaudio_pcm_stream(struct snd_pcm_substream *substream)
++{
++ struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
++ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
++ return &sdev->out_streams[substream->number];
++ else
++ return &sdev->in_streams[substream->number];
++}
++
++static int aaudio_pcm_open(struct snd_pcm_substream *substream)
++{
++ pr_debug("aaudio_pcm_open\n");
++ substream->runtime->hw = *aaudio_pcm_stream(substream)->alsa_hw_desc;
++
++ return 0;
++}
++
++static int aaudio_pcm_close(struct snd_pcm_substream *substream)
++{
++ pr_debug("aaudio_pcm_close\n");
++ return 0;
++}
++
++static int aaudio_pcm_prepare(struct snd_pcm_substream *substream)
++{
++ return 0;
++}
++
++static int aaudio_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params)
++{
++ struct aaudio_stream *astream = aaudio_pcm_stream(substream);
++ pr_debug("aaudio_pcm_hw_params\n");
++
++ if (!astream->buffer_cnt || !astream->buffers)
++ return -EINVAL;
++
++ substream->runtime->dma_area = astream->buffers[0].ptr;
++ substream->runtime->dma_addr = astream->buffers[0].dma_addr;
++ substream->runtime->dma_bytes = astream->buffers[0].size;
++ return 0;
++}
++
++static int aaudio_pcm_hw_free(struct snd_pcm_substream *substream)
++{
++ pr_debug("aaudio_pcm_hw_free\n");
++ return 0;
++}
++
++static void aaudio_pcm_start(struct snd_pcm_substream *substream)
++{
++ struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
++ struct aaudio_stream *stream = aaudio_pcm_stream(substream);
++ void *buf;
++ size_t s;
++ ktime_t time_start, time_end;
++ bool back_buffer;
++ time_start = ktime_get();
++
++ back_buffer = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
++
++ if (back_buffer) {
++ s = frames_to_bytes(substream->runtime, substream->runtime->control->appl_ptr);
++ buf = kmalloc(s, GFP_KERNEL);
++ memcpy_fromio(buf, substream->runtime->dma_area, s);
++ time_end = ktime_get();
++ pr_debug("aaudio: Backed up the buffer in %lluns [%li]\n", ktime_to_ns(time_end - time_start),
++ substream->runtime->control->appl_ptr);
++ }
++
++ stream->waiting_for_first_ts = true;
++ stream->frame_min = stream->latency;
++
++ aaudio_cmd_start_io(sdev->a, sdev->dev_id);
++ if (back_buffer)
++ memcpy_toio(substream->runtime->dma_area, buf, s);
++
++ time_end = ktime_get();
++ pr_debug("aaudio: Started the audio device in %lluns\n", ktime_to_ns(time_end - time_start));
++}
++
++static int aaudio_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
++{
++ struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream);
++ struct aaudio_stream *stream = aaudio_pcm_stream(substream);
++ pr_debug("aaudio_pcm_trigger %x\n", cmd);
++
++ /* We only supports triggers on the #0 buffer */
++ if (substream->number != 0)
++ return 0;
++ switch (cmd) {
++ case SNDRV_PCM_TRIGGER_START:
++ aaudio_pcm_start(substream);
++ stream->started = 1;
++ break;
++ case SNDRV_PCM_TRIGGER_STOP:
++ aaudio_cmd_stop_io(sdev->a, sdev->dev_id);
++ stream->started = 0;
++ break;
++ default:
++ return -EINVAL;
++ }
++ return 0;
++}
++
++static snd_pcm_uframes_t aaudio_pcm_pointer(struct snd_pcm_substream *substream)
++{
++ struct aaudio_stream *stream = aaudio_pcm_stream(substream);
++ ktime_t time_from_start;
++ snd_pcm_sframes_t frames;
++ snd_pcm_sframes_t buffer_time_length;
++
++ if (!stream->started || stream->waiting_for_first_ts) {
++ pr_warn("aaudio_pcm_pointer while not started\n");
++ return 0;
++ }
++
++ /* Approximate the pointer based on the last received timestamp */
++ time_from_start = ktime_get_boottime() - stream->remote_timestamp;
++ buffer_time_length = NSEC_PER_SEC * substream->runtime->buffer_size / substream->runtime->rate;
++ frames = (ktime_to_ns(time_from_start) % buffer_time_length) * substream->runtime->buffer_size / buffer_time_length;
++ if (ktime_to_ns(time_from_start) < buffer_time_length) {
++ if (frames < stream->frame_min)
++ frames = stream->frame_min;
++ else
++ stream->frame_min = 0;
++ } else {
++ if (ktime_to_ns(time_from_start) < 2 * buffer_time_length)
++ stream->frame_min = frames;
++ else
++ stream->frame_min = 0; /* Heavy desync */
++ }
++ frames -= stream->latency;
++ if (frames < 0)
++ frames += ((-frames - 1) / substream->runtime->buffer_size + 1) * substream->runtime->buffer_size;
++ return (snd_pcm_uframes_t) frames;
++}
++
++static struct snd_pcm_ops aaudio_pcm_ops = {
++ .open = aaudio_pcm_open,
++ .close = aaudio_pcm_close,
++ .ioctl = snd_pcm_lib_ioctl,
++ .hw_params = aaudio_pcm_hw_params,
++ .hw_free = aaudio_pcm_hw_free,
++ .prepare = aaudio_pcm_prepare,
++ .trigger = aaudio_pcm_trigger,
++ .pointer = aaudio_pcm_pointer,
++ .mmap = snd_pcm_lib_mmap_iomem
++};
++
++int aaudio_create_pcm(struct aaudio_subdevice *sdev)
++{
++ struct snd_pcm *pcm;
++ struct aaudio_alsa_pcm_id_mapping *id_mapping;
++ int err;
++
++ if (!sdev->is_pcm || (sdev->in_stream_cnt == 0 && sdev->out_stream_cnt == 0)) {
++ return -EINVAL;
++ }
++
++ for (id_mapping = aaudio_alsa_id_mappings; id_mapping->name; id_mapping++) {
++ if (!strcmp(sdev->uid, id_mapping->name)) {
++ sdev->alsa_id = id_mapping->alsa_id;
++ break;
++ }
++ }
++ if (!id_mapping->name)
++ sdev->alsa_id = sdev->a->next_alsa_id++;
++ err = snd_pcm_new(sdev->a->card, sdev->uid, sdev->alsa_id,
++ (int) sdev->out_stream_cnt, (int) sdev->in_stream_cnt, &pcm);
++ if (err < 0)
++ return err;
++ pcm->private_data = sdev;
++ pcm->nonatomic = 1;
++ sdev->pcm = pcm;
++ strcpy(pcm->name, sdev->uid);
++ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &aaudio_pcm_ops);
++ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &aaudio_pcm_ops);
++ return 0;
++}
++
++static void aaudio_handle_stream_timestamp(struct snd_pcm_substream *substream, ktime_t timestamp)
++{
++ unsigned long flags;
++ struct aaudio_stream *stream;
++
++ stream = aaudio_pcm_stream(substream);
++ snd_pcm_stream_lock_irqsave(substream, flags);
++ stream->remote_timestamp = timestamp;
++ if (stream->waiting_for_first_ts) {
++ stream->waiting_for_first_ts = false;
++ snd_pcm_stream_unlock_irqrestore(substream, flags);
++ return;
++ }
++ snd_pcm_stream_unlock_irqrestore(substream, flags);
++ snd_pcm_period_elapsed(substream);
++}
++
++void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp)
++{
++ struct snd_pcm_substream *substream;
++
++ substream = sdev->pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream;
++ if (substream)
++ aaudio_handle_stream_timestamp(substream, dev_timestamp);
++ substream = sdev->pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream;
++ if (substream)
++ aaudio_handle_stream_timestamp(substream, os_timestamp);
++}
+diff --git a/drivers/staging/apple-bce/audio/pcm.h b/drivers/staging/apple-bce/audio/pcm.h
+new file mode 100644
+index 000000000..ea5f35fbe
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/pcm.h
+@@ -0,0 +1,16 @@
++#ifndef AAUDIO_PCM_H
++#define AAUDIO_PCM_H
++
++#include <linux/types.h>
++#include <linux/ktime.h>
++
++struct aaudio_subdevice;
++struct aaudio_apple_description;
++struct snd_pcm_hardware;
++
++int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw, size_t buf_size);
++int aaudio_create_pcm(struct aaudio_subdevice *sdev);
++
++void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp);
++
++#endif //AAUDIO_PCM_H
+diff --git a/drivers/staging/apple-bce/audio/protocol.c b/drivers/staging/apple-bce/audio/protocol.c
+new file mode 100644
+index 000000000..2314813ae
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/protocol.c
+@@ -0,0 +1,347 @@
++#include "protocol.h"
++#include "protocol_bce.h"
++#include "audio.h"
++
++int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base)
++{
++ if (msg->size < sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base) * 2)
++ return -EINVAL;
++ *base = *((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1));
++ return 0;
++}
++
++#define READ_START(type) \
++ size_t offset = sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base); (void)offset; \
++ if (((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1))->msg != type) \
++ return -EINVAL;
++#define READ_DEVID_VAR(devid) *devid = ((struct aaudio_msg_header *) msg->data)->device_id
++#define READ_VAL(type) ({ offset += sizeof(type); *((type *) ((u8 *) msg->data + offset - sizeof(type))); })
++#define READ_VAR(type, var) *var = READ_VAL(type)
++
++int aaudio_msg_read_start_io_response(struct aaudio_msg *msg)
++{
++ READ_START(AAUDIO_MSG_START_IO_RESPONSE);
++ return 0;
++}
++
++int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg)
++{
++ READ_START(AAUDIO_MSG_STOP_IO_RESPONSE);
++ return 0;
++}
++
++int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid,
++ u64 *timestamp, u64 *update_seed)
++{
++ READ_START(AAUDIO_MSG_UPDATE_TIMESTAMP);
++ READ_DEVID_VAR(devid);
++ READ_VAR(u64, timestamp);
++ READ_VAR(u64, update_seed);
++ return 0;
++}
++
++int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
++ struct aaudio_prop_addr *prop, void **data, u64 *data_size)
++{
++ READ_START(AAUDIO_MSG_GET_PROPERTY_RESPONSE);
++ READ_VAR(aaudio_object_id_t, obj);
++ READ_VAR(u32, &prop->element);
++ READ_VAR(u32, &prop->scope);
++ READ_VAR(u32, &prop->selector);
++ READ_VAR(u64, data_size);
++ *data = ((u8 *) msg->data + offset);
++ /* offset += data_size; */
++ return 0;
++}
++
++int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj)
++{
++ READ_START(AAUDIO_MSG_SET_PROPERTY_RESPONSE);
++ READ_VAR(aaudio_object_id_t, obj);
++ return 0;
++}
++
++int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
++ struct aaudio_prop_addr *prop)
++{
++ READ_START(AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE);
++ READ_VAR(aaudio_object_id_t, obj);
++ READ_VAR(u32, &prop->element);
++ READ_VAR(u32, &prop->scope);
++ READ_VAR(u32, &prop->selector);
++ return 0;
++}
++
++int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj,
++ struct aaudio_prop_addr *prop)
++{
++ READ_START(AAUDIO_MSG_PROPERTY_CHANGED);
++ READ_DEVID_VAR(devid);
++ READ_VAR(aaudio_object_id_t, obj);
++ READ_VAR(u32, &prop->element);
++ READ_VAR(u32, &prop->scope);
++ READ_VAR(u32, &prop->selector);
++ return 0;
++}
++
++int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg)
++{
++ READ_START(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE);
++ return 0;
++}
++
++int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt)
++{
++ READ_START(AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE);
++ READ_VAR(u64, str_cnt);
++ *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
++ /* offset += str_cnt * sizeof(aaudio_object_id_t); */
++ return 0;
++}
++
++int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt)
++{
++ READ_START(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE);
++ READ_VAR(u64, str_cnt);
++ *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
++ /* offset += str_cnt * sizeof(aaudio_object_id_t); */
++ return 0;
++}
++
++int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg)
++{
++ READ_START(AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE);
++ return 0;
++}
++
++int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt)
++{
++ READ_START(AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE);
++ READ_VAR(u64, dev_cnt);
++ *dev_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset);
++ /* offset += dev_cnt * sizeof(aaudio_device_id_t); */
++ return 0;
++}
++
++#define WRITE_START_OF_TYPE(typev, devid) \
++ size_t offset = sizeof(struct aaudio_msg_header); (void) offset; \
++ ((struct aaudio_msg_header *) msg->data)->type = (typev); \
++ ((struct aaudio_msg_header *) msg->data)->device_id = (devid);
++#define WRITE_START_COMMAND(devid) WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_COMMAND, devid)
++#define WRITE_START_RESPONSE() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_RESPONSE, 0)
++#define WRITE_START_NOTIFICATION() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_NOTIFICATION, 0)
++#define WRITE_VAL(type, value) { *((type *) ((u8 *) msg->data + offset)) = value; offset += sizeof(value); }
++#define WRITE_BIN(value, size) { memcpy((u8 *) msg->data + offset, value, size); offset += size; }
++#define WRITE_BASE(type) WRITE_VAL(u32, type) WRITE_VAL(u32, 0)
++#define WRITE_END() { msg->size = offset; }
++
++void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev)
++{
++ WRITE_START_COMMAND(dev);
++ WRITE_BASE(AAUDIO_MSG_START_IO);
++ WRITE_END();
++}
++
++void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev)
++{
++ WRITE_START_COMMAND(dev);
++ WRITE_BASE(AAUDIO_MSG_STOP_IO);
++ WRITE_END();
++}
++
++void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size)
++{
++ WRITE_START_COMMAND(dev);
++ WRITE_BASE(AAUDIO_MSG_GET_PROPERTY);
++ WRITE_VAL(aaudio_object_id_t, obj);
++ WRITE_VAL(u32, prop.element);
++ WRITE_VAL(u32, prop.scope);
++ WRITE_VAL(u32, prop.selector);
++ WRITE_VAL(u64, qualifier_size);
++ WRITE_BIN(qualifier, qualifier_size);
++ WRITE_END();
++}
++
++void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size)
++{
++ WRITE_START_COMMAND(dev);
++ WRITE_BASE(AAUDIO_MSG_SET_PROPERTY);
++ WRITE_VAL(aaudio_object_id_t, obj);
++ WRITE_VAL(u32, prop.element);
++ WRITE_VAL(u32, prop.scope);
++ WRITE_VAL(u32, prop.selector);
++ WRITE_VAL(u64, data_size);
++ WRITE_BIN(data, data_size);
++ WRITE_VAL(u64, qualifier_size);
++ WRITE_BIN(qualifier, qualifier_size);
++ WRITE_END();
++}
++
++void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop)
++{
++ WRITE_START_COMMAND(dev);
++ WRITE_BASE(AAUDIO_MSG_PROPERTY_LISTENER);
++ WRITE_VAL(aaudio_object_id_t, obj);
++ WRITE_VAL(u32, prop.element);
++ WRITE_VAL(u32, prop.scope);
++ WRITE_VAL(u32, prop.selector);
++ WRITE_END();
++}
++
++void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid)
++{
++ WRITE_START_COMMAND(devid);
++ WRITE_BASE(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES);
++ WRITE_END();
++}
++
++void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid)
++{
++ WRITE_START_COMMAND(devid);
++ WRITE_BASE(AAUDIO_MSG_GET_INPUT_STREAM_LIST);
++ WRITE_END();
++}
++
++void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid)
++{
++ WRITE_START_COMMAND(devid);
++ WRITE_BASE(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST);
++ WRITE_END();
++}
++
++void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode)
++{
++ WRITE_START_COMMAND(0);
++ WRITE_BASE(AAUDIO_MSG_SET_REMOTE_ACCESS);
++ WRITE_VAL(u64, mode);
++ WRITE_END();
++}
++
++void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver)
++{
++ WRITE_START_NOTIFICATION();
++ WRITE_BASE(AAUDIO_MSG_NOTIFICATION_ALIVE);
++ WRITE_VAL(u32, proto_ver);
++ WRITE_VAL(u32, msg_ver);
++ WRITE_END();
++}
++
++void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg)
++{
++ WRITE_START_RESPONSE();
++ WRITE_BASE(AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE);
++ WRITE_END();
++}
++
++void aaudio_msg_write_get_device_list(struct aaudio_msg *msg)
++{
++ WRITE_START_COMMAND(0);
++ WRITE_BASE(AAUDIO_MSG_GET_DEVICE_LIST);
++ WRITE_END();
++}
++
++#define CMD_SHARED_VARS_NO_REPLY \
++ int status = 0; \
++ struct aaudio_send_ctx sctx;
++#define CMD_SHARED_VARS \
++ CMD_SHARED_VARS_NO_REPLY \
++ struct aaudio_msg reply = aaudio_reply_alloc(); \
++ struct aaudio_msg *buf = &reply;
++#define CMD_SEND_REQUEST(fn, ...) \
++ if ((status = aaudio_send_cmd_sync(a, &sctx, buf, 500, fn, ##__VA_ARGS__))) \
++ return status;
++#define CMD_DEF_SHARED_AND_SEND(fn, ...) \
++ CMD_SHARED_VARS \
++ CMD_SEND_REQUEST(fn, ##__VA_ARGS__);
++#define CMD_DEF_SHARED_NO_REPLY_AND_SEND(fn, ...) \
++ CMD_SHARED_VARS_NO_REPLY \
++ CMD_SEND_REQUEST(fn, ##__VA_ARGS__);
++#define CMD_HNDL_REPLY_NO_FREE(fn, ...) \
++ status = fn(buf, ##__VA_ARGS__); \
++ return status;
++#define CMD_HNDL_REPLY_AND_FREE(fn, ...) \
++ status = fn(buf, ##__VA_ARGS__); \
++ aaudio_reply_free(&reply); \
++ return status;
++
++int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid)
++{
++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_start_io, devid);
++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_start_io_response);
++}
++int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid)
++{
++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_stop_io, devid);
++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_stop_io_response);
++}
++int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf,
++ aaudio_device_id_t devid, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size)
++{
++ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_property, devid, obj, prop, qualifier, qualifier_size);
++ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_property_response, &obj, &prop, data, data_size);
++}
++int aaudio_cmd_get_primitive_property(struct aaudio_device *a,
++ aaudio_device_id_t devid, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size)
++{
++ int status;
++ struct aaudio_msg reply = aaudio_reply_alloc();
++ void *r_data;
++ u64 r_data_size;
++ if ((status = aaudio_cmd_get_property(a, &reply, devid, obj, prop, qualifier, qualifier_size,
++ &r_data, &r_data_size)))
++ goto finish;
++ if (r_data_size != data_size) {
++ status = -EINVAL;
++ goto finish;
++ }
++ memcpy(data, r_data, data_size);
++finish:
++ aaudio_reply_free(&reply);
++ return status;
++}
++int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size)
++{
++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_property, devid, obj, prop, data, data_size,
++ qualifier, qualifier_size);
++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_property_response, &obj);
++}
++int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop)
++{
++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_property_listener, devid, obj, prop);
++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_property_listener_response, &obj, &prop);
++}
++int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid)
++{
++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_input_stream_address_ranges, devid);
++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_input_stream_address_ranges_response);
++}
++int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
++ aaudio_object_id_t **str_l, u64 *str_cnt)
++{
++ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_input_stream_list, devid);
++ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_input_stream_list_response, str_l, str_cnt);
++}
++int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
++ aaudio_object_id_t **str_l, u64 *str_cnt)
++{
++ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_output_stream_list, devid);
++ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_output_stream_list_response, str_l, str_cnt);
++}
++int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode)
++{
++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_remote_access, mode);
++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_remote_access_response);
++}
++int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf,
++ aaudio_device_id_t **dev_l, u64 *dev_cnt)
++{
++ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_device_list);
++ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_device_list_response, dev_l, dev_cnt);
++}
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/audio/protocol.h b/drivers/staging/apple-bce/audio/protocol.h
+new file mode 100644
+index 000000000..3427486f3
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/protocol.h
+@@ -0,0 +1,147 @@
++#ifndef AAUDIO_PROTOCOL_H
++#define AAUDIO_PROTOCOL_H
++
++#include <linux/types.h>
++
++struct aaudio_device;
++
++typedef u64 aaudio_device_id_t;
++typedef u64 aaudio_object_id_t;
++
++struct aaudio_msg {
++ void *data;
++ size_t size;
++};
++
++struct __attribute__((packed)) aaudio_msg_header {
++ char tag[4];
++ u8 type;
++ aaudio_device_id_t device_id; // Idk, use zero for commands?
++};
++struct __attribute__((packed)) aaudio_msg_base {
++ u32 msg;
++ u32 status;
++};
++
++struct aaudio_prop_addr {
++ u32 scope;
++ u32 selector;
++ u32 element;
++};
++#define AAUDIO_PROP(scope, sel, el) (struct aaudio_prop_addr) { scope, sel, el }
++
++enum {
++ AAUDIO_MSG_TYPE_COMMAND = 1,
++ AAUDIO_MSG_TYPE_RESPONSE = 2,
++ AAUDIO_MSG_TYPE_NOTIFICATION = 3
++};
++
++enum {
++ AAUDIO_MSG_START_IO = 0,
++ AAUDIO_MSG_START_IO_RESPONSE = 1,
++ AAUDIO_MSG_STOP_IO = 2,
++ AAUDIO_MSG_STOP_IO_RESPONSE = 3,
++ AAUDIO_MSG_UPDATE_TIMESTAMP = 4,
++ AAUDIO_MSG_GET_PROPERTY = 7,
++ AAUDIO_MSG_GET_PROPERTY_RESPONSE = 8,
++ AAUDIO_MSG_SET_PROPERTY = 9,
++ AAUDIO_MSG_SET_PROPERTY_RESPONSE = 10,
++ AAUDIO_MSG_PROPERTY_LISTENER = 11,
++ AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE = 12,
++ AAUDIO_MSG_PROPERTY_CHANGED = 13,
++ AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES = 18,
++ AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE = 19,
++ AAUDIO_MSG_GET_INPUT_STREAM_LIST = 24,
++ AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE = 25,
++ AAUDIO_MSG_GET_OUTPUT_STREAM_LIST = 26,
++ AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE = 27,
++ AAUDIO_MSG_SET_REMOTE_ACCESS = 32,
++ AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE = 33,
++ AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE = 34,
++
++ AAUDIO_MSG_NOTIFICATION_ALIVE = 100,
++ AAUDIO_MSG_GET_DEVICE_LIST = 101,
++ AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE = 102,
++ AAUDIO_MSG_NOTIFICATION_BOOT = 104
++};
++
++enum {
++ AAUDIO_REMOTE_ACCESS_OFF = 0,
++ AAUDIO_REMOTE_ACCESS_ON = 2
++};
++
++enum {
++ AAUDIO_PROP_SCOPE_GLOBAL = 0x676c6f62, // 'glob'
++ AAUDIO_PROP_SCOPE_INPUT = 0x696e7074, // 'inpt'
++ AAUDIO_PROP_SCOPE_OUTPUT = 0x6f757470 // 'outp'
++};
++
++enum {
++ AAUDIO_PROP_UID = 0x75696420, // 'uid '
++ AAUDIO_PROP_BOOL_VALUE = 0x6263766c, // 'bcvl'
++ AAUDIO_PROP_JACK_PLUGGED = 0x6a61636b, // 'jack'
++ AAUDIO_PROP_SEL_VOLUME = 0x64656176, // 'deav'
++ AAUDIO_PROP_LATENCY = 0x6c746e63, // 'ltnc'
++ AAUDIO_PROP_PHYS_FORMAT = 0x70667420 // 'pft '
++};
++
++int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base);
++
++int aaudio_msg_read_start_io_response(struct aaudio_msg *msg);
++int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg);
++int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid,
++ u64 *timestamp, u64 *update_seed);
++int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj,
++ struct aaudio_prop_addr *prop, void **data, u64 *data_size);
++int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj);
++int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg,aaudio_object_id_t *obj,
++ struct aaudio_prop_addr *prop);
++int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj,
++ struct aaudio_prop_addr *prop);
++int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg);
++int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt);
++int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt);
++int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg);
++int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt);
++
++void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev);
++void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev);
++void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size);
++void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size);
++void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop);
++void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid);
++void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid);
++void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid);
++void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode);
++void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver);
++void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg);
++void aaudio_msg_write_get_device_list(struct aaudio_msg *msg);
++
++
++int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid);
++int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid);
++int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf,
++ aaudio_device_id_t devid, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size);
++int aaudio_cmd_get_primitive_property(struct aaudio_device *a,
++ aaudio_device_id_t devid, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size);
++int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size);
++int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj,
++ struct aaudio_prop_addr prop);
++int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid);
++int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
++ aaudio_object_id_t **str_l, u64 *str_cnt);
++int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid,
++ aaudio_object_id_t **str_l, u64 *str_cnt);
++int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode);
++int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf,
++ aaudio_device_id_t **dev_l, u64 *dev_cnt);
++
++
++
++#endif //AAUDIO_PROTOCOL_H
+diff --git a/drivers/staging/apple-bce/audio/protocol_bce.c b/drivers/staging/apple-bce/audio/protocol_bce.c
+new file mode 100644
+index 000000000..28f2dfd44
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/protocol_bce.c
+@@ -0,0 +1,226 @@
++#include "protocol_bce.h"
++
++#include "audio.h"
++
++static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq);
++static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq);
++static int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction,
++ bce_sq_completion cfn);
++void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count);
++
++int aaudio_bce_init(struct aaudio_device *dev)
++{
++ int status;
++ struct aaudio_bce *bce = &dev->bcem;
++ bce->cq = bce_create_cq(dev->bce, 0x80);
++ spin_lock_init(&bce->spinlock);
++ if (!bce->cq)
++ return -EINVAL;
++ if ((status = aaudio_bce_queue_init(dev, &bce->qout, "com.apple.BridgeAudio.IntelToARM", DMA_TO_DEVICE,
++ aaudio_bce_out_queue_completion))) {
++ return status;
++ }
++ if ((status = aaudio_bce_queue_init(dev, &bce->qin, "com.apple.BridgeAudio.ARMToIntel", DMA_FROM_DEVICE,
++ aaudio_bce_in_queue_completion))) {
++ return status;
++ }
++ aaudio_bce_in_queue_submit_pending(&bce->qin, bce->qin.el_count);
++ return 0;
++}
++
++int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction,
++ bce_sq_completion cfn)
++{
++ q->cq = dev->bcem.cq;
++ q->el_size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE;
++ q->el_count = AAUDIO_BCE_QUEUE_ELEMENT_COUNT;
++ /* NOTE: The Apple impl uses 0x80 as the queue size, however we use 21 (in fact 20) to simplify the impl */
++ q->sq = bce_create_sq(dev->bce, q->cq, name, (u32) (q->el_count + 1), direction, cfn, dev);
++ if (!q->sq)
++ return -EINVAL;
++
++ q->data = dma_alloc_coherent(&dev->bce->pci->dev, q->el_size * q->el_count, &q->dma_addr, GFP_KERNEL);
++ if (!q->data) {
++ bce_destroy_sq(dev->bce, q->sq);
++ return -EINVAL;
++ }
++ return 0;
++}
++
++static void aaudio_send_create_tag(struct aaudio_bce *b, int *tagn, char tag[4])
++{
++ char tag_zero[5];
++ b->tag_num = (b->tag_num + 1) % AAUDIO_BCE_QUEUE_TAG_COUNT;
++ *tagn = b->tag_num;
++ snprintf(tag_zero, 5, "S%03d", b->tag_num);
++ *((u32 *) tag) = *((u32 *) tag_zero);
++}
++
++int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag)
++{
++ int status;
++ size_t index;
++ void *dptr;
++ struct aaudio_msg_header *header;
++ if ((status = bce_reserve_submission(b->qout.sq, &ctx->timeout)))
++ return status;
++ spin_lock_irqsave(&b->spinlock, ctx->irq_flags);
++ index = b->qout.data_tail;
++ dptr = (u8 *) b->qout.data + index * b->qout.el_size;
++ ctx->msg.data = dptr;
++ header = dptr;
++ if (tag)
++ *((u32 *) header->tag) = *((u32 *) tag);
++ else
++ aaudio_send_create_tag(b, &ctx->tag_n, header->tag);
++ return 0;
++}
++
++void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx)
++{
++ struct bce_qe_submission *s = bce_next_submission(b->qout.sq);
++#ifdef DEBUG
++ pr_debug("aaudio: Sending command data\n");
++ print_hex_dump(KERN_DEBUG, "aaudio:OUT ", DUMP_PREFIX_NONE, 32, 1, ctx->msg.data, ctx->msg.size, true);
++#endif
++ bce_set_submission_single(s, b->qout.dma_addr + (dma_addr_t) (ctx->msg.data - b->qout.data), ctx->msg.size);
++ bce_submit_to_device(b->qout.sq);
++ b->qout.data_tail = (b->qout.data_tail + 1) % b->qout.el_count;
++ spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags);
++}
++
++int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply)
++{
++ struct aaudio_bce_queue_entry ent;
++ DECLARE_COMPLETION_ONSTACK(cmpl);
++ ent.msg = reply;
++ ent.cmpl = &cmpl;
++ b->pending_entries[ctx->tag_n] = &ent;
++ __aaudio_send(b, ctx); /* unlocks the spinlock */
++ ctx->timeout = wait_for_completion_timeout(&cmpl, ctx->timeout);
++ if (ctx->timeout == 0) {
++ /* Remove the pending queue entry; this will be normally handled by the completion route but
++ * during a timeout it won't */
++ spin_lock_irqsave(&b->spinlock, ctx->irq_flags);
++ if (b->pending_entries[ctx->tag_n] == &ent)
++ b->pending_entries[ctx->tag_n] = NULL;
++ spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags);
++ return -ETIMEDOUT;
++ }
++ return 0;
++}
++
++static void aaudio_handle_reply(struct aaudio_bce *b, struct aaudio_msg *reply)
++{
++ const char *tag;
++ int tagn;
++ unsigned long irq_flags;
++ char tag_zero[5];
++ struct aaudio_bce_queue_entry *entry;
++
++ tag = ((struct aaudio_msg_header *) reply->data)->tag;
++ if (tag[0] != 'S') {
++ pr_err("aaudio_handle_reply: Unexpected tag: %.4s\n", tag);
++ return;
++ }
++ *((u32 *) tag_zero) = *((u32 *) tag);
++ tag_zero[4] = 0;
++ if (kstrtoint(&tag_zero[1], 10, &tagn)) {
++ pr_err("aaudio_handle_reply: Tag parse failed: %.4s\n", tag);
++ return;
++ }
++
++ spin_lock_irqsave(&b->spinlock, irq_flags);
++ entry = b->pending_entries[tagn];
++ if (entry) {
++ if (reply->size < entry->msg->size)
++ entry->msg->size = reply->size;
++ memcpy(entry->msg->data, reply->data, entry->msg->size);
++ complete(entry->cmpl);
++
++ b->pending_entries[tagn] = NULL;
++ } else {
++ pr_err("aaudio_handle_reply: No queued item found for tag: %.4s\n", tag);
++ }
++ spin_unlock_irqrestore(&b->spinlock, irq_flags);
++}
++
++static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq)
++{
++ while (bce_next_completion(sq)) {
++ //pr_info("aaudio: Send confirmed\n");
++ bce_notify_submission_complete(sq);
++ }
++}
++
++static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg);
++
++static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq)
++{
++ struct aaudio_msg msg;
++ struct aaudio_device *dev = sq->userdata;
++ struct aaudio_bce_queue *q = &dev->bcem.qin;
++ struct bce_sq_completion_data *c;
++ size_t cnt = 0;
++
++ mb();
++ while ((c = bce_next_completion(sq))) {
++ msg.data = (u8 *) q->data + q->data_head * q->el_size;
++ msg.size = c->data_size;
++#ifdef DEBUG
++ pr_debug("aaudio: Received command data %llx\n", c->data_size);
++ print_hex_dump(KERN_DEBUG, "aaudio:IN ", DUMP_PREFIX_NONE, 32, 1, msg.data, min(msg.size, 128UL), true);
++#endif
++ aaudio_bce_in_queue_handle_msg(dev, &msg);
++
++ q->data_head = (q->data_head + 1) % q->el_count;
++
++ bce_notify_submission_complete(sq);
++ ++cnt;
++ }
++ aaudio_bce_in_queue_submit_pending(q, cnt);
++}
++
++static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg)
++{
++ struct aaudio_msg_header *header = (struct aaudio_msg_header *) msg->data;
++ if (msg->size < sizeof(struct aaudio_msg_header)) {
++ pr_err("aaudio: Msg size smaller than header (%lx)", msg->size);
++ return;
++ }
++ if (header->type == AAUDIO_MSG_TYPE_RESPONSE) {
++ aaudio_handle_reply(&a->bcem, msg);
++ } else if (header->type == AAUDIO_MSG_TYPE_COMMAND) {
++ aaudio_handle_command(a, msg);
++ } else if (header->type == AAUDIO_MSG_TYPE_NOTIFICATION) {
++ aaudio_handle_notification(a, msg);
++ }
++}
++
++void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count)
++{
++ struct bce_qe_submission *s;
++ while (count--) {
++ if (bce_reserve_submission(q->sq, NULL)) {
++ pr_err("aaudio: Failed to reserve an event queue submission\n");
++ break;
++ }
++ s = bce_next_submission(q->sq);
++ bce_set_submission_single(s, q->dma_addr + (dma_addr_t) (q->data_tail * q->el_size), q->el_size);
++ q->data_tail = (q->data_tail + 1) % q->el_count;
++ }
++ bce_submit_to_device(q->sq);
++}
++
++struct aaudio_msg aaudio_reply_alloc(void)
++{
++ struct aaudio_msg ret;
++ ret.size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE;
++ ret.data = kmalloc(ret.size, GFP_KERNEL);
++ return ret;
++}
++
++void aaudio_reply_free(struct aaudio_msg *reply)
++{
++ kfree(reply->data);
++}
+diff --git a/drivers/staging/apple-bce/audio/protocol_bce.h b/drivers/staging/apple-bce/audio/protocol_bce.h
+new file mode 100644
+index 000000000..14d26c05d
+--- /dev/null
++++ b/drivers/staging/apple-bce/audio/protocol_bce.h
+@@ -0,0 +1,72 @@
++#ifndef AAUDIO_PROTOCOL_BCE_H
++#define AAUDIO_PROTOCOL_BCE_H
++
++#include "protocol.h"
++#include "../queue.h"
++
++#define AAUDIO_BCE_QUEUE_ELEMENT_SIZE 0x1000
++#define AAUDIO_BCE_QUEUE_ELEMENT_COUNT 20
++
++#define AAUDIO_BCE_QUEUE_TAG_COUNT 1000
++
++struct aaudio_device;
++
++struct aaudio_bce_queue_entry {
++ struct aaudio_msg *msg;
++ struct completion *cmpl;
++};
++struct aaudio_bce_queue {
++ struct bce_queue_cq *cq;
++ struct bce_queue_sq *sq;
++ void *data;
++ dma_addr_t dma_addr;
++ size_t data_head, data_tail;
++ size_t el_size, el_count;
++};
++struct aaudio_bce {
++ struct bce_queue_cq *cq;
++ struct aaudio_bce_queue qin;
++ struct aaudio_bce_queue qout;
++ int tag_num;
++ struct aaudio_bce_queue_entry *pending_entries[AAUDIO_BCE_QUEUE_TAG_COUNT];
++ struct spinlock spinlock;
++};
++
++struct aaudio_send_ctx {
++ int status;
++ int tag_n;
++ unsigned long irq_flags;
++ struct aaudio_msg msg;
++ unsigned long timeout;
++};
++
++int aaudio_bce_init(struct aaudio_device *dev);
++int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag);
++void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx);
++int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply);
++
++#define aaudio_send_with_tag(a, ctx, tag, tout, fn, ...) ({ \
++ (ctx)->timeout = msecs_to_jiffies(tout); \
++ (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), (tag)); \
++ if (!(ctx)->status) { \
++ fn(&(ctx)->msg, ##__VA_ARGS__); \
++ __aaudio_send(&(a)->bcem, (ctx)); \
++ } \
++ (ctx)->status; \
++})
++#define aaudio_send(a, ctx, tout, fn, ...) aaudio_send_with_tag(a, ctx, NULL, tout, fn, ##__VA_ARGS__)
++
++#define aaudio_send_cmd_sync(a, ctx, reply, tout, fn, ...) ({ \
++ (ctx)->timeout = msecs_to_jiffies(tout); \
++ (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), NULL); \
++ if (!(ctx)->status) { \
++ fn(&(ctx)->msg, ##__VA_ARGS__); \
++ (ctx)->status = __aaudio_send_cmd_sync(&(a)->bcem, (ctx), (reply)); \
++ } \
++ (ctx)->status; \
++})
++
++struct aaudio_msg aaudio_reply_alloc(void);
++void aaudio_reply_free(struct aaudio_msg *reply);
++
++#endif //AAUDIO_PROTOCOL_BCE_H
+diff --git a/drivers/staging/apple-bce/mailbox.c b/drivers/staging/apple-bce/mailbox.c
+new file mode 100644
+index 000000000..e24bd3521
+--- /dev/null
++++ b/drivers/staging/apple-bce/mailbox.c
+@@ -0,0 +1,151 @@
++#include "mailbox.h"
++#include <linux/atomic.h>
++#include "apple_bce.h"
++
++#define REG_MBOX_OUT_BASE 0x820
++#define REG_MBOX_REPLY_COUNTER 0x108
++#define REG_MBOX_REPLY_BASE 0x810
++#define REG_TIMESTAMP_BASE 0xC000
++
++#define BCE_MBOX_TIMEOUT_MS 200
++
++void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb)
++{
++ mb->reg_mb = reg_mb;
++ init_completion(&mb->mb_completion);
++}
++
++int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv)
++{
++ u32 __iomem *regb;
++
++ if (atomic_cmpxchg(&mb->mb_status, 0, 1) != 0) {
++ return -EEXIST; // We don't support two messages at once
++ }
++ reinit_completion(&mb->mb_completion);
++
++ pr_debug("bce_mailbox_send: %llx\n", msg);
++ regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_OUT_BASE);
++ iowrite32((u32) msg, regb);
++ iowrite32((u32) (msg >> 32), regb + 1);
++ iowrite32(0, regb + 2);
++ iowrite32(0, regb + 3);
++
++ wait_for_completion_timeout(&mb->mb_completion, msecs_to_jiffies(BCE_MBOX_TIMEOUT_MS));
++ if (atomic_read(&mb->mb_status) != 2) { // Didn't get the reply
++ atomic_set(&mb->mb_status, 0);
++ return -ETIMEDOUT;
++ }
++
++ *recv = mb->mb_result;
++ pr_debug("bce_mailbox_send: reply %llx\n", *recv);
++
++ atomic_set(&mb->mb_status, 0);
++ return 0;
++}
++
++static int bce_mailbox_retrive_response(struct bce_mailbox *mb)
++{
++ u32 __iomem *regb;
++ u32 lo, hi;
++ int count, counter;
++ u32 res = ioread32((u8*) mb->reg_mb + REG_MBOX_REPLY_COUNTER);
++ count = (res >> 20) & 0xf;
++ counter = count;
++ pr_debug("bce_mailbox_retrive_response count=%i\n", count);
++ while (counter--) {
++ regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_REPLY_BASE);
++ lo = ioread32(regb);
++ hi = ioread32(regb + 1);
++ ioread32(regb + 2);
++ ioread32(regb + 3);
++ pr_debug("bce_mailbox_retrive_response %llx\n", ((u64) hi << 32) | lo);
++ mb->mb_result = ((u64) hi << 32) | lo;
++ }
++ return count > 0 ? 0 : -ENODATA;
++}
++
++int bce_mailbox_handle_interrupt(struct bce_mailbox *mb)
++{
++ int status = bce_mailbox_retrive_response(mb);
++ if (!status) {
++ atomic_set(&mb->mb_status, 2);
++ complete(&mb->mb_completion);
++ }
++ return status;
++}
++
++static void bc_send_timestamp(struct timer_list *tl);
++
++void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg)
++{
++ u32 __iomem *regb;
++
++ spin_lock_init(&ts->stop_sl);
++ ts->stopped = false;
++
++ ts->reg = reg;
++
++ regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
++
++ ioread32(regb);
++ mb();
++
++ timer_setup(&ts->timer, bc_send_timestamp, 0);
++}
++
++void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial)
++{
++ unsigned long flags;
++ u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
++
++ if (is_initial) {
++ iowrite32((u32) -4, regb + 2);
++ iowrite32((u32) -1, regb);
++ } else {
++ iowrite32((u32) -3, regb + 2);
++ iowrite32((u32) -1, regb);
++ }
++
++ spin_lock_irqsave(&ts->stop_sl, flags);
++ ts->stopped = false;
++ spin_unlock_irqrestore(&ts->stop_sl, flags);
++ mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150));
++}
++
++void bce_timestamp_stop(struct bce_timestamp *ts)
++{
++ unsigned long flags;
++ u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
++
++ spin_lock_irqsave(&ts->stop_sl, flags);
++ ts->stopped = true;
++ spin_unlock_irqrestore(&ts->stop_sl, flags);
++ del_timer_sync(&ts->timer);
++
++ iowrite32((u32) -2, regb + 2);
++ iowrite32((u32) -1, regb);
++}
++
++static void bc_send_timestamp(struct timer_list *tl)
++{
++ struct bce_timestamp *ts;
++ unsigned long flags;
++ u32 __iomem *regb;
++ ktime_t bt;
++
++ ts = container_of(tl, struct bce_timestamp, timer);
++ regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE);
++ local_irq_save(flags);
++ ioread32(regb + 2);
++ mb();
++ bt = ktime_get_boottime();
++ iowrite32((u32) bt, regb + 2);
++ iowrite32((u32) (bt >> 32), regb);
++
++ spin_lock(&ts->stop_sl);
++ if (!ts->stopped)
++ mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150));
++ spin_unlock(&ts->stop_sl);
++ local_irq_restore(flags);
++}
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/mailbox.h b/drivers/staging/apple-bce/mailbox.h
+new file mode 100644
+index 000000000..f3323f95b
+--- /dev/null
++++ b/drivers/staging/apple-bce/mailbox.h
+@@ -0,0 +1,53 @@
++#ifndef BCE_MAILBOX_H
++#define BCE_MAILBOX_H
++
++#include <linux/completion.h>
++#include <linux/pci.h>
++#include <linux/timer.h>
++
++struct bce_mailbox {
++ void __iomem *reg_mb;
++
++ atomic_t mb_status; // possible statuses: 0 (no msg), 1 (has active msg), 2 (got reply)
++ struct completion mb_completion;
++ uint64_t mb_result;
++};
++
++enum bce_message_type {
++ BCE_MB_REGISTER_COMMAND_SQ = 0x7, // to-device
++ BCE_MB_REGISTER_COMMAND_CQ = 0x8, // to-device
++ BCE_MB_REGISTER_COMMAND_QUEUE_REPLY = 0xB, // to-host
++ BCE_MB_SET_FW_PROTOCOL_VERSION = 0xC, // both
++ BCE_MB_SLEEP_NO_STATE = 0x14, // to-device
++ BCE_MB_RESTORE_NO_STATE = 0x15, // to-device
++ BCE_MB_SAVE_STATE_AND_SLEEP = 0x17, // to-device
++ BCE_MB_RESTORE_STATE_AND_WAKE = 0x18, // to-device
++ BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE = 0x19, // from-device
++ BCE_MB_SAVE_RESTORE_STATE_COMPLETE = 0x1A, // from-device
++};
++
++#define BCE_MB_MSG(type, value) (((u64) (type) << 58) | ((value) & 0x3FFFFFFFFFFFFFFLL))
++#define BCE_MB_TYPE(v) ((u32) (v >> 58))
++#define BCE_MB_VALUE(v) (v & 0x3FFFFFFFFFFFFFFLL)
++
++void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb);
++
++int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv);
++
++int bce_mailbox_handle_interrupt(struct bce_mailbox *mb);
++
++
++struct bce_timestamp {
++ void __iomem *reg;
++ struct timer_list timer;
++ struct spinlock stop_sl;
++ bool stopped;
++};
++
++void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg);
++
++void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial);
++
++void bce_timestamp_stop(struct bce_timestamp *ts);
++
++#endif //BCEDRIVER_MAILBOX_H
+diff --git a/drivers/staging/apple-bce/queue.c b/drivers/staging/apple-bce/queue.c
+new file mode 100644
+index 000000000..bc9cd3bc6
+--- /dev/null
++++ b/drivers/staging/apple-bce/queue.c
+@@ -0,0 +1,390 @@
++#include "queue.h"
++#include "apple_bce.h"
++
++#define REG_DOORBELL_BASE 0x44000
++
++struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count)
++{
++ struct bce_queue_cq *q;
++ q = kzalloc(sizeof(struct bce_queue_cq), GFP_KERNEL);
++ q->qid = qid;
++ q->type = BCE_QUEUE_CQ;
++ q->el_count = el_count;
++ q->data = dma_alloc_coherent(&dev->pci->dev, el_count * sizeof(struct bce_qe_completion),
++ &q->dma_handle, GFP_KERNEL);
++ if (!q->data) {
++ pr_err("DMA queue memory alloc failed\n");
++ kfree(q);
++ return NULL;
++ }
++ return q;
++}
++
++void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg)
++{
++ cfg->qid = (u16) cq->qid;
++ cfg->el_count = (u16) cq->el_count;
++ cfg->vector_or_cq = 0;
++ cfg->_pad = 0;
++ cfg->addr = cq->dma_handle;
++ cfg->length = cq->el_count * sizeof(struct bce_qe_completion);
++}
++
++void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq)
++{
++ dma_free_coherent(&dev->pci->dev, cq->el_count * sizeof(struct bce_qe_completion), cq->data, cq->dma_handle);
++ kfree(cq);
++}
++
++static void bce_handle_cq_completion(struct apple_bce_device *dev, struct bce_qe_completion *e, size_t *ce)
++{
++ struct bce_queue *target;
++ struct bce_queue_sq *target_sq;
++ struct bce_sq_completion_data *cmpl;
++ if (e->qid >= BCE_MAX_QUEUE_COUNT) {
++ pr_err("Device sent a response for qid (%u) >= BCE_MAX_QUEUE_COUNT\n", e->qid);
++ return;
++ }
++ target = dev->queues[e->qid];
++ if (!target || target->type != BCE_QUEUE_SQ) {
++ pr_err("Device sent a response for qid (%u), which does not exist\n", e->qid);
++ return;
++ }
++ target_sq = (struct bce_queue_sq *) target;
++ if (target_sq->completion_tail != e->completion_index) {
++ pr_err("Completion index mismatch; this is likely going to make this driver unusable\n");
++ return;
++ }
++ if (!target_sq->has_pending_completions) {
++ target_sq->has_pending_completions = true;
++ dev->int_sq_list[(*ce)++] = target_sq;
++ }
++ cmpl = &target_sq->completion_data[e->completion_index];
++ cmpl->status = e->status;
++ cmpl->data_size = e->data_size;
++ cmpl->result = e->result;
++ wmb();
++ target_sq->completion_tail = (target_sq->completion_tail + 1) % target_sq->el_count;
++}
++
++void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq)
++{
++ size_t ce = 0;
++ struct bce_qe_completion *e;
++ struct bce_queue_sq *sq;
++ e = bce_cq_element(cq, cq->index);
++ if (!(e->flags & BCE_COMPLETION_FLAG_PENDING))
++ return;
++ mb();
++ while (true) {
++ e = bce_cq_element(cq, cq->index);
++ if (!(e->flags & BCE_COMPLETION_FLAG_PENDING))
++ break;
++ // pr_info("apple-bce: compl: %i: %i %llx %llx", e->qid, e->status, e->data_size, e->result);
++ bce_handle_cq_completion(dev, e, &ce);
++ e->flags = 0;
++ cq->index = (cq->index + 1) % cq->el_count;
++ }
++ mb();
++ iowrite32(cq->index, (u32 *) ((u8 *) dev->reg_mem_dma + REG_DOORBELL_BASE) + cq->qid);
++ while (ce) {
++ --ce;
++ sq = dev->int_sq_list[ce];
++ sq->completion(sq);
++ sq->has_pending_completions = false;
++ }
++}
++
++
++struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count,
++ bce_sq_completion compl, void *userdata)
++{
++ struct bce_queue_sq *q;
++ q = kzalloc(sizeof(struct bce_queue_sq), GFP_KERNEL);
++ q->qid = qid;
++ q->type = BCE_QUEUE_SQ;
++ q->el_size = el_size;
++ q->el_count = el_count;
++ q->data = dma_alloc_coherent(&dev->pci->dev, el_count * el_size,
++ &q->dma_handle, GFP_KERNEL);
++ q->completion = compl;
++ q->userdata = userdata;
++ q->completion_data = kzalloc(sizeof(struct bce_sq_completion_data) * el_count, GFP_KERNEL);
++ q->reg_mem_dma = dev->reg_mem_dma;
++ atomic_set(&q->available_commands, el_count - 1);
++ init_completion(&q->available_command_completion);
++ atomic_set(&q->available_command_completion_waiting_count, 0);
++ if (!q->data) {
++ pr_err("DMA queue memory alloc failed\n");
++ kfree(q);
++ return NULL;
++ }
++ return q;
++}
++
++void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg)
++{
++ cfg->qid = (u16) sq->qid;
++ cfg->el_count = (u16) sq->el_count;
++ cfg->vector_or_cq = (u16) cq->qid;
++ cfg->_pad = 0;
++ cfg->addr = sq->dma_handle;
++ cfg->length = sq->el_count * sq->el_size;
++}
++
++void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq)
++{
++ dma_free_coherent(&dev->pci->dev, sq->el_count * sq->el_size, sq->data, sq->dma_handle);
++ kfree(sq);
++}
++
++int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout)
++{
++ while (atomic_dec_if_positive(&sq->available_commands) < 0) {
++ if (!timeout || !*timeout)
++ return -EAGAIN;
++ atomic_inc(&sq->available_command_completion_waiting_count);
++ *timeout = wait_for_completion_timeout(&sq->available_command_completion, *timeout);
++ if (!*timeout) {
++ if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) < 0)
++ try_wait_for_completion(&sq->available_command_completion); /* consume the pending completion */
++ }
++ }
++ return 0;
++}
++
++void bce_cancel_submission_reservation(struct bce_queue_sq *sq)
++{
++ atomic_inc(&sq->available_commands);
++}
++
++void *bce_next_submission(struct bce_queue_sq *sq)
++{
++ void *ret = bce_sq_element(sq, sq->tail);
++ sq->tail = (sq->tail + 1) % sq->el_count;
++ return ret;
++}
++
++void bce_submit_to_device(struct bce_queue_sq *sq)
++{
++ mb();
++ iowrite32(sq->tail, (u32 *) ((u8 *) sq->reg_mem_dma + REG_DOORBELL_BASE) + sq->qid);
++}
++
++void bce_notify_submission_complete(struct bce_queue_sq *sq)
++{
++ sq->head = (sq->head + 1) % sq->el_count;
++ atomic_inc(&sq->available_commands);
++ if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) >= 0) {
++ complete(&sq->available_command_completion);
++ }
++}
++
++void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size)
++{
++ element->addr = addr;
++ element->length = size;
++ element->segl_addr = element->segl_length = 0;
++}
++
++static void bce_cmdq_completion(struct bce_queue_sq *q);
++
++struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count)
++{
++ struct bce_queue_cmdq *q;
++ q = kzalloc(sizeof(struct bce_queue_cmdq), GFP_KERNEL);
++ q->sq = bce_alloc_sq(dev, qid, BCE_CMD_SIZE, el_count, bce_cmdq_completion, q);
++ if (!q->sq) {
++ kfree(q);
++ return NULL;
++ }
++ spin_lock_init(&q->lck);
++ q->tres = kzalloc(sizeof(struct bce_queue_cmdq_result_el*) * el_count, GFP_KERNEL);
++ if (!q->tres) {
++ kfree(q);
++ return NULL;
++ }
++ return q;
++}
++
++void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq)
++{
++ bce_free_sq(dev, cmdq->sq);
++ kfree(cmdq->tres);
++ kfree(cmdq);
++}
++
++void bce_cmdq_completion(struct bce_queue_sq *q)
++{
++ struct bce_queue_cmdq_result_el *el;
++ struct bce_queue_cmdq *cmdq = q->userdata;
++ struct bce_sq_completion_data *result;
++
++ spin_lock(&cmdq->lck);
++ while ((result = bce_next_completion(q))) {
++ el = cmdq->tres[cmdq->sq->head];
++ if (el) {
++ el->result = result->result;
++ el->status = result->status;
++ mb();
++ complete(&el->cmpl);
++ } else {
++ pr_err("apple-bce: Unexpected command queue completion\n");
++ }
++ cmdq->tres[cmdq->sq->head] = NULL;
++ bce_notify_submission_complete(q);
++ }
++ spin_unlock(&cmdq->lck);
++}
++
++static __always_inline void *bce_cmd_start(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
++{
++ void *ret;
++ unsigned long timeout;
++ init_completion(&res->cmpl);
++ mb();
++
++ timeout = msecs_to_jiffies(1000L * 60 * 5); /* wait for up to ~5 minutes */
++ if (bce_reserve_submission(cmdq->sq, &timeout))
++ return NULL;
++
++ spin_lock(&cmdq->lck);
++ cmdq->tres[cmdq->sq->tail] = res;
++ ret = bce_next_submission(cmdq->sq);
++ return ret;
++}
++
++static __always_inline void bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
++{
++ bce_submit_to_device(cmdq->sq);
++ spin_unlock(&cmdq->lck);
++
++ wait_for_completion(&res->cmpl);
++ mb();
++}
++
++u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout)
++{
++ struct bce_queue_cmdq_result_el res;
++ struct bce_cmdq_register_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
++ if (!cmd)
++ return (u32) -1;
++ cmd->cmd = BCE_CMD_REGISTER_MEMORY_QUEUE;
++ cmd->flags = (u16) ((name ? 2 : 0) | (isdirout ? 1 : 0));
++ cmd->qid = cfg->qid;
++ cmd->el_count = cfg->el_count;
++ cmd->vector_or_cq = cfg->vector_or_cq;
++ memset(cmd->name, 0, sizeof(cmd->name));
++ if (name) {
++ cmd->name_len = (u16) min(strlen(name), (size_t) sizeof(cmd->name));
++ memcpy(cmd->name, name, cmd->name_len);
++ } else {
++ cmd->name_len = 0;
++ }
++ cmd->addr = cfg->addr;
++ cmd->length = cfg->length;
++
++ bce_cmd_finish(cmdq, &res);
++ return res.status;
++}
++
++u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
++{
++ struct bce_queue_cmdq_result_el res;
++ struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
++ if (!cmd)
++ return (u32) -1;
++ cmd->cmd = BCE_CMD_UNREGISTER_MEMORY_QUEUE;
++ cmd->flags = 0;
++ cmd->qid = qid;
++ bce_cmd_finish(cmdq, &res);
++ return res.status;
++}
++
++u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
++{
++ struct bce_queue_cmdq_result_el res;
++ struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res);
++ if (!cmd)
++ return (u32) -1;
++ cmd->cmd = BCE_CMD_FLUSH_MEMORY_QUEUE;
++ cmd->flags = 0;
++ cmd->qid = qid;
++ bce_cmd_finish(cmdq, &res);
++ return res.status;
++}
++
++
++struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count)
++{
++ struct bce_queue_cq *cq;
++ struct bce_queue_memcfg cfg;
++ int qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL);
++ if (qid < 0)
++ return NULL;
++ cq = bce_alloc_cq(dev, qid, el_count);
++ if (!cq)
++ return NULL;
++ bce_get_cq_memcfg(cq, &cfg);
++ if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, NULL, false) != 0) {
++ pr_err("apple-bce: CQ registration failed (%i)", qid);
++ bce_free_cq(dev, cq);
++ ida_simple_remove(&dev->queue_ida, (uint) qid);
++ return NULL;
++ }
++ dev->queues[qid] = (struct bce_queue *) cq;
++ return cq;
++}
++
++struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count,
++ int direction, bce_sq_completion compl, void *userdata)
++{
++ struct bce_queue_sq *sq;
++ struct bce_queue_memcfg cfg;
++ int qid;
++ if (cq == NULL)
++ return NULL; /* cq can not be null */
++ if (name == NULL)
++ return NULL; /* name can not be null */
++ if (direction != DMA_TO_DEVICE && direction != DMA_FROM_DEVICE)
++ return NULL; /* unsupported direction */
++ qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL);
++ if (qid < 0)
++ return NULL;
++ sq = bce_alloc_sq(dev, qid, sizeof(struct bce_qe_submission), el_count, compl, userdata);
++ if (!sq)
++ return NULL;
++ bce_get_sq_memcfg(sq, cq, &cfg);
++ if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, name, direction != DMA_FROM_DEVICE) != 0) {
++ pr_err("apple-bce: SQ registration failed (%i)", qid);
++ bce_free_sq(dev, sq);
++ ida_simple_remove(&dev->queue_ida, (uint) qid);
++ return NULL;
++ }
++ spin_lock(&dev->queues_lock);
++ dev->queues[qid] = (struct bce_queue *) sq;
++ spin_unlock(&dev->queues_lock);
++ return sq;
++}
++
++void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq)
++{
++ if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) cq->qid))
++ pr_err("apple-bce: CQ unregister failed");
++ spin_lock(&dev->queues_lock);
++ dev->queues[cq->qid] = NULL;
++ spin_unlock(&dev->queues_lock);
++ ida_simple_remove(&dev->queue_ida, (uint) cq->qid);
++ bce_free_cq(dev, cq);
++}
++
++void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq)
++{
++ if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) sq->qid))
++ pr_err("apple-bce: CQ unregister failed");
++ spin_lock(&dev->queues_lock);
++ dev->queues[sq->qid] = NULL;
++ spin_unlock(&dev->queues_lock);
++ ida_simple_remove(&dev->queue_ida, (uint) sq->qid);
++ bce_free_sq(dev, sq);
++}
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/queue.h b/drivers/staging/apple-bce/queue.h
+new file mode 100644
+index 000000000..8368ac5df
+--- /dev/null
++++ b/drivers/staging/apple-bce/queue.h
+@@ -0,0 +1,177 @@
++#ifndef BCE_QUEUE_H
++#define BCE_QUEUE_H
++
++#include <linux/completion.h>
++#include <linux/pci.h>
++
++#define BCE_CMD_SIZE 0x40
++
++struct apple_bce_device;
++
++enum bce_queue_type {
++ BCE_QUEUE_CQ, BCE_QUEUE_SQ
++};
++struct bce_queue {
++ int qid;
++ int type;
++};
++struct bce_queue_cq {
++ int qid;
++ int type;
++ u32 el_count;
++ dma_addr_t dma_handle;
++ void *data;
++
++ u32 index;
++};
++struct bce_queue_sq;
++typedef void (*bce_sq_completion)(struct bce_queue_sq *q);
++struct bce_sq_completion_data {
++ u32 status;
++ u64 data_size;
++ u64 result;
++};
++struct bce_queue_sq {
++ int qid;
++ int type;
++ u32 el_size;
++ u32 el_count;
++ dma_addr_t dma_handle;
++ void *data;
++ void *userdata;
++ void __iomem *reg_mem_dma;
++
++ atomic_t available_commands;
++ struct completion available_command_completion;
++ atomic_t available_command_completion_waiting_count;
++ u32 head, tail;
++
++ u32 completion_cidx, completion_tail;
++ struct bce_sq_completion_data *completion_data;
++ bool has_pending_completions;
++ bce_sq_completion completion;
++};
++
++struct bce_queue_cmdq_result_el {
++ struct completion cmpl;
++ u32 status;
++ u64 result;
++};
++struct bce_queue_cmdq {
++ struct bce_queue_sq *sq;
++ struct spinlock lck;
++ struct bce_queue_cmdq_result_el **tres;
++};
++
++struct bce_queue_memcfg {
++ u16 qid;
++ u16 el_count;
++ u16 vector_or_cq;
++ u16 _pad;
++ u64 addr;
++ u64 length;
++};
++
++enum bce_qe_completion_status {
++ BCE_COMPLETION_SUCCESS = 0,
++ BCE_COMPLETION_ERROR = 1,
++ BCE_COMPLETION_ABORTED = 2,
++ BCE_COMPLETION_NO_SPACE = 3,
++ BCE_COMPLETION_OVERRUN = 4
++};
++enum bce_qe_completion_flags {
++ BCE_COMPLETION_FLAG_PENDING = 0x8000
++};
++struct bce_qe_completion {
++ u64 result;
++ u64 data_size;
++ u16 qid;
++ u16 completion_index;
++ u16 status; // bce_qe_completion_status
++ u16 flags; // bce_qe_completion_flags
++};
++
++struct bce_qe_submission {
++ u64 length;
++ u64 addr;
++
++ u64 segl_addr;
++ u64 segl_length;
++};
++
++enum bce_cmdq_command {
++ BCE_CMD_REGISTER_MEMORY_QUEUE = 0x20,
++ BCE_CMD_UNREGISTER_MEMORY_QUEUE = 0x30,
++ BCE_CMD_FLUSH_MEMORY_QUEUE = 0x40,
++ BCE_CMD_SET_MEMORY_QUEUE_PROPERTY = 0x50
++};
++struct bce_cmdq_simple_memory_queue_cmd {
++ u16 cmd; // bce_cmdq_command
++ u16 flags;
++ u16 qid;
++};
++struct bce_cmdq_register_memory_queue_cmd {
++ u16 cmd; // bce_cmdq_command
++ u16 flags;
++ u16 qid;
++ u16 _pad;
++ u16 el_count;
++ u16 vector_or_cq;
++ u16 _pad2;
++ u16 name_len;
++ char name[0x20];
++ u64 addr;
++ u64 length;
++};
++
++static __always_inline void *bce_sq_element(struct bce_queue_sq *q, int i) {
++ return (void *) ((u8 *) q->data + q->el_size * i);
++}
++static __always_inline void *bce_cq_element(struct bce_queue_cq *q, int i) {
++ return (void *) ((struct bce_qe_completion *) q->data + i);
++}
++
++static __always_inline struct bce_sq_completion_data *bce_next_completion(struct bce_queue_sq *sq) {
++ struct bce_sq_completion_data *res;
++ rmb();
++ if (sq->completion_cidx == sq->completion_tail)
++ return NULL;
++ res = &sq->completion_data[sq->completion_cidx];
++ sq->completion_cidx = (sq->completion_cidx + 1) % sq->el_count;
++ return res;
++}
++
++struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count);
++void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg);
++void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq);
++void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq);
++
++struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count,
++ bce_sq_completion compl, void *userdata);
++void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg);
++void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq);
++int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout);
++void bce_cancel_submission_reservation(struct bce_queue_sq *sq);
++void *bce_next_submission(struct bce_queue_sq *sq);
++void bce_submit_to_device(struct bce_queue_sq *sq);
++void bce_notify_submission_complete(struct bce_queue_sq *sq);
++
++void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size);
++
++struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count);
++void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq);
++
++u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout);
++u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid);
++u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid);
++
++
++/* User API - Creates and registers the queue */
++
++struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count);
++struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count,
++ int direction, bce_sq_completion compl, void *userdata);
++void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq);
++void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq);
++
++#endif //BCEDRIVER_MAILBOX_H
+diff --git a/drivers/staging/apple-bce/queue_dma.c b/drivers/staging/apple-bce/queue_dma.c
+new file mode 100644
+index 000000000..b23661328
+--- /dev/null
++++ b/drivers/staging/apple-bce/queue_dma.c
+@@ -0,0 +1,220 @@
++#include "queue_dma.h"
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include "queue.h"
++
++static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len);
++static struct bce_segment_list_element_hostinfo *bce_map_segment_list(
++ struct device *dev, struct scatterlist *pages, int pagen);
++static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list);
++
++int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist,
++ enum dma_data_direction dir)
++{
++ int cnt;
++
++ buf->direction = dir;
++ buf->scatterlist = scatterlist;
++ buf->seglist_hostinfo = NULL;
++
++ cnt = dma_map_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
++ if (cnt != buf->scatterlist.nents) {
++ pr_err("apple-bce: DMA scatter list mapping returned an unexpected count: %i\n", cnt);
++ dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
++ return -EIO;
++ }
++ if (cnt == 1)
++ return 0;
++
++ buf->seglist_hostinfo = bce_map_segment_list(dev, buf->scatterlist.sgl, buf->scatterlist.nents);
++ if (!buf->seglist_hostinfo) {
++ pr_err("apple-bce: Creating segment list failed\n");
++ dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir);
++ return -EIO;
++ }
++ return 0;
++}
++
++int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
++ enum dma_data_direction dir)
++{
++ int status;
++ struct sg_table scatterlist;
++ if ((status = bce_alloc_scatterlist_from_vm(&scatterlist, data, len)))
++ return status;
++ if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) {
++ sg_free_table(&scatterlist);
++ return status;
++ }
++ return 0;
++}
++
++int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
++ enum dma_data_direction dir)
++{
++ /* Kernel memory is continuous which is great for us. */
++ int status;
++ struct sg_table scatterlist;
++ if ((status = sg_alloc_table(&scatterlist, 1, GFP_KERNEL))) {
++ sg_free_table(&scatterlist);
++ return status;
++ }
++ sg_set_buf(scatterlist.sgl, data, (uint) len);
++ if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) {
++ sg_free_table(&scatterlist);
++ return status;
++ }
++ return 0;
++}
++
++void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf)
++{
++ dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, buf->direction);
++ bce_unmap_segement_list(dev, buf->seglist_hostinfo);
++}
++
++
++static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len)
++{
++ int status, i;
++ struct page **pages;
++ size_t off, start_page, end_page, page_count;
++ off = (size_t) data % PAGE_SIZE;
++ start_page = (size_t) data / PAGE_SIZE;
++ end_page = ((size_t) data + len - 1) / PAGE_SIZE;
++ page_count = end_page - start_page + 1;
++
++ if (page_count > PAGE_SIZE / sizeof(struct page *))
++ pages = vmalloc(page_count * sizeof(struct page *));
++ else
++ pages = kmalloc(page_count * sizeof(struct page *), GFP_KERNEL);
++
++ for (i = 0; i < page_count; i++)
++ pages[i] = vmalloc_to_page((void *) ((start_page + i) * PAGE_SIZE));
++
++ if ((status = sg_alloc_table_from_pages(tbl, pages, page_count, (unsigned int) off, len, GFP_KERNEL))) {
++ sg_free_table(tbl);
++ }
++
++ if (page_count > PAGE_SIZE / sizeof(struct page *))
++ vfree(pages);
++ else
++ kfree(pages);
++ return status;
++}
++
++#define BCE_ELEMENTS_PER_PAGE ((PAGE_SIZE - sizeof(struct bce_segment_list_header)) \
++ / sizeof(struct bce_segment_list_element))
++#define BCE_ELEMENTS_PER_ADDITIONAL_PAGE (PAGE_SIZE / sizeof(struct bce_segment_list_element))
++
++static struct bce_segment_list_element_hostinfo *bce_map_segment_list(
++ struct device *dev, struct scatterlist *pages, int pagen)
++{
++ size_t ptr, pptr = 0;
++ struct bce_segment_list_header theader; /* a temp header, to store the initial seg */
++ struct bce_segment_list_header *header;
++ struct bce_segment_list_element *el, *el_end;
++ struct bce_segment_list_element_hostinfo *out, *pout, *out_root;
++ struct scatterlist *sg;
++ int i;
++ header = &theader;
++ out = out_root = NULL;
++ el = el_end = NULL;
++ for_each_sg(pages, sg, pagen, i) {
++ if (el >= el_end) {
++ /* allocate a new page, this will be also done for the first element */
++ ptr = __get_free_page(GFP_KERNEL);
++ if (pptr && ptr == pptr + PAGE_SIZE) {
++ out->page_count++;
++ header->element_count += BCE_ELEMENTS_PER_ADDITIONAL_PAGE;
++ el_end += BCE_ELEMENTS_PER_ADDITIONAL_PAGE;
++ } else {
++ header = (void *) ptr;
++ header->element_count = BCE_ELEMENTS_PER_PAGE;
++ header->data_size = 0;
++ header->next_segl_addr = 0;
++ header->next_segl_length = 0;
++ el = (void *) (header + 1);
++ el_end = el + BCE_ELEMENTS_PER_PAGE;
++
++ if (out) {
++ out->next = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL);
++ out = out->next;
++ } else {
++ out_root = out = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL);
++ }
++ out->page_start = (void *) ptr;
++ out->page_count = 1;
++ out->dma_start = DMA_MAPPING_ERROR;
++ out->next = NULL;
++ }
++ pptr = ptr;
++ }
++ el->addr = sg->dma_address;
++ el->length = sg->length;
++ header->data_size += el->length;
++ }
++
++ /* DMA map */
++ out = out_root;
++ pout = NULL;
++ while (out) {
++ out->dma_start = dma_map_single(dev, out->page_start, out->page_count * PAGE_SIZE, DMA_TO_DEVICE);
++ if (dma_mapping_error(dev, out->dma_start))
++ goto error;
++ if (pout) {
++ header = pout->page_start;
++ header->next_segl_addr = out->dma_start;
++ header->next_segl_length = out->page_count * PAGE_SIZE;
++ }
++ pout = out;
++ out = out->next;
++ }
++ return out_root;
++
++ error:
++ bce_unmap_segement_list(dev, out_root);
++ return NULL;
++}
++
++static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list)
++{
++ struct bce_segment_list_element_hostinfo *next;
++ while (list) {
++ if (list->dma_start != DMA_MAPPING_ERROR)
++ dma_unmap_single(dev, list->dma_start, list->page_count * PAGE_SIZE, DMA_TO_DEVICE);
++ next = list->next;
++ kfree(list);
++ list = next;
++ }
++}
++
++int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length)
++{
++ struct bce_segment_list_element_hostinfo *seg;
++ struct bce_segment_list_header *seg_header;
++
++ seg = buf->seglist_hostinfo;
++ if (!seg) {
++ element->addr = buf->scatterlist.sgl->dma_address + offset;
++ element->length = length;
++ element->segl_addr = 0;
++ element->segl_length = 0;
++ return 0;
++ }
++
++ while (seg) {
++ seg_header = seg->page_start;
++ if (offset <= seg_header->data_size)
++ break;
++ offset -= seg_header->data_size;
++ seg = seg->next;
++ }
++ if (!seg)
++ return -EINVAL;
++ element->addr = offset;
++ element->length = buf->scatterlist.sgl->dma_length;
++ element->segl_addr = seg->dma_start;
++ element->segl_length = seg->page_count * PAGE_SIZE;
++ return 0;
++}
+\ No newline at end of file
+diff --git a/drivers/staging/apple-bce/queue_dma.h b/drivers/staging/apple-bce/queue_dma.h
+new file mode 100644
+index 000000000..f8a57e50e
+--- /dev/null
++++ b/drivers/staging/apple-bce/queue_dma.h
+@@ -0,0 +1,50 @@
++#ifndef BCE_QUEUE_DMA_H
++#define BCE_QUEUE_DMA_H
++
++#include <linux/pci.h>
++
++struct bce_qe_submission;
++
++struct bce_segment_list_header {
++ u64 element_count;
++ u64 data_size;
++
++ u64 next_segl_addr;
++ u64 next_segl_length;
++};
++struct bce_segment_list_element {
++ u64 addr;
++ u64 length;
++};
++
++struct bce_segment_list_element_hostinfo {
++ struct bce_segment_list_element_hostinfo *next;
++ void *page_start;
++ size_t page_count;
++ dma_addr_t dma_start;
++};
++
++
++struct bce_dma_buffer {
++ enum dma_data_direction direction;
++ struct sg_table scatterlist;
++ struct bce_segment_list_element_hostinfo *seglist_hostinfo;
++};
++
++/* NOTE: Takes ownership of the sg_table if it succeeds. Ownership is not transferred on failure. */
++int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist,
++ enum dma_data_direction dir);
++
++/* Creates a buffer from virtual memory (vmalloc) */
++int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
++ enum dma_data_direction dir);
++
++/* Creates a buffer from kernel memory (kmalloc) */
++int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len,
++ enum dma_data_direction dir);
++
++void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf);
++
++int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length);
++
++#endif //BCE_QUEUE_DMA_H
+diff --git a/drivers/staging/apple-bce/vhci/command.h b/drivers/staging/apple-bce/vhci/command.h
+new file mode 100644
+index 000000000..26619e0bc
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/command.h
+@@ -0,0 +1,204 @@
++#ifndef BCE_VHCI_COMMAND_H
++#define BCE_VHCI_COMMAND_H
++
++#include "queue.h"
++#include <linux/jiffies.h>
++#include <linux/usb.h>
++
++#define BCE_VHCI_CMD_TIMEOUT_SHORT msecs_to_jiffies(2000)
++#define BCE_VHCI_CMD_TIMEOUT_LONG msecs_to_jiffies(30000)
++
++#define BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2 2
++#define BCE_VHCI_BULK_MAX_ACTIVE_URBS (1 << BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2)
++
++typedef u8 bce_vhci_port_t;
++typedef u8 bce_vhci_device_t;
++
++enum bce_vhci_command {
++ BCE_VHCI_CMD_CONTROLLER_ENABLE = 1,
++ BCE_VHCI_CMD_CONTROLLER_DISABLE = 2,
++ BCE_VHCI_CMD_CONTROLLER_START = 3,
++ BCE_VHCI_CMD_CONTROLLER_PAUSE = 4,
++
++ BCE_VHCI_CMD_PORT_POWER_ON = 0x10,
++ BCE_VHCI_CMD_PORT_POWER_OFF = 0x11,
++ BCE_VHCI_CMD_PORT_RESUME = 0x12,
++ BCE_VHCI_CMD_PORT_SUSPEND = 0x13,
++ BCE_VHCI_CMD_PORT_RESET = 0x14,
++ BCE_VHCI_CMD_PORT_DISABLE = 0x15,
++ BCE_VHCI_CMD_PORT_STATUS = 0x16,
++
++ BCE_VHCI_CMD_DEVICE_CREATE = 0x30,
++ BCE_VHCI_CMD_DEVICE_DESTROY = 0x31,
++
++ BCE_VHCI_CMD_ENDPOINT_CREATE = 0x40,
++ BCE_VHCI_CMD_ENDPOINT_DESTROY = 0x41,
++ BCE_VHCI_CMD_ENDPOINT_SET_STATE = 0x42,
++ BCE_VHCI_CMD_ENDPOINT_RESET = 0x44,
++
++ /* Device to host only */
++ BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE = 0x43,
++ BCE_VHCI_CMD_TRANSFER_REQUEST = 0x1000,
++ BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS = 0x1005
++};
++
++enum bce_vhci_endpoint_state {
++ BCE_VHCI_ENDPOINT_ACTIVE = 0,
++ BCE_VHCI_ENDPOINT_PAUSED = 1,
++ BCE_VHCI_ENDPOINT_STALLED = 2
++};
++
++static inline int bce_vhci_cmd_controller_enable(struct bce_vhci_command_queue *q, u8 busNum, u16 *portMask)
++{
++ int status;
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_ENABLE;
++ cmd.param1 = 0x7100u | busNum;
++ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++ if (!status)
++ *portMask = (u16) res.param2;
++ return status;
++}
++static inline int bce_vhci_cmd_controller_disable(struct bce_vhci_command_queue *q)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_DISABLE;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++static inline int bce_vhci_cmd_controller_start(struct bce_vhci_command_queue *q)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_START;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++static inline int bce_vhci_cmd_controller_pause(struct bce_vhci_command_queue *q)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_PAUSE;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++
++static inline int bce_vhci_cmd_port_power_on(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_PORT_POWER_ON;
++ cmd.param1 = port;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_port_power_off(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_PORT_POWER_OFF;
++ cmd.param1 = port;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_port_resume(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_PORT_RESUME;
++ cmd.param1 = port;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++static inline int bce_vhci_cmd_port_suspend(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_PORT_SUSPEND;
++ cmd.param1 = port;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++static inline int bce_vhci_cmd_port_reset(struct bce_vhci_command_queue *q, bce_vhci_port_t port, u32 timeout)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_PORT_RESET;
++ cmd.param1 = port;
++ cmd.param2 = timeout;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_port_disable(struct bce_vhci_command_queue *q, bce_vhci_port_t port)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_PORT_DISABLE;
++ cmd.param1 = port;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_port_status(struct bce_vhci_command_queue *q, bce_vhci_port_t port,
++ u32 clearFlags, u32 *resStatus)
++{
++ int status;
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_PORT_STATUS;
++ cmd.param1 = port;
++ cmd.param2 = clearFlags & 0x560000;
++ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++ if (status >= 0)
++ *resStatus = (u32) res.param2;
++ return status;
++}
++
++static inline int bce_vhci_cmd_device_create(struct bce_vhci_command_queue *q, bce_vhci_port_t port,
++ bce_vhci_device_t *dev)
++{
++ int status;
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_DEVICE_CREATE;
++ cmd.param1 = port;
++ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++ if (!status)
++ *dev = (bce_vhci_device_t) res.param2;
++ return status;
++}
++static inline int bce_vhci_cmd_device_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_DEVICE_DESTROY;
++ cmd.param1 = dev;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG);
++}
++
++static inline int bce_vhci_cmd_endpoint_create(struct bce_vhci_command_queue *q, bce_vhci_device_t dev,
++ struct usb_endpoint_descriptor *desc)
++{
++ struct bce_vhci_message cmd, res;
++ int endpoint_type = usb_endpoint_type(desc);
++ int maxp = usb_endpoint_maxp(desc);
++ int maxp_burst = usb_endpoint_maxp_mult(desc) * maxp;
++ u8 max_active_requests_pow2 = 0;
++ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_CREATE;
++ cmd.param1 = dev | ((desc->bEndpointAddress & 0x8Fu) << 8);
++ if (endpoint_type == USB_ENDPOINT_XFER_BULK)
++ max_active_requests_pow2 = BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2;
++ cmd.param2 = endpoint_type | ((max_active_requests_pow2 & 0xf) << 4) | (maxp << 16) | ((u64) maxp_burst << 32);
++ if (endpoint_type == USB_ENDPOINT_XFER_INT)
++ cmd.param2 |= (desc->bInterval - 1) << 8;
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_endpoint_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_DESTROY;
++ cmd.param1 = dev | (endpoint << 8);
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++static inline int bce_vhci_cmd_endpoint_set_state(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint,
++ enum bce_vhci_endpoint_state newState, enum bce_vhci_endpoint_state *retState)
++{
++ int status;
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_SET_STATE;
++ cmd.param1 = dev | (endpoint << 8);
++ cmd.param2 = (u64) newState;
++ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++ if (status != BCE_VHCI_INTERNAL_ERROR && status != BCE_VHCI_NO_POWER)
++ *retState = (enum bce_vhci_endpoint_state) res.param2;
++ return status;
++}
++static inline int bce_vhci_cmd_endpoint_reset(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint)
++{
++ struct bce_vhci_message cmd, res;
++ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_RESET;
++ cmd.param1 = dev | (endpoint << 8);
++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT);
++}
++
++
++#endif //BCE_VHCI_COMMAND_H
+diff --git a/drivers/staging/apple-bce/vhci/queue.c b/drivers/staging/apple-bce/vhci/queue.c
+new file mode 100644
+index 000000000..7b0b50271
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/queue.c
+@@ -0,0 +1,268 @@
++#include "queue.h"
++#include "vhci.h"
++#include "../apple_bce.h"
++
++
++static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq);
++
++int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name)
++{
++ int status;
++ ret->cq = bce_create_cq(vhci->dev, VHCI_EVENT_QUEUE_EL_COUNT);
++ if (!ret->cq)
++ return -EINVAL;
++ ret->sq = bce_create_sq(vhci->dev, ret->cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_TO_DEVICE,
++ bce_vhci_message_queue_completion, ret);
++ if (!ret->sq) {
++ status = -EINVAL;
++ goto fail_cq;
++ }
++ ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
++ &ret->dma_addr, GFP_KERNEL);
++ if (!ret->data) {
++ status = -EINVAL;
++ goto fail_sq;
++ }
++ return 0;
++
++fail_sq:
++ bce_destroy_sq(vhci->dev, ret->sq);
++ ret->sq = NULL;
++fail_cq:
++ bce_destroy_cq(vhci->dev, ret->cq);
++ ret->cq = NULL;
++ return status;
++}
++
++void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q)
++{
++ if (!q->cq)
++ return;
++ dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
++ q->data, q->dma_addr);
++ bce_destroy_sq(vhci->dev, q->sq);
++ bce_destroy_cq(vhci->dev, q->cq);
++}
++
++void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req)
++{
++ int sidx;
++ struct bce_qe_submission *s;
++ sidx = q->sq->tail;
++ s = bce_next_submission(q->sq);
++ pr_debug("bce-vhci: Send message: %x s=%x p1=%x p2=%llx\n", req->cmd, req->status, req->param1, req->param2);
++ q->data[sidx] = *req;
++ bce_set_submission_single(s, q->dma_addr + sizeof(struct bce_vhci_message) * sidx,
++ sizeof(struct bce_vhci_message));
++ bce_submit_to_device(q->sq);
++}
++
++static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq)
++{
++ while (bce_next_completion(sq))
++ bce_notify_submission_complete(sq);
++}
++
++
++
++static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq);
++
++int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
++ bce_sq_completion compl)
++{
++ ret->vhci = vhci;
++
++ ret->sq = bce_create_sq(vhci->dev, vhci->ev_cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_FROM_DEVICE, compl, ret);
++ if (!ret->sq)
++ return -EINVAL;
++ ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
++ &ret->dma_addr, GFP_KERNEL);
++ if (!ret->data) {
++ bce_destroy_sq(vhci->dev, ret->sq);
++ ret->sq = NULL;
++ return -EINVAL;
++ }
++
++ init_completion(&ret->queue_empty_completion);
++ bce_vhci_event_queue_submit_pending(ret, VHCI_EVENT_PENDING_COUNT);
++ return 0;
++}
++
++int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
++ bce_vhci_event_queue_callback cb)
++{
++ ret->cb = cb;
++ return __bce_vhci_event_queue_create(vhci, ret, name, bce_vhci_event_queue_completion);
++}
++
++void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q)
++{
++ if (!q->sq)
++ return;
++ dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT,
++ q->data, q->dma_addr);
++ bce_destroy_sq(vhci->dev, q->sq);
++}
++
++static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq)
++{
++ struct bce_sq_completion_data *cd;
++ struct bce_vhci_event_queue *ev = sq->userdata;
++ struct bce_vhci_message *msg;
++ size_t cnt = 0;
++
++ while ((cd = bce_next_completion(sq))) {
++ if (cd->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */
++ bce_notify_submission_complete(sq);
++ continue;
++ }
++ msg = &ev->data[sq->head];
++ pr_debug("bce-vhci: Got event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2);
++ ev->cb(ev, msg);
++
++ bce_notify_submission_complete(sq);
++ ++cnt;
++ }
++ bce_vhci_event_queue_submit_pending(ev, cnt);
++ if (atomic_read(&sq->available_commands) == sq->el_count - 1)
++ complete(&ev->queue_empty_completion);
++}
++
++void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count)
++{
++ int idx;
++ struct bce_qe_submission *s;
++ while (count--) {
++ if (bce_reserve_submission(q->sq, NULL)) {
++ pr_err("bce-vhci: Failed to reserve an event queue submission\n");
++ break;
++ }
++ idx = q->sq->tail;
++ s = bce_next_submission(q->sq);
++ bce_set_submission_single(s,
++ q->dma_addr + idx * sizeof(struct bce_vhci_message), sizeof(struct bce_vhci_message));
++ }
++ bce_submit_to_device(q->sq);
++}
++
++void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q)
++{
++ unsigned long timeout;
++ reinit_completion(&q->queue_empty_completion);
++ if (bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, q->sq->qid))
++ pr_warn("bce-vhci: failed to flush event queue\n");
++ timeout = msecs_to_jiffies(5000);
++ while (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) {
++ timeout = wait_for_completion_timeout(&q->queue_empty_completion, timeout);
++ if (timeout == 0) {
++ pr_err("bce-vhci: waiting for queue to be flushed timed out\n");
++ break;
++ }
++ }
++}
++
++void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q)
++{
++ if (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) {
++ pr_err("bce-vhci: resume of a queue with pending submissions\n");
++ return;
++ }
++ bce_vhci_event_queue_submit_pending(q, VHCI_EVENT_PENDING_COUNT);
++}
++
++void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq)
++{
++ ret->mq = mq;
++ ret->completion.result = NULL;
++ init_completion(&ret->completion.completion);
++ spin_lock_init(&ret->completion_lock);
++ mutex_init(&ret->mutex);
++}
++
++void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq)
++{
++ spin_lock(&cq->completion_lock);
++ if (cq->completion.result) {
++ memset(cq->completion.result, 0, sizeof(struct bce_vhci_message));
++ cq->completion.result->status = BCE_VHCI_ABORT;
++ complete(&cq->completion.completion);
++ cq->completion.result = NULL;
++ }
++ spin_unlock(&cq->completion_lock);
++ mutex_lock(&cq->mutex);
++ mutex_unlock(&cq->mutex);
++ mutex_destroy(&cq->mutex);
++}
++
++void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg)
++{
++ struct bce_vhci_command_queue_completion *c = &cq->completion;
++
++ spin_lock(&cq->completion_lock);
++ if (c->result) {
++ *c->result = *msg;
++ complete(&c->completion);
++ c->result = NULL;
++ }
++ spin_unlock(&cq->completion_lock);
++}
++
++static int __bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
++ struct bce_vhci_message *res, unsigned long timeout)
++{
++ int status;
++ struct bce_vhci_command_queue_completion *c;
++ struct bce_vhci_message creq;
++ c = &cq->completion;
++
++ if ((status = bce_reserve_submission(cq->mq->sq, &timeout)))
++ return status;
++
++ spin_lock(&cq->completion_lock);
++ c->result = res;
++ reinit_completion(&c->completion);
++ spin_unlock(&cq->completion_lock);
++
++ bce_vhci_message_queue_write(cq->mq, req);
++
++ if (!wait_for_completion_timeout(&c->completion, timeout)) {
++ /* we ran out of time, send cancellation */
++ pr_debug("bce-vhci: command timed out req=%x\n", req->cmd);
++ if ((status = bce_reserve_submission(cq->mq->sq, &timeout)))
++ return status;
++
++ creq = *req;
++ creq.cmd |= 0x4000;
++ bce_vhci_message_queue_write(cq->mq, &creq);
++
++ if (!wait_for_completion_timeout(&c->completion, 1000)) {
++ pr_err("bce-vhci: Possible desync, cmd cancel timed out\n");
++
++ spin_lock(&cq->completion_lock);
++ c->result = NULL;
++ spin_unlock(&cq->completion_lock);
++ return -ETIMEDOUT;
++ }
++ if ((res->cmd & ~0x8000) == creq.cmd)
++ return -ETIMEDOUT;
++ /* reply for the previous command most likely arrived */
++ }
++
++ if ((res->cmd & ~0x8000) != req->cmd) {
++ pr_err("bce-vhci: Possible desync, cmd reply mismatch req=%x, res=%x\n", req->cmd, res->cmd);
++ return -EIO;
++ }
++ if (res->status == BCE_VHCI_SUCCESS)
++ return 0;
++ return res->status;
++}
++
++int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
++ struct bce_vhci_message *res, unsigned long timeout)
++{
++ int status;
++ mutex_lock(&cq->mutex);
++ status = __bce_vhci_command_queue_execute(cq, req, res, timeout);
++ mutex_unlock(&cq->mutex);
++ return status;
++}
+diff --git a/drivers/staging/apple-bce/vhci/queue.h b/drivers/staging/apple-bce/vhci/queue.h
+new file mode 100644
+index 000000000..adb705b6b
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/queue.h
+@@ -0,0 +1,76 @@
++#ifndef BCE_VHCI_QUEUE_H
++#define BCE_VHCI_QUEUE_H
++
++#include <linux/completion.h>
++#include "../queue.h"
++
++#define VHCI_EVENT_QUEUE_EL_COUNT 256
++#define VHCI_EVENT_PENDING_COUNT 32
++
++struct bce_vhci;
++struct bce_vhci_event_queue;
++
++enum bce_vhci_message_status {
++ BCE_VHCI_SUCCESS = 1,
++ BCE_VHCI_ERROR = 2,
++ BCE_VHCI_USB_PIPE_STALL = 3,
++ BCE_VHCI_ABORT = 4,
++ BCE_VHCI_BAD_ARGUMENT = 5,
++ BCE_VHCI_OVERRUN = 6,
++ BCE_VHCI_INTERNAL_ERROR = 7,
++ BCE_VHCI_NO_POWER = 8,
++ BCE_VHCI_UNSUPPORTED = 9
++};
++struct bce_vhci_message {
++ u16 cmd;
++ u16 status; // bce_vhci_message_status
++ u32 param1;
++ u64 param2;
++};
++
++struct bce_vhci_message_queue {
++ struct bce_queue_cq *cq;
++ struct bce_queue_sq *sq;
++ struct bce_vhci_message *data;
++ dma_addr_t dma_addr;
++};
++typedef void (*bce_vhci_event_queue_callback)(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
++struct bce_vhci_event_queue {
++ struct bce_vhci *vhci;
++ struct bce_queue_sq *sq;
++ struct bce_vhci_message *data;
++ dma_addr_t dma_addr;
++ bce_vhci_event_queue_callback cb;
++ struct completion queue_empty_completion;
++};
++struct bce_vhci_command_queue_completion {
++ struct bce_vhci_message *result;
++ struct completion completion;
++};
++struct bce_vhci_command_queue {
++ struct bce_vhci_message_queue *mq;
++ struct bce_vhci_command_queue_completion completion;
++ struct spinlock completion_lock;
++ struct mutex mutex;
++};
++
++int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name);
++void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q);
++void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req);
++
++int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
++ bce_sq_completion compl);
++int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name,
++ bce_vhci_event_queue_callback cb);
++void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q);
++void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count);
++void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q);
++void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q);
++
++void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq);
++void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq);
++int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req,
++ struct bce_vhci_message *res, unsigned long timeout);
++void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg);
++
++#endif //BCE_VHCI_QUEUE_H
+diff --git a/drivers/staging/apple-bce/vhci/transfer.c b/drivers/staging/apple-bce/vhci/transfer.c
+new file mode 100644
+index 000000000..8226363d6
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/transfer.c
+@@ -0,0 +1,661 @@
++#include "transfer.h"
++#include "../queue.h"
++#include "vhci.h"
++#include "../apple_bce.h"
++#include <linux/usb/hcd.h>
++
++static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq);
++static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q);
++static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q);
++
++static int bce_vhci_urb_init(struct bce_vhci_urb *vurb);
++static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg);
++static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c);
++
++static void bce_vhci_transfer_queue_reset_w(struct work_struct *work);
++
++void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q,
++ struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir)
++{
++ char name[0x21];
++ INIT_LIST_HEAD(&q->evq);
++ INIT_LIST_HEAD(&q->giveback_urb_list);
++ spin_lock_init(&q->urb_lock);
++ mutex_init(&q->pause_lock);
++ q->vhci = vhci;
++ q->endp = endp;
++ q->dev_addr = dev_addr;
++ q->endp_addr = (u8) (endp->desc.bEndpointAddress & 0x8F);
++ q->state = BCE_VHCI_ENDPOINT_ACTIVE;
++ q->active = true;
++ q->stalled = false;
++ q->max_active_requests = 1;
++ if (usb_endpoint_type(&endp->desc) == USB_ENDPOINT_XFER_BULK)
++ q->max_active_requests = BCE_VHCI_BULK_MAX_ACTIVE_URBS;
++ q->remaining_active_requests = q->max_active_requests;
++ q->cq = bce_create_cq(vhci->dev, 0x100);
++ INIT_WORK(&q->w_reset, bce_vhci_transfer_queue_reset_w);
++ q->sq_in = NULL;
++ if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
++ snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, 0x80 | usb_endpoint_num(&endp->desc));
++ q->sq_in = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_FROM_DEVICE,
++ bce_vhci_transfer_queue_completion, q);
++ }
++ q->sq_out = NULL;
++ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) {
++ snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, usb_endpoint_num(&endp->desc));
++ q->sq_out = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_TO_DEVICE,
++ bce_vhci_transfer_queue_completion, q);
++ }
++}
++
++void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q)
++{
++ bce_vhci_transfer_queue_giveback(q);
++ bce_vhci_transfer_queue_remove_pending(q);
++ if (q->sq_in)
++ bce_destroy_sq(vhci->dev, q->sq_in);
++ if (q->sq_out)
++ bce_destroy_sq(vhci->dev, q->sq_out);
++ bce_destroy_cq(vhci->dev, q->cq);
++}
++
++static inline bool bce_vhci_transfer_queue_can_init_urb(struct bce_vhci_transfer_queue *q)
++{
++ return q->remaining_active_requests > 0;
++}
++
++static void bce_vhci_transfer_queue_defer_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg)
++{
++ struct bce_vhci_list_message *lm;
++ lm = kmalloc(sizeof(struct bce_vhci_list_message), GFP_KERNEL);
++ INIT_LIST_HEAD(&lm->list);
++ lm->msg = *msg;
++ list_add_tail(&lm->list, &q->evq);
++}
++
++static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q)
++{
++ unsigned long flags;
++ struct urb *urb;
++ spin_lock_irqsave(&q->urb_lock, flags);
++ while (!list_empty(&q->giveback_urb_list)) {
++ urb = list_first_entry(&q->giveback_urb_list, struct urb, urb_list);
++ list_del(&urb->urb_list);
++
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ usb_hcd_giveback_urb(q->vhci->hcd, urb, urb->status);
++ spin_lock_irqsave(&q->urb_lock, flags);
++ }
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++}
++
++static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q);
++
++static void bce_vhci_transfer_queue_deliver_pending(struct bce_vhci_transfer_queue *q)
++{
++ struct urb *urb;
++ struct bce_vhci_list_message *lm;
++
++ while (!list_empty(&q->endp->urb_list) && !list_empty(&q->evq)) {
++ urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
++
++ lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list);
++ if (bce_vhci_urb_update(urb->hcpriv, &lm->msg) == -EAGAIN)
++ break;
++ list_del(&lm->list);
++ kfree(lm);
++ }
++
++ /* some of the URBs could have been completed, so initialize more URBs if possible */
++ bce_vhci_transfer_queue_init_pending_urbs(q);
++}
++
++static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q)
++{
++ unsigned long flags;
++ struct bce_vhci_list_message *lm;
++ spin_lock_irqsave(&q->urb_lock, flags);
++ while (!list_empty(&q->evq)) {
++ lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list);
++ list_del(&lm->list);
++ kfree(lm);
++ }
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++}
++
++void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg)
++{
++ unsigned long flags;
++ struct bce_vhci_urb *turb;
++ struct urb *urb;
++ spin_lock_irqsave(&q->urb_lock, flags);
++ bce_vhci_transfer_queue_deliver_pending(q);
++
++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST &&
++ (!list_empty(&q->evq) || list_empty(&q->endp->urb_list))) {
++ bce_vhci_transfer_queue_defer_event(q, msg);
++ goto complete;
++ }
++ if (list_empty(&q->endp->urb_list)) {
++ pr_err("bce-vhci: [%02x] Unexpected transfer queue event\n", q->endp_addr);
++ goto complete;
++ }
++ urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
++ turb = urb->hcpriv;
++ if (bce_vhci_urb_update(turb, msg) == -EAGAIN) {
++ bce_vhci_transfer_queue_defer_event(q, msg);
++ } else {
++ bce_vhci_transfer_queue_init_pending_urbs(q);
++ }
++
++complete:
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ bce_vhci_transfer_queue_giveback(q);
++}
++
++static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq)
++{
++ unsigned long flags;
++ struct bce_sq_completion_data *c;
++ struct urb *urb;
++ struct bce_vhci_transfer_queue *q = sq->userdata;
++ spin_lock_irqsave(&q->urb_lock, flags);
++ while ((c = bce_next_completion(sq))) {
++ if (c->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */
++ pr_debug("bce-vhci: [%02x] Got an abort completion\n", q->endp_addr);
++ bce_notify_submission_complete(sq);
++ continue;
++ }
++ if (list_empty(&q->endp->urb_list)) {
++ pr_err("bce-vhci: [%02x] Got a completion while no requests are pending\n", q->endp_addr);
++ continue;
++ }
++ pr_debug("bce-vhci: [%02x] Got a transfer queue completion\n", q->endp_addr);
++ urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
++ bce_vhci_urb_transfer_completion(urb->hcpriv, c);
++ bce_notify_submission_complete(sq);
++ }
++ bce_vhci_transfer_queue_deliver_pending(q);
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ bce_vhci_transfer_queue_giveback(q);
++}
++
++int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q)
++{
++ unsigned long flags;
++ int status;
++ u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
++ spin_lock_irqsave(&q->urb_lock, flags);
++ q->active = false;
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ if (q->sq_out) {
++ pr_err("bce-vhci: Not implemented: wait for pending output requests\n");
++ }
++ bce_vhci_transfer_queue_remove_pending(q);
++ if ((status = bce_vhci_cmd_endpoint_set_state(
++ &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state)))
++ return status;
++ if (q->state != BCE_VHCI_ENDPOINT_PAUSED)
++ return -EINVAL;
++ if (q->sq_in)
++ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
++ if (q->sq_out)
++ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
++ return 0;
++}
++
++static void bce_vhci_urb_resume(struct bce_vhci_urb *urb);
++
++int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q)
++{
++ unsigned long flags;
++ int status;
++ struct urb *urb, *urbt;
++ struct bce_vhci_urb *vurb;
++ u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
++ if ((status = bce_vhci_cmd_endpoint_set_state(
++ &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state)))
++ return status;
++ if (q->state != BCE_VHCI_ENDPOINT_ACTIVE)
++ return -EINVAL;
++ spin_lock_irqsave(&q->urb_lock, flags);
++ q->active = true;
++ list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) {
++ vurb = urb->hcpriv;
++ if (vurb->state == BCE_VHCI_URB_INIT_PENDING) {
++ if (!bce_vhci_transfer_queue_can_init_urb(q))
++ break;
++ bce_vhci_urb_init(vurb);
++ } else {
++ bce_vhci_urb_resume(vurb);
++ }
++ }
++ bce_vhci_transfer_queue_deliver_pending(q);
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ return 0;
++}
++
++int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src)
++{
++ int ret = 0;
++ mutex_lock(&q->pause_lock);
++ if ((q->paused_by & src) != src) {
++ if (!q->paused_by)
++ ret = bce_vhci_transfer_queue_do_pause(q);
++ if (!ret)
++ q->paused_by |= src;
++ }
++ mutex_unlock(&q->pause_lock);
++ return ret;
++}
++
++int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src)
++{
++ int ret = 0;
++ mutex_lock(&q->pause_lock);
++ if (q->paused_by & src) {
++ if (!(q->paused_by & ~src))
++ ret = bce_vhci_transfer_queue_do_resume(q);
++ if (!ret)
++ q->paused_by &= ~src;
++ }
++ mutex_unlock(&q->pause_lock);
++ return ret;
++}
++
++static void bce_vhci_transfer_queue_reset_w(struct work_struct *work)
++{
++ unsigned long flags;
++ struct bce_vhci_transfer_queue *q = container_of(work, struct bce_vhci_transfer_queue, w_reset);
++
++ mutex_lock(&q->pause_lock);
++ spin_lock_irqsave(&q->urb_lock, flags);
++ if (!q->stalled) {
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ mutex_unlock(&q->pause_lock);
++ return;
++ }
++ q->active = false;
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ q->paused_by |= BCE_VHCI_PAUSE_INTERNAL_WQ;
++ bce_vhci_transfer_queue_remove_pending(q);
++ if (q->sq_in)
++ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
++ if (q->sq_out)
++ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
++ bce_vhci_cmd_endpoint_reset(&q->vhci->cq, q->dev_addr, (u8) (q->endp->desc.bEndpointAddress & 0x8F));
++ spin_lock_irqsave(&q->urb_lock, flags);
++ q->stalled = false;
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ mutex_unlock(&q->pause_lock);
++ bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
++}
++
++void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q)
++{
++ queue_work(q->vhci->tq_state_wq, &q->w_reset);
++}
++
++static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q)
++{
++ struct urb *urb, *urbt;
++ struct bce_vhci_urb *vurb;
++ list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) {
++ vurb = urb->hcpriv;
++ if (!bce_vhci_transfer_queue_can_init_urb(q))
++ break;
++ if (vurb->state == BCE_VHCI_URB_INIT_PENDING)
++ bce_vhci_urb_init(vurb);
++ }
++}
++
++
++
++static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout);
++
++int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb)
++{
++ unsigned long flags;
++ int status = 0;
++ struct bce_vhci_urb *vurb;
++ vurb = kzalloc(sizeof(struct bce_vhci_urb), GFP_KERNEL);
++ urb->hcpriv = vurb;
++
++ vurb->q = q;
++ vurb->urb = urb;
++ vurb->dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
++ vurb->is_control = (usb_endpoint_num(&urb->ep->desc) == 0);
++
++ spin_lock_irqsave(&q->urb_lock, flags);
++ status = usb_hcd_link_urb_to_ep(q->vhci->hcd, urb);
++ if (status) {
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ urb->hcpriv = NULL;
++ kfree(vurb);
++ return status;
++ }
++
++ if (q->active) {
++ if (bce_vhci_transfer_queue_can_init_urb(vurb->q))
++ status = bce_vhci_urb_init(vurb);
++ else
++ vurb->state = BCE_VHCI_URB_INIT_PENDING;
++ } else {
++ if (q->stalled)
++ bce_vhci_transfer_queue_request_reset(q);
++ vurb->state = BCE_VHCI_URB_INIT_PENDING;
++ }
++ if (status) {
++ usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb);
++ urb->hcpriv = NULL;
++ kfree(vurb);
++ } else {
++ bce_vhci_transfer_queue_deliver_pending(q);
++ }
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ pr_debug("bce-vhci: [%02x] URB enqueued (dir = %s, size = %i)\n", q->endp_addr,
++ usb_urb_dir_in(urb) ? "IN" : "OUT", urb->transfer_buffer_length);
++ return status;
++}
++
++static int bce_vhci_urb_init(struct bce_vhci_urb *vurb)
++{
++ int status = 0;
++
++ if (vurb->q->remaining_active_requests == 0) {
++ pr_err("bce-vhci: cannot init request (remaining_active_requests = 0)\n");
++ return -EINVAL;
++ }
++
++ if (vurb->is_control) {
++ vurb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST;
++ } else {
++ status = bce_vhci_urb_data_start(vurb, NULL);
++ }
++
++ if (!status) {
++ --vurb->q->remaining_active_requests;
++ }
++ return status;
++}
++
++static void bce_vhci_urb_complete(struct bce_vhci_urb *urb, int status)
++{
++ struct bce_vhci_transfer_queue *q = urb->q;
++ struct bce_vhci *vhci = q->vhci;
++ struct urb *real_urb = urb->urb;
++ pr_debug("bce-vhci: [%02x] URB complete %i\n", q->endp_addr, status);
++ usb_hcd_unlink_urb_from_ep(vhci->hcd, real_urb);
++ real_urb->hcpriv = NULL;
++ real_urb->status = status;
++ if (urb->state != BCE_VHCI_URB_INIT_PENDING)
++ ++urb->q->remaining_active_requests;
++ kfree(urb);
++ list_add_tail(&real_urb->urb_list, &q->giveback_urb_list);
++}
++
++int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status)
++{
++ struct bce_vhci_urb *vurb;
++ unsigned long flags;
++ int ret;
++
++ spin_lock_irqsave(&q->urb_lock, flags);
++ if ((ret = usb_hcd_check_unlink_urb(q->vhci->hcd, urb, status))) {
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ return ret;
++ }
++
++ vurb = urb->hcpriv;
++ /* If the URB wasn't posted to the device yet, we can still remove it on the host without pausing the queue. */
++ if (vurb->state != BCE_VHCI_URB_INIT_PENDING) {
++ pr_debug("bce-vhci: [%02x] Cancelling URB\n", q->endp_addr);
++
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++ bce_vhci_transfer_queue_pause(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
++ spin_lock_irqsave(&q->urb_lock, flags);
++
++ ++q->remaining_active_requests;
++ }
++
++ usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb);
++
++ spin_unlock_irqrestore(&q->urb_lock, flags);
++
++ usb_hcd_giveback_urb(q->vhci->hcd, urb, status);
++
++ if (vurb->state != BCE_VHCI_URB_INIT_PENDING)
++ bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ);
++
++ kfree(vurb);
++
++ return 0;
++}
++
++static int bce_vhci_urb_data_transfer_in(struct bce_vhci_urb *urb, unsigned long *timeout)
++{
++ struct bce_vhci_message msg;
++ struct bce_qe_submission *s;
++ u32 tr_len;
++ int reservation1, reservation2 = -EFAULT;
++
++ pr_debug("bce-vhci: [%02x] DMA from device %llx %x\n", urb->q->endp_addr,
++ (u64) urb->urb->transfer_dma, urb->urb->transfer_buffer_length);
++
++ /* Reserve both a message and a submission, so we don't run into issues later. */
++ reservation1 = bce_reserve_submission(urb->q->vhci->msg_asynchronous.sq, timeout);
++ if (!reservation1)
++ reservation2 = bce_reserve_submission(urb->q->sq_in, timeout);
++ if (reservation1 || reservation2) {
++ pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n");
++ if (!reservation1)
++ bce_cancel_submission_reservation(urb->q->vhci->msg_asynchronous.sq);
++ return -ENOMEM;
++ }
++
++ urb->send_offset = urb->receive_offset;
++
++ tr_len = urb->urb->transfer_buffer_length - urb->send_offset;
++
++ spin_lock(&urb->q->vhci->msg_asynchronous_lock);
++ msg.cmd = BCE_VHCI_CMD_TRANSFER_REQUEST;
++ msg.status = 0;
++ msg.param1 = ((urb->urb->ep->desc.bEndpointAddress & 0x8Fu) << 8) | urb->q->dev_addr;
++ msg.param2 = tr_len;
++ bce_vhci_message_queue_write(&urb->q->vhci->msg_asynchronous, &msg);
++ spin_unlock(&urb->q->vhci->msg_asynchronous_lock);
++
++ s = bce_next_submission(urb->q->sq_in);
++ bce_set_submission_single(s, urb->urb->transfer_dma + urb->send_offset, tr_len);
++ bce_submit_to_device(urb->q->sq_in);
++
++ urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION;
++ return 0;
++}
++
++static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout)
++{
++ if (urb->dir == DMA_TO_DEVICE) {
++ if (urb->urb->transfer_buffer_length > 0)
++ urb->state = BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST;
++ else
++ urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE;
++ return 0;
++ } else {
++ return bce_vhci_urb_data_transfer_in(urb, timeout);
++ }
++}
++
++static int bce_vhci_urb_send_out_data(struct bce_vhci_urb *urb, dma_addr_t addr, size_t size)
++{
++ struct bce_qe_submission *s;
++ unsigned long timeout = 0;
++ if (bce_reserve_submission(urb->q->sq_out, &timeout)) {
++ pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n");
++ return -EPIPE;
++ }
++
++ pr_debug("bce-vhci: [%02x] DMA to device %llx %lx\n", urb->q->endp_addr, (u64) addr, size);
++
++ s = bce_next_submission(urb->q->sq_out);
++ bce_set_submission_single(s, addr, size);
++ bce_submit_to_device(urb->q->sq_out);
++ return 0;
++}
++
++static int bce_vhci_urb_data_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
++{
++ u32 tr_len;
++ int status;
++ if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST) {
++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) {
++ tr_len = min(urb->urb->transfer_buffer_length - urb->send_offset, (u32) msg->param2);
++ if ((status = bce_vhci_urb_send_out_data(urb, urb->urb->transfer_dma + urb->send_offset, tr_len)))
++ return status;
++ urb->send_offset += tr_len;
++ urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION;
++ return 0;
++ }
++ }
++
++ /* 0x1000 in out queues aren't really unexpected */
++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL)
++ return -EAGAIN;
++ pr_err("bce-vhci: [%02x] %s URB unexpected message (state = %x, msg: %x %x %x %llx)\n",
++ urb->q->endp_addr, (urb->is_control ? "Control (data update)" : "Data"), urb->state,
++ msg->cmd, msg->status, msg->param1, msg->param2);
++ return -EAGAIN;
++}
++
++static int bce_vhci_urb_data_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
++{
++ if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
++ urb->receive_offset += c->data_size;
++ if (urb->dir == DMA_FROM_DEVICE || urb->receive_offset >= urb->urb->transfer_buffer_length) {
++ urb->urb->actual_length = (u32) urb->receive_offset;
++ urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE;
++ if (!urb->is_control) {
++ bce_vhci_urb_complete(urb, 0);
++ return -ENOENT;
++ }
++ }
++ } else {
++ pr_err("bce-vhci: [%02x] Data URB unexpected completion\n", urb->q->endp_addr);
++ }
++ return 0;
++}
++
++
++static int bce_vhci_urb_control_check_status(struct bce_vhci_urb *urb)
++{
++ struct bce_vhci_transfer_queue *q = urb->q;
++ if (urb->received_status == 0)
++ return 0;
++ if (urb->state == BCE_VHCI_URB_DATA_TRANSFER_COMPLETE ||
++ (urb->received_status != BCE_VHCI_SUCCESS && urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST &&
++ urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION)) {
++ urb->state = BCE_VHCI_URB_CONTROL_COMPLETE;
++ if (urb->received_status != BCE_VHCI_SUCCESS) {
++ pr_err("bce-vhci: [%02x] URB failed: %x\n", urb->q->endp_addr, urb->received_status);
++ urb->q->active = false;
++ urb->q->stalled = true;
++ bce_vhci_urb_complete(urb, -EPIPE);
++ if (!list_empty(&q->endp->urb_list))
++ bce_vhci_transfer_queue_request_reset(q);
++ return -ENOENT;
++ }
++ bce_vhci_urb_complete(urb, 0);
++ return -ENOENT;
++ }
++ return 0;
++}
++
++static int bce_vhci_urb_control_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
++{
++ int status;
++ if (msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) {
++ urb->received_status = msg->status;
++ return bce_vhci_urb_control_check_status(urb);
++ }
++
++ if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST) {
++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) {
++ if (bce_vhci_urb_send_out_data(urb, urb->urb->setup_dma, sizeof(struct usb_ctrlrequest))) {
++ pr_err("bce-vhci: [%02x] Failed to start URB setup transfer\n", urb->q->endp_addr);
++ return 0; /* TODO: fail the URB? */
++ }
++ urb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION;
++ pr_debug("bce-vhci: [%02x] Sent setup %llx\n", urb->q->endp_addr, urb->urb->setup_dma);
++ return 0;
++ }
++ } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST ||
++ urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
++ if ((status = bce_vhci_urb_data_update(urb, msg)))
++ return status;
++ return bce_vhci_urb_control_check_status(urb);
++ }
++
++ /* 0x1000 in out queues aren't really unexpected */
++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL)
++ return -EAGAIN;
++ pr_err("bce-vhci: [%02x] Control URB unexpected message (state = %x, msg: %x %x %x %llx)\n", urb->q->endp_addr,
++ urb->state, msg->cmd, msg->status, msg->param1, msg->param2);
++ return -EAGAIN;
++}
++
++static int bce_vhci_urb_control_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
++{
++ int status;
++ unsigned long timeout;
++
++ if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION) {
++ if (c->data_size != sizeof(struct usb_ctrlrequest))
++ pr_err("bce-vhci: [%02x] transfer complete data size mistmatch for usb_ctrlrequest (%llx instead of %lx)\n",
++ urb->q->endp_addr, c->data_size, sizeof(struct usb_ctrlrequest));
++
++ timeout = 1000;
++ status = bce_vhci_urb_data_start(urb, &timeout);
++ if (status) {
++ bce_vhci_urb_complete(urb, status);
++ return -ENOENT;
++ }
++ return 0;
++ } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST ||
++ urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
++ if ((status = bce_vhci_urb_data_transfer_completion(urb, c)))
++ return status;
++ return bce_vhci_urb_control_check_status(urb);
++ } else {
++ pr_err("bce-vhci: [%02x] Control URB unexpected completion (state = %x)\n", urb->q->endp_addr, urb->state);
++ }
++ return 0;
++}
++
++static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg)
++{
++ if (urb->state == BCE_VHCI_URB_INIT_PENDING)
++ return -EAGAIN;
++ if (urb->is_control)
++ return bce_vhci_urb_control_update(urb, msg);
++ else
++ return bce_vhci_urb_data_update(urb, msg);
++}
++
++static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c)
++{
++ if (urb->is_control)
++ return bce_vhci_urb_control_transfer_completion(urb, c);
++ else
++ return bce_vhci_urb_data_transfer_completion(urb, c);
++}
++
++static void bce_vhci_urb_resume(struct bce_vhci_urb *urb)
++{
++ int status = 0;
++ if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) {
++ status = bce_vhci_urb_data_transfer_in(urb, NULL);
++ }
++ if (status)
++ bce_vhci_urb_complete(urb, status);
++}
+diff --git a/drivers/staging/apple-bce/vhci/transfer.h b/drivers/staging/apple-bce/vhci/transfer.h
+new file mode 100644
+index 000000000..6a62a00b2
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/transfer.h
+@@ -0,0 +1,71 @@
++#ifndef BCEDRIVER_TRANSFER_H
++#define BCEDRIVER_TRANSFER_H
++
++#include <linux/usb.h>
++#include "queue.h"
++#include "command.h"
++#include "../queue.h"
++
++struct bce_vhci_list_message {
++ struct list_head list;
++ struct bce_vhci_message msg;
++};
++enum bce_vhci_pause_source {
++ BCE_VHCI_PAUSE_INTERNAL_WQ = 1,
++ BCE_VHCI_PAUSE_FIRMWARE = 2,
++ BCE_VHCI_PAUSE_SUSPEND = 4,
++ BCE_VHCI_PAUSE_SHUTDOWN = 8
++};
++struct bce_vhci_transfer_queue {
++ struct bce_vhci *vhci;
++ struct usb_host_endpoint *endp;
++ enum bce_vhci_endpoint_state state;
++ u32 max_active_requests, remaining_active_requests;
++ bool active, stalled;
++ u32 paused_by;
++ bce_vhci_device_t dev_addr;
++ u8 endp_addr;
++ struct bce_queue_cq *cq;
++ struct bce_queue_sq *sq_in;
++ struct bce_queue_sq *sq_out;
++ struct list_head evq;
++ struct spinlock urb_lock;
++ struct mutex pause_lock;
++ struct list_head giveback_urb_list;
++
++ struct work_struct w_reset;
++};
++enum bce_vhci_urb_state {
++ BCE_VHCI_URB_INIT_PENDING,
++
++ BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST,
++ BCE_VHCI_URB_WAITING_FOR_COMPLETION,
++ BCE_VHCI_URB_DATA_TRANSFER_COMPLETE,
++
++ BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST,
++ BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION,
++ BCE_VHCI_URB_CONTROL_COMPLETE
++};
++struct bce_vhci_urb {
++ struct urb *urb;
++ struct bce_vhci_transfer_queue *q;
++ enum dma_data_direction dir;
++ bool is_control;
++ enum bce_vhci_urb_state state;
++ int received_status;
++ u32 send_offset;
++ u32 receive_offset;
++};
++
++void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q,
++ struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir);
++void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q);
++void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg);
++int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src);
++int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src);
++void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q);
++
++int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb);
++int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status);
++
++#endif //BCEDRIVER_TRANSFER_H
+diff --git a/drivers/staging/apple-bce/vhci/vhci.c b/drivers/staging/apple-bce/vhci/vhci.c
+new file mode 100644
+index 000000000..053a9f39e
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/vhci.c
+@@ -0,0 +1,759 @@
++#include "vhci.h"
++#include "../apple_bce.h"
++#include "command.h"
++#include <linux/usb.h>
++#include <linux/usb/hcd.h>
++#include <linux/module.h>
++#include <linux/version.h>
++
++static dev_t bce_vhci_chrdev;
++static struct class *bce_vhci_class;
++static const struct hc_driver bce_vhci_driver;
++static u16 bce_vhci_port_mask = U16_MAX;
++
++static int bce_vhci_create_event_queues(struct bce_vhci *vhci);
++static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci);
++static int bce_vhci_create_message_queues(struct bce_vhci *vhci);
++static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci);
++static void bce_vhci_handle_firmware_events_w(struct work_struct *ws);
++static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq);
++
++int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci)
++{
++ int status;
++
++ spin_lock_init(&vhci->hcd_spinlock);
++
++ vhci->dev = dev;
++
++ vhci->vdevt = bce_vhci_chrdev;
++ vhci->vdev = device_create(bce_vhci_class, dev->dev, vhci->vdevt, NULL, "bce-vhci");
++ if (IS_ERR_OR_NULL(vhci->vdev)) {
++ status = PTR_ERR(vhci->vdev);
++ goto fail_dev;
++ }
++
++ if ((status = bce_vhci_create_message_queues(vhci)))
++ goto fail_mq;
++ if ((status = bce_vhci_create_event_queues(vhci)))
++ goto fail_eq;
++
++ vhci->tq_state_wq = alloc_ordered_workqueue("bce-vhci-tq-state", 0);
++ INIT_WORK(&vhci->w_fw_events, bce_vhci_handle_firmware_events_w);
++
++ vhci->hcd = usb_create_hcd(&bce_vhci_driver, vhci->vdev, "bce-vhci");
++ if (!vhci->hcd) {
++ status = -ENOMEM;
++ goto fail_hcd;
++ }
++ vhci->hcd->self.sysdev = &dev->pci->dev;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0)
++ vhci->hcd->self.uses_dma = 1;
++#endif
++ *((struct bce_vhci **) vhci->hcd->hcd_priv) = vhci;
++ vhci->hcd->speed = HCD_USB2;
++
++ if ((status = usb_add_hcd(vhci->hcd, 0, 0)))
++ goto fail_hcd;
++
++ return 0;
++
++fail_hcd:
++ bce_vhci_destroy_event_queues(vhci);
++fail_eq:
++ bce_vhci_destroy_message_queues(vhci);
++fail_mq:
++ device_destroy(bce_vhci_class, vhci->vdevt);
++fail_dev:
++ if (!status)
++ status = -EINVAL;
++ return status;
++}
++
++void bce_vhci_destroy(struct bce_vhci *vhci)
++{
++ usb_remove_hcd(vhci->hcd);
++ bce_vhci_destroy_event_queues(vhci);
++ bce_vhci_destroy_message_queues(vhci);
++ device_destroy(bce_vhci_class, vhci->vdevt);
++}
++
++struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd)
++{
++ return *((struct bce_vhci **) hcd->hcd_priv);
++}
++
++int bce_vhci_start(struct usb_hcd *hcd)
++{
++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++ int status;
++ u16 port_mask = 0;
++ bce_vhci_port_t port_no = 0;
++ if ((status = bce_vhci_cmd_controller_enable(&vhci->cq, 1, &port_mask)))
++ return status;
++ vhci->port_mask = port_mask;
++ vhci->port_power_mask = 0;
++ if ((status = bce_vhci_cmd_controller_start(&vhci->cq)))
++ return status;
++ port_mask = vhci->port_mask;
++ while (port_mask) {
++ port_no += 1;
++ port_mask >>= 1;
++ }
++ vhci->port_count = port_no;
++ return 0;
++}
++
++void bce_vhci_stop(struct usb_hcd *hcd)
++{
++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++ bce_vhci_cmd_controller_disable(&vhci->cq);
++}
++
++static int bce_vhci_hub_status_data(struct usb_hcd *hcd, char *buf)
++{
++ return 0;
++}
++
++static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout);
++
++static int bce_vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength)
++{
++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++ int status;
++ struct usb_hub_descriptor *hd;
++ struct usb_hub_status *hs;
++ struct usb_port_status *ps;
++ u32 port_status;
++ // pr_info("bce-vhci: bce_vhci_hub_control %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength);
++ if (typeReq == GetHubDescriptor && wLength >= sizeof(struct usb_hub_descriptor)) {
++ hd = (struct usb_hub_descriptor *) buf;
++ memset(hd, 0, sizeof(*hd));
++ hd->bDescLength = sizeof(struct usb_hub_descriptor);
++ hd->bDescriptorType = USB_DT_HUB;
++ hd->bNbrPorts = (u8) vhci->port_count;
++ hd->wHubCharacteristics = HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_INDV_PORT_OCPM;
++ hd->bPwrOn2PwrGood = 0;
++ hd->bHubContrCurrent = 0;
++ return 0;
++ } else if (typeReq == GetHubStatus && wLength >= sizeof(struct usb_hub_status)) {
++ hs = (struct usb_hub_status *) buf;
++ memset(hs, 0, sizeof(*hs));
++ hs->wHubStatus = 0;
++ hs->wHubChange = 0;
++ return 0;
++ } else if (typeReq == GetPortStatus && wLength >= 4 /* usb 2.0 */) {
++ ps = (struct usb_port_status *) buf;
++ ps->wPortStatus = 0;
++ ps->wPortChange = 0;
++
++ if (vhci->port_power_mask & BIT(wIndex))
++ ps->wPortStatus |= USB_PORT_STAT_POWER;
++
++ if (!(bce_vhci_port_mask & BIT(wIndex)))
++ return 0;
++
++ if ((status = bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0, &port_status)))
++ return status;
++
++ if (port_status & 16)
++ ps->wPortStatus |= USB_PORT_STAT_ENABLE | USB_PORT_STAT_HIGH_SPEED;
++ if (port_status & 4)
++ ps->wPortStatus |= USB_PORT_STAT_CONNECTION;
++ if (port_status & 2)
++ ps->wPortStatus |= USB_PORT_STAT_OVERCURRENT;
++ if (port_status & 8)
++ ps->wPortStatus |= USB_PORT_STAT_RESET;
++ if (port_status & 0x60)
++ ps->wPortStatus |= USB_PORT_STAT_SUSPEND;
++
++ if (port_status & 0x40000)
++ ps->wPortChange |= USB_PORT_STAT_C_CONNECTION;
++
++ pr_debug("bce-vhci: Translated status %x to %x:%x\n", port_status, ps->wPortStatus, ps->wPortChange);
++ return 0;
++ } else if (typeReq == SetPortFeature) {
++ if (wValue == USB_PORT_FEAT_POWER) {
++ status = bce_vhci_cmd_port_power_on(&vhci->cq, (u8) wIndex);
++ /* As far as I am aware, power status is not part of the port status so store it separately */
++ if (!status)
++ vhci->port_power_mask |= BIT(wIndex);
++ return status;
++ }
++ if (wValue == USB_PORT_FEAT_RESET) {
++ return bce_vhci_reset_device(vhci, wIndex, wValue);
++ }
++ if (wValue == USB_PORT_FEAT_SUSPEND) {
++ /* TODO: Am I supposed to also suspend the endpoints? */
++ pr_debug("bce-vhci: Suspending port %i\n", wIndex);
++ return bce_vhci_cmd_port_suspend(&vhci->cq, (u8) wIndex);
++ }
++ } else if (typeReq == ClearPortFeature) {
++ if (wValue == USB_PORT_FEAT_ENABLE)
++ return bce_vhci_cmd_port_disable(&vhci->cq, (u8) wIndex);
++ if (wValue == USB_PORT_FEAT_POWER) {
++ status = bce_vhci_cmd_port_power_off(&vhci->cq, (u8) wIndex);
++ if (!status)
++ vhci->port_power_mask &= ~BIT(wIndex);
++ return status;
++ }
++ if (wValue == USB_PORT_FEAT_C_CONNECTION)
++ return bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0x40000, &port_status);
++ if (wValue == USB_PORT_FEAT_C_RESET) { /* I don't think I can transfer it in any way */
++ return 0;
++ }
++ if (wValue == USB_PORT_FEAT_SUSPEND) {
++ pr_debug("bce-vhci: Resuming port %i\n", wIndex);
++ return bce_vhci_cmd_port_resume(&vhci->cq, (u8) wIndex);
++ }
++ }
++ pr_err("bce-vhci: bce_vhci_hub_control unhandled request: %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength);
++ dump_stack();
++ return -EIO;
++}
++
++static int bce_vhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev)
++{
++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++ struct bce_vhci_device *vdev;
++ bce_vhci_device_t devid;
++ pr_info("bce_vhci_enable_device\n");
++
++ if (vhci->port_to_device[udev->portnum])
++ return 0;
++
++ /* We need to early address the device */
++ if (bce_vhci_cmd_device_create(&vhci->cq, udev->portnum, &devid))
++ return -EIO;
++
++ pr_info("bce_vhci_cmd_device_create %i -> %i\n", udev->portnum, devid);
++
++ vdev = kzalloc(sizeof(struct bce_vhci_device), GFP_KERNEL);
++ vhci->port_to_device[udev->portnum] = devid;
++ vhci->devices[devid] = vdev;
++
++ bce_vhci_create_transfer_queue(vhci, &vdev->tq[0], &udev->ep0, devid, DMA_BIDIRECTIONAL);
++ udev->ep0.hcpriv = &vdev->tq[0];
++ vdev->tq_mask |= BIT(0);
++
++ bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &udev->ep0.desc);
++ return 0;
++}
++
++static int bce_vhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
++{
++ /* This is the same as enable_device, but instead in the old scheme */
++ return bce_vhci_enable_device(hcd, udev);
++}
++
++static void bce_vhci_free_device(struct usb_hcd *hcd, struct usb_device *udev)
++{
++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++ int i;
++ bce_vhci_device_t devid;
++ struct bce_vhci_device *dev;
++ pr_info("bce_vhci_free_device %i\n", udev->portnum);
++ if (!vhci->port_to_device[udev->portnum])
++ return;
++ devid = vhci->port_to_device[udev->portnum];
++ dev = vhci->devices[devid];
++ for (i = 0; i < 32; i++) {
++ if (dev->tq_mask & BIT(i)) {
++ bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN);
++ bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i);
++ bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]);
++ }
++ }
++ vhci->devices[devid] = NULL;
++ vhci->port_to_device[udev->portnum] = 0;
++ bce_vhci_cmd_device_destroy(&vhci->cq, devid);
++ kfree(dev);
++}
++
++static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout)
++{
++ struct bce_vhci_device *dev = NULL;
++ bce_vhci_device_t devid;
++ int i;
++ int status;
++ enum dma_data_direction dir;
++ pr_info("bce_vhci_reset_device %i\n", index);
++
++ devid = vhci->port_to_device[index];
++ if (devid) {
++ dev = vhci->devices[devid];
++
++ for (i = 0; i < 32; i++) {
++ if (dev->tq_mask & BIT(i)) {
++ bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN);
++ bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i);
++ bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]);
++ }
++ }
++ vhci->devices[devid] = NULL;
++ vhci->port_to_device[index] = 0;
++ bce_vhci_cmd_device_destroy(&vhci->cq, devid);
++ }
++ status = bce_vhci_cmd_port_reset(&vhci->cq, (u8) index, timeout);
++
++ if (dev) {
++ if ((status = bce_vhci_cmd_device_create(&vhci->cq, index, &devid)))
++ return status;
++ vhci->devices[devid] = dev;
++ vhci->port_to_device[index] = devid;
++
++ for (i = 0; i < 32; i++) {
++ if (dev->tq_mask & BIT(i)) {
++ dir = usb_endpoint_dir_in(&dev->tq[i].endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
++ if (i == 0)
++ dir = DMA_BIDIRECTIONAL;
++ bce_vhci_create_transfer_queue(vhci, &dev->tq[i], dev->tq[i].endp, devid, dir);
++ bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &dev->tq[i].endp->desc);
++ }
++ }
++ }
++
++ return status;
++}
++
++static int bce_vhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
++{
++ return 0;
++}
++
++static int bce_vhci_get_frame_number(struct usb_hcd *hcd)
++{
++ return 0;
++}
++
++static int bce_vhci_bus_suspend(struct usb_hcd *hcd)
++{
++ int i, j;
++ int status;
++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++ pr_info("bce_vhci: suspend started\n");
++
++ pr_info("bce_vhci: suspend endpoints\n");
++ for (i = 0; i < 16; i++) {
++ if (!vhci->port_to_device[i])
++ continue;
++ for (j = 0; j < 32; j++) {
++ if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j)))
++ continue;
++ bce_vhci_transfer_queue_pause(&vhci->devices[vhci->port_to_device[i]]->tq[j],
++ BCE_VHCI_PAUSE_SUSPEND);
++ }
++ }
++
++ pr_info("bce_vhci: suspend ports\n");
++ for (i = 0; i < 16; i++) {
++ if (!vhci->port_to_device[i])
++ continue;
++ bce_vhci_cmd_port_suspend(&vhci->cq, i);
++ }
++ pr_info("bce_vhci: suspend controller\n");
++ if ((status = bce_vhci_cmd_controller_pause(&vhci->cq)))
++ return status;
++
++ bce_vhci_event_queue_pause(&vhci->ev_commands);
++ bce_vhci_event_queue_pause(&vhci->ev_system);
++ bce_vhci_event_queue_pause(&vhci->ev_isochronous);
++ bce_vhci_event_queue_pause(&vhci->ev_interrupt);
++ bce_vhci_event_queue_pause(&vhci->ev_asynchronous);
++ pr_info("bce_vhci: suspend done\n");
++ return 0;
++}
++
++static int bce_vhci_bus_resume(struct usb_hcd *hcd)
++{
++ int i, j;
++ int status;
++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++ pr_info("bce_vhci: resume started\n");
++
++ bce_vhci_event_queue_resume(&vhci->ev_system);
++ bce_vhci_event_queue_resume(&vhci->ev_isochronous);
++ bce_vhci_event_queue_resume(&vhci->ev_interrupt);
++ bce_vhci_event_queue_resume(&vhci->ev_asynchronous);
++ bce_vhci_event_queue_resume(&vhci->ev_commands);
++
++ pr_info("bce_vhci: resume controller\n");
++ if ((status = bce_vhci_cmd_controller_start(&vhci->cq)))
++ return status;
++
++ pr_info("bce_vhci: resume ports\n");
++ for (i = 0; i < 16; i++) {
++ if (!vhci->port_to_device[i])
++ continue;
++ bce_vhci_cmd_port_resume(&vhci->cq, i);
++ }
++ pr_info("bce_vhci: resume endpoints\n");
++ for (i = 0; i < 16; i++) {
++ if (!vhci->port_to_device[i])
++ continue;
++ for (j = 0; j < 32; j++) {
++ if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j)))
++ continue;
++ bce_vhci_transfer_queue_resume(&vhci->devices[vhci->port_to_device[i]]->tq[j],
++ BCE_VHCI_PAUSE_SUSPEND);
++ }
++ }
++
++ pr_info("bce_vhci: resume done\n");
++ return 0;
++}
++
++static int bce_vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
++{
++ struct bce_vhci_transfer_queue *q = urb->ep->hcpriv;
++ pr_debug("bce_vhci_urb_enqueue %i:%x\n", q->dev_addr, urb->ep->desc.bEndpointAddress);
++ if (!q)
++ return -ENOENT;
++ return bce_vhci_urb_create(q, urb);
++}
++
++static int bce_vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
++{
++ struct bce_vhci_transfer_queue *q = urb->ep->hcpriv;
++ pr_debug("bce_vhci_urb_dequeue %x\n", urb->ep->desc.bEndpointAddress);
++ return bce_vhci_urb_request_cancel(q, urb, status);
++}
++
++static void bce_vhci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep)
++{
++ struct bce_vhci_transfer_queue *q = ep->hcpriv;
++ pr_debug("bce_vhci_endpoint_reset\n");
++ if (q)
++ bce_vhci_transfer_queue_request_reset(q);
++}
++
++static u8 bce_vhci_endpoint_index(u8 addr)
++{
++ if (addr & 0x80)
++ return (u8) (0x10 + (addr & 0xf));
++ return (u8) (addr & 0xf);
++}
++
++static int bce_vhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp)
++{
++ u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress);
++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++ bce_vhci_device_t devid = vhci->port_to_device[udev->portnum];
++ struct bce_vhci_device *vdev = vhci->devices[devid];
++ pr_debug("bce_vhci_add_endpoint %x/%x:%x\n", udev->portnum, devid, endp_index);
++
++ if (udev->bus->root_hub == udev) /* The USB hub */
++ return 0;
++ if (vdev == NULL)
++ return -ENODEV;
++ if (vdev->tq_mask & BIT(endp_index)) {
++ endp->hcpriv = &vdev->tq[endp_index];
++ return 0;
++ }
++
++ bce_vhci_create_transfer_queue(vhci, &vdev->tq[endp_index], endp, devid,
++ usb_endpoint_dir_in(&endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
++ endp->hcpriv = &vdev->tq[endp_index];
++ vdev->tq_mask |= BIT(endp_index);
++
++ bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &endp->desc);
++ return 0;
++}
++
++static int bce_vhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp)
++{
++ u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress);
++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd);
++ bce_vhci_device_t devid = vhci->port_to_device[udev->portnum];
++ struct bce_vhci_transfer_queue *q = endp->hcpriv;
++ struct bce_vhci_device *vdev = vhci->devices[devid];
++ pr_info("bce_vhci_drop_endpoint %x:%x\n", udev->portnum, endp_index);
++ if (!q) {
++ if (vdev && vdev->tq_mask & BIT(endp_index)) {
++ pr_err("something deleted the hcpriv?\n");
++ q = &vdev->tq[endp_index];
++ } else {
++ return 0;
++ }
++ }
++
++ bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) (endp->desc.bEndpointAddress & 0x8Fu));
++ vhci->devices[devid]->tq_mask &= ~BIT(endp_index);
++ bce_vhci_destroy_transfer_queue(vhci, q);
++ return 0;
++}
++
++static int bce_vhci_create_message_queues(struct bce_vhci *vhci)
++{
++ if (bce_vhci_message_queue_create(vhci, &vhci->msg_commands, "VHC1HostCommands") ||
++ bce_vhci_message_queue_create(vhci, &vhci->msg_system, "VHC1HostSystemEvents") ||
++ bce_vhci_message_queue_create(vhci, &vhci->msg_isochronous, "VHC1HostIsochronousEvents") ||
++ bce_vhci_message_queue_create(vhci, &vhci->msg_interrupt, "VHC1HostInterruptEvents") ||
++ bce_vhci_message_queue_create(vhci, &vhci->msg_asynchronous, "VHC1HostAsynchronousEvents")) {
++ bce_vhci_destroy_message_queues(vhci);
++ return -EINVAL;
++ }
++ spin_lock_init(&vhci->msg_asynchronous_lock);
++ bce_vhci_command_queue_create(&vhci->cq, &vhci->msg_commands);
++ return 0;
++}
++
++static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci)
++{
++ bce_vhci_command_queue_destroy(&vhci->cq);
++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_commands);
++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_system);
++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_isochronous);
++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_interrupt);
++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_asynchronous);
++}
++
++static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
++static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg);
++
++static int bce_vhci_create_event_queues(struct bce_vhci *vhci)
++{
++ vhci->ev_cq = bce_create_cq(vhci->dev, 0x100);
++ if (!vhci->ev_cq)
++ return -EINVAL;
++#define CREATE_EVENT_QUEUE(field, name, cb) bce_vhci_event_queue_create(vhci, &vhci->field, name, cb)
++ if (__bce_vhci_event_queue_create(vhci, &vhci->ev_commands, "VHC1FirmwareCommands",
++ bce_vhci_firmware_event_completion) ||
++ CREATE_EVENT_QUEUE(ev_system, "VHC1FirmwareSystemEvents", bce_vhci_handle_system_event) ||
++ CREATE_EVENT_QUEUE(ev_isochronous, "VHC1FirmwareIsochronousEvents", bce_vhci_handle_usb_event) ||
++ CREATE_EVENT_QUEUE(ev_interrupt, "VHC1FirmwareInterruptEvents", bce_vhci_handle_usb_event) ||
++ CREATE_EVENT_QUEUE(ev_asynchronous, "VHC1FirmwareAsynchronousEvents", bce_vhci_handle_usb_event)) {
++ bce_vhci_destroy_event_queues(vhci);
++ return -EINVAL;
++ }
++#undef CREATE_EVENT_QUEUE
++ return 0;
++}
++
++static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci)
++{
++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_commands);
++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_system);
++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_isochronous);
++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_interrupt);
++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_asynchronous);
++ if (vhci->ev_cq)
++ bce_destroy_cq(vhci->dev, vhci->ev_cq);
++}
++
++static void bce_vhci_send_fw_event_response(struct bce_vhci *vhci, struct bce_vhci_message *req, u16 status)
++{
++ unsigned long timeout = 1000;
++ struct bce_vhci_message r = *req;
++ r.cmd = (u16) (req->cmd | 0x8000u);
++ r.status = status;
++ r.param1 = req->param1;
++ r.param2 = 0;
++
++ if (bce_reserve_submission(vhci->msg_system.sq, &timeout)) {
++ pr_err("bce-vhci: Cannot reserve submision for FW event reply\n");
++ return;
++ }
++ bce_vhci_message_queue_write(&vhci->msg_system, &r);
++}
++
++static int bce_vhci_handle_firmware_event(struct bce_vhci *vhci, struct bce_vhci_message *msg)
++{
++ unsigned long flags;
++ bce_vhci_device_t devid;
++ u8 endp;
++ struct bce_vhci_device *dev;
++ struct bce_vhci_transfer_queue *tq;
++ if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE || msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) {
++ devid = (bce_vhci_device_t) (msg->param1 & 0xff);
++ endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff));
++ dev = vhci->devices[devid];
++ if (!dev || !(dev->tq_mask & BIT(endp)))
++ return BCE_VHCI_BAD_ARGUMENT;
++ tq = &dev->tq[endp];
++ }
++
++ if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE) {
++ if (msg->param2 == BCE_VHCI_ENDPOINT_ACTIVE) {
++ bce_vhci_transfer_queue_resume(tq, BCE_VHCI_PAUSE_FIRMWARE);
++ return BCE_VHCI_SUCCESS;
++ } else if (msg->param2 == BCE_VHCI_ENDPOINT_PAUSED) {
++ bce_vhci_transfer_queue_pause(tq, BCE_VHCI_PAUSE_FIRMWARE);
++ return BCE_VHCI_SUCCESS;
++ }
++ return BCE_VHCI_BAD_ARGUMENT;
++ } else if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) {
++ if (msg->param2 == BCE_VHCI_ENDPOINT_STALLED) {
++ tq->state = msg->param2;
++ spin_lock_irqsave(&tq->urb_lock, flags);
++ tq->stalled = true;
++ spin_unlock_irqrestore(&tq->urb_lock, flags);
++ return BCE_VHCI_SUCCESS;
++ }
++ return BCE_VHCI_BAD_ARGUMENT;
++ }
++ pr_warn("bce-vhci: Unhandled firmware event: %x s=%x p1=%x p2=%llx\n",
++ msg->cmd, msg->status, msg->param1, msg->param2);
++ return BCE_VHCI_BAD_ARGUMENT;
++}
++
++static void bce_vhci_handle_firmware_events_w(struct work_struct *ws)
++{
++ size_t cnt = 0;
++ int result;
++ struct bce_vhci *vhci = container_of(ws, struct bce_vhci, w_fw_events);
++ struct bce_queue_sq *sq = vhci->ev_commands.sq;
++ struct bce_sq_completion_data *cq;
++ struct bce_vhci_message *msg, *msg2 = NULL;
++
++ while (true) {
++ if (msg2) {
++ msg = msg2;
++ msg2 = NULL;
++ } else if ((cq = bce_next_completion(sq))) {
++ if (cq->status == BCE_COMPLETION_ABORTED) {
++ bce_notify_submission_complete(sq);
++ continue;
++ }
++ msg = &vhci->ev_commands.data[sq->head];
++ } else {
++ break;
++ }
++
++ pr_debug("bce-vhci: Got fw event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2);
++ if ((cq = bce_next_completion(sq))) {
++ msg2 = &vhci->ev_commands.data[(sq->head + 1) % sq->el_count];
++ pr_debug("bce-vhci: Got second fw event: %x s=%x p1=%x p2=%llx\n",
++ msg->cmd, msg->status, msg->param1, msg->param2);
++ if (cq->status != BCE_COMPLETION_ABORTED &&
++ msg2->cmd == (msg->cmd | 0x4000) && msg2->param1 == msg->param1) {
++ /* Take two elements */
++ pr_debug("bce-vhci: Cancelled\n");
++ bce_vhci_send_fw_event_response(vhci, msg, BCE_VHCI_ABORT);
++
++ bce_notify_submission_complete(sq);
++ bce_notify_submission_complete(sq);
++ msg2 = NULL;
++ cnt += 2;
++ continue;
++ }
++
++ pr_warn("bce-vhci: Handle fw event - unexpected cancellation\n");
++ }
++
++ result = bce_vhci_handle_firmware_event(vhci, msg);
++ bce_vhci_send_fw_event_response(vhci, msg, (u16) result);
++
++
++ bce_notify_submission_complete(sq);
++ ++cnt;
++ }
++ bce_vhci_event_queue_submit_pending(&vhci->ev_commands, cnt);
++ if (atomic_read(&sq->available_commands) == sq->el_count - 1) {
++ pr_debug("bce-vhci: complete\n");
++ complete(&vhci->ev_commands.queue_empty_completion);
++ }
++}
++
++static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq)
++{
++ struct bce_vhci_event_queue *q = sq->userdata;
++ queue_work(q->vhci->tq_state_wq, &q->vhci->w_fw_events);
++}
++
++static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg)
++{
++ if (msg->cmd & 0x8000) {
++ bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg);
++ } else {
++ pr_warn("bce-vhci: Unhandled system event: %x s=%x p1=%x p2=%llx\n",
++ msg->cmd, msg->status, msg->param1, msg->param2);
++ }
++}
++
++static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg)
++{
++ bce_vhci_device_t devid;
++ u8 endp;
++ struct bce_vhci_device *dev;
++ if (msg->cmd & 0x8000) {
++ bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg);
++ } else if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST || msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) {
++ devid = (bce_vhci_device_t) (msg->param1 & 0xff);
++ endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff));
++ dev = q->vhci->devices[devid];
++ if (!dev || (dev->tq_mask & BIT(endp)) == 0) {
++ pr_err("bce-vhci: Didn't find destination for transfer queue event\n");
++ return;
++ }
++ bce_vhci_transfer_queue_event(&dev->tq[endp], msg);
++ } else {
++ pr_warn("bce-vhci: Unhandled USB event: %x s=%x p1=%x p2=%llx\n",
++ msg->cmd, msg->status, msg->param1, msg->param2);
++ }
++}
++
++
++
++static const struct hc_driver bce_vhci_driver = {
++ .description = "bce-vhci",
++ .product_desc = "BCE VHCI Host Controller",
++ .hcd_priv_size = sizeof(struct bce_vhci *),
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0)
++ .flags = HCD_USB2,
++#else
++ .flags = HCD_USB2 | HCD_DMA,
++#endif
++
++ .start = bce_vhci_start,
++ .stop = bce_vhci_stop,
++ .hub_status_data = bce_vhci_hub_status_data,
++ .hub_control = bce_vhci_hub_control,
++ .urb_enqueue = bce_vhci_urb_enqueue,
++ .urb_dequeue = bce_vhci_urb_dequeue,
++ .enable_device = bce_vhci_enable_device,
++ .free_dev = bce_vhci_free_device,
++ .address_device = bce_vhci_address_device,
++ .add_endpoint = bce_vhci_add_endpoint,
++ .drop_endpoint = bce_vhci_drop_endpoint,
++ .endpoint_reset = bce_vhci_endpoint_reset,
++ .check_bandwidth = bce_vhci_check_bandwidth,
++ .get_frame_number = bce_vhci_get_frame_number,
++ .bus_suspend = bce_vhci_bus_suspend,
++ .bus_resume = bce_vhci_bus_resume
++};
++
++
++int __init bce_vhci_module_init(void)
++{
++ int result;
++ if ((result = alloc_chrdev_region(&bce_vhci_chrdev, 0, 1, "bce-vhci")))
++ goto fail_chrdev;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0)
++ bce_vhci_class = class_create(THIS_MODULE, "bce-vhci");
++#else
++ bce_vhci_class = class_create("bce-vhci");
++#endif
++ if (IS_ERR(bce_vhci_class)) {
++ result = PTR_ERR(bce_vhci_class);
++ goto fail_class;
++ }
++ return 0;
++
++fail_class:
++ class_destroy(bce_vhci_class);
++fail_chrdev:
++ unregister_chrdev_region(bce_vhci_chrdev, 1);
++ if (!result)
++ result = -EINVAL;
++ return result;
++}
++void __exit bce_vhci_module_exit(void)
++{
++ class_destroy(bce_vhci_class);
++ unregister_chrdev_region(bce_vhci_chrdev, 1);
++}
++
++module_param_named(vhci_port_mask, bce_vhci_port_mask, ushort, 0444);
++MODULE_PARM_DESC(vhci_port_mask, "Specifies which VHCI ports are enabled");
+diff --git a/drivers/staging/apple-bce/vhci/vhci.h b/drivers/staging/apple-bce/vhci/vhci.h
+new file mode 100644
+index 000000000..90641d1ba
+--- /dev/null
++++ b/drivers/staging/apple-bce/vhci/vhci.h
+@@ -0,0 +1,48 @@
++#ifndef BCE_VHCI_H
++#define BCE_VHCI_H
++
++#include "queue.h"
++#include "transfer.h"
++
++struct usb_hcd;
++struct bce_queue_cq;
++
++struct bce_vhci_device {
++ struct bce_vhci_transfer_queue tq[32];
++ u32 tq_mask;
++};
++struct bce_vhci {
++ struct apple_bce_device *dev;
++ dev_t vdevt;
++ struct device *vdev;
++ struct usb_hcd *hcd;
++ struct spinlock hcd_spinlock;
++ struct bce_vhci_message_queue msg_commands;
++ struct bce_vhci_message_queue msg_system;
++ struct bce_vhci_message_queue msg_isochronous;
++ struct bce_vhci_message_queue msg_interrupt;
++ struct bce_vhci_message_queue msg_asynchronous;
++ struct spinlock msg_asynchronous_lock;
++ struct bce_vhci_command_queue cq;
++ struct bce_queue_cq *ev_cq;
++ struct bce_vhci_event_queue ev_commands;
++ struct bce_vhci_event_queue ev_system;
++ struct bce_vhci_event_queue ev_isochronous;
++ struct bce_vhci_event_queue ev_interrupt;
++ struct bce_vhci_event_queue ev_asynchronous;
++ u16 port_mask;
++ u8 port_count;
++ u16 port_power_mask;
++ bce_vhci_device_t port_to_device[16];
++ struct bce_vhci_device *devices[16];
++ struct workqueue_struct *tq_state_wq;
++ struct work_struct w_fw_events;
++};
++
++int __init bce_vhci_module_init(void);
++void __exit bce_vhci_module_exit(void);
++
++int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci);
++void bce_vhci_destroy(struct bce_vhci *vhci);
++
++#endif //BCE_VHCI_H
+--
+2.39.2
+
+From dc27d4db5787546ae5eacf3483f3b87f2d4fb1c1 Mon Sep 17 00:00:00 2001
+From: Redecorating <69827514+Redecorating@users.noreply.github.com>
+Date: Mon, 7 Nov 2022 14:56:34 +0530
+Subject: [PATCH] Put apple-bce in drivers/staging
+
+---
+ drivers/staging/Kconfig | 2 ++
+ drivers/staging/Makefile | 1 +
+ drivers/staging/apple-bce/Kconfig | 18 ++++++++++++++++++
+ drivers/staging/apple-bce/Makefile | 2 +-
+ 4 files changed, 22 insertions(+), 1 deletion(-)
+ create mode 100644 drivers/staging/apple-bce/Kconfig
+
+diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
+index 5cfabd537..3b8e61d26 100644
+--- a/drivers/staging/Kconfig
++++ b/drivers/staging/Kconfig
+@@ -80,4 +80,6 @@ source "drivers/staging/qlge/Kconfig"
+
+ source "drivers/staging/vme_user/Kconfig"
+
++source "drivers/staging/apple-bce/Kconfig"
++
+ endif # STAGING
+diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
+index f8c3aa9c2..1e148d6c3 100644
+--- a/drivers/staging/Makefile
++++ b/drivers/staging/Makefile
+@@ -29,3 +29,4 @@ obj-$(CONFIG_PI433) += pi433/
+ obj-$(CONFIG_PI433) += pi433/
+ obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/
+ obj-$(CONFIG_FIELDBUS_DEV) += fieldbus/
++obj-$(CONFIG_APPLE_BCE) += apple-bce/
+diff --git a/drivers/staging/apple-bce/Kconfig b/drivers/staging/apple-bce/Kconfig
+new file mode 100644
+index 000000000..fe92bc441
+--- /dev/null
++++ b/drivers/staging/apple-bce/Kconfig
+@@ -0,0 +1,18 @@
++config APPLE_BCE
++ tristate "Apple BCE driver (VHCI and Audio support)"
++ default m
++ depends on X86
++ select SOUND
++ select SND
++ select SND_PCM
++ select SND_JACK
++ help
++ VHCI and audio support on Apple MacBooks with the T2 Chip.
++ This driver is divided in three components:
++ - BCE (Buffer Copy Engine): which establishes a basic communication
++ channel with the T2 chip. This component is required by the other two:
++ - VHCI (Virtual Host Controller Interface): Access to keyboard, mouse
++ and other system devices depend on this virtual USB host controller
++ - Audio: a driver for the T2 audio interface.
++
++ If "M" is selected, the module will be called apple-bce.'
+diff --git a/drivers/staging/apple-bce/Makefile b/drivers/staging/apple-bce/Makefile
+index a6a656f06..8cfbd3f64 100644
+--- a/drivers/staging/apple-bce/Makefile
++++ b/drivers/staging/apple-bce/Makefile
+@@ -1,5 +1,5 @@
+ modname := apple-bce
+-obj-m += $(modname).o
++obj-$(CONFIG_APPLE_BCE) += $(modname).o
+
+ apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o
+
+--
+2.34.1
+From 153b587ed53135eaf244144f6f8bdd5a0fe6b69e Mon Sep 17 00:00:00 2001
+From: Redecorating <69827514+Redecorating@users.noreply.github.com>
+Date: Fri, 24 Dec 2021 18:12:25 +1100
+Subject: [PATCH 1/1] add modalias to apple-bce
+
+---
+ drivers/staging/apple-bce/apple_bce.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/staging/apple-bce/apple_bce.c b/drivers/staging/apple-bce/apple_bce.c
+index ad89632df..5e2f2f3b9 100644
+--- a/drivers/staging/apple-bce/apple_bce.c
++++ b/drivers/staging/apple-bce/apple_bce.c
+@@ -439,5 +439,6 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("MrARM");
+ MODULE_DESCRIPTION("Apple BCE Driver");
+ MODULE_VERSION("0.01");
++MODULE_ALIAS("pci:v0000106Bd00001801sv*sd*bc*sc*i*");
+ module_init(apple_bce_module_init);
+ module_exit(apple_bce_module_exit);
+--
+2.43.0
+
+From 75ca57b64ce6846622d8aefac5a76fc638a2123d Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Sun, 5 Mar 2023 19:12:53 +0300
+Subject: [PATCH 01/12] HID: core: add helper for finding a field with a
+ certain usage
+
+This helper will allow HID drivers to easily determine if they should
+bind to a hid_device by checking for the prescence of a certain field
+when its ID is not enough, which can be the case on USB devices with
+multiple interfaces and/or configurations.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/hid/hid-core.c | 25 +++++++++++++++++++++++++
+ drivers/hid/hid-google-hammer.c | 27 ++-------------------------
+ include/linux/hid.h | 2 ++
+ 3 files changed, 29 insertions(+), 25 deletions(-)
+
+diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
+index 8992e3c1e..6395bdc2e 100644
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -1906,6 +1906,31 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value)
+ }
+ EXPORT_SYMBOL_GPL(hid_set_field);
+
++struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type,
++ unsigned int application, unsigned int usage)
++{
++ struct list_head *report_list = &hdev->report_enum[report_type].report_list;
++ struct hid_report *report;
++ int i, j;
++
++ list_for_each_entry(report, report_list, list) {
++ if (report->application != application)
++ continue;
++
++ for (i = 0; i < report->maxfield; i++) {
++ struct hid_field *field = report->field[i];
++
++ for (j = 0; j < field->maxusage; j++) {
++ if (field->usage[j].hid == usage)
++ return field;
++ }
++ }
++ }
++
++ return NULL;
++}
++EXPORT_SYMBOL_GPL(hid_find_field);
++
+ static struct hid_report *hid_get_report(struct hid_report_enum *report_enum,
+ const u8 *data)
+ {
+diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
+index c6bdb9c4e..fba3652aa 100644
+--- a/drivers/hid/hid-google-hammer.c
++++ b/drivers/hid/hid-google-hammer.c
+@@ -419,38 +419,15 @@ static int hammer_event(struct hid_device *hid, struct hid_field *field,
+ return 0;
+ }
+
+-static bool hammer_has_usage(struct hid_device *hdev, unsigned int report_type,
+- unsigned application, unsigned usage)
+-{
+- struct hid_report_enum *re = &hdev->report_enum[report_type];
+- struct hid_report *report;
+- int i, j;
+-
+- list_for_each_entry(report, &re->report_list, list) {
+- if (report->application != application)
+- continue;
+-
+- for (i = 0; i < report->maxfield; i++) {
+- struct hid_field *field = report->field[i];
+-
+- for (j = 0; j < field->maxusage; j++)
+- if (field->usage[j].hid == usage)
+- return true;
+- }
+- }
+-
+- return false;
+-}
+-
+ static bool hammer_has_folded_event(struct hid_device *hdev)
+ {
+- return hammer_has_usage(hdev, HID_INPUT_REPORT,
++ return !!hid_find_field(hdev, HID_INPUT_REPORT,
+ HID_GD_KEYBOARD, HID_USAGE_KBD_FOLDED);
+ }
+
+ static bool hammer_has_backlight_control(struct hid_device *hdev)
+ {
+- return hammer_has_usage(hdev, HID_OUTPUT_REPORT,
++ return !!hid_find_field(hdev, HID_OUTPUT_REPORT,
+ HID_GD_KEYBOARD, HID_AD_BRIGHTNESS);
+ }
+
+diff --git a/include/linux/hid.h b/include/linux/hid.h
+index 39e21e381..9520fdfdd 100644
+--- a/include/linux/hid.h
++++ b/include/linux/hid.h
+@@ -913,6 +913,8 @@ extern void hidinput_report_event(struct hid_device *hid, struct hid_report *rep
+ extern int hidinput_connect(struct hid_device *hid, unsigned int force);
+ extern void hidinput_disconnect(struct hid_device *);
+
++struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type,
++ unsigned int application, unsigned int usage);
+ int hid_set_field(struct hid_field *, unsigned, __s32);
+ int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
+ int interrupt);
+--
+2.42.0
+
+From 05cd738ce1c0e1a930a1dab02528fd9f1c702c38 Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Sun, 5 Mar 2023 18:52:43 +0300
+Subject: [PATCH 02/12] HID: hid-appletb-bl: add driver for the backlight of
+ Apple Touch Bars
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This commit adds a driver for the backlight of Apple Touch Bars on x86
+Macs. Note that currently only T2 Macs are supported.
+
+This driver is based on previous work done by Ronald Tschalär
+<ronald@innovation.ch>.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ MAINTAINERS | 6 ++
+ drivers/hid/Kconfig | 10 ++
+ drivers/hid/Makefile | 1 +
+ drivers/hid/hid-appletb-bl.c | 193 +++++++++++++++++++++++++++++++++++
+ drivers/hid/hid-quirks.c | 4 +-
+ 5 files changed, 213 insertions(+), 1 deletion(-)
+ create mode 100644 drivers/hid/hid-appletb-bl.c
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 4cc6bf79f..519b3b736 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -9157,6 +9157,12 @@ F: include/linux/pm.h
+ F: include/linux/suspend.h
+ F: kernel/power/
+
++HID APPLE TOUCH BAR DRIVERS
++M: Kerem Karabay <kekrby@gmail.com>
++L: linux-input@vger.kernel.org
++S: Maintained
++F: drivers/hid/hid-appletb-*
++
+ HID CORE LAYER
+ M: Jiri Kosina <jikos@kernel.org>
+ M: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
+index e11c1c803..cf19a3b33 100644
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -148,6 +148,16 @@ config HID_APPLEIR
+
+ Say Y here if you want support for Apple infrared remote control.
+
++config HID_APPLETB_BL
++ tristate "Apple Touch Bar Backlight"
++ depends on BACKLIGHT_CLASS_DEVICE
++ help
++ Say Y here if you want support for the backlight of Touch Bars on x86
++ MacBook Pros.
++
++ To compile this driver as a module, choose M here: the
++ module will be called hid-appletb-bl.
++
+ config HID_ASUS
+ tristate "Asus"
+ depends on USB_HID
+diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
+index 7a9e16015..bc86e38b2 100644
+--- a/drivers/hid/Makefile
++++ b/drivers/hid/Makefile
+@@ -29,6 +29,7 @@ obj-$(CONFIG_HID_ALPS) += hid-alps.o
+ obj-$(CONFIG_HID_ACRUX) += hid-axff.o
+ obj-$(CONFIG_HID_APPLE) += hid-apple.o
+ obj-$(CONFIG_HID_APPLEIR) += hid-appleir.o
++obj-$(CONFIG_HID_APPLETB_BL) += hid-appletb-bl.o
+ obj-$(CONFIG_HID_CREATIVE_SB0540) += hid-creative-sb0540.o
+ obj-$(CONFIG_HID_ASUS) += hid-asus.o
+ obj-$(CONFIG_HID_AUREAL) += hid-aureal.o
+diff --git a/drivers/hid/hid-appletb-bl.c b/drivers/hid/hid-appletb-bl.c
+new file mode 100644
+index 000000000..0c5e4b776
+--- /dev/null
++++ b/drivers/hid/hid-appletb-bl.c
+@@ -0,0 +1,193 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Apple Touch Bar Backlight Driver
++ *
++ * Copyright (c) 2017-2018 Ronald Tschalär
++ * Copyright (c) 2022-2023 Kerem Karabay <kekrby@gmail.com>
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/hid.h>
++#include <linux/backlight.h>
++
++#include "hid-ids.h"
++
++#define APPLETB_BL_ON 1
++#define APPLETB_BL_DIM 3
++#define APPLETB_BL_OFF 4
++
++#define HID_UP_APPLEVENDOR_TB_BL 0xff120000
++
++#define HID_VD_APPLE_TB_BRIGHTNESS 0xff120001
++#define HID_USAGE_AUX1 0xff120020
++#define HID_USAGE_BRIGHTNESS 0xff120021
++
++struct appletb_bl {
++ struct hid_field *aux1_field, *brightness_field;
++ struct backlight_device *bdev;
++
++ bool full_on;
++};
++
++const u8 appletb_bl_brightness_map[] = {
++ APPLETB_BL_OFF,
++ APPLETB_BL_DIM,
++ APPLETB_BL_ON
++};
++
++static int appletb_bl_set_brightness(struct appletb_bl *bl, u8 brightness)
++{
++ struct hid_report *report = bl->brightness_field->report;
++ struct hid_device *hdev = report->device;
++ int ret;
++
++ ret = hid_set_field(bl->aux1_field, 0, 1);
++ if (ret) {
++ hid_err(hdev, "Failed to set auxiliary field (%pe)\n", ERR_PTR(ret));
++ return ret;
++ }
++
++ ret = hid_set_field(bl->brightness_field, 0, brightness);
++ if (ret) {
++ hid_err(hdev, "Failed to set brightness field (%pe)\n", ERR_PTR(ret));
++ return ret;
++ }
++
++ if (!bl->full_on) {
++ ret = hid_hw_power(hdev, PM_HINT_FULLON);
++ if (ret < 0) {
++ hid_err(hdev, "Device didn't power on (%pe)\n", ERR_PTR(ret));
++ return ret;
++ }
++
++ bl->full_on = true;
++ }
++
++ hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
++
++ if (brightness == APPLETB_BL_OFF) {
++ hid_hw_power(hdev, PM_HINT_NORMAL);
++ bl->full_on = false;
++ }
++
++ return 0;
++}
++
++static int appletb_bl_update_status(struct backlight_device *bdev)
++{
++ struct appletb_bl *bl = bl_get_data(bdev);
++ u16 brightness;
++
++ if (bdev->props.state & BL_CORE_SUSPENDED)
++ brightness = 0;
++ else
++ brightness = backlight_get_brightness(bdev);
++
++ return appletb_bl_set_brightness(bl, appletb_bl_brightness_map[brightness]);
++}
++
++static const struct backlight_ops appletb_bl_backlight_ops = {
++ .options = BL_CORE_SUSPENDRESUME,
++ .update_status = appletb_bl_update_status,
++};
++
++static int appletb_bl_probe(struct hid_device *hdev, const struct hid_device_id *id)
++{
++ struct hid_field *aux1_field, *brightness_field;
++ struct backlight_properties bl_props = { 0 };
++ struct device *dev = &hdev->dev;
++ struct appletb_bl *bl;
++ int ret;
++
++ ret = hid_parse(hdev);
++ if (ret)
++ return dev_err_probe(dev, ret, "HID parse failed\n");
++
++ aux1_field = hid_find_field(hdev, HID_FEATURE_REPORT,
++ HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_AUX1);
++
++ brightness_field = hid_find_field(hdev, HID_FEATURE_REPORT,
++ HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_BRIGHTNESS);
++
++ if (!aux1_field || !brightness_field)
++ return -ENODEV;
++
++ if (aux1_field->report != brightness_field->report)
++ return dev_err_probe(dev, -ENODEV, "Encountered unexpected report structure\n");
++
++ bl = devm_kzalloc(dev, sizeof(*bl), GFP_KERNEL);
++ if (!bl)
++ return -ENOMEM;
++
++ ret = hid_hw_start(hdev, HID_CONNECT_DRIVER);
++ if (ret)
++ return dev_err_probe(dev, ret, "HID hardware start failed\n");
++
++ ret = hid_hw_open(hdev);
++ if (ret) {
++ dev_err_probe(dev, ret, "HID hardware open failed\n");
++ goto stop_hw;
++ }
++
++ bl->aux1_field = aux1_field;
++ bl->brightness_field = brightness_field;
++
++ ret = appletb_bl_set_brightness(bl, APPLETB_BL_OFF);
++ if (ret) {
++ dev_err_probe(dev, ret, "Failed to set touch bar brightness to off\n");
++ goto close_hw;
++ }
++
++ bl_props.type = BACKLIGHT_RAW;
++ bl_props.max_brightness = ARRAY_SIZE(appletb_bl_brightness_map) - 1;
++
++ bl->bdev = devm_backlight_device_register(dev, "appletb_backlight", dev, bl,
++ &appletb_bl_backlight_ops, &bl_props);
++ if (IS_ERR(bl->bdev)) {
++ ret = PTR_ERR(bl->bdev);
++ dev_err_probe(dev, ret, "Failed to register backlight device\n");
++ goto close_hw;
++ }
++
++ hid_set_drvdata(hdev, bl);
++
++ return 0;
++
++close_hw:
++ hid_hw_close(hdev);
++stop_hw:
++ hid_hw_stop(hdev);
++
++ return ret;
++}
++
++static void appletb_bl_remove(struct hid_device *hdev)
++{
++ struct appletb_bl *bl = hid_get_drvdata(hdev);
++
++ appletb_bl_set_brightness(bl, APPLETB_BL_OFF);
++
++ hid_hw_close(hdev);
++ hid_hw_stop(hdev);
++}
++
++static const struct hid_device_id appletb_bl_hid_ids[] = {
++ /* MacBook Pro's 2018, 2019, with T2 chip: iBridge DFR Brightness */
++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
++ { }
++};
++MODULE_DEVICE_TABLE(hid, appletb_bl_hid_ids);
++
++static struct hid_driver appletb_bl_hid_driver = {
++ .name = "hid-appletb-bl",
++ .id_table = appletb_bl_hid_ids,
++ .probe = appletb_bl_probe,
++ .remove = appletb_bl_remove,
++};
++module_hid_driver(appletb_bl_hid_driver);
++
++MODULE_AUTHOR("Ronald Tschalär");
++MODULE_AUTHOR("Kerem Karabay <kekrby@gmail.com>");
++MODULE_DESCRIPTION("MacBookPro Touch Bar Backlight Driver");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
+index 3983b4f28..82e7a80c9 100644
+--- a/drivers/hid/hid-quirks.c
++++ b/drivers/hid/hid-quirks.c
+@@ -325,7 +325,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) },
+- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
+ #endif
+ #if IS_ENABLED(CONFIG_HID_APPLEIR)
+@@ -335,6 +334,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
+ #endif
++#if IS_ENABLED(CONFIG_HID_APPLETB_BL)
++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
++#endif
+ #if IS_ENABLED(CONFIG_HID_ASUS)
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) },
+--
+2.42.0
+
+From 514b4f088b7ed916c634ca6f61de72c5f86268dd Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Sun, 5 Mar 2023 18:17:23 +0300
+Subject: [PATCH 03/12] HID: hid-appletb-kbd: add driver for the keyboard mode
+ of Apple Touch Bars
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The Touch Bars found on x86 Macs support two USB configurations: one
+where the device presents itself as a HID keyboard and can display
+predefined sets of keys, and one where the operating system has full
+control over what is displayed. This commit adds a driver for the
+display functionality of the first configuration.
+
+Note that currently only T2 Macs are supported.
+
+This driver is based on previous work done by Ronald Tschalär
+<ronald@innovation.ch>.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ .../ABI/testing/sysfs-driver-hid-appletb-kbd | 13 +
+ drivers/hid/Kconfig | 11 +
+ drivers/hid/Makefile | 1 +
+ drivers/hid/hid-appletb-kbd.c | 289 ++++++++++++++++++
+ drivers/hid/hid-quirks.c | 4 +-
+ 5 files changed, 317 insertions(+), 1 deletion(-)
+ create mode 100644 Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
+ create mode 100644 drivers/hid/hid-appletb-kbd.c
+
+diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
+new file mode 100644
+index 000000000..2a19584d0
+--- /dev/null
++++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
+@@ -0,0 +1,13 @@
++What: /sys/bus/hid/drivers/hid-appletb-kbd/<dev>/mode
++Date: September, 2023
++KernelVersion: 6.5
++Contact: linux-input@vger.kernel.org
++Description:
++ The set of keys displayed on the Touch Bar.
++ Valid values are:
++ == =================
++ 0 Escape key only
++ 1 Function keys
++ 2 Media/brightness keys
++ 3 None
++ == =================
+diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
+index cf19a3b33..852de13aa 100644
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -158,6 +158,17 @@ config HID_APPLETB_BL
+ To compile this driver as a module, choose M here: the
+ module will be called hid-appletb-bl.
+
++config HID_APPLETB_KBD
++ tristate "Apple Touch Bar Keyboard Mode"
++ depends on USB_HID
++ help
++ Say Y here if you want support for the keyboard mode (escape,
++ function, media and brightness keys) of Touch Bars on x86 MacBook
++ Pros.
++
++ To compile this driver as a module, choose M here: the
++ module will be called hid-appletb-kbd.
++
+ config HID_ASUS
+ tristate "Asus"
+ depends on USB_HID
+diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
+index bc86e38b2..5b60015fd 100644
+--- a/drivers/hid/Makefile
++++ b/drivers/hid/Makefile
+@@ -30,6 +30,7 @@ obj-$(CONFIG_HID_ACRUX) += hid-axff.o
+ obj-$(CONFIG_HID_APPLE) += hid-apple.o
+ obj-$(CONFIG_HID_APPLEIR) += hid-appleir.o
+ obj-$(CONFIG_HID_APPLETB_BL) += hid-appletb-bl.o
++obj-$(CONFIG_HID_APPLETB_KBD) += hid-appletb-kbd.o
+ obj-$(CONFIG_HID_CREATIVE_SB0540) += hid-creative-sb0540.o
+ obj-$(CONFIG_HID_ASUS) += hid-asus.o
+ obj-$(CONFIG_HID_AUREAL) += hid-aureal.o
+diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c
+new file mode 100644
+index 000000000..bc004c408
+--- /dev/null
++++ b/drivers/hid/hid-appletb-kbd.c
+@@ -0,0 +1,289 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Apple Touch Bar Keyboard Mode Driver
++ *
++ * Copyright (c) 2017-2018 Ronald Tschalär
++ * Copyright (c) 2022-2023 Kerem Karabay <kekrby@gmail.com>
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/hid.h>
++#include <linux/usb.h>
++#include <linux/input.h>
++#include <linux/sysfs.h>
++#include <linux/bitops.h>
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/input/sparse-keymap.h>
++
++#include "hid-ids.h"
++
++#define APPLETB_KBD_MODE_ESC 0
++#define APPLETB_KBD_MODE_FN 1
++#define APPLETB_KBD_MODE_SPCL 2
++#define APPLETB_KBD_MODE_OFF 3
++#define APPLETB_KBD_MODE_MAX APPLETB_KBD_MODE_OFF
++
++#define HID_USAGE_MODE 0x00ff0004
++
++struct appletb_kbd {
++ struct hid_field *mode_field;
++
++ u8 saved_mode;
++ u8 current_mode;
++};
++
++static const struct key_entry appletb_kbd_keymap[] = {
++ { KE_KEY, KEY_ESC, { KEY_ESC } },
++ { KE_KEY, KEY_F1, { KEY_BRIGHTNESSDOWN } },
++ { KE_KEY, KEY_F2, { KEY_BRIGHTNESSUP } },
++ { KE_KEY, KEY_F3, { KEY_RESERVED } },
++ { KE_KEY, KEY_F4, { KEY_RESERVED } },
++ { KE_KEY, KEY_F5, { KEY_KBDILLUMDOWN } },
++ { KE_KEY, KEY_F6, { KEY_KBDILLUMUP } },
++ { KE_KEY, KEY_F7, { KEY_PREVIOUSSONG } },
++ { KE_KEY, KEY_F8, { KEY_PLAYPAUSE } },
++ { KE_KEY, KEY_F9, { KEY_NEXTSONG } },
++ { KE_KEY, KEY_F10, { KEY_MUTE } },
++ { KE_KEY, KEY_F11, { KEY_VOLUMEDOWN } },
++ { KE_KEY, KEY_F12, { KEY_VOLUMEUP } },
++ { KE_END, 0 }
++};
++
++static int appletb_kbd_set_mode(struct appletb_kbd *kbd, u8 mode)
++{
++ struct hid_report *report = kbd->mode_field->report;
++ struct hid_device *hdev = report->device;
++ int ret;
++
++ ret = hid_hw_power(hdev, PM_HINT_FULLON);
++ if (ret) {
++ hid_err(hdev, "Device didn't resume (%pe)\n", ERR_PTR(ret));
++ return ret;
++ }
++
++ ret = hid_set_field(kbd->mode_field, 0, mode);
++ if (ret) {
++ hid_err(hdev, "Failed to set mode field to %u (%pe)\n", mode, ERR_PTR(ret));
++ goto power_normal;
++ }
++
++ hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
++
++ kbd->current_mode = mode;
++
++power_normal:
++ hid_hw_power(hdev, PM_HINT_NORMAL);
++
++ return ret;
++}
++
++static ssize_t mode_show(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct appletb_kbd *kbd = dev_get_drvdata(dev);
++
++ return sysfs_emit(buf, "%d\n", kbd->current_mode);
++}
++
++static ssize_t mode_store(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf, size_t size)
++{
++ struct appletb_kbd *kbd = dev_get_drvdata(dev);
++ u8 mode;
++ int ret;
++
++ ret = kstrtou8(buf, 0, &mode);
++ if (ret)
++ return ret;
++
++ if (mode > APPLETB_KBD_MODE_MAX)
++ return -EINVAL;
++
++ ret = appletb_kbd_set_mode(kbd, mode);
++
++ return ret < 0 ? ret : size;
++}
++static DEVICE_ATTR_RW(mode);
++
++struct attribute *appletb_kbd_attrs[] = {
++ &dev_attr_mode.attr,
++ NULL
++};
++ATTRIBUTE_GROUPS(appletb_kbd);
++
++static int appletb_tb_key_to_slot(unsigned int code)
++{
++ switch (code) {
++ case KEY_ESC:
++ return 0;
++ case KEY_F1 ... KEY_F10:
++ return code - KEY_F1 + 1;
++ case KEY_F11 ... KEY_F12:
++ return code - KEY_F11 + 11;
++
++ default:
++ return -EINVAL;
++ }
++}
++
++static int appletb_kbd_hid_event(struct hid_device *hdev, struct hid_field *field,
++ struct hid_usage *usage, __s32 value)
++{
++ struct appletb_kbd *kbd = hid_get_drvdata(hdev);
++ struct key_entry *translation;
++ struct input_dev *input;
++ int slot;
++
++ if ((usage->hid & HID_USAGE_PAGE) != HID_UP_KEYBOARD || usage->type != EV_KEY)
++ return 0;
++
++ input = field->hidinput->input;
++
++ /*
++ * Skip non-touch-bar keys.
++ *
++ * Either the touch bar itself or usbhid generate a slew of key-down
++ * events for all the meta keys. None of which we're at all interested
++ * in.
++ */
++ slot = appletb_tb_key_to_slot(usage->code);
++ if (slot < 0)
++ return 0;
++
++ translation = sparse_keymap_entry_from_scancode(input, usage->code);
++
++ if (translation && kbd->current_mode == APPLETB_KBD_MODE_SPCL) {
++ input_event(input, usage->type, translation->keycode, value);
++
++ return 1;
++ }
++
++ return kbd->current_mode == APPLETB_KBD_MODE_OFF;
++}
++
++static int appletb_kbd_input_configured(struct hid_device *hdev, struct hid_input *hidinput)
++{
++ struct input_dev *input = hidinput->input;
++
++ /*
++ * Clear various input capabilities that are blindly set by the hid
++ * driver (usbkbd.c)
++ */
++ memset(input->evbit, 0, sizeof(input->evbit));
++ memset(input->keybit, 0, sizeof(input->keybit));
++ memset(input->ledbit, 0, sizeof(input->ledbit));
++
++ __set_bit(EV_REP, input->evbit);
++
++ return sparse_keymap_setup(input, appletb_kbd_keymap, NULL);
++}
++
++static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id)
++{
++ struct appletb_kbd *kbd;
++ struct device *dev = &hdev->dev;
++ struct hid_field *mode_field;
++ int ret;
++
++ ret = hid_parse(hdev);
++ if (ret)
++ return dev_err_probe(dev, ret, "HID parse failed\n");
++
++ mode_field = hid_find_field(hdev, HID_OUTPUT_REPORT,
++ HID_GD_KEYBOARD, HID_USAGE_MODE);
++ if (!mode_field)
++ return -ENODEV;
++
++ kbd = devm_kzalloc(dev, sizeof(*kbd), GFP_KERNEL);
++ if (!kbd)
++ return -ENOMEM;
++
++ kbd->mode_field = mode_field;
++
++ ret = hid_hw_start(hdev, HID_CONNECT_HIDINPUT);
++ if (ret)
++ return dev_err_probe(dev, ret, "HID hw start failed\n");
++
++ ret = hid_hw_open(hdev);
++ if (ret) {
++ dev_err_probe(dev, ret, "HID hw open failed\n");
++ goto stop_hw;
++ }
++
++ ret = appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
++ if (ret) {
++ dev_err_probe(dev, ret, "Failed to set touchbar mode\n");
++ goto close_hw;
++ }
++
++ hid_set_drvdata(hdev, kbd);
++
++ return 0;
++
++close_hw:
++ hid_hw_close(hdev);
++stop_hw:
++ hid_hw_stop(hdev);
++ return ret;
++}
++
++static void appletb_kbd_remove(struct hid_device *hdev)
++{
++ struct appletb_kbd *kbd = hid_get_drvdata(hdev);
++
++ appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
++
++ hid_hw_close(hdev);
++ hid_hw_stop(hdev);
++}
++
++#ifdef CONFIG_PM
++static int appletb_kbd_suspend(struct hid_device *hdev, pm_message_t msg)
++{
++ struct appletb_kbd *kbd = hid_get_drvdata(hdev);
++
++ kbd->saved_mode = kbd->current_mode;
++ appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
++
++ return 0;
++}
++
++static int appletb_kbd_reset_resume(struct hid_device *hdev)
++{
++ struct appletb_kbd *kbd = hid_get_drvdata(hdev);
++
++ appletb_kbd_set_mode(kbd, kbd->saved_mode);
++
++ return 0;
++}
++#endif
++
++static const struct hid_device_id appletb_kbd_hid_ids[] = {
++ /* MacBook Pro's 2018, 2019, with T2 chip: iBridge Display */
++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
++ { }
++};
++MODULE_DEVICE_TABLE(hid, appletb_kbd_hid_ids);
++
++static struct hid_driver appletb_kbd_hid_driver = {
++ .name = "hid-appletb-kbd",
++ .id_table = appletb_kbd_hid_ids,
++ .probe = appletb_kbd_probe,
++ .remove = appletb_kbd_remove,
++ .event = appletb_kbd_hid_event,
++ .input_configured = appletb_kbd_input_configured,
++#ifdef CONFIG_PM
++ .suspend = appletb_kbd_suspend,
++ .reset_resume = appletb_kbd_reset_resume,
++#endif
++ .driver.dev_groups = appletb_kbd_groups,
++};
++module_hid_driver(appletb_kbd_hid_driver);
++
++MODULE_AUTHOR("Ronald Tschalär");
++MODULE_AUTHOR("Kerem Karabay <kekrby@gmail.com>");
++MODULE_DESCRIPTION("MacBookPro Touch Bar Keyboard Mode Driver");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
+index 82e7a80c9..82be9dfaf 100644
+--- a/drivers/hid/hid-quirks.c
++++ b/drivers/hid/hid-quirks.c
+@@ -325,7 +325,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) },
+- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
+ #endif
+ #if IS_ENABLED(CONFIG_HID_APPLEIR)
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
+@@ -337,6 +336,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ #if IS_ENABLED(CONFIG_HID_APPLETB_BL)
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
+ #endif
++#if IS_ENABLED(CONFIG_HID_APPLETB_KBD)
++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
++#endif
+ #if IS_ENABLED(CONFIG_HID_ASUS)
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) },
+--
+2.42.0
+
+From 2f9be28549307b4ac51e8d66bf3b8d5e0621466d Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Wed, 19 Jul 2023 19:37:14 +0300
+Subject: [PATCH 04/12] HID: multitouch: support getting the contact ID from
+ HID_DG_TRANSDUCER_INDEX fields
+
+This is needed to support Apple Touch Bars, where the contact ID is
+contained in fields with the HID_DG_TRANSDUCER_INDEX usage.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/hid/hid-multitouch.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index e31be0cb8..902a59928 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -636,7 +636,9 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td,
+
+ if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) {
+ for (n = 0; n < field->report_count; n++) {
+- if (field->usage[n].hid == HID_DG_CONTACTID) {
++ unsigned int hid = field->usage[n].hid;
++
++ if (hid == HID_DG_CONTACTID || hid == HID_DG_TRANSDUCER_INDEX) {
+ rdata->is_mt_collection = true;
+ break;
+ }
+@@ -815,6 +817,7 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+ MT_STORE_FIELD(tip_state);
+ return 1;
+ case HID_DG_CONTACTID:
++ case HID_DG_TRANSDUCER_INDEX:
+ MT_STORE_FIELD(contactid);
+ app->touches_by_report++;
+ return 1;
+--
+2.42.0
+
+From 6162d328fe7b2cf5a3ee8c29bdb229e9528c7a6c Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Wed, 19 Jul 2023 19:44:10 +0300
+Subject: [PATCH 05/12] HID: multitouch: support getting the tip state from
+ HID_DG_TOUCH fields
+
+This is necessary on Apple Touch Bars, where the tip state is contained
+in fields with the HID_DG_TOUCH usage. This feature is gated by a quirk
+in order to prevent breaking other devices, see commit c2ef8f21ea8f
+("HID: multitouch: add support for trackpads").
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/hid/hid-multitouch.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index 902a59928..dd5509eeb 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -78,6 +78,8 @@ MODULE_LICENSE("GPL");
+ #define MT_QUIRK_ORIENTATION_INVERT BIT(22)
+ #define MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT BIT(23)
+ #define MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH BIT(24)
++#define MT_QUIRK_TOUCH_IS_TIPSTATE BIT(25)
++
+
+ #define MT_INPUTMODE_TOUCHSCREEN 0x02
+ #define MT_INPUTMODE_TOUCHPAD 0x03
+@@ -810,6 +811,15 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+
+ MT_STORE_FIELD(confidence_state);
+ return 1;
++ case HID_DG_TOUCH:
++ /*
++ * Legacy devices use TIPSWITCH and not TOUCH.
++ * Let's just ignore this field unless the quirk is set.
++ */
++ if (!(cls->quirks & MT_QUIRK_TOUCH_IS_TIPSTATE))
++ return -1;
++
++ fallthrough;
+ case HID_DG_TIPSWITCH:
+ if (field->application != HID_GD_SYSTEM_MULTIAXIS)
+ input_set_capability(hi->input,
+@@ -873,10 +883,6 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+ case HID_DG_CONTACTMAX:
+ /* contact max are global to the report */
+ return -1;
+- case HID_DG_TOUCH:
+- /* Legacy devices use TIPSWITCH and not TOUCH.
+- * Let's just ignore this field. */
+- return -1;
+ }
+ /* let hid-input decide for the others */
+ return 0;
+--
+2.42.0
+
+From e923c6e1a5a508e341851ae020cdb3e7333ccd18 Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Wed, 19 Jul 2023 19:26:57 +0300
+Subject: [PATCH 06/12] HID: multitouch: take cls->maxcontacts into account for
+ devices without a HID_DG_CONTACTMAX field too
+
+This is needed for Apple Touch Bars, where no HID_DG_CONTACTMAX field is
+present and the maximum contact count is greater than the default.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/hid/hid-multitouch.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index dd5509eeb..624c1d3cc 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -491,9 +491,6 @@ static void mt_feature_mapping(struct hid_device *hdev,
+ if (!td->maxcontacts &&
+ field->logical_maximum <= MT_MAX_MAXCONTACT)
+ td->maxcontacts = field->logical_maximum;
+- if (td->mtclass.maxcontacts)
+- /* check if the maxcontacts is given by the class */
+- td->maxcontacts = td->mtclass.maxcontacts;
+
+ break;
+ case HID_DG_BUTTONTYPE:
+@@ -1310,6 +1307,10 @@ static int mt_touch_input_configured(struct hid_device *hdev,
+ struct input_dev *input = hi->input;
+ int ret;
+
++ /* check if the maxcontacts is given by the class */
++ if (cls->maxcontacts)
++ td->maxcontacts = cls->maxcontacts;
++
+ if (!td->maxcontacts)
+ td->maxcontacts = MT_DEFAULT_MAXCONTACT;
+
+--
+2.42.0
+
+From b9f7232d2696b91ae98fadd7b14c531aa8edceb5 Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Wed, 19 Jul 2023 19:39:53 +0300
+Subject: [PATCH 07/12] HID: multitouch: allow specifying if a device is direct
+ in a class
+
+Currently the driver determines the device type based on the
+application, but this value is not reliable on Apple Touch Bars, where
+the application is HID_DG_TOUCHPAD even though the devices are direct,
+so allow setting it in classes.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/hid/hid-multitouch.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index 624c1d3cc..f98fb36ff 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -147,6 +147,7 @@ struct mt_class {
+ __s32 sn_height; /* Signal/noise ratio for height events */
+ __s32 sn_pressure; /* Signal/noise ratio for pressure events */
+ __u8 maxcontacts;
++ bool is_direct; /* true for touchscreens */
+ bool is_indirect; /* true for touchpads */
+ bool export_all_inputs; /* do not ignore mouse, keyboards, etc... */
+ };
+@@ -564,13 +565,13 @@ static struct mt_application *mt_allocate_application(struct mt_device *td,
+ mt_application->application = application;
+ INIT_LIST_HEAD(&mt_application->mt_usages);
+
+- if (application == HID_DG_TOUCHSCREEN)
++ if (application == HID_DG_TOUCHSCREEN && !td->mtclass.is_indirect)
+ mt_application->mt_flags |= INPUT_MT_DIRECT;
+
+ /*
+ * Model touchscreens providing buttons as touchpads.
+ */
+- if (application == HID_DG_TOUCHPAD) {
++ if (application == HID_DG_TOUCHPAD && !td->mtclass.is_direct) {
+ mt_application->mt_flags |= INPUT_MT_POINTER;
+ td->inputmode_value = MT_INPUTMODE_TOUCHPAD;
+ }
+@@ -1318,6 +1319,9 @@ static int mt_touch_input_configured(struct hid_device *hdev,
+ if (td->serial_maybe)
+ mt_post_parse_default_settings(td, app);
+
++ if (cls->is_direct)
++ app->mt_flags |= INPUT_MT_DIRECT;
++
+ if (cls->is_indirect)
+ app->mt_flags |= INPUT_MT_POINTER;
+
+--
+2.42.0
+
+From a74de0b6f2e1b79d54e84dbeab1b310232275d6c Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Wed, 19 Jul 2023 19:46:02 +0300
+Subject: [PATCH 08/12] HID: multitouch: add device ID for Apple Touch Bars
+
+Note that this is device ID is for T2 Macs. Testing on T1 Macs would be
+appreciated.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/hid/Kconfig | 1 +
+ drivers/hid/hid-multitouch.c | 26 ++++++++++++++++++++++----
+ 2 files changed, 23 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
+index 852de13aa..4e238df87 100644
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -737,6 +737,7 @@ config HID_MULTITOUCH
+ Say Y here if you have one of the following devices:
+ - 3M PCT touch screens
+ - ActionStar dual touch panels
++ - Touch Bars on x86 MacBook Pros
+ - Atmel panels
+ - Cando dual touch panels
+ - Chunghwa panels
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index f98fb36ff..f881b19db 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -226,6 +229,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app);
+ #define MT_CLS_RAZER_BLADE_STEALTH 0x0112
+ #define MT_CLS_SMART_TECH 0x0113
+ #define MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER 0x0114
++#define MT_CLS_APPLE_TOUCHBAR 0x0115
+
+ #define MT_DEFAULT_MAXCONTACT 10
+ #define MT_MAX_MAXCONTACT 250
+@@ -421,6 +421,13 @@
+ MT_QUIRK_WIN8_PTP_BUTTONS,
+ .export_all_inputs = true
+ },
++ { .name = MT_CLS_APPLE_TOUCHBAR,
++ .quirks = MT_QUIRK_HOVERING |
++ MT_QUIRK_TOUCH_IS_TIPSTATE |
++ MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE,
++ .is_direct = true,
++ .maxcontacts = 11,
++ },
+ { }
+ };
+
+@@ -1883,6 +1906,17 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ }
+ }
+
++ ret = hid_parse(hdev);
++ if (ret != 0) {
++ unregister_pm_notifier(&td->pm_notifier);
++ return ret;
++ }
++
++ if (mtclass->name == MT_CLS_APPLE_TOUCHBAR &&
++ !hid_find_field(hdev, HID_INPUT_REPORT,
++ HID_DG_TOUCHPAD, HID_DG_TRANSDUCER_INDEX))
++ return -ENODEV;
++
+ td = devm_kzalloc(&hdev->dev, sizeof(struct mt_device), GFP_KERNEL);
+ if (!td) {
+ dev_err(&hdev->dev, "cannot allocate multitouch data\n");
+@@ -1933,12 +1967,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
+
+ timer_setup(&td->release_timer, mt_expired_timeout, 0);
+
+- ret = hid_parse(hdev);
+- if (ret != 0) {
+- unregister_pm_notifier(&td->pm_notifier);
+- return ret;
+- }
+-
+ if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID)
+ mt_fix_const_fields(hdev, HID_DG_CONTACTID);
+
+@@ -2418,6 +2418,11 @@
+ HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY,
+ USB_VENDOR_ID_MICROSOFT, 0x09c0) },
+
++ /* Apple Touch Bars */
++ { .driver_data = MT_CLS_APPLE_TOUCHBAR,
++ HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
++ USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) },
++
+ /* Google MT devices */
+ { .driver_data = MT_CLS_GOOGLE,
+ HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE,
+--
+2.42.0
+
+From f6ab7e4580962c9d82e7dc40dd074d47b2bce034 Mon Sep 17 00:00:00 2001
+From: Hector Martin <marcan@marcan.st>
+Date: Tue, 1 Feb 2022 00:40:51 +0900
+Subject: [PATCH 09/12] lib/vsprintf: Add support for generic FOURCCs by
+ extending %p4cc
+
+%p4cc is designed for DRM/V4L2 FOURCCs with their specific quirks, but
+it's useful to be able to print generic 4-character codes formatted as
+an integer. Extend it to add format specifiers for printing generic
+32-bit FOURCCs with various endian semantics:
+
+%p4ch Host-endian
+%p4cl Little-endian
+%p4cb Big-endian
+%p4cr Reverse-endian
+
+The endianness determines how bytes are interpreted as a u32, and the
+FOURCC is then always printed MSByte-first (this is the opposite of
+V4L/DRM FOURCCs). This covers most practical cases, e.g. %p4cr would
+allow printing LSByte-first FOURCCs stored in host endian order
+(other than the hex form being in character order, not the integer
+value).
+
+Signed-off-by: Hector Martin <marcan@marcan.st>
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ Documentation/core-api/printk-formats.rst | 32 ++++++++++++++++++++
+ lib/test_printf.c | 20 +++++++++----
+ lib/vsprintf.c | 36 +++++++++++++++++++----
+ scripts/checkpatch.pl | 2 +-
+ 4 files changed, 77 insertions(+), 13 deletions(-)
+
+diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
+index dfe7e75a7..0ccef63e6 100644
+--- a/Documentation/core-api/printk-formats.rst
++++ b/Documentation/core-api/printk-formats.rst
+@@ -631,6 +631,38 @@ Examples::
+ %p4cc Y10 little-endian (0x20303159)
+ %p4cc NV12 big-endian (0xb231564e)
+
++Generic FourCC code
++-------------------
++
++::
++ %p4c[hnbl] gP00 (0x67503030)
++
++Print a generic FourCC code, as both ASCII characters and its numerical
++value as hexadecimal.
++
++The additional ``h``, ``r``, ``b``, and ``l`` specifiers are used to specify
++host, reversed, big or little endian order data respectively. Host endian
++order means the data is interpreted as a 32-bit integer and the most
++significant byte is printed first; that is, the character code as printed
++matches the byte order stored in memory on big-endian systems, and is reversed
++on little-endian systems.
++
++Passed by reference.
++
++Examples for a little-endian machine, given &(u32)0x67503030::
++
++ %p4ch gP00 (0x67503030)
++ %p4cl gP00 (0x67503030)
++ %p4cb 00Pg (0x30305067)
++ %p4cr 00Pg (0x30305067)
++
++Examples for a big-endian machine, given &(u32)0x67503030::
++
++ %p4ch gP00 (0x67503030)
++ %p4cl 00Pg (0x30305067)
++ %p4cb gP00 (0x67503030)
++ %p4cr 00Pg (0x30305067)
++
+ Rust
+ ----
+
+diff --git a/lib/test_printf.c b/lib/test_printf.c
+index 7677ebccf..2355be36f 100644
+--- a/lib/test_printf.c
++++ b/lib/test_printf.c
+@@ -746,18 +746,26 @@ static void __init fwnode_pointer(void)
+ static void __init fourcc_pointer(void)
+ {
+ struct {
++ char type;
+ u32 code;
+ char *str;
+ } const try[] = {
+- { 0x3231564e, "NV12 little-endian (0x3231564e)", },
+- { 0xb231564e, "NV12 big-endian (0xb231564e)", },
+- { 0x10111213, ".... little-endian (0x10111213)", },
+- { 0x20303159, "Y10 little-endian (0x20303159)", },
++ { 'c', 0x3231564e, "NV12 little-endian (0x3231564e)", },
++ { 'c', 0xb231564e, "NV12 big-endian (0xb231564e)", },
++ { 'c', 0x10111213, ".... little-endian (0x10111213)", },
++ { 'c', 0x20303159, "Y10 little-endian (0x20303159)", },
++ { 'h', 0x67503030, "gP00 (0x67503030)", },
++ { 'r', 0x30305067, "gP00 (0x67503030)", },
++ { 'l', cpu_to_le32(0x67503030), "gP00 (0x67503030)", },
++ { 'b', cpu_to_be32(0x67503030), "gP00 (0x67503030)", },
+ };
+ unsigned int i;
+
+- for (i = 0; i < ARRAY_SIZE(try); i++)
+- test(try[i].str, "%p4cc", &try[i].code);
++ for (i = 0; i < ARRAY_SIZE(try); i++) {
++ char fmt[] = { '%', 'p', '4', 'c', try[i].type, '\0' };
++
++ test(try[i].str, fmt, &try[i].code);
++ }
+ }
+
+ static void __init
+diff --git a/lib/vsprintf.c b/lib/vsprintf.c
+index 40f560959..bd9af783c 100644
+--- a/lib/vsprintf.c
++++ b/lib/vsprintf.c
+@@ -1758,27 +1758,50 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc,
+ char output[sizeof("0123 little-endian (0x01234567)")];
+ char *p = output;
+ unsigned int i;
++ bool pix_fmt = false;
+ u32 orig, val;
+
+- if (fmt[1] != 'c' || fmt[2] != 'c')
++ if (fmt[1] != 'c')
+ return error_string(buf, end, "(%p4?)", spec);
+
+ if (check_pointer(&buf, end, fourcc, spec))
+ return buf;
+
+ orig = get_unaligned(fourcc);
+- val = orig & ~BIT(31);
++ switch (fmt[2]) {
++ case 'h':
++ val = orig;
++ break;
++ case 'r':
++ val = orig = swab32(orig);
++ break;
++ case 'l':
++ val = orig = le32_to_cpu(orig);
++ break;
++ case 'b':
++ val = orig = be32_to_cpu(orig);
++ break;
++ case 'c':
++ /* Pixel formats are printed LSB-first */
++ val = swab32(orig & ~BIT(31));
++ pix_fmt = true;
++ break;
++ default:
++ return error_string(buf, end, "(%p4?)", spec);
++ }
+
+ for (i = 0; i < sizeof(u32); i++) {
+- unsigned char c = val >> (i * 8);
++ unsigned char c = val >> ((3 - i) * 8);
+
+ /* Print non-control ASCII characters as-is, dot otherwise */
+ *p++ = isascii(c) && isprint(c) ? c : '.';
+ }
+
+- *p++ = ' ';
+- strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian");
+- p += strlen(p);
++ if (pix_fmt) {
++ *p++ = ' ';
++ strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian");
++ p += strlen(p);
++ }
+
+ *p++ = ' ';
+ *p++ = '(';
+@@ -2348,6 +2371,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr);
+ * read the documentation (path below) first.
+ * - 'NF' For a netdev_features_t
+ * - '4cc' V4L2 or DRM FourCC code, with endianness and raw numerical value.
++ * - '4c[hlbr]' Generic FourCC code.
+ * - 'h[CDN]' For a variable-length buffer, it prints it as a hex string with
+ * a certain separator (' ' by default):
+ * C colon
+diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
+index 880fde13d..f080e33a4 100755
+--- a/scripts/checkpatch.pl
++++ b/scripts/checkpatch.pl
+@@ -6906,7 +6906,7 @@ sub process {
+ ($extension eq "f" &&
+ defined $qualifier && $qualifier !~ /^w/) ||
+ ($extension eq "4" &&
+- defined $qualifier && $qualifier !~ /^cc/)) {
++ defined $qualifier && $qualifier !~ /^c[chlbr]/)) {
+ $bad_specifier = $specifier;
+ last;
+ }
+--
+2.42.0
+
+From f893444f7c842f97f3707897ba29f2c8dd77c8df Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Mon, 7 Aug 2023 20:29:27 +0300
+Subject: [PATCH 10/12] USB: core: add 'shutdown' callback to usb_driver
+
+This simplifies running code on shutdown for USB drivers.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/usb/core/driver.c | 14 ++++++++++++++
+ drivers/usb/storage/uas.c | 5 ++---
+ include/linux/usb.h | 3 +++
+ 3 files changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
+index f58a0299f..dc0f86376 100644
+--- a/drivers/usb/core/driver.c
++++ b/drivers/usb/core/driver.c
+@@ -514,6 +514,19 @@ static int usb_unbind_interface(struct device *dev)
+ return 0;
+ }
+
++static void usb_shutdown_interface(struct device *dev)
++{
++ struct usb_interface *intf = to_usb_interface(dev);
++ struct usb_driver *driver;
++
++ if (!dev->driver)
++ return;
++
++ driver = to_usb_driver(dev->driver);
++ if (driver->shutdown)
++ driver->shutdown(intf);
++}
++
+ /**
+ * usb_driver_claim_interface - bind a driver to an interface
+ * @driver: the driver to be bound
+@@ -1053,6 +1066,7 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner,
+ new_driver->drvwrap.driver.bus = &usb_bus_type;
+ new_driver->drvwrap.driver.probe = usb_probe_interface;
+ new_driver->drvwrap.driver.remove = usb_unbind_interface;
++ new_driver->drvwrap.driver.shutdown = usb_shutdown_interface;
+ new_driver->drvwrap.driver.owner = owner;
+ new_driver->drvwrap.driver.mod_name = mod_name;
+ new_driver->drvwrap.driver.dev_groups = new_driver->dev_groups;
+diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
+index 2583ee981..591fa0379 100644
+--- a/drivers/usb/storage/uas.c
++++ b/drivers/usb/storage/uas.c
+@@ -1221,9 +1221,8 @@ static void uas_disconnect(struct usb_interface *intf)
+ * hang on reboot when the device is still in uas mode. Note the reset is
+ * necessary as some devices won't revert to usb-storage mode without it.
+ */
+-static void uas_shutdown(struct device *dev)
++static void uas_shutdown(struct usb_interface *intf)
+ {
+- struct usb_interface *intf = to_usb_interface(dev);
+ struct usb_device *udev = interface_to_usbdev(intf);
+ struct Scsi_Host *shost = usb_get_intfdata(intf);
+ struct uas_dev_info *devinfo = (struct uas_dev_info *)shost->hostdata;
+@@ -1246,7 +1245,7 @@ static struct usb_driver uas_driver = {
+ .suspend = uas_suspend,
+ .resume = uas_resume,
+ .reset_resume = uas_reset_resume,
+- .drvwrap.driver.shutdown = uas_shutdown,
++ .shutdown = uas_shutdown,
+ .id_table = uas_usb_ids,
+ };
+
+diff --git a/include/linux/usb.h b/include/linux/usb.h
+index 25f8e62a3..5f3ae2186 100644
+--- a/include/linux/usb.h
++++ b/include/linux/usb.h
+@@ -1194,6 +1194,7 @@ struct usbdrv_wrap {
+ * post_reset method is called.
+ * @post_reset: Called by usb_reset_device() after the device
+ * has been reset
++ * @shutdown: Called at shut-down time to quiesce the device.
+ * @id_table: USB drivers use ID table to support hotplugging.
+ * Export this with MODULE_DEVICE_TABLE(usb,...). This must be set
+ * or your driver's probe function will never get called.
+@@ -1245,6 +1246,8 @@ struct usb_driver {
+ int (*pre_reset)(struct usb_interface *intf);
+ int (*post_reset)(struct usb_interface *intf);
+
++ void (*shutdown)(struct usb_interface *intf);
++
+ const struct usb_device_id *id_table;
+ const struct attribute_group **dev_groups;
+
+--
+2.42.0
+
+From 337d6f6e34daaa786a0fb70d0dbd553288cd5ecd Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Fri, 4 Aug 2023 17:49:25 +0300
+Subject: [PATCH 11/12] drm/format-helper: add helper for BGR888 to XRGB8888
+ conversion
+
+Add XRGB8888 emulation helper for devices that only support BGR888.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/gpu/drm/drm_format_helper.c | 53 ++++++++++++++
+ .../gpu/drm/tests/drm_format_helper_test.c | 69 +++++++++++++++++++
+ include/drm/drm_format_helper.h | 3 +
+ 3 files changed, 125 insertions(+)
+
+diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
+index f93a4efce..5508fbde1 100644
+--- a/drivers/gpu/drm/drm_format_helper.c
++++ b/drivers/gpu/drm/drm_format_helper.c
+@@ -601,6 +601,56 @@ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pi
+ }
+ EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888);
+
++static void drm_fb_xrgb8888_to_bgr888_line(void *dbuf, const void *sbuf, unsigned int pixels)
++{
++ u8 *dbuf8 = dbuf;
++ const __le32 *sbuf32 = sbuf;
++ unsigned int x;
++ u32 pix;
++
++ for (x = 0; x < pixels; x++) {
++ pix = le32_to_cpu(sbuf32[x]);
++ /* write red-green-blue to output in little endianness */
++ *dbuf8++ = (pix & 0x00FF0000) >> 16;
++ *dbuf8++ = (pix & 0x0000FF00) >> 8;
++ *dbuf8++ = (pix & 0x000000FF) >> 0;
++ }
++}
++
++/**
++ * drm_fb_xrgb8888_to_bgr888 - Convert XRGB8888 to BGR888 clip buffer
++ * @dst: Array of BGR888 destination buffers
++ * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines
++ * within @dst; can be NULL if scanlines are stored next to each other.
++ * @src: Array of XRGB8888 source buffers
++ * @fb: DRM framebuffer
++ * @clip: Clip rectangle area to copy
++ *
++ * This function copies parts of a framebuffer to display memory and converts the
++ * color format during the process. Destination and framebuffer formats must match. The
++ * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at
++ * least as many entries as there are planes in @fb's format. Each entry stores the
++ * value for the format's respective color plane at the same index.
++ *
++ * This function does not apply clipping on @dst (i.e. the destination is at the
++ * top-left corner).
++ *
++ * Drivers can use this function for BGR888 devices that don't natively
++ * support XRGB8888.
++ */
++void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch,
++ const struct iosys_map *src, const struct drm_framebuffer *fb,
++ const struct drm_rect *clip)
++{
++ static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = {
++ 3,
++ };
++
++ drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false,
++ drm_fb_xrgb8888_to_bgr888_line);
++}
++EXPORT_SYMBOL(drm_fb_xrgb8888_to_bgr888);
++
+ static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsigned int pixels)
+ {
+ __le32 *dbuf32 = dbuf;
+@@ -925,6 +975,9 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
+ } else if (dst_format == DRM_FORMAT_RGB888) {
+ drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip);
+ return 0;
++ } else if (dst_format == DRM_FORMAT_BGR888) {
++ drm_fb_xrgb8888_to_bgr888(dst, dst_pitch, src, fb, clip);
++ return 0;
+ } else if (dst_format == DRM_FORMAT_ARGB8888) {
+ drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip);
+ return 0;
+diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c
+index 474bb7a1c..dff7fabd9 100644
+--- a/drivers/gpu/drm/tests/drm_format_helper_test.c
++++ b/drivers/gpu/drm/tests/drm_format_helper_test.c
+@@ -52,6 +52,11 @@ struct convert_to_rgb888_result {
+ const u8 expected[TEST_BUF_SIZE];
+ };
+
++struct convert_to_bgr888_result {
++ unsigned int dst_pitch;
++ const u8 expected[TEST_BUF_SIZE];
++};
++
+ struct convert_to_argb8888_result {
+ unsigned int dst_pitch;
+ const u32 expected[TEST_BUF_SIZE];
+@@ -84,6 +89,7 @@ struct convert_xrgb8888_case {
+ struct convert_to_argb1555_result argb1555_result;
+ struct convert_to_rgba5551_result rgba5551_result;
+ struct convert_to_rgb888_result rgb888_result;
++ struct convert_to_bgr888_result bgr888_result;
+ struct convert_to_argb8888_result argb8888_result;
+ struct convert_to_xrgb2101010_result xrgb2101010_result;
+ struct convert_to_argb2101010_result argb2101010_result;
+@@ -125,6 +131,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
+ .dst_pitch = TEST_USE_DEFAULT_PITCH,
+ .expected = { 0x00, 0x00, 0xFF },
+ },
++ .bgr888_result = {
++ .dst_pitch = TEST_USE_DEFAULT_PITCH,
++ .expected = { 0xFF, 0x00, 0x00 },
++ },
+ .argb8888_result = {
+ .dst_pitch = TEST_USE_DEFAULT_PITCH,
+ .expected = { 0xFFFF0000 },
+@@ -179,6 +189,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
+ .dst_pitch = TEST_USE_DEFAULT_PITCH,
+ .expected = { 0x00, 0x00, 0xFF },
+ },
++ .bgr888_result = {
++ .dst_pitch = TEST_USE_DEFAULT_PITCH,
++ .expected = { 0xFF, 0x00, 0x00 },
++ },
+ .argb8888_result = {
+ .dst_pitch = TEST_USE_DEFAULT_PITCH,
+ .expected = { 0xFFFF0000 },
+@@ -280,6 +294,15 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
+ 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,
+ },
+ },
++ .bgr888_result = {
++ .dst_pitch = TEST_USE_DEFAULT_PITCH,
++ .expected = {
++ 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,
++ 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00,
++ 0x00, 0x00, 0xFF, 0xFF, 0x00, 0xFF,
++ 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF,
++ },
++ },
+ .argb8888_result = {
+ .dst_pitch = TEST_USE_DEFAULT_PITCH,
+ .expected = {
+@@ -391,6 +414,17 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ },
+ },
++ .bgr888_result = {
++ .dst_pitch = 15,
++ .expected = {
++ 0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05, 0xA8, 0xF3, 0x03,
++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++ 0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05,
++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++ 0xA8, 0x03, 0x03, 0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C,
++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
++ },
++ },
+ .argb8888_result = {
+ .dst_pitch = 20,
+ .expected = {
+@@ -727,6 +761,40 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test)
+ KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
+ }
+
++static void drm_test_fb_xrgb8888_to_bgr888(struct kunit *test)
++{
++ const struct convert_xrgb8888_case *params = test->param_value;
++ const struct convert_to_bgr888_result *result = &params->bgr888_result;
++ size_t dst_size;
++ u8 *buf = NULL;
++ __le32 *xrgb8888 = NULL;
++ struct iosys_map dst, src;
++
++ struct drm_framebuffer fb = {
++ .format = drm_format_info(DRM_FORMAT_XRGB8888),
++ .pitches = { params->pitch, 0, 0 },
++ };
++
++ dst_size = conversion_buf_size(DRM_FORMAT_BGR888, result->dst_pitch,
++ &params->clip, 0);
++ KUNIT_ASSERT_GT(test, dst_size, 0);
++
++ buf = kunit_kzalloc(test, dst_size, GFP_KERNEL);
++ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
++ iosys_map_set_vaddr(&dst, buf);
++
++ xrgb8888 = cpubuf_to_le32(test, params->xrgb8888, TEST_BUF_SIZE);
++ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xrgb8888);
++ iosys_map_set_vaddr(&src, xrgb8888);
++
++ /*
++ * BGR888 expected results are already in little-endian
++ * order, so there's no need to convert the test output.
++ */
++ drm_fb_xrgb8888_to_bgr888(&dst, &result->dst_pitch, &src, &fb, &params->clip);
++ KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
++}
++
+ static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test)
+ {
+ const struct convert_xrgb8888_case *params = test->param_value;
+@@ -858,6 +926,7 @@ static struct kunit_case drm_format_helper_test_cases[] = {
+ KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb1555, convert_xrgb8888_gen_params),
+ KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgba5551, convert_xrgb8888_gen_params),
+ KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgb888, convert_xrgb8888_gen_params),
++ KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_bgr888, convert_xrgb8888_gen_params),
+ KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb8888, convert_xrgb8888_gen_params),
+ KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_xrgb2101010, convert_xrgb8888_gen_params),
+ KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb2101010, convert_xrgb8888_gen_params),
+diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
+index 291deb094..7fc553318 100644
+--- a/include/drm/drm_format_helper.h
++++ b/include/drm/drm_format_helper.h
+@@ -42,6 +42,9 @@ void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_
+ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch,
+ const struct iosys_map *src, const struct drm_framebuffer *fb,
+ const struct drm_rect *clip);
++void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch,
++ const struct iosys_map *src, const struct drm_framebuffer *fb,
++ const struct drm_rect *clip);
+ void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch,
+ const struct iosys_map *src, const struct drm_framebuffer *fb,
+ const struct drm_rect *clip);
+--
+2.42.0
+
+From 1f0b6c21c4d56f5be74c4d7d0665525862e307c3 Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Sat, 6 May 2023 17:30:09 +0300
+Subject: [PATCH 12/12] drm/tiny: add driver for Apple Touch Bars in x86 Macs
+
+The Touch Bars found on x86 Macs support two USB configurations: one
+where the device presents itself as a HID keyboard and can display
+predefined sets of keys, and one where the operating system has full
+control over what is displayed. This commit adds support for the display
+functionality of the second configuration.
+
+Note that this driver has only been tested on T2 Macs, and only includes
+the USB device ID for these devices. Testing on T1 Macs would be
+appreciated.
+
+Credit goes to @imbushuo on GitHub for reverse engineering most of the
+protocol.
+
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+---
+ MAINTAINERS | 6 +
+ drivers/gpu/drm/tiny/Kconfig | 12 +
+ drivers/gpu/drm/tiny/Makefile | 1 +
+ drivers/gpu/drm/tiny/appletbdrm.c | 624 ++++++++++++++++++++++++++++++
+ 4 files changed, 643 insertions(+)
+ create mode 100644 drivers/gpu/drm/tiny/appletbdrm.c
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 519b3b736..dfc63d257 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -6372,6 +6372,12 @@ S: Supported
+ T: git git://anongit.freedesktop.org/drm/drm-misc
+ F: drivers/gpu/drm/sun4i/sun8i*
+
++DRM DRIVER FOR APPLE TOUCH BARS
++M: Kerem Karabay <kekrby@gmail.com>
++L: dri-devel@lists.freedesktop.org
++S: Maintained
++F: drivers/gpu/drm/tiny/appletbdrm.c
++
+ DRM DRIVER FOR ARM PL111 CLCD
+ M: Emma Anholt <emma@anholt.net>
+ S: Supported
+diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
+index f6889f649..559a97bce 100644
+--- a/drivers/gpu/drm/tiny/Kconfig
++++ b/drivers/gpu/drm/tiny/Kconfig
+@@ -1,5 +1,17 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+
++config DRM_APPLETBDRM
++ tristate "DRM support for Apple Touch Bars"
++ depends on DRM && USB && MMU
++ select DRM_KMS_HELPER
++ select DRM_GEM_SHMEM_HELPER
++ help
++ Say Y here if you want support for the display of Touch Bars on x86
++ MacBook Pros.
++
++ To compile this driver as a module, choose M here: the
++ module will be called appletbdrm.
++
+ config DRM_ARCPGU
+ tristate "ARC PGU"
+ depends on DRM && OF
+diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile
+index 76dde89a0..9a1b412e7 100644
+--- a/drivers/gpu/drm/tiny/Makefile
++++ b/drivers/gpu/drm/tiny/Makefile
+@@ -1,5 +1,6 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+
++obj-$(CONFIG_DRM_APPLETBDRM) += appletbdrm.o
+ obj-$(CONFIG_DRM_ARCPGU) += arcpgu.o
+ obj-$(CONFIG_DRM_BOCHS) += bochs.o
+ obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus.o
+diff --git a/drivers/gpu/drm/tiny/appletbdrm.c b/drivers/gpu/drm/tiny/appletbdrm.c
+new file mode 100644
+index 000000000..33a99436b
+--- /dev/null
++++ b/drivers/gpu/drm/tiny/appletbdrm.c
+@@ -0,0 +1,624 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Apple Touch Bar DRM Driver
++ *
++ * Copyright (c) 2023 Kerem Karabay <kekrby@gmail.com>
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <asm/unaligned.h>
++
++#include <linux/usb.h>
++#include <linux/module.h>
++
++#include <drm/drm_drv.h>
++#include <drm/drm_fourcc.h>
++#include <drm/drm_probe_helper.h>
++#include <drm/drm_atomic_helper.h>
++#include <drm/drm_damage_helper.h>
++#include <drm/drm_format_helper.h>
++#include <drm/drm_gem_shmem_helper.h>
++#include <drm/drm_gem_atomic_helper.h>
++#include <drm/drm_simple_kms_helper.h>
++#include <drm/drm_gem_framebuffer_helper.h>
++
++#define _APPLETBDRM_FOURCC(s) (((s)[0] << 24) | ((s)[1] << 16) | ((s)[2] << 8) | (s)[3])
++#define APPLETBDRM_FOURCC(s) _APPLETBDRM_FOURCC(#s)
++
++#define APPLETBDRM_PIXEL_FORMAT APPLETBDRM_FOURCC(RGBA) /* The actual format is BGR888 */
++#define APPLETBDRM_BITS_PER_PIXEL 24
++
++#define APPLETBDRM_MSG_CLEAR_DISPLAY APPLETBDRM_FOURCC(CLRD)
++#define APPLETBDRM_MSG_GET_INFORMATION APPLETBDRM_FOURCC(GINF)
++#define APPLETBDRM_MSG_UPDATE_COMPLETE APPLETBDRM_FOURCC(UDCL)
++#define APPLETBDRM_MSG_SIGNAL_READINESS APPLETBDRM_FOURCC(REDY)
++
++#define APPLETBDRM_BULK_MSG_TIMEOUT 1000
++
++#define drm_to_adev(_drm) container_of(_drm, struct appletbdrm_device, drm)
++#define adev_to_udev(adev) interface_to_usbdev(to_usb_interface(adev->dev))
++
++struct appletbdrm_device {
++ struct device *dev;
++
++ u8 in_ep;
++ u8 out_ep;
++
++ u32 width;
++ u32 height;
++
++ struct drm_device drm;
++ struct drm_display_mode mode;
++ struct drm_connector connector;
++ struct drm_simple_display_pipe pipe;
++
++ bool readiness_signal_received;
++};
++
++struct appletbdrm_request_header {
++ __le16 unk_00;
++ __le16 unk_02;
++ __le32 unk_04;
++ __le32 unk_08;
++ __le32 size;
++} __packed;
++
++struct appletbdrm_response_header {
++ u8 unk_00[16];
++ u32 msg;
++} __packed;
++
++struct appletbdrm_simple_request {
++ struct appletbdrm_request_header header;
++ u32 msg;
++ u8 unk_14[8];
++ __le32 size;
++} __packed;
++
++struct appletbdrm_information {
++ struct appletbdrm_response_header header;
++ u8 unk_14[12];
++ __le32 width;
++ __le32 height;
++ u8 bits_per_pixel;
++ __le32 bytes_per_row;
++ __le32 orientation;
++ __le32 bitmap_info;
++ u32 pixel_format;
++ __le32 width_inches; /* floating point */
++ __le32 height_inches; /* floating point */
++} __packed;
++
++struct appletbdrm_frame {
++ __le16 begin_x;
++ __le16 begin_y;
++ __le16 width;
++ __le16 height;
++ __le32 buf_size;
++ u8 buf[];
++} __packed;
++
++struct appletbdrm_fb_request_footer {
++ u8 unk_00[12];
++ __le32 unk_0c;
++ u8 unk_10[12];
++ __le32 unk_1c;
++ __le64 timestamp;
++ u8 unk_28[12];
++ __le32 unk_34;
++ u8 unk_38[20];
++ __le32 unk_4c;
++} __packed;
++
++struct appletbdrm_fb_request {
++ struct appletbdrm_request_header header;
++ __le16 unk_10;
++ u8 msg_id;
++ u8 unk_13[29];
++ /*
++ * Contents of `data`:
++ * - struct appletbdrm_frame frames[];
++ * - struct appletbdrm_fb_request_footer footer;
++ * - padding to make the total size a multiple of 16
++ */
++ u8 data[];
++} __packed;
++
++struct appletbdrm_fb_request_response {
++ struct appletbdrm_response_header header;
++ u8 unk_14[12];
++ __le64 timestamp;
++} __packed;
++
++static int appletbdrm_send_request(struct appletbdrm_device *adev,
++ struct appletbdrm_request_header *request, size_t size)
++{
++ struct usb_device *udev = adev_to_udev(adev);
++ struct drm_device *drm = &adev->drm;
++ int ret, actual_size;
++
++ ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, adev->out_ep),
++ request, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT);
++ if (ret) {
++ drm_err(drm, "Failed to send message (%pe)\n", ERR_PTR(ret));
++ return ret;
++ }
++
++ if (actual_size != size) {
++ drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n",
++ actual_size, size);
++ return -EIO;
++ }
++
++ return ret;
++}
++
++static int appletbdrm_read_response(struct appletbdrm_device *adev,
++ struct appletbdrm_response_header *response,
++ size_t size, u32 expected_response)
++{
++ struct usb_device *udev = adev_to_udev(adev);
++ struct drm_device *drm = &adev->drm;
++ int ret, actual_size;
++
++retry:
++ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, adev->in_ep),
++ response, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT);
++ if (ret) {
++ drm_err(drm, "Failed to read response (%pe)\n", ERR_PTR(ret));
++ return ret;
++ }
++
++ /*
++ * The device responds to the first request sent in a particular
++ * timeframe after the USB device configuration is set with a readiness
++ * signal, in which case the response should be read again
++ */
++ if (response->msg == APPLETBDRM_MSG_SIGNAL_READINESS) {
++ if (!adev->readiness_signal_received) {
++ adev->readiness_signal_received = true;
++ goto retry;
++ }
++
++ drm_err(drm, "Encountered unexpected readiness signal\n");
++ return -EIO;
++ }
++
++ if (actual_size != size) {
++ drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n",
++ actual_size, size);
++ return -EIO;
++ }
++
++ if (response->msg != expected_response) {
++ drm_err(drm, "Unexpected response from device (expected %p4ch found %p4ch)\n",
++ &expected_response, &response->msg);
++ return -EIO;
++ }
++
++ return 0;
++}
++
++static int appletbdrm_send_msg(struct appletbdrm_device *adev, u32 msg)
++{
++ struct appletbdrm_simple_request *request;
++ int ret;
++
++ request = kzalloc(sizeof(*request), GFP_KERNEL);
++ if (!request)
++ return -ENOMEM;
++
++ request->header.unk_00 = cpu_to_le16(2);
++ request->header.unk_02 = cpu_to_le16(0x1512);
++ request->header.size = cpu_to_le32(sizeof(*request) - sizeof(request->header));
++ request->msg = msg;
++ request->size = request->header.size;
++
++ ret = appletbdrm_send_request(adev, &request->header, sizeof(*request));
++
++ kfree(request);
++
++ return ret;
++}
++
++static int appletbdrm_clear_display(struct appletbdrm_device *adev)
++{
++ return appletbdrm_send_msg(adev, APPLETBDRM_MSG_CLEAR_DISPLAY);
++}
++
++static int appletbdrm_signal_readiness(struct appletbdrm_device *adev)
++{
++ return appletbdrm_send_msg(adev, APPLETBDRM_MSG_SIGNAL_READINESS);
++}
++
++static int appletbdrm_get_information(struct appletbdrm_device *adev)
++{
++ struct appletbdrm_information *info;
++ struct drm_device *drm = &adev->drm;
++ u8 bits_per_pixel;
++ u32 pixel_format;
++ int ret;
++
++ info = kzalloc(sizeof(*info), GFP_KERNEL);
++ if (!info)
++ return -ENOMEM;
++
++ ret = appletbdrm_send_msg(adev, APPLETBDRM_MSG_GET_INFORMATION);
++ if (ret)
++ return ret;
++
++ ret = appletbdrm_read_response(adev, &info->header, sizeof(*info),
++ APPLETBDRM_MSG_GET_INFORMATION);
++ if (ret)
++ goto free_info;
++
++ bits_per_pixel = info->bits_per_pixel;
++ pixel_format = get_unaligned(&info->pixel_format);
++
++ adev->width = get_unaligned_le32(&info->width);
++ adev->height = get_unaligned_le32(&info->height);
++
++ if (bits_per_pixel != APPLETBDRM_BITS_PER_PIXEL) {
++ drm_err(drm, "Encountered unexpected bits per pixel value (%d)\n", bits_per_pixel);
++ ret = -EINVAL;
++ goto free_info;
++ }
++
++ if (pixel_format != APPLETBDRM_PIXEL_FORMAT) {
++ drm_err(drm, "Encountered unknown pixel format (%p4ch)\n", &pixel_format);
++ ret = -EINVAL;
++ goto free_info;
++ }
++
++free_info:
++ kfree(info);
++
++ return ret;
++}
++
++static u32 rect_size(struct drm_rect *rect)
++{
++ return drm_rect_width(rect) * drm_rect_height(rect) * (APPLETBDRM_BITS_PER_PIXEL / 8);
++}
++
++static int appletbdrm_flush_damage(struct appletbdrm_device *adev,
++ struct drm_plane_state *old_state,
++ struct drm_plane_state *state)
++{
++ struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state);
++ struct appletbdrm_fb_request_response *response;
++ struct appletbdrm_fb_request_footer *footer;
++ struct drm_atomic_helper_damage_iter iter;
++ struct drm_framebuffer *fb = state->fb;
++ struct appletbdrm_fb_request *request;
++ struct drm_device *drm = &adev->drm;
++ struct appletbdrm_frame *frame;
++ u64 timestamp = ktime_get_ns();
++ struct drm_rect damage;
++ size_t frames_size = 0;
++ size_t request_size;
++ int ret;
++
++ drm_atomic_helper_damage_iter_init(&iter, old_state, state);
++ drm_atomic_for_each_plane_damage(&iter, &damage) {
++ frames_size += struct_size(frame, buf, rect_size(&damage));
++ }
++
++ if (!frames_size)
++ return 0;
++
++ request_size = ALIGN(sizeof(*request) + frames_size + sizeof(*footer), 16);
++
++ request = kzalloc(request_size, GFP_KERNEL);
++ if (!request)
++ return -ENOMEM;
++
++ response = kzalloc(sizeof(*response), GFP_KERNEL);
++ if (!response) {
++ ret = -ENOMEM;
++ goto free_request;
++ }
++
++ ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
++ if (ret) {
++ drm_err(drm, "Failed to start CPU framebuffer access (%pe)\n", ERR_PTR(ret));
++ goto free_response;
++ }
++
++ request->header.unk_00 = cpu_to_le16(2);
++ request->header.unk_02 = cpu_to_le16(0x12);
++ request->header.unk_04 = cpu_to_le32(9);
++ request->header.size = cpu_to_le32(request_size - sizeof(request->header));
++ request->unk_10 = cpu_to_le16(1);
++ request->msg_id = timestamp & 0xff;
++
++ frame = (struct appletbdrm_frame *)request->data;
++
++ drm_atomic_helper_damage_iter_init(&iter, old_state, state);
++ drm_atomic_for_each_plane_damage(&iter, &damage) {
++ struct iosys_map dst = IOSYS_MAP_INIT_VADDR(frame->buf);
++ u32 buf_size = rect_size(&damage);
++
++ /*
++ * The coordinates need to be translated to the coordinate
++ * system the device expects, see the comment in
++ * appletbdrm_setup_mode_config
++ */
++ frame->begin_x = cpu_to_le16(damage.y1);
++ frame->begin_y = cpu_to_le16(adev->height - damage.x2);
++ frame->width = cpu_to_le16(drm_rect_height(&damage));
++ frame->height = cpu_to_le16(drm_rect_width(&damage));
++ frame->buf_size = cpu_to_le32(buf_size);
++
++ ret = drm_fb_blit(&dst, NULL, DRM_FORMAT_BGR888,
++ &shadow_plane_state->data[0], fb, &damage);
++ if (ret) {
++ drm_err(drm, "Failed to copy damage clip (%pe)\n", ERR_PTR(ret));
++ goto end_fb_cpu_access;
++ }
++
++ frame = (void *)frame + struct_size(frame, buf, buf_size);
++ }
++
++ footer = (struct appletbdrm_fb_request_footer *)&request->data[frames_size];
++
++ footer->unk_0c = cpu_to_le32(0xfffe);
++ footer->unk_1c = cpu_to_le32(0x80001);
++ footer->unk_34 = cpu_to_le32(0x80002);
++ footer->unk_4c = cpu_to_le32(0xffff);
++ footer->timestamp = cpu_to_le64(timestamp);
++
++ ret = appletbdrm_send_request(adev, &request->header, request_size);
++ if (ret)
++ goto end_fb_cpu_access;
++
++ ret = appletbdrm_read_response(adev, &response->header, sizeof(*response),
++ APPLETBDRM_MSG_UPDATE_COMPLETE);
++ if (ret)
++ goto end_fb_cpu_access;
++
++ if (response->timestamp != footer->timestamp) {
++ drm_err(drm, "Response timestamp (%llu) doesn't match request timestamp (%llu)\n",
++ le64_to_cpu(response->timestamp), timestamp);
++ goto end_fb_cpu_access;
++ }
++
++end_fb_cpu_access:
++ drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
++free_response:
++ kfree(response);
++free_request:
++ kfree(request);
++
++ return ret;
++}
++
++static int appletbdrm_connector_helper_get_modes(struct drm_connector *connector)
++{
++ struct appletbdrm_device *adev = drm_to_adev(connector->dev);
++
++ return drm_connector_helper_get_modes_fixed(connector, &adev->mode);
++}
++
++static enum drm_mode_status appletbdrm_pipe_mode_valid(struct drm_simple_display_pipe *pipe,
++ const struct drm_display_mode *mode)
++{
++ struct drm_crtc *crtc = &pipe->crtc;
++ struct appletbdrm_device *adev = drm_to_adev(crtc->dev);
++
++ return drm_crtc_helper_mode_valid_fixed(crtc, mode, &adev->mode);
++}
++
++static void appletbdrm_pipe_disable(struct drm_simple_display_pipe *pipe)
++{
++ struct appletbdrm_device *adev = drm_to_adev(pipe->crtc.dev);
++ int idx;
++
++ if (!drm_dev_enter(&adev->drm, &idx))
++ return;
++
++ appletbdrm_clear_display(adev);
++
++ drm_dev_exit(idx);
++}
++
++static void appletbdrm_pipe_update(struct drm_simple_display_pipe *pipe,
++ struct drm_plane_state *old_state)
++{
++ struct drm_crtc *crtc = &pipe->crtc;
++ struct appletbdrm_device *adev = drm_to_adev(crtc->dev);
++ int idx;
++
++ if (!crtc->state->active || !drm_dev_enter(&adev->drm, &idx))
++ return;
++
++ appletbdrm_flush_damage(adev, old_state, pipe->plane.state);
++
++ drm_dev_exit(idx);
++}
++
++static const u32 appletbdrm_formats[] = {
++ DRM_FORMAT_BGR888,
++ DRM_FORMAT_XRGB8888, /* emulated */
++};
++
++static const struct drm_mode_config_funcs appletbdrm_mode_config_funcs = {
++ .fb_create = drm_gem_fb_create_with_dirty,
++ .atomic_check = drm_atomic_helper_check,
++ .atomic_commit = drm_atomic_helper_commit,
++};
++
++static const struct drm_connector_funcs appletbdrm_connector_funcs = {
++ .reset = drm_atomic_helper_connector_reset,
++ .destroy = drm_connector_cleanup,
++ .fill_modes = drm_helper_probe_single_connector_modes,
++ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
++ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
++};
++
++static const struct drm_connector_helper_funcs appletbdrm_connector_helper_funcs = {
++ .get_modes = appletbdrm_connector_helper_get_modes,
++};
++
++static const struct drm_simple_display_pipe_funcs appletbdrm_pipe_funcs = {
++ DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS,
++ .update = appletbdrm_pipe_update,
++ .disable = appletbdrm_pipe_disable,
++ .mode_valid = appletbdrm_pipe_mode_valid,
++};
++
++DEFINE_DRM_GEM_FOPS(appletbdrm_drm_fops);
++
++static const struct drm_driver appletbdrm_drm_driver = {
++ DRM_GEM_SHMEM_DRIVER_OPS,
++ .name = "appletbdrm",
++ .desc = "Apple Touch Bar DRM Driver",
++ .date = "20230910",
++ .major = 1,
++ .minor = 0,
++ .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
++ .fops = &appletbdrm_drm_fops,
++};
++
++static int appletbdrm_setup_mode_config(struct appletbdrm_device *adev)
++{
++ struct drm_connector *connector = &adev->connector;
++ struct drm_device *drm = &adev->drm;
++ struct device *dev = adev->dev;
++ int ret;
++
++ ret = drmm_mode_config_init(drm);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to initialize mode configuration\n");
++
++ /*
++ * The coordinate system used by the device is different from the
++ * coordinate system of the framebuffer in that the x and y axes are
++ * swapped, and that the y axis is inverted; so what the device reports
++ * as the height is actually the width of the framebuffer and vice
++ * versa
++ */
++ drm->mode_config.min_width = 0;
++ drm->mode_config.min_height = 0;
++ drm->mode_config.max_width = max(adev->height, DRM_SHADOW_PLANE_MAX_WIDTH);
++ drm->mode_config.max_height = max(adev->width, DRM_SHADOW_PLANE_MAX_HEIGHT);
++ drm->mode_config.preferred_depth = APPLETBDRM_BITS_PER_PIXEL;
++ drm->mode_config.funcs = &appletbdrm_mode_config_funcs;
++
++ adev->mode = (struct drm_display_mode) {
++ DRM_MODE_INIT(60, adev->height, adev->width,
++ DRM_MODE_RES_MM(adev->height, 218),
++ DRM_MODE_RES_MM(adev->width, 218))
++ };
++
++ ret = drm_connector_init(drm, connector,
++ &appletbdrm_connector_funcs, DRM_MODE_CONNECTOR_USB);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to initialize connector\n");
++
++ drm_connector_helper_add(connector, &appletbdrm_connector_helper_funcs);
++
++ ret = drm_connector_set_panel_orientation(connector,
++ DRM_MODE_PANEL_ORIENTATION_RIGHT_UP);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to set panel orientation\n");
++
++ connector->display_info.non_desktop = true;
++ ret = drm_object_property_set_value(&connector->base,
++ drm->mode_config.non_desktop_property, true);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to set non-desktop property\n");
++
++ ret = drm_simple_display_pipe_init(drm, &adev->pipe, &appletbdrm_pipe_funcs,
++ appletbdrm_formats, ARRAY_SIZE(appletbdrm_formats),
++ NULL, &adev->connector);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to initialize simple display pipe\n");
++
++ drm_plane_enable_fb_damage_clips(&adev->pipe.plane);
++
++ drm_mode_config_reset(drm);
++
++ ret = drm_dev_register(drm, 0);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to register DRM device\n");
++
++ return 0;
++}
++
++static int appletbdrm_probe(struct usb_interface *intf,
++ const struct usb_device_id *id)
++{
++ struct usb_endpoint_descriptor *bulk_in, *bulk_out;
++ struct device *dev = &intf->dev;
++ struct appletbdrm_device *adev;
++ int ret;
++
++ ret = usb_find_common_endpoints(intf->cur_altsetting, &bulk_in, &bulk_out, NULL, NULL);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to find bulk endpoints\n");
++
++ adev = devm_drm_dev_alloc(dev, &appletbdrm_drm_driver, struct appletbdrm_device, drm);
++ if (IS_ERR(adev))
++ return PTR_ERR(adev);
++
++ adev->dev = dev;
++ adev->in_ep = bulk_in->bEndpointAddress;
++ adev->out_ep = bulk_out->bEndpointAddress;
++
++ usb_set_intfdata(intf, adev);
++
++ ret = appletbdrm_get_information(adev);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to get display information\n");
++
++ ret = appletbdrm_signal_readiness(adev);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to signal readiness\n");
++
++ ret = appletbdrm_clear_display(adev);
++ if (ret)
++ return dev_err_probe(dev, ret, "Failed to clear display\n");
++
++ return appletbdrm_setup_mode_config(adev);
++}
++
++static void appletbdrm_disconnect(struct usb_interface *intf)
++{
++ struct appletbdrm_device *adev = usb_get_intfdata(intf);
++ struct drm_device *drm = &adev->drm;
++
++ drm_dev_unplug(drm);
++ drm_atomic_helper_shutdown(drm);
++}
++
++static void appletbdrm_shutdown(struct usb_interface *intf)
++{
++ struct appletbdrm_device *adev = usb_get_intfdata(intf);
++
++ /*
++ * The framebuffer needs to be cleared on shutdown since its content
++ * persists across boots
++ */
++ drm_atomic_helper_shutdown(&adev->drm);
++}
++
++static const struct usb_device_id appletbdrm_usb_id_table[] = {
++ { USB_DEVICE_INTERFACE_CLASS(0x05ac, 0x8302, USB_CLASS_AUDIO_VIDEO) },
++ {}
++};
++MODULE_DEVICE_TABLE(usb, appletbdrm_usb_id_table);
++
++static struct usb_driver appletbdrm_usb_driver = {
++ .name = "appletbdrm",
++ .probe = appletbdrm_probe,
++ .disconnect = appletbdrm_disconnect,
++ .shutdown = appletbdrm_shutdown,
++ .id_table = appletbdrm_usb_id_table,
++};
++module_usb_driver(appletbdrm_usb_driver);
++
++MODULE_AUTHOR("Kerem Karabay <kekrby@gmail.com>");
++MODULE_DESCRIPTION("Apple Touch Bar DRM Driver");
++MODULE_LICENSE("GPL");
+--
+2.42.0
+
+From e34c6d09241ba826a6e9b2b0e50e306b273b7bda Mon Sep 17 00:00:00 2001
+From: Orlando Chamberlain <orlandoch.dev@gmail.com>
+Date: Thu, 16 Feb 2023 12:32:34 +1100
+Subject: [PATCH 5/8] Documentation: leds: standardise keyboard backlight led
+ names
+
+Advice use of either "input*:*:kbd_backlight" or ":*:kbd_backlight". We
+don't want people using vendor or product name (e.g. "smc", "apple",
+"asus") as this information is available from sysfs anyway, and it made the
+folder names inconsistent.
+
+Signed-off-by: Orlando Chamberlain <orlandoch.dev@gmail.com>
+---
+ Documentation/leds/well-known-leds.txt | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt
+index 2160382c86be..4e5429fce4d8 100644
+--- a/Documentation/leds/well-known-leds.txt
++++ b/Documentation/leds/well-known-leds.txt
+@@ -44,6 +44,14 @@ Legacy: "lp5523:kb{1,2,3,4,5,6}" (Nokia N900)
+
+ Frontlight/backlight of main keyboard.
+
++Good: ":*:kbd_backlight"
++Good: "input*:*:kbd_backlight"
++Legacy: "*:*:kbd_backlight"
++
++Many drivers have the vendor or product name as the first field of the led name,
++this makes names inconsistent and is redundant as that information is already in
++sysfs.
++
+ Legacy: "button-backlight" (Motorola Droid 4)
+
+ Some phones have touch buttons below screen; it is different from main
+--
+2.39.1
+
+From c124f5401040d02abd6d349979be29acd1e88545 Mon Sep 17 00:00:00 2001
+From: Orlando Chamberlain <orlandoch.dev@gmail.com>
+Date: Fri, 10 Feb 2023 23:14:31 +1100
+Subject: [PATCH 6/8] HID: hid-apple-magic-backlight: Add driver for keyboard
+ backlight on internal Magic Keyboards
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This driver adds support for the keyboard backlight on Intel T2 Macs
+with internal Magic Keyboards (MacBookPro16,x and MacBookAir9,1)
+
+Co-developed-by: Kerem Karabay <kekrby@gmail.com>
+Signed-off-by: Kerem Karabay <kekrby@gmail.com>
+Signed-off-by: Orlando Chamberlain <orlandoch.dev@gmail.com>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
+---
+ MAINTAINERS | 6 ++
+ drivers/hid/Kconfig | 13 +++
+ drivers/hid/Makefile | 1 +
+ drivers/hid/hid-apple-magic-backlight.c | 120 ++++++++++++++++++++++++
+ 4 files changed, 140 insertions(+)
+ create mode 100644 drivers/hid/hid-apple-magic-backlight.c
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index dfc63d257..9148bda0a 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -9169,6 +9169,12 @@ L: linux-input@vger.kernel.org
+ S: Maintained
+ F: drivers/hid/hid-appletb-*
+
++HID APPLE MAGIC BACKLIGHT DRIVER
++M: Orlando Chamberlain <orlandoch.dev@gmail.com>
++L: linux-input@vger.kernel.org
++S: Maintained
++F: drivers/hid/apple-magic-backlight.c
++
+ HID CORE LAYER
+ M: Jiri Kosina <jikos@kernel.org>
+ M: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
+index 4e238df87..83fbab6d4 100644
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -169,6 +169,19 @@ config HID_APPLETB_KBD
+ To compile this driver as a module, choose M here: the
+ module will be called hid-appletb-kbd.
+
++config HID_APPLE_MAGIC_BACKLIGHT
++ tristate "Apple Magic Keyboard Backlight"
++ depends on USB_HID
++ depends on LEDS_CLASS
++ depends on NEW_LEDS
++ help
++ Say Y here if you want support for the keyboard backlight on Macs with
++ the magic keyboard (MacBookPro16,x and MacBookAir9,1). Note that this
++ driver is not for external magic keyboards.
++
++ To compile this driver as a module, choose M here: the
++ module will be called hid-apple-magic-backlight.
++
+ config HID_ASUS
+ tristate "Asus"
+ depends on USB_HID
+diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
+index 5b60015fd..581f5e720 100644
+--- a/drivers/hid/Makefile
++++ b/drivers/hid/Makefile
+@@ -31,6 +31,7 @@ obj-$(CONFIG_HID_APPLE) += hid-apple.o
+ obj-$(CONFIG_HID_APPLEIR) += hid-appleir.o
+ obj-$(CONFIG_HID_APPLETB_BL) += hid-appletb-bl.o
+ obj-$(CONFIG_HID_APPLETB_KBD) += hid-appletb-kbd.o
++obj-$(CONFIG_HID_APPLE_MAGIC_BACKLIGHT) += hid-apple-magic-backlight.o
+ obj-$(CONFIG_HID_CREATIVE_SB0540) += hid-creative-sb0540.o
+ obj-$(CONFIG_HID_ASUS) += hid-asus.o
+ obj-$(CONFIG_HID_AUREAL) += hid-aureal.o
+diff --git a/drivers/hid/hid-apple-magic-backlight.c b/drivers/hid/hid-apple-magic-backlight.c
+new file mode 100644
+index 000000000..f0fc02ff3
+--- /dev/null
++++ b/drivers/hid/hid-apple-magic-backlight.c
+@@ -0,0 +1,120 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Apple Magic Keyboard Backlight Driver
++ *
++ * For Intel Macs with internal Magic Keyboard (MacBookPro16,1-4 and MacBookAir9,1)
++ *
++ * Copyright (c) 2022 Kerem Karabay <kekrby@gmail.com>
++ * Copyright (c) 2023 Orlando Chamberlain <orlandoch.dev@gmail.com>
++ */
++
++#include <linux/hid.h>
++#include <linux/leds.h>
++#include <linux/device.h>
++#include <linux/errno.h>
++#include <dt-bindings/leds/common.h>
++
++#include "hid-ids.h"
++
++#define HID_USAGE_MAGIC_BL 0xff00000f
++
++#define APPLE_MAGIC_REPORT_ID_POWER 3
++#define APPLE_MAGIC_REPORT_ID_BRIGHTNESS 1
++
++struct apple_magic_backlight {
++ struct led_classdev cdev;
++ struct hid_report *brightness;
++ struct hid_report *power;
++};
++
++static void apple_magic_backlight_report_set(struct hid_report *rep, s32 value, u8 rate)
++{
++ rep->field[0]->value[0] = value;
++ rep->field[1]->value[0] = 0x5e; /* Mimic Windows */
++ rep->field[1]->value[0] |= rate << 8;
++
++ hid_hw_request(rep->device, rep, HID_REQ_SET_REPORT);
++}
++
++static void apple_magic_backlight_set(struct apple_magic_backlight *backlight,
++ int brightness, char rate)
++{
++ apple_magic_backlight_report_set(backlight->power, brightness ? 1 : 0, rate);
++ if (brightness)
++ apple_magic_backlight_report_set(backlight->brightness, brightness, rate);
++}
++
++static int apple_magic_backlight_led_set(struct led_classdev *led_cdev,
++ enum led_brightness brightness)
++{
++ struct apple_magic_backlight *backlight = container_of(led_cdev,
++ struct apple_magic_backlight, cdev);
++
++ apple_magic_backlight_set(backlight, brightness, 1);
++ return 0;
++}
++
++static int apple_magic_backlight_probe(struct hid_device *hdev,
++ const struct hid_device_id *id)
++{
++ struct apple_magic_backlight *backlight;
++ int rc;
++
++ rc = hid_parse(hdev);
++ if (rc)
++ return rc;
++
++ /*
++ * Ensure this usb endpoint is for the keyboard backlight, not touchbar
++ * backlight.
++ */
++ if (hdev->collection[0].usage != HID_USAGE_MAGIC_BL)
++ return -ENODEV;
++
++ backlight = devm_kzalloc(&hdev->dev, sizeof(*backlight), GFP_KERNEL);
++ if (!backlight)
++ return -ENOMEM;
++
++ rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
++ if (rc)
++ return rc;
++
++ backlight->brightness = hid_register_report(hdev, HID_FEATURE_REPORT,
++ APPLE_MAGIC_REPORT_ID_BRIGHTNESS, 0);
++ backlight->power = hid_register_report(hdev, HID_FEATURE_REPORT,
++ APPLE_MAGIC_REPORT_ID_POWER, 0);
++
++ if (!backlight->brightness || !backlight->power) {
++ rc = -ENODEV;
++ goto hw_stop;
++ }
++
++ backlight->cdev.name = ":white:" LED_FUNCTION_KBD_BACKLIGHT;
++ backlight->cdev.max_brightness = backlight->brightness->field[0]->logical_maximum;
++ backlight->cdev.brightness_set_blocking = apple_magic_backlight_led_set;
++
++ apple_magic_backlight_set(backlight, 0, 0);
++
++ return devm_led_classdev_register(&hdev->dev, &backlight->cdev);
++
++hw_stop:
++ hid_hw_stop(hdev);
++ return rc;
++}
++
++static const struct hid_device_id apple_magic_backlight_hid_ids[] = {
++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) },
++ { }
++};
++MODULE_DEVICE_TABLE(hid, apple_magic_backlight_hid_ids);
++
++static struct hid_driver apple_magic_backlight_hid_driver = {
++ .name = "hid-apple-magic-backlight",
++ .id_table = apple_magic_backlight_hid_ids,
++ .probe = apple_magic_backlight_probe,
++};
++module_hid_driver(apple_magic_backlight_hid_driver);
++
++MODULE_DESCRIPTION("MacBook Magic Keyboard Backlight");
++MODULE_AUTHOR("Orlando Chamberlain <orlandoch.dev@gmail.com>");
++MODULE_LICENSE("GPL");
+--
+2.39.2
+
+From 12c7a3306a631a651464ef56318a218dc4cdb157 Mon Sep 17 00:00:00 2001
+From: Orlando Chamberlain <orlandoch.dev@gmail.com>
+Date: Sat, 18 Feb 2023 23:05:05 +1100
+Subject: [PATCH 8/9] i915: 4 lane quirk for mbp15,1
+
+Needed to use iGPU when dGPU was boot GPU
+
+Patch written by Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/gpu/drm/i915/display/intel_ddi.c | 3 +++
+ drivers/gpu/drm/i915/display/intel_quirks.c | 15 +++++++++++++++
+ drivers/gpu/drm/i915/display/intel_quirks.h | 1 +
+ 3 files changed, 19 insertions(+)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
+index 0f1ec2a98cc8..1ec67390f623 100644
+--- a/drivers/gpu/drm/i915/display/intel_ddi.c
++++ b/drivers/gpu/drm/i915/display/intel_ddi.c
+@@ -4097,6 +4097,9 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port)
+ if (dig_port->saved_port_bits & DDI_A_4_LANES)
+ return false;
+
++ if (intel_has_quirk(dev_priv, QUIRK_DDI_A_FORCE_4_LANES))
++ return true;
++
+ /* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only
+ * supported configuration
+ */
+diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c
+index 6e48d3bcdfec..a8c55e165b46 100644
+--- a/drivers/gpu/drm/i915/display/intel_quirks.c
++++ b/drivers/gpu/drm/i915/display/intel_quirks.c
+@@ -59,6 +59,18 @@ static void quirk_increase_ddi_disabled_time(struct drm_i915_private *i915)
+ drm_info(&i915->drm, "Applying Increase DDI Disabled quirk\n");
+ }
+
++/*
++ * In some cases, the firmware might not set the lane count to 4 (for example,
++ * when booting in some dual GPU Macs with the dGPU as the default GPU), this
++ * quirk is used to force it as otherwise it might not be possible to compute a
++ * valid link configuration.
++ */
++static void quirk_ddi_a_force_4_lanes(struct drm_i915_private *i915)
++{
++ intel_set_quirk(i915, QUIRK_DDI_A_FORCE_4_LANES);
++ drm_info(&i915->drm, "Applying DDI A Forced 4 Lanes quirk\n");
++}
++
+ static void quirk_no_pps_backlight_power_hook(struct drm_i915_private *i915)
+ {
+ intel_set_quirk(i915, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK);
+@@ -199,6 +211,9 @@ static struct intel_quirk intel_quirks[] = {
+ { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
+ /* HP Notebook - 14-r206nv */
+ { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness },
++
++ /* Apple MacBookPro15,1 */
++ { 0x3e9b, 0x106b, 0x0176, quirk_ddi_a_force_4_lanes },
+ };
+
+ void intel_init_quirks(struct drm_i915_private *i915)
+diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h
+index 10a4d163149f..78aacf1f6f5c 100644
+--- a/drivers/gpu/drm/i915/display/intel_quirks.h
++++ b/drivers/gpu/drm/i915/display/intel_quirks.h
+@@ -17,6 +17,7 @@ enum intel_quirk_id {
+ QUIRK_INVERT_BRIGHTNESS,
+ QUIRK_LVDS_SSC_DISABLE,
+ QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK,
++ QUIRK_DDI_A_FORCE_4_LANES,
+ };
+
+ void intel_init_quirks(struct drm_i915_private *i915);
+--
+2.39.1
+
+From bd8e785c74e22978648ced004552eb9c137f1eb6 Mon Sep 17 00:00:00 2001
+From: Orlando Chamberlain <orlandoch.dev@gmail.com>
+Date: Fri, 10 Feb 2023 22:45:00 +1100
+Subject: [PATCH 9/9] apple-gmux: allow switching to igpu at probe
+
+This means user don't need to set the gpu-power-prefs efivar to use the
+igpu while runtime switching isn't working, so macOS will be unaffected.
+
+This isn't really upstreamable, what we want upstream is the ability to
+switch at runtime (so both gpus need to be able to probe the eDP panel).
+
+Based off of work by Kerem Karabay <kekrby@gmail.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +++
+ drivers/gpu/vga/vga_switcheroo.c | 7 +------
+ drivers/pci/vgaarb.c | 1 +
+ drivers/platform/x86/apple-gmux.c | 18 ++++++++++++++++++
+ 4 files changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index 81edf66dbea8..8f3daf28665b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -2051,6 +2051,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
+ int ret, retry = 0, i;
+ bool supports_atomic = false;
+
++ if (vga_switcheroo_client_probe_defer(pdev))
++ return -EPROBE_DEFER;
++
+ /* skip devices which are owned by radeon */
+ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
+ if (amdgpu_unsupported_pciidlist[i] == pdev->device)
+diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
+index 365e6ddbe90f..cf357cd3389d 100644
+--- a/drivers/gpu/vga/vga_switcheroo.c
++++ b/drivers/gpu/vga/vga_switcheroo.c
+@@ -438,12 +438,7 @@ find_active_client(struct list_head *head)
+ bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev)
+ {
+ if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+- /*
+- * apple-gmux is needed on pre-retina MacBook Pro
+- * to probe the panel if pdev is the inactive GPU.
+- */
+- if (apple_gmux_present() && pdev != vga_default_device() &&
+- !vgasr_priv.handler_flags)
++ if (apple_gmux_present() && !vgasr_priv.handler_flags)
+ return true;
+ }
+
+diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
+index 5e6b1eb54c64..1f11701d37d1 100644
+--- a/drivers/pci/vgaarb.c
++++ b/drivers/pci/vgaarb.c
+@@ -143,6 +143,7 @@ void vga_set_default_device(struct pci_dev *pdev)
+ pci_dev_put(vga_default);
+ vga_default = pci_dev_get(pdev);
+ }
++EXPORT_SYMBOL_GPL(vga_set_default_device);
+
+ /**
+ * vga_remove_vgacon - deactivate VGA console
+diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
+index 1417e230edbd..e69785af8e1d 100644
+--- a/drivers/platform/x86/apple-gmux.c
++++ b/drivers/platform/x86/apple-gmux.c
+@@ -21,6 +21,7 @@
+ #include <linux/delay.h>
+ #include <linux/pci.h>
+ #include <linux/vga_switcheroo.h>
++#include <linux/vgaarb.h>
+ #include <linux/debugfs.h>
+ #include <acpi/video.h>
+ #include <asm/io.h>
+@@ -107,6 +108,10 @@ struct apple_gmux_config {
+
+ # define MMIO_GMUX_MAX_BRIGHTNESS 0xffff
+
++static bool force_igd;
++module_param(force_igd, bool, 0);
++MODULE_PARM_DESC(force_idg, "Switch gpu to igd on module load. Make sure that you have apple-set-os set up and the iGPU is in `lspci -s 00:02.0`. (default: false) (bool)");
++
+ static u8 gmux_pio_read8(struct apple_gmux_data *gmux_data, int port)
+ {
+ return inb(gmux_data->iostart + port);
+@@ -945,6 +950,19 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
+ gmux_enable_interrupts(gmux_data);
+ gmux_read_switch_state(gmux_data);
+
++ if (force_igd) {
++ struct pci_dev *pdev;
++
++ pdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(2, 0));
++ if (pdev) {
++ pr_info("Switching to IGD");
++ gmux_switchto(VGA_SWITCHEROO_IGD);
++ vga_set_default_device(pdev);
++ } else {
++ pr_err("force_idg is true, but couldn't find iGPU at 00:02.0! Is apple-set-os working?");
++ }
++ }
++
+ /*
+ * Retina MacBook Pros cannot switch the panel's AUX separately
+ * and need eDP pre-calibration. They are distinguishable from
+--
+2.43.0
+
+From 6adb501c697cd0e3246e75237ee8e43eb5a92cc3 Mon Sep 17 00:00:00 2001
+From: Kerem Karabay <kekrby@gmail.com>
+Date: Thu, 23 Nov 2023 18:58:51 +0530
+Subject: [PATCH] efi: libstub: add support for the apple_set_os protocol
+
+On dual GPU EFI Macs, the EFI stub needs to report that it is booting
+macOS in order to prevent the firmware from disabling the iGPU.
+
+See also this patch for GRUB by Andreas Heider <andreas@heider.io>:
+https://lists.gnu.org/archive/html/grub-devel/2013-12/msg00442.html
+---
+ .../admin-guide/kernel-parameters.txt | 2 ++
+ .../firmware/efi/libstub/efi-stub-helper.c | 3 +++
+ drivers/firmware/efi/libstub/efistub.h | 14 ++++++++++
+ drivers/firmware/efi/libstub/x86-stub.c | 27 +++++++++++++++++++
+ include/linux/efi.h | 1 +
+ 5 files changed, 47 insertions(+)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 41644336e..cbd4697a5 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -399,6 +399,8 @@
+ useful so that a dump capture kernel won't be
+ shot down by NMI
+
++ apple_set_os [KNL] Report that macOS is being booted to the firmware
++
+ autoconf= [IPV6]
+ See Documentation/networking/ipv6.rst.
+
+diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
+index bfa30625f..3d99acc1a 100644
+--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
++++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
+@@ -19,6 +19,7 @@
+ bool efi_nochunk;
+ bool efi_nokaslr = !IS_ENABLED(CONFIG_RANDOMIZE_BASE);
+ bool efi_novamap;
++bool efi_apple_set_os;
+
+ static bool efi_noinitrd;
+ static bool efi_nosoftreserve;
+@@ -73,6 +74,8 @@ efi_status_t efi_parse_options(char const *cmdline)
+ efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
+ } else if (!strcmp(param, "noinitrd")) {
+ efi_noinitrd = true;
++ } else if (!strcmp(param, "apple_set_os")) {
++ efi_apple_set_os = true;
+ } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+ efi_no5lvl = true;
+ } else if (!strcmp(param, "efi") && val) {
+diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
+index 212687c30..21b414d09 100644
+--- a/drivers/firmware/efi/libstub/efistub.h
++++ b/drivers/firmware/efi/libstub/efistub.h
+@@ -38,6 +38,7 @@ extern bool efi_nochunk;
+ extern bool efi_nokaslr;
+ extern int efi_loglevel;
+ extern bool efi_novamap;
++extern bool efi_apple_set_os;
+
+ extern const efi_system_table_t *efi_system_table;
+
+@@ -825,6 +826,19 @@ union apple_properties_protocol {
+ } mixed_mode;
+ };
+
++typedef struct apple_set_os_protocol apple_set_os_protocol_t;
++
++struct apple_set_os_protocol {
++ u64 version;
++ efi_status_t (__efiapi *set_os_version) (const char *);
++ efi_status_t (__efiapi *set_os_vendor) (const char *);
++ struct {
++ u32 version;
++ u32 set_os_version;
++ u32 set_os_vendor;
++ } mixed_mode;
++};
++
+ typedef u32 efi_tcg2_event_log_format;
+
+ #define INITRD_EVENT_TAG_ID 0x8F3B22ECU
+diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
+index 70b325a2f..2131f8543 100644
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -223,6 +223,30 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
+ }
+ }
+
++static void apple_set_os(void)
++{
++ efi_guid_t guid = APPLE_SET_OS_PROTOCOL_GUID;
++ apple_set_os_protocol_t *set_os;
++ efi_status_t status;
++
++ status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&set_os);
++ if (status != EFI_SUCCESS)
++ return;
++
++ if (efi_table_attr(set_os, version) >= 2) {
++ status = efi_fn_call(set_os, set_os_vendor, "Apple Inc.");
++ if (status != EFI_SUCCESS)
++ efi_err("Failed to set OS vendor via apple_set_os\n");
++ }
++
++ /* The version being set doesn't seem to matter */
++ if (efi_table_attr(set_os, version) > 0) {
++ status = efi_fn_call(set_os, set_os_version, "Mac OS X 10.9");
++ if (status != EFI_SUCCESS)
++ efi_err("Failed to set OS version via apple_set_os\n");
++ }
++}
++
+ void efi_adjust_memory_range_protection(unsigned long start,
+ unsigned long size)
+ {
+@@ -321,6 +345,9 @@ static void setup_quirks(struct boot_params *boot_params)
+ if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) &&
+ !memcmp(efistub_fw_vendor(), apple, sizeof(apple)))
+ retrieve_apple_device_properties(boot_params);
++
++ if (efi_apple_set_os)
++ apple_set_os();
+ }
+
+ /*
+diff --git a/include/linux/efi.h b/include/linux/efi.h
+index 80b21d1c6..f1e58e027 100644
+--- a/include/linux/efi.h
++++ b/include/linux/efi.h
+@@ -387,6 +387,7 @@ void efi_native_runtime_setup(void);
+ #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
+ #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+ #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0)
++#define APPLE_SET_OS_PROTOCOL_GUID EFI_GUID(0xc5c5da95, 0x7d5c, 0x45e6, 0xb2, 0xf1, 0x3f, 0xd5, 0x2b, 0xb1, 0x00, 0x77)
+ #define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f)
+ #define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
+ #define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d)
+--
+2.34.1
+
+From 09dd6c563cd73d72e917de07e8d59358c41e051d Mon Sep 17 00:00:00 2001
+From: Paul Pawlowski <paul@mrarm.io>
+Date: Sun, 17 Nov 2019 23:12:55 +0100
+Subject: [PATCH 1/6] applesmc: convert static structures to drvdata
+
+All static data structures have been moved to an applesmc_device struct,
+which is then associated with the platform device.
+This change is intended to ease the migration to an acpi_device, where
+static data would preferably be avoided.
+
+Signed-off-by: Aun-Ali Zaidi <admin@kodeit.net>
+---
+ drivers/hwmon/applesmc.c | 540 +++++++++++++++++++++++----------------
+ 1 file changed, 319 insertions(+), 221 deletions(-)
+
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index 79b498f816fe..62211b590a61 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -6,6 +6,7 @@
+ *
+ * Copyright (C) 2007 Nicolas Boichat <nicolas@boichat.ch>
+ * Copyright (C) 2010 Henrik Rydberg <rydberg@euromail.se>
++ * Copyright (C) 2019 Paul Pawlowski <paul@mrarm.io>
+ *
+ * Based on hdaps.c driver:
+ * Copyright (C) 2005 Robert Love <rml@novell.com>
+@@ -119,7 +120,7 @@ struct applesmc_entry {
+ };
+
+ /* Register lookup and registers common to all SMCs */
+-static struct applesmc_registers {
++struct applesmc_registers {
+ struct mutex mutex; /* register read/write mutex */
+ unsigned int key_count; /* number of SMC registers */
+ unsigned int fan_count; /* number of fans */
+@@ -133,26 +134,32 @@ static struct applesmc_registers {
+ bool init_complete; /* true when fully initialized */
+ struct applesmc_entry *cache; /* cached key entries */
+ const char **index; /* temperature key index */
+-} smcreg = {
+- .mutex = __MUTEX_INITIALIZER(smcreg.mutex),
+ };
+
+-static const int debug;
+-static struct platform_device *pdev;
+-static s16 rest_x;
+-static s16 rest_y;
+-static u8 backlight_state[2];
++struct applesmc_device {
++ struct platform_device *dev;
++ struct applesmc_registers reg;
+
+-static struct device *hwmon_dev;
+-static struct input_dev *applesmc_idev;
++ s16 rest_x;
++ s16 rest_y;
+
+-/*
+- * Last index written to key_at_index sysfs file, and value to use for all other
+- * key_at_index_* sysfs files.
+- */
+-static unsigned int key_at_index;
++ u8 backlight_state[2];
++
++ struct device *hwmon_dev;
++ struct input_dev *idev;
++
++ /*
++ * Last index written to key_at_index sysfs file, and value to use for all other
++ * key_at_index_* sysfs files.
++ */
++ unsigned int key_at_index;
++
++ struct workqueue_struct *backlight_wq;
++ struct work_struct backlight_work;
++ struct led_classdev backlight_dev;
++};
+
+-static struct workqueue_struct *applesmc_led_wq;
++static const int debug;
+
+ /*
+ * Wait for specific status bits with a mask on the SMC.
+@@ -338,36 +345,37 @@ static int read_register_count(unsigned int *count)
+ * All functions below are concurrency safe - callers should NOT hold lock.
+ */
+
+-static int applesmc_read_entry(const struct applesmc_entry *entry,
+- u8 *buf, u8 len)
++static int applesmc_read_entry(struct applesmc_device *smc,
++ const struct applesmc_entry *entry, u8 *buf, u8 len)
+ {
+ int ret;
+
+ if (entry->len != len)
+ return -EINVAL;
+- mutex_lock(&smcreg.mutex);
++ mutex_lock(&smc->reg.mutex);
+ ret = read_smc(APPLESMC_READ_CMD, entry->key, buf, len);
+- mutex_unlock(&smcreg.mutex);
++ mutex_unlock(&smc->reg.mutex);
+
+ return ret;
+ }
+
+-static int applesmc_write_entry(const struct applesmc_entry *entry,
+- const u8 *buf, u8 len)
++static int applesmc_write_entry(struct applesmc_device *smc,
++ const struct applesmc_entry *entry, const u8 *buf, u8 len)
+ {
+ int ret;
+
+ if (entry->len != len)
+ return -EINVAL;
+- mutex_lock(&smcreg.mutex);
++ mutex_lock(&smc->reg.mutex);
+ ret = write_smc(APPLESMC_WRITE_CMD, entry->key, buf, len);
+- mutex_unlock(&smcreg.mutex);
++ mutex_unlock(&smc->reg.mutex);
+ return ret;
+ }
+
+-static const struct applesmc_entry *applesmc_get_entry_by_index(int index)
++static const struct applesmc_entry *applesmc_get_entry_by_index(
++ struct applesmc_device *smc, int index)
+ {
+- struct applesmc_entry *cache = &smcreg.cache[index];
++ struct applesmc_entry *cache = &smc->reg.cache[index];
+ u8 key[4], info[6];
+ __be32 be;
+ int ret = 0;
+@@ -375,7 +383,7 @@ static const struct applesmc_entry *applesmc_get_entry_by_index(int index)
+ if (cache->valid)
+ return cache;
+
+- mutex_lock(&smcreg.mutex);
++ mutex_lock(&smc->reg.mutex);
+
+ if (cache->valid)
+ goto out;
+@@ -394,20 +402,21 @@ static const struct applesmc_entry *applesmc_get_entry_by_index(int index)
+ cache->valid = true;
+
+ out:
+- mutex_unlock(&smcreg.mutex);
++ mutex_unlock(&smc->reg.mutex);
+ if (ret)
+ return ERR_PTR(ret);
+ return cache;
+ }
+
+-static int applesmc_get_lower_bound(unsigned int *lo, const char *key)
++static int applesmc_get_lower_bound(struct applesmc_device *smc,
++ unsigned int *lo, const char *key)
+ {
+- int begin = 0, end = smcreg.key_count;
++ int begin = 0, end = smc->reg.key_count;
+ const struct applesmc_entry *entry;
+
+ while (begin != end) {
+ int middle = begin + (end - begin) / 2;
+- entry = applesmc_get_entry_by_index(middle);
++ entry = applesmc_get_entry_by_index(smc, middle);
+ if (IS_ERR(entry)) {
+ *lo = 0;
+ return PTR_ERR(entry);
+@@ -422,16 +431,17 @@ static int applesmc_get_lower_bound(unsigned int *lo, const char *key)
+ return 0;
+ }
+
+-static int applesmc_get_upper_bound(unsigned int *hi, const char *key)
++static int applesmc_get_upper_bound(struct applesmc_device *smc,
++ unsigned int *hi, const char *key)
+ {
+- int begin = 0, end = smcreg.key_count;
++ int begin = 0, end = smc->reg.key_count;
+ const struct applesmc_entry *entry;
+
+ while (begin != end) {
+ int middle = begin + (end - begin) / 2;
+- entry = applesmc_get_entry_by_index(middle);
++ entry = applesmc_get_entry_by_index(smc, middle);
+ if (IS_ERR(entry)) {
+- *hi = smcreg.key_count;
++ *hi = smc->reg.key_count;
+ return PTR_ERR(entry);
+ }
+ if (strcmp(key, entry->key) < 0)
+@@ -444,50 +454,54 @@ static int applesmc_get_upper_bound(unsigned int *hi, const char *key)
+ return 0;
+ }
+
+-static const struct applesmc_entry *applesmc_get_entry_by_key(const char *key)
++static const struct applesmc_entry *applesmc_get_entry_by_key(
++ struct applesmc_device *smc, const char *key)
+ {
+ int begin, end;
+ int ret;
+
+- ret = applesmc_get_lower_bound(&begin, key);
++ ret = applesmc_get_lower_bound(smc, &begin, key);
+ if (ret)
+ return ERR_PTR(ret);
+- ret = applesmc_get_upper_bound(&end, key);
++ ret = applesmc_get_upper_bound(smc, &end, key);
+ if (ret)
+ return ERR_PTR(ret);
+ if (end - begin != 1)
+ return ERR_PTR(-EINVAL);
+
+- return applesmc_get_entry_by_index(begin);
++ return applesmc_get_entry_by_index(smc, begin);
+ }
+
+-static int applesmc_read_key(const char *key, u8 *buffer, u8 len)
++static int applesmc_read_key(struct applesmc_device *smc,
++ const char *key, u8 *buffer, u8 len)
+ {
+ const struct applesmc_entry *entry;
+
+- entry = applesmc_get_entry_by_key(key);
++ entry = applesmc_get_entry_by_key(smc, key);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+- return applesmc_read_entry(entry, buffer, len);
++ return applesmc_read_entry(smc, entry, buffer, len);
+ }
+
+-static int applesmc_write_key(const char *key, const u8 *buffer, u8 len)
++static int applesmc_write_key(struct applesmc_device *smc,
++ const char *key, const u8 *buffer, u8 len)
+ {
+ const struct applesmc_entry *entry;
+
+- entry = applesmc_get_entry_by_key(key);
++ entry = applesmc_get_entry_by_key(smc, key);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+- return applesmc_write_entry(entry, buffer, len);
++ return applesmc_write_entry(smc, entry, buffer, len);
+ }
+
+-static int applesmc_has_key(const char *key, bool *value)
++static int applesmc_has_key(struct applesmc_device *smc,
++ const char *key, bool *value)
+ {
+ const struct applesmc_entry *entry;
+
+- entry = applesmc_get_entry_by_key(key);
++ entry = applesmc_get_entry_by_key(smc, key);
+ if (IS_ERR(entry) && PTR_ERR(entry) != -EINVAL)
+ return PTR_ERR(entry);
+
+@@ -498,12 +512,13 @@ static int applesmc_has_key(const char *key, bool *value)
+ /*
+ * applesmc_read_s16 - Read 16-bit signed big endian register
+ */
+-static int applesmc_read_s16(const char *key, s16 *value)
++static int applesmc_read_s16(struct applesmc_device *smc,
++ const char *key, s16 *value)
+ {
+ u8 buffer[2];
+ int ret;
+
+- ret = applesmc_read_key(key, buffer, 2);
++ ret = applesmc_read_key(smc, key, buffer, 2);
+ if (ret)
+ return ret;
+
+@@ -514,28 +529,29 @@ static int applesmc_read_s16(const char *key, s16 *value)
+ /*
+ * applesmc_device_init - initialize the accelerometer. Can sleep.
+ */
+-static void applesmc_device_init(void)
++static void applesmc_device_init(struct applesmc_device *smc)
+ {
+ int total;
+ u8 buffer[2];
+
+- if (!smcreg.has_accelerometer)
++ if (!smc->reg.has_accelerometer)
+ return;
+
+ for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) {
+- if (!applesmc_read_key(MOTION_SENSOR_KEY, buffer, 2) &&
++ if (!applesmc_read_key(smc, MOTION_SENSOR_KEY, buffer, 2) &&
+ (buffer[0] != 0x00 || buffer[1] != 0x00))
+ return;
+ buffer[0] = 0xe0;
+ buffer[1] = 0x00;
+- applesmc_write_key(MOTION_SENSOR_KEY, buffer, 2);
++ applesmc_write_key(smc, MOTION_SENSOR_KEY, buffer, 2);
+ msleep(INIT_WAIT_MSECS);
+ }
+
+ pr_warn("failed to init the device\n");
+ }
+
+-static int applesmc_init_index(struct applesmc_registers *s)
++static int applesmc_init_index(struct applesmc_device *smc,
++ struct applesmc_registers *s)
+ {
+ const struct applesmc_entry *entry;
+ unsigned int i;
+@@ -548,7 +564,7 @@ static int applesmc_init_index(struct applesmc_registers *s)
+ return -ENOMEM;
+
+ for (i = s->temp_begin; i < s->temp_end; i++) {
+- entry = applesmc_get_entry_by_index(i);
++ entry = applesmc_get_entry_by_index(smc, i);
+ if (IS_ERR(entry))
+ continue;
+ if (strcmp(entry->type, TEMP_SENSOR_TYPE))
+@@ -562,9 +578,9 @@ static int applesmc_init_index(struct applesmc_registers *s)
+ /*
+ * applesmc_init_smcreg_try - Try to initialize register cache. Idempotent.
+ */
+-static int applesmc_init_smcreg_try(void)
++static int applesmc_init_smcreg_try(struct applesmc_device *smc)
+ {
+- struct applesmc_registers *s = &smcreg;
++ struct applesmc_registers *s = &smc->reg;
+ bool left_light_sensor = false, right_light_sensor = false;
+ unsigned int count;
+ u8 tmp[1];
+@@ -590,35 +606,35 @@ static int applesmc_init_smcreg_try(void)
+ if (!s->cache)
+ return -ENOMEM;
+
+- ret = applesmc_read_key(FANS_COUNT, tmp, 1);
++ ret = applesmc_read_key(smc, FANS_COUNT, tmp, 1);
+ if (ret)
+ return ret;
+ s->fan_count = tmp[0];
+ if (s->fan_count > 10)
+ s->fan_count = 10;
+
+- ret = applesmc_get_lower_bound(&s->temp_begin, "T");
++ ret = applesmc_get_lower_bound(smc, &s->temp_begin, "T");
+ if (ret)
+ return ret;
+- ret = applesmc_get_lower_bound(&s->temp_end, "U");
++ ret = applesmc_get_lower_bound(smc, &s->temp_end, "U");
+ if (ret)
+ return ret;
+ s->temp_count = s->temp_end - s->temp_begin;
+
+- ret = applesmc_init_index(s);
++ ret = applesmc_init_index(smc, s);
+ if (ret)
+ return ret;
+
+- ret = applesmc_has_key(LIGHT_SENSOR_LEFT_KEY, &left_light_sensor);
++ ret = applesmc_has_key(smc, LIGHT_SENSOR_LEFT_KEY, &left_light_sensor);
+ if (ret)
+ return ret;
+- ret = applesmc_has_key(LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor);
++ ret = applesmc_has_key(smc, LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor);
+ if (ret)
+ return ret;
+- ret = applesmc_has_key(MOTION_SENSOR_KEY, &s->has_accelerometer);
++ ret = applesmc_has_key(smc, MOTION_SENSOR_KEY, &s->has_accelerometer);
+ if (ret)
+ return ret;
+- ret = applesmc_has_key(BACKLIGHT_KEY, &s->has_key_backlight);
++ ret = applesmc_has_key(smc, BACKLIGHT_KEY, &s->has_key_backlight);
+ if (ret)
+ return ret;
+
+@@ -634,13 +650,13 @@ static int applesmc_init_smcreg_try(void)
+ return 0;
+ }
+
+-static void applesmc_destroy_smcreg(void)
++static void applesmc_destroy_smcreg(struct applesmc_device *smc)
+ {
+- kfree(smcreg.index);
+- smcreg.index = NULL;
+- kfree(smcreg.cache);
+- smcreg.cache = NULL;
+- smcreg.init_complete = false;
++ kfree(smc->reg.index);
++ smc->reg.index = NULL;
++ kfree(smc->reg.cache);
++ smc->reg.cache = NULL;
++ smc->reg.init_complete = false;
+ }
+
+ /*
+@@ -649,12 +665,12 @@ static void applesmc_destroy_smcreg(void)
+ * Retries until initialization is successful, or the operation times out.
+ *
+ */
+-static int applesmc_init_smcreg(void)
++static int applesmc_init_smcreg(struct applesmc_device *smc)
+ {
+ int ms, ret;
+
+ for (ms = 0; ms < INIT_TIMEOUT_MSECS; ms += INIT_WAIT_MSECS) {
+- ret = applesmc_init_smcreg_try();
++ ret = applesmc_init_smcreg_try(smc);
+ if (!ret) {
+ if (ms)
+ pr_info("init_smcreg() took %d ms\n", ms);
+@@ -663,21 +679,58 @@ static int applesmc_init_smcreg(void)
+ msleep(INIT_WAIT_MSECS);
+ }
+
+- applesmc_destroy_smcreg();
++ applesmc_destroy_smcreg(smc);
+
+ return ret;
+ }
+
+ /* Device model stuff */
++static int applesmc_create_modules(struct applesmc_device *smc);
++static void applesmc_destroy_modules(struct applesmc_device *smc);
+ static int applesmc_probe(struct platform_device *dev)
+ {
++ struct applesmc_device *smc;
+ int ret;
+
+- ret = applesmc_init_smcreg();
++ smc = kzalloc(sizeof(struct applesmc_device), GFP_KERNEL);
++ if (!smc)
++ return -ENOMEM;
++ smc->dev = dev;
++ mutex_init(&smc->reg.mutex);
++
++ platform_set_drvdata(dev, smc);
++
++ ret = applesmc_init_smcreg(smc);
+ if (ret)
+- return ret;
++ goto out_mem;
++
++ applesmc_device_init(smc);
++
++ ret = applesmc_create_modules(smc);
++ if (ret)
++ goto out_reg;
++
++ return 0;
++
++out_reg:
++ applesmc_destroy_smcreg(smc);
++out_mem:
++ platform_set_drvdata(dev, NULL);
++ mutex_destroy(&smc->reg.mutex);
++ kfree(smc);
+
+- applesmc_device_init();
++ return ret;
++}
++
++static int applesmc_remove(struct platform_device *dev)
++{
++ struct applesmc_device *smc = platform_get_drvdata(dev);
++
++ applesmc_destroy_modules(smc);
++ applesmc_destroy_smcreg(smc);
++
++ mutex_destroy(&smc->reg.mutex);
++ kfree(smc);
+
+ return 0;
+ }
+@@ -685,15 +738,21 @@ static int applesmc_probe(struct platform_device *dev)
+ /* Synchronize device with memorized backlight state */
+ static int applesmc_pm_resume(struct device *dev)
+ {
+- if (smcreg.has_key_backlight)
+- applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
++ struct applesmc_device *smc = dev_get_drvdata(dev);
++
++ if (smc->reg.has_key_backlight)
++ applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2);
++
+ return 0;
+ }
+
+ /* Reinitialize device on resume from hibernation */
+ static int applesmc_pm_restore(struct device *dev)
+ {
+- applesmc_device_init();
++ struct applesmc_device *smc = dev_get_drvdata(dev);
++
++ applesmc_device_init(smc);
++
+ return applesmc_pm_resume(dev);
+ }
+
+@@ -704,6 +763,7 @@ static const struct dev_pm_ops applesmc_pm_ops = {
+
+ static struct platform_driver applesmc_driver = {
+ .probe = applesmc_probe,
++ .remove = applesmc_remove,
+ .driver = {
+ .name = "applesmc",
+ .pm = &applesmc_pm_ops,
+@@ -714,25 +774,26 @@ static struct platform_driver applesmc_driver = {
+ * applesmc_calibrate - Set our "resting" values. Callers must
+ * hold applesmc_lock.
+ */
+-static void applesmc_calibrate(void)
++static void applesmc_calibrate(struct applesmc_device *smc)
+ {
+- applesmc_read_s16(MOTION_SENSOR_X_KEY, &rest_x);
+- applesmc_read_s16(MOTION_SENSOR_Y_KEY, &rest_y);
+- rest_x = -rest_x;
++ applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &smc->rest_x);
++ applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &smc->rest_y);
++ smc->rest_x = -smc->rest_x;
+ }
+
+ static void applesmc_idev_poll(struct input_dev *idev)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(&idev->dev);
+ s16 x, y;
+
+- if (applesmc_read_s16(MOTION_SENSOR_X_KEY, &x))
++ if (applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x))
+ return;
+- if (applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y))
++ if (applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y))
+ return;
+
+ x = -x;
+- input_report_abs(idev, ABS_X, x - rest_x);
+- input_report_abs(idev, ABS_Y, y - rest_y);
++ input_report_abs(idev, ABS_X, x - smc->rest_x);
++ input_report_abs(idev, ABS_Y, y - smc->rest_y);
+ input_sync(idev);
+ }
+
+@@ -747,16 +808,17 @@ static ssize_t applesmc_name_show(struct device *dev,
+ static ssize_t applesmc_position_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ int ret;
+ s16 x, y, z;
+
+- ret = applesmc_read_s16(MOTION_SENSOR_X_KEY, &x);
++ ret = applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x);
+ if (ret)
+ goto out;
+- ret = applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y);
++ ret = applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y);
+ if (ret)
+ goto out;
+- ret = applesmc_read_s16(MOTION_SENSOR_Z_KEY, &z);
++ ret = applesmc_read_s16(smc, MOTION_SENSOR_Z_KEY, &z);
+ if (ret)
+ goto out;
+
+@@ -770,6 +832,7 @@ static ssize_t applesmc_position_show(struct device *dev,
+ static ssize_t applesmc_light_show(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ const struct applesmc_entry *entry;
+ static int data_length;
+ int ret;
+@@ -777,7 +840,7 @@ static ssize_t applesmc_light_show(struct device *dev,
+ u8 buffer[10];
+
+ if (!data_length) {
+- entry = applesmc_get_entry_by_key(LIGHT_SENSOR_LEFT_KEY);
++ entry = applesmc_get_entry_by_key(smc, LIGHT_SENSOR_LEFT_KEY);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+ if (entry->len > 10)
+@@ -786,7 +849,7 @@ static ssize_t applesmc_light_show(struct device *dev,
+ pr_info("light sensor data length set to %d\n", data_length);
+ }
+
+- ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
++ ret = applesmc_read_key(smc, LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
+ if (ret)
+ goto out;
+ /* newer macbooks report a single 10-bit bigendian value */
+@@ -796,7 +859,7 @@ static ssize_t applesmc_light_show(struct device *dev,
+ }
+ left = buffer[2];
+
+- ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
++ ret = applesmc_read_key(smc, LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
+ if (ret)
+ goto out;
+ right = buffer[2];
+@@ -812,7 +875,8 @@ static ssize_t applesmc_light_show(struct device *dev,
+ static ssize_t applesmc_show_sensor_label(struct device *dev,
+ struct device_attribute *devattr, char *sysfsbuf)
+ {
+- const char *key = smcreg.index[to_index(devattr)];
++ struct applesmc_device *smc = dev_get_drvdata(dev);
++ const char *key = smc->reg.index[to_index(devattr)];
+
+ return sysfs_emit(sysfsbuf, "%s\n", key);
+ }
+@@ -821,12 +885,13 @@ static ssize_t applesmc_show_sensor_label(struct device *dev,
+ static ssize_t applesmc_show_temperature(struct device *dev,
+ struct device_attribute *devattr, char *sysfsbuf)
+ {
+- const char *key = smcreg.index[to_index(devattr)];
++ struct applesmc_device *smc = dev_get_drvdata(dev);
++ const char *key = smc->reg.index[to_index(devattr)];
+ int ret;
+ s16 value;
+ int temp;
+
+- ret = applesmc_read_s16(key, &value);
++ ret = applesmc_read_s16(smc, key, &value);
+ if (ret)
+ return ret;
+
+@@ -838,6 +903,7 @@ static ssize_t applesmc_show_temperature(struct device *dev,
+ static ssize_t applesmc_show_fan_speed(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ int ret;
+ unsigned int speed = 0;
+ char newkey[5];
+@@ -846,7 +912,7 @@ static ssize_t applesmc_show_fan_speed(struct device *dev,
+ scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
+ to_index(attr));
+
+- ret = applesmc_read_key(newkey, buffer, 2);
++ ret = applesmc_read_key(smc, newkey, buffer, 2);
+ if (ret)
+ return ret;
+
+@@ -858,6 +924,7 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
+ struct device_attribute *attr,
+ const char *sysfsbuf, size_t count)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ int ret;
+ unsigned long speed;
+ char newkey[5];
+@@ -871,7 +938,7 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
+
+ buffer[0] = (speed >> 6) & 0xff;
+ buffer[1] = (speed << 2) & 0xff;
+- ret = applesmc_write_key(newkey, buffer, 2);
++ ret = applesmc_write_key(smc, newkey, buffer, 2);
+
+ if (ret)
+ return ret;
+@@ -882,11 +949,12 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
+ static ssize_t applesmc_show_fan_manual(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ int ret;
+ u16 manual = 0;
+ u8 buffer[2];
+
+- ret = applesmc_read_key(FANS_MANUAL, buffer, 2);
++ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
+ if (ret)
+ return ret;
+
+@@ -898,6 +966,7 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
+ struct device_attribute *attr,
+ const char *sysfsbuf, size_t count)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ int ret;
+ u8 buffer[2];
+ unsigned long input;
+@@ -906,7 +975,7 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
+ if (kstrtoul(sysfsbuf, 10, &input) < 0)
+ return -EINVAL;
+
+- ret = applesmc_read_key(FANS_MANUAL, buffer, 2);
++ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
+ if (ret)
+ goto out;
+
+@@ -920,7 +989,7 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
+ buffer[0] = (val >> 8) & 0xFF;
+ buffer[1] = val & 0xFF;
+
+- ret = applesmc_write_key(FANS_MANUAL, buffer, 2);
++ ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2);
+
+ out:
+ if (ret)
+@@ -932,13 +1001,14 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
+ static ssize_t applesmc_show_fan_position(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ int ret;
+ char newkey[5];
+ u8 buffer[17];
+
+ scnprintf(newkey, sizeof(newkey), FAN_ID_FMT, to_index(attr));
+
+- ret = applesmc_read_key(newkey, buffer, 16);
++ ret = applesmc_read_key(smc, newkey, buffer, 16);
+ buffer[16] = 0;
+
+ if (ret)
+@@ -950,30 +1020,36 @@ static ssize_t applesmc_show_fan_position(struct device *dev,
+ static ssize_t applesmc_calibrate_show(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
+- return sysfs_emit(sysfsbuf, "(%d,%d)\n", rest_x, rest_y);
++ struct applesmc_device *smc = dev_get_drvdata(dev);
++
++ return sysfs_emit(sysfsbuf, "(%d,%d)\n", smc->rest_x, smc->rest_y);
+ }
+
+ static ssize_t applesmc_calibrate_store(struct device *dev,
+ struct device_attribute *attr, const char *sysfsbuf, size_t count)
+ {
+- applesmc_calibrate();
++ struct applesmc_device *smc = dev_get_drvdata(dev);
++
++ applesmc_calibrate(smc);
+
+ return count;
+ }
+
+ static void applesmc_backlight_set(struct work_struct *work)
+ {
+- applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
++ struct applesmc_device *smc = container_of(work, struct applesmc_device, backlight_work);
++
++ applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2);
+ }
+-static DECLARE_WORK(backlight_work, &applesmc_backlight_set);
+
+ static void applesmc_brightness_set(struct led_classdev *led_cdev,
+ enum led_brightness value)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(led_cdev->dev);
+ int ret;
+
+- backlight_state[0] = value;
+- ret = queue_work(applesmc_led_wq, &backlight_work);
++ smc->backlight_state[0] = value;
++ ret = queue_work(smc->backlight_wq, &smc->backlight_work);
+
+ if (debug && (!ret))
+ dev_dbg(led_cdev->dev, "work was already on the queue.\n");
+@@ -982,11 +1058,12 @@ static void applesmc_brightness_set(struct led_classdev *led_cdev,
+ static ssize_t applesmc_key_count_show(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ int ret;
+ u8 buffer[4];
+ u32 count;
+
+- ret = applesmc_read_key(KEY_COUNT_KEY, buffer, 4);
++ ret = applesmc_read_key(smc, KEY_COUNT_KEY, buffer, 4);
+ if (ret)
+ return ret;
+
+@@ -998,13 +1075,14 @@ static ssize_t applesmc_key_count_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_read_show(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ const struct applesmc_entry *entry;
+ int ret;
+
+- entry = applesmc_get_entry_by_index(key_at_index);
++ entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+- ret = applesmc_read_entry(entry, sysfsbuf, entry->len);
++ ret = applesmc_read_entry(smc, entry, sysfsbuf, entry->len);
+ if (ret)
+ return ret;
+
+@@ -1014,9 +1092,10 @@ static ssize_t applesmc_key_at_index_read_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_data_length_show(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ const struct applesmc_entry *entry;
+
+- entry = applesmc_get_entry_by_index(key_at_index);
++ entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+@@ -1026,9 +1105,10 @@ static ssize_t applesmc_key_at_index_data_length_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_type_show(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ const struct applesmc_entry *entry;
+
+- entry = applesmc_get_entry_by_index(key_at_index);
++ entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+@@ -1038,9 +1118,10 @@ static ssize_t applesmc_key_at_index_type_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_name_show(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ const struct applesmc_entry *entry;
+
+- entry = applesmc_get_entry_by_index(key_at_index);
++ entry = applesmc_get_entry_by_index(smc, smc->key_at_index);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+@@ -1050,28 +1131,25 @@ static ssize_t applesmc_key_at_index_name_show(struct device *dev,
+ static ssize_t applesmc_key_at_index_show(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
+- return sysfs_emit(sysfsbuf, "%d\n", key_at_index);
++ struct applesmc_device *smc = dev_get_drvdata(dev);
++
++ return sysfs_emit(sysfsbuf, "%d\n", smc->key_at_index);
+ }
+
+ static ssize_t applesmc_key_at_index_store(struct device *dev,
+ struct device_attribute *attr, const char *sysfsbuf, size_t count)
+ {
++ struct applesmc_device *smc = dev_get_drvdata(dev);
+ unsigned long newkey;
+
+ if (kstrtoul(sysfsbuf, 10, &newkey) < 0
+- || newkey >= smcreg.key_count)
++ || newkey >= smc->reg.key_count)
+ return -EINVAL;
+
+- key_at_index = newkey;
++ smc->key_at_index = newkey;
+ return count;
+ }
+
+-static struct led_classdev applesmc_backlight = {
+- .name = "smc::kbd_backlight",
+- .default_trigger = "nand-disk",
+- .brightness_set = applesmc_brightness_set,
+-};
+-
+ static struct applesmc_node_group info_group[] = {
+ { "name", applesmc_name_show },
+ { "key_count", applesmc_key_count_show },
+@@ -1116,14 +1194,15 @@ static struct applesmc_node_group temp_group[] = {
+ /*
+ * applesmc_destroy_nodes - remove files and free associated memory
+ */
+-static void applesmc_destroy_nodes(struct applesmc_node_group *groups)
++static void applesmc_destroy_nodes(struct applesmc_device *smc,
++ struct applesmc_node_group *groups)
+ {
+ struct applesmc_node_group *grp;
+ struct applesmc_dev_attr *node;
+
+ for (grp = groups; grp->nodes; grp++) {
+ for (node = grp->nodes; node->sda.dev_attr.attr.name; node++)
+- sysfs_remove_file(&pdev->dev.kobj,
++ sysfs_remove_file(&smc->dev->dev.kobj,
+ &node->sda.dev_attr.attr);
+ kfree(grp->nodes);
+ grp->nodes = NULL;
+@@ -1133,7 +1212,8 @@ static void applesmc_destroy_nodes(struct applesmc_node_group *groups)
+ /*
+ * applesmc_create_nodes - create a two-dimensional group of sysfs files
+ */
+-static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
++static int applesmc_create_nodes(struct applesmc_device *smc,
++ struct applesmc_node_group *groups, int num)
+ {
+ struct applesmc_node_group *grp;
+ struct applesmc_dev_attr *node;
+@@ -1157,7 +1237,7 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
+ sysfs_attr_init(attr);
+ attr->name = node->name;
+ attr->mode = 0444 | (grp->store ? 0200 : 0);
+- ret = sysfs_create_file(&pdev->dev.kobj, attr);
++ ret = sysfs_create_file(&smc->dev->dev.kobj, attr);
+ if (ret) {
+ attr->name = NULL;
+ goto out;
+@@ -1167,57 +1247,57 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
+
+ return 0;
+ out:
+- applesmc_destroy_nodes(groups);
++ applesmc_destroy_nodes(smc, groups);
+ return ret;
+ }
+
+ /* Create accelerometer resources */
+-static int applesmc_create_accelerometer(void)
++static int applesmc_create_accelerometer(struct applesmc_device *smc)
+ {
+ int ret;
+
+- if (!smcreg.has_accelerometer)
++ if (!smc->reg.has_accelerometer)
+ return 0;
+
+- ret = applesmc_create_nodes(accelerometer_group, 1);
++ ret = applesmc_create_nodes(smc, accelerometer_group, 1);
+ if (ret)
+ goto out;
+
+- applesmc_idev = input_allocate_device();
+- if (!applesmc_idev) {
++ smc->idev = input_allocate_device();
++ if (!smc->idev) {
+ ret = -ENOMEM;
+ goto out_sysfs;
+ }
+
+ /* initial calibrate for the input device */
+- applesmc_calibrate();
++ applesmc_calibrate(smc);
+
+ /* initialize the input device */
+- applesmc_idev->name = "applesmc";
+- applesmc_idev->id.bustype = BUS_HOST;
+- applesmc_idev->dev.parent = &pdev->dev;
+- input_set_abs_params(applesmc_idev, ABS_X,
++ smc->idev->name = "applesmc";
++ smc->idev->id.bustype = BUS_HOST;
++ smc->idev->dev.parent = &smc->dev->dev;
++ input_set_abs_params(smc->idev, ABS_X,
+ -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
+- input_set_abs_params(applesmc_idev, ABS_Y,
++ input_set_abs_params(smc->idev, ABS_Y,
+ -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
+
+- ret = input_setup_polling(applesmc_idev, applesmc_idev_poll);
++ ret = input_setup_polling(smc->idev, applesmc_idev_poll);
+ if (ret)
+ goto out_idev;
+
+- input_set_poll_interval(applesmc_idev, APPLESMC_POLL_INTERVAL);
++ input_set_poll_interval(smc->idev, APPLESMC_POLL_INTERVAL);
+
+- ret = input_register_device(applesmc_idev);
++ ret = input_register_device(smc->idev);
+ if (ret)
+ goto out_idev;
+
+ return 0;
+
+ out_idev:
+- input_free_device(applesmc_idev);
++ input_free_device(smc->idev);
+
+ out_sysfs:
+- applesmc_destroy_nodes(accelerometer_group);
++ applesmc_destroy_nodes(smc, accelerometer_group);
+
+ out:
+ pr_warn("driver init failed (ret=%d)!\n", ret);
+@@ -1225,44 +1305,55 @@ static int applesmc_create_accelerometer(void)
+ }
+
+ /* Release all resources used by the accelerometer */
+-static void applesmc_release_accelerometer(void)
++static void applesmc_release_accelerometer(struct applesmc_device *smc)
+ {
+- if (!smcreg.has_accelerometer)
++ if (!smc->reg.has_accelerometer)
+ return;
+- input_unregister_device(applesmc_idev);
+- applesmc_destroy_nodes(accelerometer_group);
++ input_unregister_device(smc->idev);
++ applesmc_destroy_nodes(smc, accelerometer_group);
+ }
+
+-static int applesmc_create_light_sensor(void)
++static int applesmc_create_light_sensor(struct applesmc_device *smc)
+ {
+- if (!smcreg.num_light_sensors)
++ if (!smc->reg.num_light_sensors)
+ return 0;
+- return applesmc_create_nodes(light_sensor_group, 1);
++ return applesmc_create_nodes(smc, light_sensor_group, 1);
+ }
+
+-static void applesmc_release_light_sensor(void)
++static void applesmc_release_light_sensor(struct applesmc_device *smc)
+ {
+- if (!smcreg.num_light_sensors)
++ if (!smc->reg.num_light_sensors)
+ return;
+- applesmc_destroy_nodes(light_sensor_group);
++ applesmc_destroy_nodes(smc, light_sensor_group);
+ }
+
+-static int applesmc_create_key_backlight(void)
++static int applesmc_create_key_backlight(struct applesmc_device *smc)
+ {
+- if (!smcreg.has_key_backlight)
++ int ret;
++
++ if (!smc->reg.has_key_backlight)
+ return 0;
+- applesmc_led_wq = create_singlethread_workqueue("applesmc-led");
+- if (!applesmc_led_wq)
++ smc->backlight_wq = create_singlethread_workqueue("applesmc-led");
++ if (!smc->backlight_wq)
+ return -ENOMEM;
+- return led_classdev_register(&pdev->dev, &applesmc_backlight);
++
++ INIT_WORK(&smc->backlight_work, applesmc_backlight_set);
++ smc->backlight_dev.name = "smc::kbd_backlight";
++ smc->backlight_dev.default_trigger = "nand-disk";
++ smc->backlight_dev.brightness_set = applesmc_brightness_set;
++ ret = led_classdev_register(&smc->dev->dev, &smc->backlight_dev);
++ if (ret)
++ destroy_workqueue(smc->backlight_wq);
++
++ return ret;
+ }
+
+-static void applesmc_release_key_backlight(void)
++static void applesmc_release_key_backlight(struct applesmc_device *smc)
+ {
+- if (!smcreg.has_key_backlight)
++ if (!smc->reg.has_key_backlight)
+ return;
+- led_classdev_unregister(&applesmc_backlight);
+- destroy_workqueue(applesmc_led_wq);
++ led_classdev_unregister(&smc->backlight_dev);
++ destroy_workqueue(smc->backlight_wq);
+ }
+
+ static int applesmc_dmi_match(const struct dmi_system_id *id)
+@@ -1302,86 +1393,100 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = {
+ { .ident = NULL }
+ };
+
+-static int __init applesmc_init(void)
++static int applesmc_create_modules(struct applesmc_device *smc)
+ {
+ int ret;
+
+- if (!dmi_check_system(applesmc_whitelist)) {
+- pr_warn("supported laptop not found!\n");
+- ret = -ENODEV;
+- goto out;
+- }
+-
+- if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS,
+- "applesmc")) {
+- ret = -ENXIO;
+- goto out;
+- }
+-
+- ret = platform_driver_register(&applesmc_driver);
+- if (ret)
+- goto out_region;
+-
+- pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT,
+- NULL, 0);
+- if (IS_ERR(pdev)) {
+- ret = PTR_ERR(pdev);
+- goto out_driver;
+- }
+-
+- /* create register cache */
+- ret = applesmc_init_smcreg();
+- if (ret)
+- goto out_device;
+-
+- ret = applesmc_create_nodes(info_group, 1);
++ ret = applesmc_create_nodes(smc, info_group, 1);
+ if (ret)
+- goto out_smcreg;
++ goto out;
+
+- ret = applesmc_create_nodes(fan_group, smcreg.fan_count);
++ ret = applesmc_create_nodes(smc, fan_group, smc->reg.fan_count);
+ if (ret)
+ goto out_info;
+
+- ret = applesmc_create_nodes(temp_group, smcreg.index_count);
++ ret = applesmc_create_nodes(smc, temp_group, smc->reg.index_count);
+ if (ret)
+ goto out_fans;
+
+- ret = applesmc_create_accelerometer();
++ ret = applesmc_create_accelerometer(smc);
+ if (ret)
+ goto out_temperature;
+
+- ret = applesmc_create_light_sensor();
++ ret = applesmc_create_light_sensor(smc);
+ if (ret)
+ goto out_accelerometer;
+
+- ret = applesmc_create_key_backlight();
++ ret = applesmc_create_key_backlight(smc);
+ if (ret)
+ goto out_light_sysfs;
+
+- hwmon_dev = hwmon_device_register(&pdev->dev);
+- if (IS_ERR(hwmon_dev)) {
+- ret = PTR_ERR(hwmon_dev);
++ smc->hwmon_dev = hwmon_device_register(&smc->dev->dev);
++ if (IS_ERR(smc->hwmon_dev)) {
++ ret = PTR_ERR(smc->hwmon_dev);
+ goto out_light_ledclass;
+ }
+
+ return 0;
+
+ out_light_ledclass:
+- applesmc_release_key_backlight();
++ applesmc_release_key_backlight(smc);
+ out_light_sysfs:
+- applesmc_release_light_sensor();
++ applesmc_release_light_sensor(smc);
+ out_accelerometer:
+- applesmc_release_accelerometer();
++ applesmc_release_accelerometer(smc);
+ out_temperature:
+- applesmc_destroy_nodes(temp_group);
++ applesmc_destroy_nodes(smc, temp_group);
+ out_fans:
+- applesmc_destroy_nodes(fan_group);
++ applesmc_destroy_nodes(smc, fan_group);
+ out_info:
+- applesmc_destroy_nodes(info_group);
+-out_smcreg:
+- applesmc_destroy_smcreg();
+-out_device:
+- platform_device_unregister(pdev);
++ applesmc_destroy_nodes(smc, info_group);
++out:
++ return ret;
++}
++
++static void applesmc_destroy_modules(struct applesmc_device *smc)
++{
++ hwmon_device_unregister(smc->hwmon_dev);
++ applesmc_release_key_backlight(smc);
++ applesmc_release_light_sensor(smc);
++ applesmc_release_accelerometer(smc);
++ applesmc_destroy_nodes(smc, temp_group);
++ applesmc_destroy_nodes(smc, fan_group);
++ applesmc_destroy_nodes(smc, info_group);
++}
++
++static struct platform_device *pdev;
++
++static int __init applesmc_init(void)
++{
++ int ret;
++
++ if (!dmi_check_system(applesmc_whitelist)) {
++ pr_warn("supported laptop not found!\n");
++ ret = -ENODEV;
++ goto out;
++ }
++
++ if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS,
++ "applesmc")) {
++ ret = -ENXIO;
++ goto out;
++ }
++
++ ret = platform_driver_register(&applesmc_driver);
++ if (ret)
++ goto out_region;
++
++ pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT,
++ NULL, 0);
++ if (IS_ERR(pdev)) {
++ ret = PTR_ERR(pdev);
++ goto out_driver;
++ }
++
++ return 0;
++
+ out_driver:
+ platform_driver_unregister(&applesmc_driver);
+ out_region:
+@@ -1393,14 +1498,6 @@ static int __init applesmc_init(void)
+
+ static void __exit applesmc_exit(void)
+ {
+- hwmon_device_unregister(hwmon_dev);
+- applesmc_release_key_backlight();
+- applesmc_release_light_sensor();
+- applesmc_release_accelerometer();
+- applesmc_destroy_nodes(temp_group);
+- applesmc_destroy_nodes(fan_group);
+- applesmc_destroy_nodes(info_group);
+- applesmc_destroy_smcreg();
+ platform_device_unregister(pdev);
+ platform_driver_unregister(&applesmc_driver);
+ release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS);
+@@ -1410,6 +1507,7 @@ module_init(applesmc_init);
+ module_exit(applesmc_exit);
+
+ MODULE_AUTHOR("Nicolas Boichat");
++MODULE_AUTHOR("Paul Pawlowski");
+ MODULE_DESCRIPTION("Apple SMC");
+ MODULE_LICENSE("GPL v2");
+ MODULE_DEVICE_TABLE(dmi, applesmc_whitelist);
+--
+2.30.0
+
+From 713e78b8dbb8adb92d4ee09ea11e726b05577689 Mon Sep 17 00:00:00 2001
+From: Paul Pawlowski <paul@mrarm.io>
+Date: Sun, 17 Nov 2019 23:11:56 +0100
+Subject: [PATCH 2/6] applesmc: make io port base addr dynamic
+
+This change makes the port base runtime configurable.
+The reason why this change is made is so that when we switch to an
+acpi_device we can resolve the port base addr from ACPI.
+
+This change is not strictly required for T2 support - the base
+address is still 0x300 on T2 Macs.
+
+Signed-off-by: Aun-Ali Zaidi <admin@kodeit.net>
+---
+ drivers/hwmon/applesmc.c | 91 +++++++++++++++++++++-------------------
+ 1 file changed, 49 insertions(+), 42 deletions(-)
+
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index 62211b590a61..39ed0bb21365 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -35,10 +35,11 @@
+ #include <linux/err.h>
+ #include <linux/bits.h>
+
++#define APPLESMC_PORT_BASE 0x300
+ /* data port used by Apple SMC */
+-#define APPLESMC_DATA_PORT 0x300
++#define APPLESMC_DATA_PORT 0
+ /* command/status port used by Apple SMC */
+-#define APPLESMC_CMD_PORT 0x304
++#define APPLESMC_CMD_PORT 4
+
+ #define APPLESMC_NR_PORTS 32 /* 0x300-0x31f */
+
+@@ -140,6 +141,8 @@ struct applesmc_device {
+ struct platform_device *dev;
+ struct applesmc_registers reg;
+
++ u16 port_base;
++
+ s16 rest_x;
+ s16 rest_y;
+
+@@ -169,7 +172,7 @@ static const int debug;
+ * run out past 500ms.
+ */
+
+-static int wait_status(u8 val, u8 mask)
++static int wait_status(struct applesmc_device *smc, u8 val, u8 mask)
+ {
+ u8 status;
+ int us;
+@@ -177,7 +180,7 @@ static int wait_status(u8 val, u8 mask)
+
+ us = APPLESMC_MIN_WAIT;
+ for (i = 0; i < 24 ; i++) {
+- status = inb(APPLESMC_CMD_PORT);
++ status = inb(smc->port_base + APPLESMC_CMD_PORT);
+ if ((status & mask) == val)
+ return 0;
+ usleep_range(us, us * 2);
+@@ -189,11 +192,11 @@ static int wait_status(u8 val, u8 mask)
+
+ /* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
+
+-static int send_byte(u8 cmd, u16 port)
++static int send_byte(struct applesmc_device *smc, u8 cmd, u16 port)
+ {
+ int status;
+
+- status = wait_status(0, SMC_STATUS_IB_CLOSED);
++ status = wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
+ if (status)
+ return status;
+ /*
+@@ -202,24 +205,24 @@ static int send_byte(u8 cmd, u16 port)
+ * this extra read may not happen if status returns both
+ * simultaneously and this would appear to be required.
+ */
+- status = wait_status(SMC_STATUS_BUSY, SMC_STATUS_BUSY);
++ status = wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY);
+ if (status)
+ return status;
+
+- outb(cmd, port);
++ outb(cmd, smc->port_base + port);
+ return 0;
+ }
+
+ /* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
+
+-static int send_command(u8 cmd)
++static int send_command(struct applesmc_device *smc, u8 cmd)
+ {
+ int ret;
+
+- ret = wait_status(0, SMC_STATUS_IB_CLOSED);
++ ret = wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
+ if (ret)
+ return ret;
+- outb(cmd, APPLESMC_CMD_PORT);
++ outb(cmd, smc->port_base + APPLESMC_CMD_PORT);
+ return 0;
+ }
+
+@@ -229,108 +232,112 @@ static int send_command(u8 cmd)
+ * If busy is stuck high after the command then the SMC is jammed.
+ */
+
+-static int smc_sane(void)
++static int smc_sane(struct applesmc_device *smc)
+ {
+ int ret;
+
+- ret = wait_status(0, SMC_STATUS_BUSY);
++ ret = wait_status(smc, 0, SMC_STATUS_BUSY);
+ if (!ret)
+ return ret;
+- ret = send_command(APPLESMC_READ_CMD);
++ ret = send_command(smc, APPLESMC_READ_CMD);
+ if (ret)
+ return ret;
+- return wait_status(0, SMC_STATUS_BUSY);
++ return wait_status(smc, 0, SMC_STATUS_BUSY);
+ }
+
+-static int send_argument(const char *key)
++static int send_argument(struct applesmc_device *smc, const char *key)
+ {
+ int i;
+
+ for (i = 0; i < 4; i++)
+- if (send_byte(key[i], APPLESMC_DATA_PORT))
++ if (send_byte(smc, key[i], APPLESMC_DATA_PORT))
+ return -EIO;
+ return 0;
+ }
+
+-static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
++static int read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++ u8 *buffer, u8 len)
+ {
+ u8 status, data = 0;
+ int i;
+ int ret;
+
+- ret = smc_sane();
++ ret = smc_sane(smc);
+ if (ret)
+ return ret;
+
+- if (send_command(cmd) || send_argument(key)) {
++ if (send_command(smc, cmd) || send_argument(smc, key)) {
+ pr_warn("%.4s: read arg fail\n", key);
+ return -EIO;
+ }
+
+ /* This has no effect on newer (2012) SMCs */
+- if (send_byte(len, APPLESMC_DATA_PORT)) {
++ if (send_byte(smc, len, APPLESMC_DATA_PORT)) {
+ pr_warn("%.4s: read len fail\n", key);
+ return -EIO;
+ }
+
+ for (i = 0; i < len; i++) {
+- if (wait_status(SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
++ if (wait_status(smc,
++ SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
+ SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) {
+ pr_warn("%.4s: read data[%d] fail\n", key, i);
+ return -EIO;
+ }
+- buffer[i] = inb(APPLESMC_DATA_PORT);
++ buffer[i] = inb(smc->port_base + APPLESMC_DATA_PORT);
+ }
+
+ /* Read the data port until bit0 is cleared */
+ for (i = 0; i < 16; i++) {
+ udelay(APPLESMC_MIN_WAIT);
+- status = inb(APPLESMC_CMD_PORT);
++ status = inb(smc->port_base + APPLESMC_CMD_PORT);
+ if (!(status & SMC_STATUS_AWAITING_DATA))
+ break;
+- data = inb(APPLESMC_DATA_PORT);
++ data = inb(smc->port_base + APPLESMC_DATA_PORT);
+ }
+ if (i)
+ pr_warn("flushed %d bytes, last value is: %d\n", i, data);
+
+- return wait_status(0, SMC_STATUS_BUSY);
++ return wait_status(smc, 0, SMC_STATUS_BUSY);
+ }
+
+-static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len)
++static int write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++ const u8 *buffer, u8 len)
+ {
+ int i;
+ int ret;
+
+- ret = smc_sane();
++ ret = smc_sane(smc);
+ if (ret)
+ return ret;
+
+- if (send_command(cmd) || send_argument(key)) {
++ if (send_command(smc, cmd) || send_argument(smc, key)) {
+ pr_warn("%s: write arg fail\n", key);
+ return -EIO;
+ }
+
+- if (send_byte(len, APPLESMC_DATA_PORT)) {
++ if (send_byte(smc, len, APPLESMC_DATA_PORT)) {
+ pr_warn("%.4s: write len fail\n", key);
+ return -EIO;
+ }
+
+ for (i = 0; i < len; i++) {
+- if (send_byte(buffer[i], APPLESMC_DATA_PORT)) {
++ if (send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) {
+ pr_warn("%s: write data fail\n", key);
+ return -EIO;
+ }
+ }
+
+- return wait_status(0, SMC_STATUS_BUSY);
++ return wait_status(smc, 0, SMC_STATUS_BUSY);
+ }
+
+-static int read_register_count(unsigned int *count)
++static int read_register_count(struct applesmc_device *smc,
++ unsigned int *count)
+ {
+ __be32 be;
+ int ret;
+
+- ret = read_smc(APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4);
++ ret = read_smc(smc, APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4);
+ if (ret)
+ return ret;
+
+@@ -353,7 +360,7 @@ static int applesmc_read_entry(struct applesmc_device *smc,
+ if (entry->len != len)
+ return -EINVAL;
+ mutex_lock(&smc->reg.mutex);
+- ret = read_smc(APPLESMC_READ_CMD, entry->key, buf, len);
++ ret = read_smc(smc, APPLESMC_READ_CMD, entry->key, buf, len);
+ mutex_unlock(&smc->reg.mutex);
+
+ return ret;
+@@ -367,7 +374,7 @@ static int applesmc_write_entry(struct applesmc_device *smc,
+ if (entry->len != len)
+ return -EINVAL;
+ mutex_lock(&smc->reg.mutex);
+- ret = write_smc(APPLESMC_WRITE_CMD, entry->key, buf, len);
++ ret = write_smc(smc, APPLESMC_WRITE_CMD, entry->key, buf, len);
+ mutex_unlock(&smc->reg.mutex);
+ return ret;
+ }
+@@ -388,10 +395,10 @@ static const struct applesmc_entry *applesmc_get_entry_by_index(
+ if (cache->valid)
+ goto out;
+ be = cpu_to_be32(index);
+- ret = read_smc(APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4);
++ ret = read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4);
+ if (ret)
+ goto out;
+- ret = read_smc(APPLESMC_GET_KEY_TYPE_CMD, key, info, 6);
++ ret = read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, info, 6);
+ if (ret)
+ goto out;
+
+@@ -589,7 +596,7 @@ static int applesmc_init_smcreg_try(struct applesmc_device *smc)
+ if (s->init_complete)
+ return 0;
+
+- ret = read_register_count(&count);
++ ret = read_register_count(smc, &count);
+ if (ret)
+ return ret;
+
+@@ -1468,7 +1475,7 @@ static int __init applesmc_init(void)
+ goto out;
+ }
+
+- if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS,
++ if (!request_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS,
+ "applesmc")) {
+ ret = -ENXIO;
+ goto out;
+@@ -1490,7 +1497,7 @@ static int __init applesmc_init(void)
+ out_driver:
+ platform_driver_unregister(&applesmc_driver);
+ out_region:
+- release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS);
++ release_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS);
+ out:
+ pr_warn("driver init failed (ret=%d)!\n", ret);
+ return ret;
+@@ -1500,7 +1507,7 @@ static void __exit applesmc_exit(void)
+ {
+ platform_device_unregister(pdev);
+ platform_driver_unregister(&applesmc_driver);
+- release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS);
++ release_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS);
+ }
+
+ module_init(applesmc_init);
+--
+2.30.0
+
+From ee3d4bf4a01bc94553bde2ae3e806a63a13faa12 Mon Sep 17 00:00:00 2001
+From: Paul Pawlowski <paul@mrarm.io>
+Date: Sun, 17 Nov 2019 23:12:08 +0100
+Subject: [PATCH 3/6] applesmc: switch to acpi_device (from platform)
+
+This change makes the change from platform_device
+to acpi_device. The rationale for this change is
+that on T2 Macs, an additional FixedMemory32
+region is needed for device operation, and it can
+be easily resolved via ACPI tables (this will be
+done in another commit).
+
+Additionally, on older Macs, the OS X driver also
+looks for the specified ACPI device to resolve
+its memory regions, and therefore this change
+should not result in any incompatibilities.
+
+Signed-off-by: Aun-Ali Zaidi <admin@kodeit.net>
+---
+ drivers/hwmon/applesmc.c | 125 ++++++++++++++++++++++++++-------------
+ 1 file changed, 85 insertions(+), 40 deletions(-)
+
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index 39ed0bb21365..bdaaf696f7b6 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -19,7 +19,7 @@
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+ #include <linux/delay.h>
+-#include <linux/platform_device.h>
++#include <linux/acpi.h>
+ #include <linux/input.h>
+ #include <linux/kernel.h>
+ #include <linux/slab.h>
+@@ -35,7 +35,6 @@
+ #include <linux/err.h>
+ #include <linux/bits.h>
+
+-#define APPLESMC_PORT_BASE 0x300
+ /* data port used by Apple SMC */
+ #define APPLESMC_DATA_PORT 0
+ /* command/status port used by Apple SMC */
+@@ -138,9 +137,10 @@ struct applesmc_registers {
+ };
+
+ struct applesmc_device {
+- struct platform_device *dev;
++ struct acpi_device *dev;
+ struct applesmc_registers reg;
+
++ bool port_base_set;
+ u16 port_base;
+
+ s16 rest_x;
+@@ -692,9 +692,13 @@ static int applesmc_init_smcreg(struct applesmc_device *smc)
+ }
+
+ /* Device model stuff */
++
++static int applesmc_init_resources(struct applesmc_device *smc);
++static void applesmc_free_resources(struct applesmc_device *smc);
+ static int applesmc_create_modules(struct applesmc_device *smc);
+ static void applesmc_destroy_modules(struct applesmc_device *smc);
+-static int applesmc_probe(struct platform_device *dev)
++
++static int applesmc_add(struct acpi_device *dev)
+ {
+ struct applesmc_device *smc;
+ int ret;
+@@ -705,12 +709,16 @@ static int applesmc_probe(struct platform_device *dev)
+ smc->dev = dev;
+ mutex_init(&smc->reg.mutex);
+
+- platform_set_drvdata(dev, smc);
++ dev_set_drvdata(&dev->dev, smc);
+
+- ret = applesmc_init_smcreg(smc);
++ ret = applesmc_init_resources(smc);
+ if (ret)
+ goto out_mem;
+
++ ret = applesmc_init_smcreg(smc);
++ if (ret)
++ goto out_res;
++
+ applesmc_device_init(smc);
+
+ ret = applesmc_create_modules(smc);
+@@ -721,20 +729,23 @@ static int applesmc_probe(struct platform_device *dev)
+
+ out_reg:
+ applesmc_destroy_smcreg(smc);
++out_res:
++ applesmc_free_resources(smc);
+ out_mem:
+- platform_set_drvdata(dev, NULL);
++ dev_set_drvdata(&dev->dev, NULL);
+ mutex_destroy(&smc->reg.mutex);
+ kfree(smc);
+
+ return ret;
+ }
+
+-static int applesmc_remove(struct platform_device *dev)
++static int applesmc_remove(struct acpi_device *dev)
+ {
+- struct applesmc_device *smc = platform_get_drvdata(dev);
++ struct applesmc_device *smc = dev_get_drvdata(&dev->dev);
+
+ applesmc_destroy_modules(smc);
+ applesmc_destroy_smcreg(smc);
++ applesmc_free_resources(smc);
+
+ mutex_destroy(&smc->reg.mutex);
+ kfree(smc);
+@@ -742,6 +753,52 @@ static int applesmc_remove(struct platform_device *dev)
+ return 0;
+ }
+
++static acpi_status applesmc_walk_resources(struct acpi_resource *res,
++ void *data)
++{
++ struct applesmc_device *smc = data;
++
++ switch (res->type) {
++ case ACPI_RESOURCE_TYPE_IO:
++ if (!smc->port_base_set) {
++ if (res->data.io.address_length < APPLESMC_NR_PORTS)
++ return AE_ERROR;
++ smc->port_base = res->data.io.minimum;
++ smc->port_base_set = true;
++ }
++ return AE_OK;
++
++ case ACPI_RESOURCE_TYPE_END_TAG:
++ if (smc->port_base_set)
++ return AE_OK;
++ else
++ return AE_NOT_FOUND;
++
++ default:
++ return AE_OK;
++ }
++}
++
++static int applesmc_init_resources(struct applesmc_device *smc)
++{
++ int ret;
++
++ ret = acpi_walk_resources(smc->dev->handle, METHOD_NAME__CRS,
++ applesmc_walk_resources, smc);
++ if (ACPI_FAILURE(ret))
++ return -ENXIO;
++
++ if (!request_region(smc->port_base, APPLESMC_NR_PORTS, "applesmc"))
++ return -ENXIO;
++
++ return 0;
++}
++
++static void applesmc_free_resources(struct applesmc_device *smc)
++{
++ release_region(smc->port_base, APPLESMC_NR_PORTS);
++}
++
+ /* Synchronize device with memorized backlight state */
+ static int applesmc_pm_resume(struct device *dev)
+ {
+@@ -763,18 +820,28 @@ static int applesmc_pm_restore(struct device *dev)
+ return applesmc_pm_resume(dev);
+ }
+
++static const struct acpi_device_id applesmc_ids[] = {
++ {"APP0001", 0},
++ {"", 0},
++};
++
+ static const struct dev_pm_ops applesmc_pm_ops = {
+ .resume = applesmc_pm_resume,
+ .restore = applesmc_pm_restore,
+ };
+
+-static struct platform_driver applesmc_driver = {
+- .probe = applesmc_probe,
+- .remove = applesmc_remove,
+- .driver = {
+- .name = "applesmc",
+- .pm = &applesmc_pm_ops,
++static struct acpi_driver applesmc_driver = {
++ .name = "applesmc",
++ .class = "applesmc",
++ .ids = applesmc_ids,
++ .ops = {
++ .add = applesmc_add,
++ .remove = applesmc_remove
+ },
++ .drv = {
++ .pm = &applesmc_pm_ops
++ },
++ .owner = THIS_MODULE
+ };
+
+ /*
+@@ -1262,7 +1329,6 @@ static int applesmc_create_nodes(struct applesmc_device *smc,
+ static int applesmc_create_accelerometer(struct applesmc_device *smc)
+ {
+ int ret;
+-
+ if (!smc->reg.has_accelerometer)
+ return 0;
+
+@@ -1463,8 +1529,6 @@ static void applesmc_destroy_modules(struct applesmc_device *smc)
+ applesmc_destroy_nodes(smc, info_group);
+ }
+
+-static struct platform_device *pdev;
+-
+ static int __init applesmc_init(void)
+ {
+ int ret;
+@@ -1475,29 +1539,12 @@ static int __init applesmc_init(void)
+ goto out;
+ }
+
+- if (!request_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS,
+- "applesmc")) {
+- ret = -ENXIO;
+- goto out;
+- }
+-
+- ret = platform_driver_register(&applesmc_driver);
++ ret = acpi_bus_register_driver(&applesmc_driver);
+ if (ret)
+- goto out_region;
+-
+- pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT,
+- NULL, 0);
+- if (IS_ERR(pdev)) {
+- ret = PTR_ERR(pdev);
+- goto out_driver;
+- }
++ goto out;
+
+ return 0;
+
+-out_driver:
+- platform_driver_unregister(&applesmc_driver);
+-out_region:
+- release_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS);
+ out:
+ pr_warn("driver init failed (ret=%d)!\n", ret);
+ return ret;
+@@ -1505,9 +1552,7 @@ static int __init applesmc_init(void)
+
+ static void __exit applesmc_exit(void)
+ {
+- platform_device_unregister(pdev);
+- platform_driver_unregister(&applesmc_driver);
+- release_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS);
++ acpi_bus_unregister_driver(&applesmc_driver);
+ }
+
+ module_init(applesmc_init);
+--
+2.30.0
+
+From 43df89a1377782788760808d8ea4bcf0730effbb Mon Sep 17 00:00:00 2001
+From: Paul Pawlowski <paul@mrarm.io>
+Date: Sun, 17 Nov 2019 23:12:14 +0100
+Subject: [PATCH 4/6] applesmc: key interface wrappers
+
+This change replaces the read_smc and write_smc
+methods with wrappers, additionally removing the
+command id parameter from them (and introducing
+get_smc_key_by_index and get_smc_key_info).
+
+This is done as to allow simple implementation
+replacement on T2 Macs. The newly introduced
+methods mentioned in the previous paragraph need
+special handling on T2 and as such had to be
+separated.
+
+Signed-off-by: Aun-Ali Zaidi <admin@kodeit.net>
+---
+ drivers/hwmon/applesmc.c | 119 ++++++++++++++++++++++++++-------------
+ 1 file changed, 79 insertions(+), 40 deletions(-)
+
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index bdaaf696f7b6..3017d8ca2c79 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -172,7 +172,7 @@ static const int debug;
+ * run out past 500ms.
+ */
+
+-static int wait_status(struct applesmc_device *smc, u8 val, u8 mask)
++static int port_wait_status(struct applesmc_device *smc, u8 val, u8 mask)
+ {
+ u8 status;
+ int us;
+@@ -190,13 +190,13 @@ static int wait_status(struct applesmc_device *smc, u8 val, u8 mask)
+ return -EIO;
+ }
+
+-/* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
++/* port_send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
+
+-static int send_byte(struct applesmc_device *smc, u8 cmd, u16 port)
++static int port_send_byte(struct applesmc_device *smc, u8 cmd, u16 port)
+ {
+ int status;
+
+- status = wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
++ status = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
+ if (status)
+ return status;
+ /*
+@@ -205,7 +205,7 @@ static int send_byte(struct applesmc_device *smc, u8 cmd, u16 port)
+ * this extra read may not happen if status returns both
+ * simultaneously and this would appear to be required.
+ */
+- status = wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY);
++ status = port_wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY);
+ if (status)
+ return status;
+
+@@ -213,15 +213,16 @@ static int send_byte(struct applesmc_device *smc, u8 cmd, u16 port)
+ return 0;
+ }
+
+-/* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
++/* port_send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
+
+-static int send_command(struct applesmc_device *smc, u8 cmd)
++static int port_send_command(struct applesmc_device *smc, u8 cmd)
+ {
+ int ret;
+
+- ret = wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
++ ret = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED);
+ if (ret)
+ return ret;
++
+ outb(cmd, smc->port_base + APPLESMC_CMD_PORT);
+ return 0;
+ }
+@@ -232,53 +233,53 @@ static int send_command(struct applesmc_device *smc, u8 cmd)
+ * If busy is stuck high after the command then the SMC is jammed.
+ */
+
+-static int smc_sane(struct applesmc_device *smc)
++static int port_smc_sane(struct applesmc_device *smc)
+ {
+ int ret;
+
+- ret = wait_status(smc, 0, SMC_STATUS_BUSY);
++ ret = port_wait_status(smc, 0, SMC_STATUS_BUSY);
+ if (!ret)
+ return ret;
+- ret = send_command(smc, APPLESMC_READ_CMD);
++ ret = port_send_command(smc, APPLESMC_READ_CMD);
+ if (ret)
+ return ret;
+- return wait_status(smc, 0, SMC_STATUS_BUSY);
++ return port_wait_status(smc, 0, SMC_STATUS_BUSY);
+ }
+
+-static int send_argument(struct applesmc_device *smc, const char *key)
++static int port_send_argument(struct applesmc_device *smc, const char *key)
+ {
+ int i;
+
+ for (i = 0; i < 4; i++)
+- if (send_byte(smc, key[i], APPLESMC_DATA_PORT))
++ if (port_send_byte(smc, key[i], APPLESMC_DATA_PORT))
+ return -EIO;
+ return 0;
+ }
+
+-static int read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++static int port_read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
+ u8 *buffer, u8 len)
+ {
+ u8 status, data = 0;
+ int i;
+ int ret;
+
+- ret = smc_sane(smc);
++ ret = port_smc_sane(smc);
+ if (ret)
+ return ret;
+
+- if (send_command(smc, cmd) || send_argument(smc, key)) {
++ if (port_send_command(smc, cmd) || port_send_argument(smc, key)) {
+ pr_warn("%.4s: read arg fail\n", key);
+ return -EIO;
+ }
+
+ /* This has no effect on newer (2012) SMCs */
+- if (send_byte(smc, len, APPLESMC_DATA_PORT)) {
++ if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) {
+ pr_warn("%.4s: read len fail\n", key);
+ return -EIO;
+ }
+
+ for (i = 0; i < len; i++) {
+- if (wait_status(smc,
++ if (port_wait_status(smc,
+ SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
+ SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) {
+ pr_warn("%.4s: read data[%d] fail\n", key, i);
+@@ -298,37 +299,80 @@ static int read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
+ if (i)
+ pr_warn("flushed %d bytes, last value is: %d\n", i, data);
+
+- return wait_status(smc, 0, SMC_STATUS_BUSY);
++ return port_wait_status(smc, 0, SMC_STATUS_BUSY);
+ }
+
+-static int write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++static int port_write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
+ const u8 *buffer, u8 len)
+ {
+ int i;
+ int ret;
+
+- ret = smc_sane(smc);
++ ret = port_smc_sane(smc);
+ if (ret)
+ return ret;
+
+- if (send_command(smc, cmd) || send_argument(smc, key)) {
++ if (port_send_command(smc, cmd) || port_send_argument(smc, key)) {
+ pr_warn("%s: write arg fail\n", key);
+ return -EIO;
+ }
+
+- if (send_byte(smc, len, APPLESMC_DATA_PORT)) {
++ if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) {
+ pr_warn("%.4s: write len fail\n", key);
+ return -EIO;
+ }
+
+ for (i = 0; i < len; i++) {
+- if (send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) {
++ if (port_send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) {
+ pr_warn("%s: write data fail\n", key);
+ return -EIO;
+ }
+ }
+
+- return wait_status(smc, 0, SMC_STATUS_BUSY);
++ return port_wait_status(smc, 0, SMC_STATUS_BUSY);
++}
++
++static int port_get_smc_key_info(struct applesmc_device *smc,
++ const char *key, struct applesmc_entry *info)
++{
++ int ret;
++ u8 raw[6];
++
++ ret = port_read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, raw, 6);
++ if (ret)
++ return ret;
++ info->len = raw[0];
++ memcpy(info->type, &raw[1], 4);
++ info->flags = raw[5];
++ return 0;
++}
++
++static int read_smc(struct applesmc_device *smc, const char *key,
++ u8 *buffer, u8 len)
++{
++ return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
++}
++
++static int write_smc(struct applesmc_device *smc, const char *key,
++ const u8 *buffer, u8 len)
++{
++ return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
++}
++
++static int get_smc_key_by_index(struct applesmc_device *smc,
++ unsigned int index, char *key)
++{
++ __be32 be;
++
++ be = cpu_to_be32(index);
++ return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
++ (const char *) &be, (u8 *) key, 4);
++}
++
++static int get_smc_key_info(struct applesmc_device *smc, const char *key,
++ struct applesmc_entry *info)
++{
++ return port_get_smc_key_info(smc, key, info);
+ }
+
+ static int read_register_count(struct applesmc_device *smc,
+@@ -337,8 +381,8 @@ static int read_register_count(struct applesmc_device *smc,
+ __be32 be;
+ int ret;
+
+- ret = read_smc(smc, APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4);
+- if (ret)
++ ret = read_smc(smc, KEY_COUNT_KEY, (u8 *)&be, 4);
++ if (ret < 0)
+ return ret;
+
+ *count = be32_to_cpu(be);
+@@ -360,7 +404,7 @@ static int applesmc_read_entry(struct applesmc_device *smc,
+ if (entry->len != len)
+ return -EINVAL;
+ mutex_lock(&smc->reg.mutex);
+- ret = read_smc(smc, APPLESMC_READ_CMD, entry->key, buf, len);
++ ret = read_smc(smc, entry->key, buf, len);
+ mutex_unlock(&smc->reg.mutex);
+
+ return ret;
+@@ -374,7 +418,7 @@ static int applesmc_write_entry(struct applesmc_device *smc,
+ if (entry->len != len)
+ return -EINVAL;
+ mutex_lock(&smc->reg.mutex);
+- ret = write_smc(smc, APPLESMC_WRITE_CMD, entry->key, buf, len);
++ ret = write_smc(smc, entry->key, buf, len);
+ mutex_unlock(&smc->reg.mutex);
+ return ret;
+ }
+@@ -383,8 +427,7 @@ static const struct applesmc_entry *applesmc_get_entry_by_index(
+ struct applesmc_device *smc, int index)
+ {
+ struct applesmc_entry *cache = &smc->reg.cache[index];
+- u8 key[4], info[6];
+- __be32 be;
++ char key[4];
+ int ret = 0;
+
+ if (cache->valid)
+@@ -394,18 +437,14 @@ static const struct applesmc_entry *applesmc_get_entry_by_index(
+
+ if (cache->valid)
+ goto out;
+- be = cpu_to_be32(index);
+- ret = read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4);
++ ret = get_smc_key_by_index(smc, index, key);
+ if (ret)
+ goto out;
+- ret = read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, info, 6);
++ memcpy(cache->key, key, 4);
++
++ ret = get_smc_key_info(smc, key, cache);
+ if (ret)
+ goto out;
+-
+- memcpy(cache->key, key, 4);
+- cache->len = info[0];
+- memcpy(cache->type, &info[1], 4);
+- cache->flags = info[5];
+ cache->valid = true;
+
+ out:
+--
+2.30.0
+
+From 799e7a54c62a36007f7874c58d7dac87c9651759 Mon Sep 17 00:00:00 2001
+From: Aun-Ali Zaidi <admin@kodeit.net>
+Date: Sun, 17 Nov 2019 23:12:16 +0100
+Subject: [PATCH 5/6] applesmc: basic mmio interface implementation
+
+This change introduces a basic MMIO-based
+interface implementation required to communicate
+with the SMC on T2 Macs. The MMIO interface is
+enabled only when it's supported on the running
+system.
+
+The MMIO interface replaces legacy port-based SMC
+key reads, writes and metadata requests (getting
+key by index and getting key info).
+
+(Based on patch by @mcmrarm)
+
+Signed-off-by: Aun-Ali Zaidi <admin@kodeit.net>
+---
+ drivers/hwmon/applesmc.c | 237 ++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 231 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index 3017d8ca2c79..2d23bb9ad9dd 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -42,6 +42,18 @@
+
+ #define APPLESMC_NR_PORTS 32 /* 0x300-0x31f */
+
++#define APPLESMC_IOMEM_KEY_DATA 0
++#define APPLESMC_IOMEM_KEY_STATUS 0x4005
++#define APPLESMC_IOMEM_KEY_NAME 0x78
++#define APPLESMC_IOMEM_KEY_DATA_LEN 0x7D
++#define APPLESMC_IOMEM_KEY_SMC_ID 0x7E
++#define APPLESMC_IOMEM_KEY_CMD 0x7F
++#define APPLESMC_IOMEM_MIN_SIZE 0x4006
++
++#define APPLESMC_IOMEM_KEY_TYPE_CODE 0
++#define APPLESMC_IOMEM_KEY_TYPE_DATA_LEN 5
++#define APPLESMC_IOMEM_KEY_TYPE_FLAGS 6
++
+ #define APPLESMC_MAX_DATA_LENGTH 32
+
+ /* Apple SMC status bits */
+@@ -138,10 +150,13 @@ struct applesmc_registers {
+
+ struct applesmc_device {
+ struct acpi_device *dev;
++ struct device *ldev;
+ struct applesmc_registers reg;
+
+- bool port_base_set;
++ bool port_base_set, iomem_base_set;
+ u16 port_base;
++ u8 *__iomem iomem_base;
++ u32 iomem_base_addr, iomem_base_size;
+
+ s16 rest_x;
+ s16 rest_y;
+@@ -347,16 +362,156 @@ static int port_get_smc_key_info(struct applesmc_device *smc,
+ return 0;
+ }
+
++
++/*
++ * MMIO based communication.
++ * TODO: Use updated mechanism for cmd timeout/retry
++ */
++
++static void iomem_clear_status(struct applesmc_device *smc)
++{
++ if (ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS))
++ iowrite8(0, smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
++}
++
++static int iomem_wait_read(struct applesmc_device *smc)
++{
++ u8 status;
++ int us;
++ int i;
++
++ us = APPLESMC_MIN_WAIT;
++ for (i = 0; i < 24 ; i++) {
++ status = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
++ if (status & 0x20)
++ return 0;
++ usleep_range(us, us * 2);
++ if (i > 9)
++ us <<= 1;
++ }
++
++ dev_warn(smc->ldev, "%s... timeout\n", __func__);
++ return -EIO;
++}
++
++static int iomem_read_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++ u8 *buffer, u8 len)
++{
++ u8 err, remote_len;
++ u32 key_int = *((u32 *) key);
++
++ iomem_clear_status(smc);
++ iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
++ iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
++ iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++
++ if (iomem_wait_read(smc))
++ return -EIO;
++
++ err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++ if (err != 0) {
++ dev_warn(smc->ldev, "read_smc_mmio(%x %8x/%.4s) failed: %u\n",
++ cmd, key_int, key, err);
++ return -EIO;
++ }
++
++ if (cmd == APPLESMC_READ_CMD) {
++ remote_len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN);
++ if (remote_len != len) {
++ dev_warn(smc->ldev,
++ "read_smc_mmio(%x %8x/%.4s) failed: buffer length mismatch (remote = %u, requested = %u)\n",
++ cmd, key_int, key, remote_len, len);
++ return -EINVAL;
++ }
++ } else {
++ remote_len = len;
++ }
++
++ memcpy_fromio(buffer, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA,
++ remote_len);
++
++ dev_dbg(smc->ldev, "read_smc_mmio(%x %8x/%.4s): buflen=%u reslen=%u\n",
++ cmd, key_int, key, len, remote_len);
++ print_hex_dump_bytes("read_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, remote_len);
++ return 0;
++}
++
++static int iomem_get_smc_key_type(struct applesmc_device *smc, const char *key,
++ struct applesmc_entry *e)
++{
++ u8 err;
++ u8 cmd = APPLESMC_GET_KEY_TYPE_CMD;
++ u32 key_int = *((u32 *) key);
++
++ iomem_clear_status(smc);
++ iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
++ iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
++ iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++
++ if (iomem_wait_read(smc))
++ return -EIO;
++
++ err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++ if (err != 0) {
++ dev_warn(smc->ldev, "get_smc_key_type_mmio(%.4s) failed: %u\n", key, err);
++ return -EIO;
++ }
++
++ e->len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_DATA_LEN);
++ *((uint32_t *) e->type) = ioread32(
++ smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_CODE);
++ e->flags = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_FLAGS);
++
++ dev_dbg(smc->ldev, "get_smc_key_type_mmio(%.4s): len=%u type=%.4s flags=%x\n",
++ key, e->len, e->type, e->flags);
++ return 0;
++}
++
++static int iomem_write_smc(struct applesmc_device *smc, u8 cmd, const char *key,
++ const u8 *buffer, u8 len)
++{
++ u8 err;
++ u32 key_int = *((u32 *) key);
++
++ iomem_clear_status(smc);
++ iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME);
++ memcpy_toio(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA, buffer, len);
++ iowrite32(len, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN);
++ iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID);
++ iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++
++ if (iomem_wait_read(smc))
++ return -EIO;
++
++ err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD);
++ if (err != 0) {
++ dev_warn(smc->ldev, "write_smc_mmio(%x %.4s) failed: %u\n", cmd, key, err);
++ print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len);
++ return -EIO;
++ }
++
++ dev_dbg(smc->ldev, "write_smc_mmio(%x %.4s): buflen=%u\n", cmd, key, len);
++ print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len);
++ return 0;
++}
++
++
+ static int read_smc(struct applesmc_device *smc, const char *key,
+ u8 *buffer, u8 len)
+ {
+- return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
++ if (smc->iomem_base_set)
++ return iomem_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
++ else
++ return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len);
+ }
+
+ static int write_smc(struct applesmc_device *smc, const char *key,
+ const u8 *buffer, u8 len)
+ {
+- return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
++ if (smc->iomem_base_set)
++ return iomem_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
++ else
++ return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len);
+ }
+
+ static int get_smc_key_by_index(struct applesmc_device *smc,
+@@ -365,14 +520,21 @@ static int get_smc_key_by_index(struct applesmc_device *smc,
+ __be32 be;
+
+ be = cpu_to_be32(index);
+- return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
+- (const char *) &be, (u8 *) key, 4);
++ if (smc->iomem_base_set)
++ return iomem_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
++ (const char *) &be, (u8 *) key, 4);
++ else
++ return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD,
++ (const char *) &be, (u8 *) key, 4);
+ }
+
+ static int get_smc_key_info(struct applesmc_device *smc, const char *key,
+ struct applesmc_entry *info)
+ {
+- return port_get_smc_key_info(smc, key, info);
++ if (smc->iomem_base_set)
++ return iomem_get_smc_key_type(smc, key, info);
++ else
++ return port_get_smc_key_info(smc, key, info);
+ }
+
+ static int read_register_count(struct applesmc_device *smc,
+@@ -746,6 +908,7 @@ static int applesmc_add(struct acpi_device *dev)
+ if (!smc)
+ return -ENOMEM;
+ smc->dev = dev;
++ smc->ldev = &dev->dev;
+ mutex_init(&smc->reg.mutex);
+
+ dev_set_drvdata(&dev->dev, smc);
+@@ -807,6 +970,20 @@ static acpi_status applesmc_walk_resources(struct acpi_resource *res,
+ }
+ return AE_OK;
+
++ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
++ if (!smc->iomem_base_set) {
++ if (res->data.fixed_memory32.address_length <
++ APPLESMC_IOMEM_MIN_SIZE) {
++ dev_warn(smc->ldev, "found iomem but it's too small: %u\n",
++ res->data.fixed_memory32.address_length);
++ return AE_OK;
++ }
++ smc->iomem_base_addr = res->data.fixed_memory32.address;
++ smc->iomem_base_size = res->data.fixed_memory32.address_length;
++ smc->iomem_base_set = true;
++ }
++ return AE_OK;
++
+ case ACPI_RESOURCE_TYPE_END_TAG:
+ if (smc->port_base_set)
+ return AE_OK;
+@@ -818,6 +995,8 @@ static acpi_status applesmc_walk_resources(struct acpi_resource *res,
+ }
+ }
+
++static int applesmc_try_enable_iomem(struct applesmc_device *smc);
++
+ static int applesmc_init_resources(struct applesmc_device *smc)
+ {
+ int ret;
+@@ -830,11 +1009,57 @@ static int applesmc_init_resources(struct applesmc_device *smc)
+ if (!request_region(smc->port_base, APPLESMC_NR_PORTS, "applesmc"))
+ return -ENXIO;
+
++ if (smc->iomem_base_set) {
++ if (applesmc_try_enable_iomem(smc))
++ smc->iomem_base_set = false;
++ }
++
+ return 0;
+ }
+
++static int applesmc_try_enable_iomem(struct applesmc_device *smc)
++{
++ u8 test_val, ldkn_version;
++
++ dev_dbg(smc->ldev, "Trying to enable iomem based communication\n");
++ smc->iomem_base = ioremap(smc->iomem_base_addr, smc->iomem_base_size);
++ if (!smc->iomem_base)
++ goto out;
++
++ /* Apple's driver does this check for some reason */
++ test_val = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS);
++ if (test_val == 0xff) {
++ dev_warn(smc->ldev,
++ "iomem enable failed: initial status is 0xff (is %x)\n",
++ test_val);
++ goto out_iomem;
++ }
++
++ if (read_smc(smc, "LDKN", &ldkn_version, 1)) {
++ dev_warn(smc->ldev, "iomem enable failed: ldkn read failed\n");
++ goto out_iomem;
++ }
++
++ if (ldkn_version < 2) {
++ dev_warn(smc->ldev,
++ "iomem enable failed: ldkn version %u is less than minimum (2)\n",
++ ldkn_version);
++ goto out_iomem;
++ }
++
++ return 0;
++
++out_iomem:
++ iounmap(smc->iomem_base);
++
++out:
++ return -ENXIO;
++}
++
+ static void applesmc_free_resources(struct applesmc_device *smc)
+ {
++ if (smc->iomem_base_set)
++ iounmap(smc->iomem_base);
+ release_region(smc->port_base, APPLESMC_NR_PORTS);
+ }
+
+--
+2.30.0
+
+From 4e63e9b77422aae8e7411ddc7a8458c2585c86df Mon Sep 17 00:00:00 2001
+From: Paul Pawlowski <paul@mrarm.io>
+Date: Sun, 17 Nov 2019 23:12:18 +0100
+Subject: [PATCH 6/6] applesmc: fan support on T2 Macs
+
+T2 Macs changed the fan values from shorts to
+floats, and changed the fan manual override
+setting from a bitmask to a per-fan boolean
+named F0Md (thanks to @kleuter for mentioning
+it).
+
+A minimal soft-float implementation has been
+written for convert floats to integers (and vice
+versa).
+
+Signed-off-by: Aun-Ali Zaidi <admin@kodeit.net>
+---
+ drivers/hwmon/applesmc.c | 119 +++++++++++++++++++++++++++++++++------
+ 1 file changed, 102 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index 2d23bb9ad9dd..0938227be612 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -87,6 +87,7 @@
+ #define FAN_ID_FMT "F%dID" /* r-o char[16] */
+
+ #define TEMP_SENSOR_TYPE "sp78"
++#define FLOAT_TYPE "flt "
+
+ /* List of keys used to read/write fan speeds */
+ static const char *const fan_speed_fmt[] = {
+@@ -96,6 +97,7 @@ static const char *const fan_speed_fmt[] = {
+ "F%dSf", /* safe speed - not all models */
+ "F%dTg", /* target speed (manual: rw) */
+ };
++#define FAN_MANUAL_FMT "F%dMd"
+
+ #define INIT_TIMEOUT_MSECS 5000 /* wait up to 5s for device init ... */
+ #define INIT_WAIT_MSECS 50 /* ... in 50ms increments */
+@@ -734,6 +736,42 @@ static int applesmc_read_s16(struct applesmc_device *smc,
+ return 0;
+ }
+
++/**
++ * applesmc_float_to_u32 - Retrieve the integral part of a float.
++ * This is needed because Apple made fans use float values in the T2.
++ * The fractional point is not significantly useful though, and the integral
++ * part can be easily extracted.
++ */
++static inline u32 applesmc_float_to_u32(u32 d)
++{
++ u8 sign = (u8) ((d >> 31) & 1);
++ s32 exp = (s32) ((d >> 23) & 0xff) - 0x7f;
++ u32 fr = d & ((1u << 23) - 1);
++
++ if (sign || exp < 0)
++ return 0;
++
++ return (u32) ((1u << exp) + (fr >> (23 - exp)));
++}
++
++/**
++ * applesmc_u32_to_float - Convert an u32 into a float.
++ * See applesmc_float_to_u32 for a rationale.
++ */
++static inline u32 applesmc_u32_to_float(u32 d)
++{
++ u32 dc = d, bc = 0, exp;
++
++ if (!d)
++ return 0;
++
++ while (dc >>= 1)
++ ++bc;
++ exp = 0x7f + bc;
++
++ return (u32) ((exp << 23) |
++ ((d << (23 - (exp - 0x7f))) & ((1u << 23) - 1)));
++}
+ /*
+ * applesmc_device_init - initialize the accelerometer. Can sleep.
+ */
+@@ -1242,6 +1280,7 @@ static ssize_t applesmc_show_fan_speed(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
+ struct applesmc_device *smc = dev_get_drvdata(dev);
++ const struct applesmc_entry *entry;
+ int ret;
+ unsigned int speed = 0;
+ char newkey[5];
+@@ -1250,11 +1289,21 @@ static ssize_t applesmc_show_fan_speed(struct device *dev,
+ scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
+ to_index(attr));
+
+- ret = applesmc_read_key(smc, newkey, buffer, 2);
++ entry = applesmc_get_entry_by_key(smc, newkey);
++ if (IS_ERR(entry))
++ return PTR_ERR(entry);
++
++ if (!strcmp(entry->type, FLOAT_TYPE)) {
++ ret = applesmc_read_entry(smc, entry, (u8 *) &speed, 4);
++ speed = applesmc_float_to_u32(speed);
++ } else {
++ ret = applesmc_read_entry(smc, entry, buffer, 2);
++ speed = ((buffer[0] << 8 | buffer[1]) >> 2);
++ }
++
+ if (ret)
+ return ret;
+
+- speed = ((buffer[0] << 8 | buffer[1]) >> 2);
+ return sysfs_emit(sysfsbuf, "%u\n", speed);
+ }
+
+@@ -1263,6 +1312,7 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
+ const char *sysfsbuf, size_t count)
+ {
+ struct applesmc_device *smc = dev_get_drvdata(dev);
++ const struct applesmc_entry *entry;
+ int ret;
+ unsigned long speed;
+ char newkey[5];
+@@ -1274,9 +1324,18 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
+ scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
+ to_index(attr));
+
+- buffer[0] = (speed >> 6) & 0xff;
+- buffer[1] = (speed << 2) & 0xff;
+- ret = applesmc_write_key(smc, newkey, buffer, 2);
++ entry = applesmc_get_entry_by_key(smc, newkey);
++ if (IS_ERR(entry))
++ return PTR_ERR(entry);
++
++ if (!strcmp(entry->type, FLOAT_TYPE)) {
++ speed = applesmc_u32_to_float(speed);
++ ret = applesmc_write_entry(smc, entry, (u8 *) &speed, 4);
++ } else {
++ buffer[0] = (speed >> 6) & 0xff;
++ buffer[1] = (speed << 2) & 0xff;
++ ret = applesmc_write_key(smc, newkey, buffer, 2);
++ }
+
+ if (ret)
+ return ret;
+@@ -1291,12 +1350,26 @@ static ssize_t applesmc_show_fan_manual(struct device *dev,
+ int ret;
+ u16 manual = 0;
+ u8 buffer[2];
++ char newkey[5];
++ bool has_newkey = false;
++
++ scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr));
++
++ ret = applesmc_has_key(smc, newkey, &has_newkey);
++ if (ret)
++ return ret;
++
++ if (has_newkey) {
++ ret = applesmc_read_key(smc, newkey, buffer, 1);
++ manual = buffer[0];
++ } else {
++ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
++ manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01;
++ }
+
+- ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
+ if (ret)
+ return ret;
+
+- manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01;
+ return sysfs_emit(sysfsbuf, "%d\n", manual);
+ }
+
+@@ -1307,27 +1380,39 @@ static ssize_t applesmc_store_fan_manual(struct device *dev,
+ struct applesmc_device *smc = dev_get_drvdata(dev);
+ int ret;
+ u8 buffer[2];
++ char newkey[5];
++ bool has_newkey = false;
+ unsigned long input;
+ u16 val;
+
+ if (kstrtoul(sysfsbuf, 10, &input) < 0)
+ return -EINVAL;
+
+- ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
++ scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr));
++
++ ret = applesmc_has_key(smc, newkey, &has_newkey);
+ if (ret)
+- goto out;
++ return ret;
+
+- val = (buffer[0] << 8 | buffer[1]);
++ if (has_newkey) {
++ buffer[0] = input & 1;
++ ret = applesmc_write_key(smc, newkey, buffer, 1);
++ } else {
++ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2);
++ val = (buffer[0] << 8 | buffer[1]);
++ if (ret)
++ goto out;
+
+- if (input)
+- val = val | (0x01 << to_index(attr));
+- else
+- val = val & ~(0x01 << to_index(attr));
++ if (input)
++ val = val | (0x01 << to_index(attr));
++ else
++ val = val & ~(0x01 << to_index(attr));
+
+- buffer[0] = (val >> 8) & 0xFF;
+- buffer[1] = val & 0xFF;
++ buffer[0] = (val >> 8) & 0xFF;
++ buffer[1] = val & 0xFF;
+
+- ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2);
++ ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2);
++ }
+
+ out:
+ if (ret)
+--
+2.30.0
+
+From 58868e6f356229eab48cfdee1603011653a19c79 Mon Sep 17 00:00:00 2001
+From: Orlando Chamberlain <redecorating@protonmail.com>
+Date: Sun, 9 Oct 2022 15:59:01 +0530
+Subject: [PATCH] applesmc: Add iMacPro to applesmc_whitelist
+
+The iMacPro1,1 is the only iMacPro released before the line was
+discontinued. Add it to the applesmc_whitelist.
+
+Signed-off-by: Orlando Chamberlain <redecorating@protonmail.com>
+---
+ drivers/hwmon/applesmc.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index 8b3f73fcb..493f95bb0 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -1804,6 +1804,10 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Macmini") },
+ },
++ { applesmc_dmi_match, "Apple iMacPro", {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "iMacPro") },
++ },
+ { applesmc_dmi_match, "Apple MacPro", {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacPro") },
+--
+2.34.1
+
+From e52b0fad357b6203691942831715fce4f26d66e2 Mon Sep 17 00:00:00 2001
+From: Orlando Chamberlain <orlandoch.dev@gmail.com>
+Date: Tue, 24 Jan 2023 15:46:48 +1100
+Subject: [PATCH 1/1] applesmc: make applesmc_remove void
+
+for linux6.2 compatibility
+---
+ drivers/hwmon/applesmc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index d071130ff68d..12be9269a314 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -979,7 +979,7 @@ static int applesmc_add(struct acpi_device *dev)
+ return ret;
+ }
+
+-static int applesmc_remove(struct acpi_device *dev)
++static void applesmc_remove(struct acpi_device *dev)
+ {
+ struct applesmc_device *smc = dev_get_drvdata(&dev->dev);
+
+@@ -990,7 +990,7 @@ static int applesmc_remove(struct acpi_device *dev)
+ mutex_destroy(&smc->reg.mutex);
+ kfree(smc);
+
+- return 0;
++ return;
+ }
+
+ static acpi_status applesmc_walk_resources(struct acpi_resource *res,
+--
+2.39.1
+
+From 38786c7979c8ece013b5b7d5cb07dc2aa40198be Mon Sep 17 00:00:00 2001
+From: Orlando Chamberlain <orlandoch.dev@gmail.com>
+Date: Mon, 30 Jan 2023 18:42:21 +1100
+Subject: [PATCH 1/1] applesmc: battery charge limiter
+
+---
+ drivers/hwmon/applesmc.c | 42 +++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 41 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index 12be9269a314..bc1eec74cfef 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -1478,6 +1478,35 @@ static void applesmc_brightness_set(struct led_classdev *led_cdev,
+ dev_dbg(led_cdev->dev, "work was already on the queue.\n");
+ }
+
++static ssize_t applesmc_BCLM_store(struct device *dev,
++ struct device_attribute *attr, char *sysfsbuf, size_t count)
++{
++ struct applesmc_device *smc = dev_get_drvdata(dev);
++ u8 val;
++
++ if (kstrtou8(sysfsbuf, 10, &val) < 0)
++ return -EINVAL;
++
++ if (val < 0 || val > 100)
++ return -EINVAL;
++
++ if (applesmc_write_key(smc, "BCLM", &val, 1))
++ return -ENODEV;
++ return count;
++}
++
++static ssize_t applesmc_BCLM_show(struct device *dev,
++ struct device_attribute *attr, char *sysfsbuf)
++{
++ struct applesmc_device *smc = dev_get_drvdata(dev);
++ u8 val;
++
++ if (applesmc_read_key(smc, "BCLM", &val, 1))
++ return -ENODEV;
++
++ return sysfs_emit(sysfsbuf, "%d\n", val);
++}
++
+ static ssize_t applesmc_key_count_show(struct device *dev,
+ struct device_attribute *attr, char *sysfsbuf)
+ {
+@@ -1612,6 +1641,11 @@ static struct applesmc_node_group temp_group[] = {
+ { }
+ };
+
++static struct applesmc_node_group BCLM_group[] = {
++ { "battery_charge_limit", applesmc_BCLM_show, applesmc_BCLM_store },
++ { }
++};
++
+ /* Module stuff */
+
+ /*
+@@ -1830,10 +1864,13 @@ static int applesmc_create_modules(struct applesmc_device *smc)
+ ret = applesmc_create_nodes(smc, info_group, 1);
+ if (ret)
+ goto out;
++ ret = applesmc_create_nodes(smc, BCLM_group, 1);
++ if (ret)
++ goto out_info;
+
+ ret = applesmc_create_nodes(smc, fan_group, smc->reg.fan_count);
+ if (ret)
+- goto out_info;
++ goto out_bclm;
+
+ ret = applesmc_create_nodes(smc, temp_group, smc->reg.index_count);
+ if (ret)
+@@ -1869,6 +1906,8 @@ static int applesmc_create_modules(struct applesmc_device *smc)
+ applesmc_destroy_nodes(smc, temp_group);
+ out_fans:
+ applesmc_destroy_nodes(smc, fan_group);
++out_bclm:
++ applesmc_destroy_nodes(smc, BCLM_group);
+ out_info:
+ applesmc_destroy_nodes(smc, info_group);
+ out:
+@@ -1883,6 +1922,7 @@ static void applesmc_destroy_modules(struct applesmc_device *smc)
+ applesmc_release_accelerometer(smc);
+ applesmc_destroy_nodes(smc, temp_group);
+ applesmc_destroy_nodes(smc, fan_group);
++ applesmc_destroy_nodes(smc, BCLM_group);
+ applesmc_destroy_nodes(smc, info_group);
+ }
+
+--
+2.39.1
+
+From 327e6e1d0f6e8db68c124dff4d6a326b381ccedb Mon Sep 17 00:00:00 2001
+From: Aun-Ali Zaidi <admin@kodeit.net>
+Date: Wed, 23 Mar 2022 17:12:21 +0530
+Subject: [PATCH] Input: bcm5974 - Add support for the T2 Macs
+
+---
+ drivers/input/mouse/bcm5974.c | 138 ++++++++++++++++++++++++++++++++++
+ 1 file changed, 138 insertions(+)
+
+diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
+index 59a14505b..88f17f21a 100644
+--- a/drivers/input/mouse/bcm5974.c
++++ b/drivers/input/mouse/bcm5974.c
+@@ -83,6 +83,24 @@
+ #define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO 0x0273
+ #define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS 0x0274
+
++/* T2-Attached Devices */
++/* MacbookAir8,1 (2018) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K 0x027a
++/* MacbookPro15,2 (2018) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132 0x027b
++/* MacbookPro15,1 (2018) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680 0x027c
++/* MacbookPro15,4 (2019) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213 0x027d
++/* MacbookPro16,2 (2020) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K 0x027e
++/* MacbookPro16,3 (2020) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223 0x027f
++/* MacbookAir9,1 (2020) */
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K 0x0280
++/* MacbookPro16,1 (2019)*/
++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F 0x0340
++
+ #define BCM5974_DEVICE(prod) { \
+ .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \
+ USB_DEVICE_ID_MATCH_INT_CLASS | \
+@@ -147,6 +165,22 @@ static const struct usb_device_id bcm5974_table[] = {
+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI),
+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ISO),
+ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_JIS),
++ /* MacbookAir8,1 */
++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K),
++ /* MacbookPro15,2 */
++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132),
++ /* MacbookPro15,1 */
++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680),
++ /* MacbookPro15,4 */
++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213),
++ /* MacbookPro16,2 */
++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K),
++ /* MacbookPro16,3 */
++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223),
++ /* MacbookAir9,1 */
++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K),
++ /* MacbookPro16,1 */
++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F),
+ /* Terminating entry */
+ {}
+ };
+@@ -483,6 +517,110 @@ static const struct bcm5974_config bcm5974_config_table[] = {
+ { SN_COORD, -203, 6803 },
+ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
+ },
++ {
++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K,
++ 0,
++ 0,
++ HAS_INTEGRATED_BUTTON,
++ 0, sizeof(struct bt_data),
++ 0x83, DATAFORMAT(TYPE4),
++ { SN_PRESSURE, 0, 300 },
++ { SN_WIDTH, 0, 2048 },
++ { SN_COORD, -6243, 6749 },
++ { SN_COORD, -170, 7685 },
++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++ },
++ {
++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132,
++ 0,
++ 0,
++ HAS_INTEGRATED_BUTTON,
++ 0, sizeof(struct bt_data),
++ 0x83, DATAFORMAT(TYPE4),
++ { SN_PRESSURE, 0, 300 },
++ { SN_WIDTH, 0, 2048 },
++ { SN_COORD, -6243, 6749 },
++ { SN_COORD, -170, 7685 },
++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++ },
++ {
++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680,
++ 0,
++ 0,
++ HAS_INTEGRATED_BUTTON,
++ 0, sizeof(struct bt_data),
++ 0x83, DATAFORMAT(TYPE4),
++ { SN_PRESSURE, 0, 300 },
++ { SN_WIDTH, 0, 2048 },
++ { SN_COORD, -7456, 7976 },
++ { SN_COORD, -1768, 7685 },
++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++ },
++ {
++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213,
++ 0,
++ 0,
++ HAS_INTEGRATED_BUTTON,
++ 0, sizeof(struct bt_data),
++ 0x83, DATAFORMAT(TYPE4),
++ { SN_PRESSURE, 0, 300 },
++ { SN_WIDTH, 0, 2048 },
++ { SN_COORD, -6243, 6749 },
++ { SN_COORD, -170, 7685 },
++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++ },
++ {
++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K,
++ 0,
++ 0,
++ HAS_INTEGRATED_BUTTON,
++ 0, sizeof(struct bt_data),
++ 0x83, DATAFORMAT(TYPE4),
++ { SN_PRESSURE, 0, 300 },
++ { SN_WIDTH, 0, 2048 },
++ { SN_COORD, -7823, 8329 },
++ { SN_COORD, -370, 7925 },
++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++ },
++ {
++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223,
++ 0,
++ 0,
++ HAS_INTEGRATED_BUTTON,
++ 0, sizeof(struct bt_data),
++ 0x83, DATAFORMAT(TYPE4),
++ { SN_PRESSURE, 0, 300 },
++ { SN_WIDTH, 0, 2048 },
++ { SN_COORD, -6243, 6749 },
++ { SN_COORD, -170, 7685 },
++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++ },
++ {
++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K,
++ 0,
++ 0,
++ HAS_INTEGRATED_BUTTON,
++ 0, sizeof(struct bt_data),
++ 0x83, DATAFORMAT(TYPE4),
++ { SN_PRESSURE, 0, 300 },
++ { SN_WIDTH, 0, 2048 },
++ { SN_COORD, -6243, 6749 },
++ { SN_COORD, -170, 7685 },
++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++ },
++ {
++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F,
++ 0,
++ 0,
++ HAS_INTEGRATED_BUTTON,
++ 0, sizeof(struct bt_data),
++ 0x83, DATAFORMAT(TYPE4),
++ { SN_PRESSURE, 0, 300 },
++ { SN_WIDTH, 0, 2048 },
++ { SN_COORD, -8916, 9918 },
++ { SN_COORD, -1934, 9835 },
++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
++ },
+ {}
+ };
+
+--
+2.25.1
+
+From: Ashish Arora <ashisharora.linux@outlook.com>
+Subject: Re: [PATCH] drm/i915: Discard large BIOS framebuffers causing display corruption.
+Date: Sat, 08 Jan 2022 21:43:18 +1100
+
+On certain 4k panels, the BIOS framebuffer is larger than what panel
+requires causing display corruption. Introduce a check for the same.
+
+Signed-off-by: Ashish Arora <ashisharora.linux@outlook.com>
+---
+ drivers/gpu/drm/i915/display/intel_fbdev.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
+index 112aa04..8fb8bcc 100644
+--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
++++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
+@@ -217,10 +217,10 @@ static int intelfb_create(struct drm_fb_helper *helper,
+ return ret;
+
+ if (intel_fb &&
+- (sizes->fb_width > intel_fb->base.width ||
+- sizes->fb_height > intel_fb->base.height)) {
++ (sizes->fb_width != intel_fb->base.width ||
++ sizes->fb_height != intel_fb->base.height)) {
+ drm_dbg_kms(&dev_priv->drm,
+- "BIOS fb too small (%dx%d), we require (%dx%d),"
++ "BIOS fb not valid (%dx%d), we require (%dx%d),"
+ " releasing it\n",
+ intel_fb->base.width, intel_fb->base.height,
+ sizes->fb_width, sizes->fb_height);
+--
+1.8.3.1
+
+From 3d4a4a3d62815f90fc65a827a3e2de96c4571350 Mon Sep 17 00:00:00 2001
+From: Orlando Chamberlain <orlandoch.dev@gmail.com>
+Date: Mon, 20 Nov 2023 10:32:23 +1100
+Subject: [PATCH 1/1] acpi video: force native for some T2 macbooks
+
+The intel backlight is needed for these.
+
+MacBookPro15,2/4 or MacBookPro16,3 or MacBookAir8,1/2 might also need
+this so I'm not going to be submitting this upstream yet
+
+mbp16,3 was reported not to have this issue for 6.5.8 at least.
+---
+ drivers/acpi/video_detect.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
+index 442396f6ed1f..baf7264d7b94 100644
+--- a/drivers/acpi/video_detect.c
++++ b/drivers/acpi/video_detect.c
+@@ -513,6 +513,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "iMac12,2"),
+ },
+ },
++ {
++ .callback = video_detect_force_native,
++ /* Apple MacBook Air 9,1 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir9,1"),
++ },
++ },
+ {
+ /* https://bugzilla.redhat.com/show_bug.cgi?id=1217249 */
+ .callback = video_detect_force_native,
+@@ -522,6 +530,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro12,1"),
+ },
+ },
++ {
++ .callback = video_detect_force_native,
++ /* Apple MacBook Pro 16,2 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro16,2"),
++ },
++ },
+ {
+ .callback = video_detect_force_native,
+ /* Dell Inspiron N4010 */
+--
+2.42.1
+
+From 923cfe9b86c71761b164f995631817e9af169f29 Mon Sep 17 00:00:00 2001
+From: Hector Martin <marcan@marcan.st>
+Date: Tue, 14 Feb 2023 18:33:19 +0900
+Subject: [PATCH] brcmfmac: cfg80211: Use WSEC to set SAE password
+
+Using the WSEC command instead of sae_password seems to be the supported
+mechanism on newer firmware, and also how the brcmdhd driver does it.
+
+Signed-off-by: Hector Martin <marcan@marcan.st>
+---
+ .../broadcom/brcm80211/brcmfmac/cfg80211.c | 46 ++++++++-----------
+ .../broadcom/brcm80211/brcmfmac/fwil_types.h | 2 +-
+ 2 files changed, 20 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+index 87f4d53fb..7ccdbafca 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+@@ -1682,52 +1682,44 @@ static u16 brcmf_map_fw_linkdown_reason(const struct brcmf_event_msg *e)
+ return reason;
+ }
+
+-static int brcmf_set_pmk(struct brcmf_if *ifp, const u8 *pmk_data, u16 pmk_len)
++static int brcmf_set_wsec(struct brcmf_if *ifp, const u8 *key, u16 key_len, u16 flags)
+ {
+ struct brcmf_pub *drvr = ifp->drvr;
+ struct brcmf_wsec_pmk_le pmk;
+ int err;
+
++ if (key_len > sizeof(pmk.key)) {
++ bphy_err(drvr, "key must be less than %zu bytes\n",
++ sizeof(pmk.key));
++ return -EINVAL;
++ }
++
+ memset(&pmk, 0, sizeof(pmk));
+
+- /* pass pmk directly */
+- pmk.key_len = cpu_to_le16(pmk_len);
+- pmk.flags = cpu_to_le16(0);
+- memcpy(pmk.key, pmk_data, pmk_len);
++ /* pass key material directly */
++ pmk.key_len = cpu_to_le16(key_len);
++ pmk.flags = cpu_to_le16(flags);
++ memcpy(pmk.key, key, key_len);
+
+- /* store psk in firmware */
++ /* store key material in firmware */
+ err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SET_WSEC_PMK,
+ &pmk, sizeof(pmk));
+ if (err < 0)
+ bphy_err(drvr, "failed to change PSK in firmware (len=%u)\n",
+- pmk_len);
++ key_len);
+
+ return err;
+ }
+
++static int brcmf_set_pmk(struct brcmf_if *ifp, const u8 *pmk_data, u16 pmk_len)
++{
++ return brcmf_set_wsec(ifp, pmk_data, pmk_len, 0);
++}
++
+ static int brcmf_set_sae_password(struct brcmf_if *ifp, const u8 *pwd_data,
+ u16 pwd_len)
+ {
+- struct brcmf_pub *drvr = ifp->drvr;
+- struct brcmf_wsec_sae_pwd_le sae_pwd;
+- int err;
+-
+- if (pwd_len > BRCMF_WSEC_MAX_SAE_PASSWORD_LEN) {
+- bphy_err(drvr, "sae_password must be less than %d\n",
+- BRCMF_WSEC_MAX_SAE_PASSWORD_LEN);
+- return -EINVAL;
+- }
+-
+- sae_pwd.key_len = cpu_to_le16(pwd_len);
+- memcpy(sae_pwd.key, pwd_data, pwd_len);
+-
+- err = brcmf_fil_iovar_data_set(ifp, "sae_password", &sae_pwd,
+- sizeof(sae_pwd));
+- if (err < 0)
+- bphy_err(drvr, "failed to set SAE password in firmware (len=%u)\n",
+- pwd_len);
+-
+- return err;
++ return brcmf_set_wsec(ifp, pwd_data, pwd_len, BRCMF_WSEC_PASSPHRASE);
+ }
+
+ static void brcmf_link_down(struct brcmf_cfg80211_vif *vif, u16 reason,
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
+index 792adaf88..3ba90878c 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
+@@ -574,7 +574,7 @@ struct brcmf_wsec_key_le {
+ struct brcmf_wsec_pmk_le {
+ __le16 key_len;
+ __le16 flags;
+- u8 key[2 * BRCMF_WSEC_MAX_PSK_LEN + 1];
++ u8 key[BRCMF_WSEC_MAX_SAE_PASSWORD_LEN];
+ };
+
+ /**
+--
+2.37.2
+
+From patchwork Wed Dec 27 10:10:03 2023
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+X-Patchwork-Submitter: Johan Hovold <johan+linaro@kernel.org>
+X-Patchwork-Id: 13505281
+Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
+ [10.30.226.201])
+ (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
+ (No client certificate requested)
+ by smtp.subspace.kernel.org (Postfix) with ESMTPS id 731F42D602;
+ Wed, 27 Dec 2023 10:10:50 +0000 (UTC)
+Authentication-Results: smtp.subspace.kernel.org;
+ dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
+ header.b="OE5gY3Rg"
+Received: by smtp.kernel.org (Postfix) with ESMTPSA id E1F71C433C8;
+ Wed, 27 Dec 2023 10:10:49 +0000 (UTC)
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
+ s=k20201202; t=1703671849;
+ bh=HNX2qe5wIUjgAOE0bih0cYXbYbw77i5qRYAGTFhWf8Q=;
+ h=From:To:Cc:Subject:Date:From;
+ b=OE5gY3RgSNMMNpr/DemitpLvv8B5KUxkea+huKa97KhEilNAbl/OG/gZPSswoI3kl
+ ifwN2LiGgFt8jyQh8hVsCoIrrOOGgiqeJ9ivyZI86fxAmaICglCBVc65vzpPozQdYn
+ YsryqO/D6A6i3egHRr7G52DifE/DihYN9uZqhAIHTY+ESsr/mJvwodvV8HNt60TaF9
+ dFeWSj4rAgt/QaclFNs1wznkamzzJ3UloOq2NJbzC3F6ILEsWfuPRm8iKBlgwdNTZ+
+ bn4JmN3Zh0Mr2uaTVg902uWeLcZ93sY9BmqH1AOBDEXTlUvPd7n6xVrSnOLfdlTR5k
+ O5JKLTyyjvyTQ==
+Received: from johan by xi.lan with local (Exim 4.96.2)
+ (envelope-from <johan+linaro@kernel.org>)
+ id 1rIQs7-0002kc-0u;
+ Wed, 27 Dec 2023 11:10:44 +0100
+From: Johan Hovold <johan+linaro@kernel.org>
+To: Luiz Augusto von Dentz <luiz.dentz@gmail.com>,
+ Marcel Holtmann <marcel@holtmann.org>,
+ Johan Hedberg <johan.hedberg@gmail.com>
+Cc: Hector Martin <marcan@marcan.st>,
+ Sven Peter <sven@svenpeter.dev>,
+ Alyssa Rosenzweig <alyssa@rosenzweig.io>,
+ asahi@lists.linux.dev,
+ linux-arm-kernel@lists.infradead.org,
+ linux-bluetooth@vger.kernel.org,
+ linux-kernel@vger.kernel.org,
+ Johan Hovold <johan+linaro@kernel.org>,
+ stable@vger.kernel.org,
+ Felix Zhang <mrman@mrman314.tech>
+Subject: [PATCH] Bluetooth: hci_bcm4377: do not mark valid bd_addr as invalid
+Date: Wed, 27 Dec 2023 11:10:03 +0100
+Message-ID: <20231227101003.10534-1-johan+linaro@kernel.org>
+X-Mailer: git-send-email 2.41.0
+Precedence: bulk
+X-Mailing-List: linux-bluetooth@vger.kernel.org
+List-Id: <linux-bluetooth.vger.kernel.org>
+List-Subscribe: <mailto:linux-bluetooth+subscribe@vger.kernel.org>
+List-Unsubscribe: <mailto:linux-bluetooth+unsubscribe@vger.kernel.org>
+MIME-Version: 1.0
+
+A recent commit restored the original (and still documented) semantics
+for the HCI_QUIRK_USE_BDADDR_PROPERTY quirk so that the device address
+is considered invalid unless an address is provided by firmware.
+
+This specifically means that this flag must only be set for devices with
+invalid addresses, but the Broadcom BCM4377 driver has so far been
+setting this flag unconditionally.
+
+Fortunately the driver already checks for invalid addresses during setup
+and sets the HCI_QUIRK_INVALID_BDADDR flag, which can simply be replaced
+with HCI_QUIRK_USE_BDADDR_PROPERTY to indicate that the default address
+is invalid but can be overridden by firmware (long term, this should
+probably just always be allowed).
+
+Fixes: 6945795bc81a ("Bluetooth: fix use-bdaddr-property quirk")
+Cc: stable@vger.kernel.org # 6.5
+Reported-by: Felix Zhang <mrman@mrman314.tech>
+Link: https://lore.kernel.org/r/77419ffacc5b4875e920e038332575a2a5bff29f.camel@mrman314.tech/
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Reported-by: Felix Zhang <mrman@mrman314.tech>
+---
+ drivers/bluetooth/hci_bcm4377.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c
+index a61757835695..9a7243d5db71 100644
+--- a/drivers/bluetooth/hci_bcm4377.c
++++ b/drivers/bluetooth/hci_bcm4377.c
+@@ -1417,7 +1417,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377)
+
+ bda = (struct hci_rp_read_bd_addr *)skb->data;
+ if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr))
+- set_bit(HCI_QUIRK_INVALID_BDADDR, &bcm4377->hdev->quirks);
++ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks);
+
+ kfree_skb(skb);
+ return 0;
+@@ -2368,7 +2368,6 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ hdev->set_bdaddr = bcm4377_hci_set_bdaddr;
+ hdev->setup = bcm4377_hci_setup;
+
+- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ if (bcm4377->hw->broken_mws_transport_config)
+ set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks);
+ if (bcm4377->hw->broken_ext_scan)
diff --git a/SOURCES/tkg-BBRv2.patch b/SOURCES/tkg-BBRv2.patch
deleted file mode 100644
index 35640e3..0000000
--- a/SOURCES/tkg-BBRv2.patch
+++ /dev/null
@@ -1,3311 +0,0 @@
-From eff7e1edf2fec63bac1a81f8c86295dd3f48422a Mon Sep 17 00:00:00 2001
-From: Oleksandr Natalenko <oleksandr@natalenko.name>
-Date: Mon, 4 Apr 2022 08:23:19 +0200
-Subject: [PATCH] tcp_bbr2: introduce BBRv2
-
-Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
----
- include/linux/tcp.h | 3 +-
- include/net/inet_connection_sock.h | 3 +-
- include/net/tcp.h | 41 +-
- include/uapi/linux/inet_diag.h | 33 +
- net/ipv4/Kconfig | 22 +
- net/ipv4/Makefile | 1 +
- net/ipv4/tcp.c | 1 +
- net/ipv4/tcp_bbr.c | 38 +-
- net/ipv4/tcp_bbr2.c | 2674 ++++++++++++++++++++++++++++
- net/ipv4/tcp_cong.c | 1 +
- net/ipv4/tcp_input.c | 27 +-
- net/ipv4/tcp_output.c | 26 +-
- net/ipv4/tcp_rate.c | 30 +-
- net/ipv4/tcp_timer.c | 1 +
- 14 files changed, 2867 insertions(+), 34 deletions(-)
- create mode 100644 net/ipv4/tcp_bbr2.c
-
-diff --git a/include/linux/tcp.h b/include/linux/tcp.h
-index 41b1da621a45..d8f94ef1a297 100644
---- a/include/linux/tcp.h
-+++ b/include/linux/tcp.h
-@@ -255,7 +255,8 @@ struct tcp_sock {
- u8 compressed_ack;
- u8 dup_ack_counter:2,
- tlp_retrans:1, /* TLP is a retransmission */
-- unused:5;
-+ fast_ack_mode:2, /* which fast ack mode ? */
-+ unused:3;
- u32 chrono_start; /* Start time in jiffies of a TCP chrono */
- u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
- u8 chrono_type:2, /* current chronograph type */
-diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
-index c2b15f7e5516..d85858efa571 100644
---- a/include/net/inet_connection_sock.h
-+++ b/include/net/inet_connection_sock.h
-@@ -135,7 +135,8 @@ struct inet_connection_sock {
- u32 icsk_probes_tstamp;
- u32 icsk_user_timeout;
-
-- u64 icsk_ca_priv[104 / sizeof(u64)];
-+/* XXX inflated by temporary internal debugging info */
-+ u64 icsk_ca_priv[216 / sizeof(u64)];
- #define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv)
- };
-
-diff --git a/include/net/tcp.h b/include/net/tcp.h
-index 14d45661a84d..7261fae79403 100644
---- a/include/net/tcp.h
-+++ b/include/net/tcp.h
-@@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
- #define TCP_ECN_QUEUE_CWR 2
- #define TCP_ECN_DEMAND_CWR 4
- #define TCP_ECN_SEEN 8
-+#define TCP_ECN_ECT_PERMANENT 16
-
- enum tcp_tw_status {
- TCP_TW_SUCCESS = 0,
-@@ -823,6 +824,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
- return max_t(s64, t1 - t0, 0);
- }
-
-+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
-+{
-+ return max_t(s32, t1 - t0, 0);
-+}
-+
- static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
- {
- return tcp_ns_to_ts(skb->skb_mstamp_ns);
-@@ -898,9 +904,14 @@ struct tcp_skb_cb {
- /* pkts S/ACKed so far upon tx of skb, incl retrans: */
- __u32 delivered;
- /* start of send pipeline phase */
-- u64 first_tx_mstamp;
-+ u32 first_tx_mstamp;
- /* when we reached the "delivered" count */
-- u64 delivered_mstamp;
-+ u32 delivered_mstamp;
-+#define TCPCB_IN_FLIGHT_BITS 20
-+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
-+ u32 in_flight:20, /* packets in flight at transmit */
-+ unused2:12;
-+ u32 lost; /* packets lost so far upon tx of skb */
- } tx; /* only used for outgoing skbs */
- union {
- struct inet_skb_parm h4;
-@@ -1026,7 +1037,11 @@ enum tcp_ca_ack_event_flags {
- #define TCP_CONG_NON_RESTRICTED 0x1
- /* Requires ECN/ECT set on all packets */
- #define TCP_CONG_NEEDS_ECN 0x2
--#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
-+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
-+#define TCP_CONG_WANTS_CE_EVENTS 0x4
-+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \
-+ TCP_CONG_NEEDS_ECN | \
-+ TCP_CONG_WANTS_CE_EVENTS)
-
- union tcp_cc_info;
-
-@@ -1046,8 +1061,11 @@ struct ack_sample {
- */
- struct rate_sample {
- u64 prior_mstamp; /* starting timestamp for interval */
-+ u32 prior_lost; /* tp->lost at "prior_mstamp" */
- u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
- u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
-+ u32 tx_in_flight; /* packets in flight at starting timestamp */
-+ s32 lost; /* number of packets lost over interval */
- s32 delivered; /* number of packets delivered over interval */
- s32 delivered_ce; /* number of packets delivered w/ CE marks*/
- long interval_us; /* time for tp->delivered to incr "delivered" */
-@@ -1061,6 +1079,7 @@ struct rate_sample {
- bool is_app_limited; /* is sample from packet with bubble in pipe? */
- bool is_retrans; /* is sample from retransmission? */
- bool is_ack_delayed; /* is this (likely) a delayed ACK? */
-+ bool is_ece; /* did this ACK have ECN marked? */
- };
-
- struct tcp_congestion_ops {
-@@ -1084,8 +1103,11 @@ struct tcp_congestion_ops {
- /* hook for packet ack accounting (optional) */
- void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
-
-- /* override sysctl_tcp_min_tso_segs */
-- u32 (*min_tso_segs)(struct sock *sk);
-+ /* pick target number of segments per TSO/GSO skb (optional): */
-+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
-+
-+ /* react to a specific lost skb (optional) */
-+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
-
- /* call when packets are delivered to update cwnd and pacing rate,
- * after all the ca_state processing. (optional)
-@@ -1148,6 +1170,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
- }
- #endif
-
-+static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
-+{
-+ const struct inet_connection_sock *icsk = inet_csk(sk);
-+
-+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
-+ TCP_CONG_WANTS_CE_EVENTS);
-+}
-+
- static inline bool tcp_ca_needs_ecn(const struct sock *sk)
- {
- const struct inet_connection_sock *icsk = inet_csk(sk);
-@@ -1167,6 +1197,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
- void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
-
- /* From tcp_rate.c */
-+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
- void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
- void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
- struct rate_sample *rs);
-diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
-index 50655de04c9b..0e24f11627d5 100644
---- a/include/uapi/linux/inet_diag.h
-+++ b/include/uapi/linux/inet_diag.h
-@@ -231,9 +231,42 @@ struct tcp_bbr_info {
- __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */
- };
-
-+/* Phase as reported in netlink/ss stats. */
-+enum tcp_bbr2_phase {
-+ BBR2_PHASE_INVALID = 0,
-+ BBR2_PHASE_STARTUP = 1,
-+ BBR2_PHASE_DRAIN = 2,
-+ BBR2_PHASE_PROBE_RTT = 3,
-+ BBR2_PHASE_PROBE_BW_UP = 4,
-+ BBR2_PHASE_PROBE_BW_DOWN = 5,
-+ BBR2_PHASE_PROBE_BW_CRUISE = 6,
-+ BBR2_PHASE_PROBE_BW_REFILL = 7
-+};
-+
-+struct tcp_bbr2_info {
-+ /* u64 bw: bandwidth (app throughput) estimate in Byte per sec: */
-+ __u32 bbr_bw_lsb; /* lower 32 bits of bw */
-+ __u32 bbr_bw_msb; /* upper 32 bits of bw */
-+ __u32 bbr_min_rtt; /* min-filtered RTT in uSec */
-+ __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */
-+ __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */
-+ __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */
-+ __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */
-+ __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */
-+ __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */
-+ __u8 bbr_mode; /* current bbr_mode in state machine */
-+ __u8 bbr_phase; /* current state machine phase */
-+ __u8 unused1; /* alignment padding; not used yet */
-+ __u8 bbr_version; /* MUST be at this offset in struct */
-+ __u32 bbr_inflight_lo; /* lower/short-term data volume bound */
-+ __u32 bbr_inflight_hi; /* higher/long-term data volume bound */
-+ __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */
-+};
-+
- union tcp_cc_info {
- struct tcpvegas_info vegas;
- struct tcp_dctcp_info dctcp;
- struct tcp_bbr_info bbr;
-+ struct tcp_bbr2_info bbr2;
- };
- #endif /* _UAPI_INET_DIAG_H_ */
-diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index 2dfb12230f08..b6bec331a82e 100644
---- a/net/ipv4/Kconfig
-+++ b/net/ipv4/Kconfig
-@@ -678,6 +678,24 @@ config TCP_CONG_BBR
- AQM schemes that do not provide a delay signal. It requires the fq
- ("Fair Queue") pacing packet scheduler.
-
-+config TCP_CONG_BBR2
-+ tristate "BBR2 TCP"
-+ default n
-+ help
-+
-+ BBR2 TCP congestion control is a model-based congestion control
-+ algorithm that aims to maximize network utilization, keep queues and
-+ retransmit rates low, and to be able to coexist with Reno/CUBIC in
-+ common scenarios. It builds an explicit model of the network path. It
-+ tolerates a targeted degree of random packet loss and delay that are
-+ unrelated to congestion. It can operate over LAN, WAN, cellular, wifi,
-+ or cable modem links, and can use DCTCP-L4S-style ECN signals. It can
-+ coexist with flows that use loss-based congestion control, and can
-+ operate with shallow buffers, deep buffers, bufferbloat, policers, or
-+ AQM schemes that do not provide a delay signal. It requires pacing,
-+ using either TCP internal pacing or the fq ("Fair Queue") pacing packet
-+ scheduler.
-+
- choice
- prompt "Default TCP congestion control"
- default DEFAULT_CUBIC
-@@ -715,6 +733,9 @@ choice
- config DEFAULT_BBR
- bool "BBR" if TCP_CONG_BBR=y
-
-+ config DEFAULT_BBR2
-+ bool "BBR2" if TCP_CONG_BBR2=y
-+
- config DEFAULT_RENO
- bool "Reno"
- endchoice
-@@ -739,6 +760,7 @@ config DEFAULT_TCP_CONG
- default "dctcp" if DEFAULT_DCTCP
- default "cdg" if DEFAULT_CDG
- default "bbr" if DEFAULT_BBR
-+ default "bbr2" if DEFAULT_BBR2
- default "cubic"
-
- config TCP_MD5SIG
-diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
-index bbdd9c44f14e..8dee1547d820 100644
---- a/net/ipv4/Makefile
-+++ b/net/ipv4/Makefile
-@@ -46,6 +46,7 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
- obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
- obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
- obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
-+obj-$(CONFIG_TCP_CONG_BBR2) += tcp_bbr2.o
- obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
- obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
- obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
-diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
-index 4f2205756cfe..c139747666dd 100644
---- a/net/ipv4/tcp.c
-+++ b/net/ipv4/tcp.c
-@@ -3188,6 +3188,7 @@ int tcp_disconnect(struct sock *sk, int flags)
- tp->rx_opt.dsack = 0;
- tp->rx_opt.num_sacks = 0;
- tp->rcv_ooopack = 0;
-+ tp->fast_ack_mode = 0;
-
-
- /* Clean up fastopen related fields */
-diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
-index 54eec33c6e1c..bfbf158c71f4 100644
---- a/net/ipv4/tcp_bbr.c
-+++ b/net/ipv4/tcp_bbr.c
-@@ -294,26 +294,40 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
- sk->sk_pacing_rate = rate;
- }
-
--/* override sysctl_tcp_min_tso_segs */
- __bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk)
- {
- return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
- }
-
-+/* Return the number of segments BBR would like in a TSO/GSO skb, given
-+ * a particular max gso size as a constraint.
-+ */
-+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
-+ u32 gso_max_size)
-+{
-+ u32 segs;
-+ u64 bytes;
-+
-+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
-+ bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift;
-+
-+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
-+ segs = max_t(u32, div_u64(bytes, mss_now), bbr_min_tso_segs(sk));
-+ return segs;
-+}
-+
-+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
-+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
-+{
-+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
-+}
-+
-+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
- static u32 bbr_tso_segs_goal(struct sock *sk)
- {
- struct tcp_sock *tp = tcp_sk(sk);
-- u32 segs, bytes;
--
-- /* Sort of tcp_tso_autosize() but ignoring
-- * driver provided sk_gso_max_size.
-- */
-- bytes = min_t(unsigned long,
-- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
-- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
-- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
-
-- return min(segs, 0x7FU);
-+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE);
- }
-
- /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
-@@ -1149,7 +1163,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
- .undo_cwnd = bbr_undo_cwnd,
- .cwnd_event = bbr_cwnd_event,
- .ssthresh = bbr_ssthresh,
-- .min_tso_segs = bbr_min_tso_segs,
-+ .tso_segs = bbr_tso_segs,
- .get_info = bbr_get_info,
- .set_state = bbr_set_state,
- };
-diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c
-new file mode 100644
-index 000000000000..488429f0f3d0
---- /dev/null
-+++ b/net/ipv4/tcp_bbr2.c
-@@ -0,0 +1,2674 @@
-+/* BBR (Bottleneck Bandwidth and RTT) congestion control, v2
-+ *
-+ * BBRv2 is a model-based congestion control algorithm that aims for low
-+ * queues, low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model
-+ * of the network path, it uses measurements of bandwidth and RTT, as well as
-+ * (if they occur) packet loss and/or DCTCP/L4S-style ECN signals. Note that
-+ * although it can use ECN or loss signals explicitly, it does not require
-+ * either; it can bound its in-flight data based on its estimate of the BDP.
-+ *
-+ * The model has both higher and lower bounds for the operating range:
-+ * lo: bw_lo, inflight_lo: conservative short-term lower bound
-+ * hi: bw_hi, inflight_hi: robust long-term upper bound
-+ * The bandwidth-probing time scale is (a) extended dynamically based on
-+ * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by
-+ * an interactive wall-clock time-scale to be more scalable and responsive
-+ * than Reno and CUBIC.
-+ *
-+ * Here is a state transition diagram for BBR:
-+ *
-+ * |
-+ * V
-+ * +---> STARTUP ----+
-+ * | | |
-+ * | V |
-+ * | DRAIN ----+
-+ * | | |
-+ * | V |
-+ * +---> PROBE_BW ----+
-+ * | ^ | |
-+ * | | | |
-+ * | +----+ |
-+ * | |
-+ * +---- PROBE_RTT <--+
-+ *
-+ * A BBR flow starts in STARTUP, and ramps up its sending rate quickly.
-+ * When it estimates the pipe is full, it enters DRAIN to drain the queue.
-+ * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT.
-+ * A long-lived BBR flow spends the vast majority of its time remaining
-+ * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth
-+ * in a fair manner, with a small, bounded queue. *If* a flow has been
-+ * continuously sending for the entire min_rtt window, and hasn't seen an RTT
-+ * sample that matches or decreases its min_rtt estimate for 10 seconds, then
-+ * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe
-+ * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if
-+ * we estimated that we reached the full bw of the pipe then we enter PROBE_BW;
-+ * otherwise we enter STARTUP to try to fill the pipe.
-+ *
-+ * BBR is described in detail in:
-+ * "BBR: Congestion-Based Congestion Control",
-+ * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
-+ * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016.
-+ *
-+ * There is a public e-mail list for discussing BBR development and testing:
-+ * https://groups.google.com/forum/#!forum/bbr-dev
-+ *
-+ * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled,
-+ * otherwise TCP stack falls back to an internal pacing using one high
-+ * resolution timer per TCP socket and may use more resources.
-+ */
-+#include <linux/module.h>
-+#include <net/tcp.h>
-+#include <linux/inet_diag.h>
-+#include <linux/inet.h>
-+#include <linux/random.h>
-+
-+#include "tcp_dctcp.h"
-+
-+/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
-+ * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
-+ * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
-+ * Since the minimum window is >=4 packets, the lower bound isn't
-+ * an issue. The upper bound isn't an issue with existing technologies.
-+ */
-+#define BW_SCALE 24
-+#define BW_UNIT (1 << BW_SCALE)
-+
-+#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */
-+#define BBR_UNIT (1 << BBR_SCALE)
-+
-+#define FLAG_DEBUG_VERBOSE 0x1 /* Verbose debugging messages */
-+#define FLAG_DEBUG_LOOPBACK 0x2 /* Do NOT skip loopback addr */
-+
-+#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */
-+
-+/* BBR has the following modes for deciding how fast to send: */
-+enum bbr_mode {
-+ BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */
-+ BBR_DRAIN, /* drain any queue created during startup */
-+ BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */
-+ BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */
-+};
-+
-+/* How does the incoming ACK stream relate to our bandwidth probing? */
-+enum bbr_ack_phase {
-+ BBR_ACKS_INIT, /* not probing; not getting probe feedback */
-+ BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */
-+ BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */
-+ BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */
-+ BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */
-+};
-+
-+/* BBR congestion control block */
-+struct bbr {
-+ u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */
-+ u32 min_rtt_stamp; /* timestamp of min_rtt_us */
-+ u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */
-+ u32 probe_rtt_min_us; /* min RTT in bbr_probe_rtt_win_ms window */
-+ u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/
-+ u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
-+ u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */
-+ u64 cycle_mstamp; /* time of this cycle phase start */
-+ u32 mode:3, /* current bbr_mode in state machine */
-+ prev_ca_state:3, /* CA state on previous ACK */
-+ packet_conservation:1, /* use packet conservation? */
-+ round_start:1, /* start of packet-timed tx->ack round? */
-+ ce_state:1, /* If most recent data has CE bit set */
-+ bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */
-+ try_fast_path:1, /* can we take fast path? */
-+ unused2:11,
-+ idle_restart:1, /* restarting after idle? */
-+ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
-+ cycle_idx:3, /* current index in pacing_gain cycle array */
-+ has_seen_rtt:1; /* have we seen an RTT sample yet? */
-+ u32 pacing_gain:11, /* current gain for setting pacing rate */
-+ cwnd_gain:11, /* current gain for setting cwnd */
-+ full_bw_reached:1, /* reached full bw in Startup? */
-+ full_bw_cnt:2, /* number of rounds without large bw gains */
-+ init_cwnd:7; /* initial cwnd */
-+ u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
-+ u32 full_bw; /* recent bw, to estimate if pipe is full */
-+
-+ /* For tracking ACK aggregation: */
-+ u64 ack_epoch_mstamp; /* start of ACK sampling epoch */
-+ u16 extra_acked[2]; /* max excess data ACKed in epoch */
-+ u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */
-+ extra_acked_win_rtts:5, /* age of extra_acked, in round trips */
-+ extra_acked_win_idx:1, /* current index in extra_acked array */
-+ /* BBR v2 state: */
-+ unused1:2,
-+ startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */
-+ loss_in_cycle:1, /* packet loss in this cycle? */
-+ ecn_in_cycle:1; /* ECN in this cycle? */
-+ u32 loss_round_delivered; /* scb->tx.delivered ending loss round */
-+ u32 undo_bw_lo; /* bw_lo before latest losses */
-+ u32 undo_inflight_lo; /* inflight_lo before latest losses */
-+ u32 undo_inflight_hi; /* inflight_hi before latest losses */
-+ u32 bw_latest; /* max delivered bw in last round trip */
-+ u32 bw_lo; /* lower bound on sending bandwidth */
-+ u32 bw_hi[2]; /* upper bound of sending bandwidth range*/
-+ u32 inflight_latest; /* max delivered data in last round trip */
-+ u32 inflight_lo; /* lower bound of inflight data range */
-+ u32 inflight_hi; /* upper bound of inflight data range */
-+ u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */
-+ u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */
-+ u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */
-+ u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */
-+ ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */
-+ bw_probe_samples:1, /* rate samples reflect bw probing? */
-+ prev_probe_too_high:1, /* did last PROBE_UP go too high? */
-+ stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */
-+ rounds_since_probe:8, /* packet-timed rounds since probed bw */
-+ loss_round_start:1, /* loss_round_delivered round trip? */
-+ loss_in_round:1, /* loss marked in this round trip? */
-+ ecn_in_round:1, /* ECN marked in this round trip? */
-+ ack_phase:3, /* bbr_ack_phase: meaning of ACKs */
-+ loss_events_in_round:4,/* losses in STARTUP round */
-+ initialized:1; /* has bbr_init() been called? */
-+ u32 alpha_last_delivered; /* tp->delivered at alpha update */
-+ u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */
-+
-+ /* Params configurable using setsockopt. Refer to correspoding
-+ * module param for detailed description of params.
-+ */
-+ struct bbr_params {
-+ u32 high_gain:11, /* max allowed value: 2047 */
-+ drain_gain:10, /* max allowed value: 1023 */
-+ cwnd_gain:11; /* max allowed value: 2047 */
-+ u32 cwnd_min_target:4, /* max allowed value: 15 */
-+ min_rtt_win_sec:5, /* max allowed value: 31 */
-+ probe_rtt_mode_ms:9, /* max allowed value: 511 */
-+ full_bw_cnt:3, /* max allowed value: 7 */
-+ cwnd_tso_budget:1, /* allowed values: {0, 1} */
-+ unused3:6,
-+ drain_to_target:1, /* boolean */
-+ precise_ece_ack:1, /* boolean */
-+ extra_acked_in_startup:1, /* allowed values: {0, 1} */
-+ fast_path:1; /* boolean */
-+ u32 full_bw_thresh:10, /* max allowed value: 1023 */
-+ startup_cwnd_gain:11, /* max allowed value: 2047 */
-+ bw_probe_pif_gain:9, /* max allowed value: 511 */
-+ usage_based_cwnd:1, /* boolean */
-+ unused2:1;
-+ u16 probe_rtt_win_ms:14, /* max allowed value: 16383 */
-+ refill_add_inc:2; /* max allowed value: 3 */
-+ u16 extra_acked_gain:11, /* max allowed value: 2047 */
-+ extra_acked_win_rtts:5; /* max allowed value: 31*/
-+ u16 pacing_gain[CYCLE_LEN]; /* max allowed value: 1023 */
-+ /* Mostly BBR v2 parameters below here: */
-+ u32 ecn_alpha_gain:8, /* max allowed value: 255 */
-+ ecn_factor:8, /* max allowed value: 255 */
-+ ecn_thresh:8, /* max allowed value: 255 */
-+ beta:8; /* max allowed value: 255 */
-+ u32 ecn_max_rtt_us:19, /* max allowed value: 524287 */
-+ bw_probe_reno_gain:9, /* max allowed value: 511 */
-+ full_loss_cnt:4; /* max allowed value: 15 */
-+ u32 probe_rtt_cwnd_gain:8, /* max allowed value: 255 */
-+ inflight_headroom:8, /* max allowed value: 255 */
-+ loss_thresh:8, /* max allowed value: 255 */
-+ bw_probe_max_rounds:8; /* max allowed value: 255 */
-+ u32 bw_probe_rand_rounds:4, /* max allowed value: 15 */
-+ bw_probe_base_us:26, /* usecs: 0..2^26-1 (67 secs) */
-+ full_ecn_cnt:2; /* max allowed value: 3 */
-+ u32 bw_probe_rand_us:26, /* usecs: 0..2^26-1 (67 secs) */
-+ undo:1, /* boolean */
-+ tso_rtt_shift:4, /* max allowed value: 15 */
-+ unused5:1;
-+ u32 ecn_reprobe_gain:9, /* max allowed value: 511 */
-+ unused1:14,
-+ ecn_alpha_init:9; /* max allowed value: 256 */
-+ } params;
-+
-+ struct {
-+ u32 snd_isn; /* Initial sequence number */
-+ u32 rs_bw; /* last valid rate sample bw */
-+ u32 target_cwnd; /* target cwnd, based on BDP */
-+ u8 undo:1, /* Undo even happened but not yet logged */
-+ unused:7;
-+ char event; /* single-letter event debug codes */
-+ u16 unused2;
-+ } debug;
-+};
-+
-+struct bbr_context {
-+ u32 sample_bw;
-+ u32 target_cwnd;
-+ u32 log:1;
-+};
-+
-+/* Window length of min_rtt filter (in sec). Max allowed value is 31 (0x1F) */
-+static u32 bbr_min_rtt_win_sec = 10;
-+/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode.
-+ * Max allowed value is 511 (0x1FF).
-+ */
-+static u32 bbr_probe_rtt_mode_ms = 200;
-+/* Window length of probe_rtt_min_us filter (in ms), and consequently the
-+ * typical interval between PROBE_RTT mode entries.
-+ * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC
-+ */
-+static u32 bbr_probe_rtt_win_ms = 5000;
-+/* Skip TSO below the following bandwidth (bits/sec): */
-+static int bbr_min_tso_rate = 1200000;
-+
-+/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting
-+ * in bigger TSO bursts. By default we cut the RTT-based allowance in half
-+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance
-+ * is below 1500 bytes after 6 * ~500 usec = 3ms.
-+ */
-+static u32 bbr_tso_rtt_shift = 9; /* halve allowance per 2^9 usecs, 512us */
-+
-+/* Select cwnd TSO budget approach:
-+ * 0: padding
-+ * 1: flooring
-+ */
-+static uint bbr_cwnd_tso_budget = 1;
-+
-+/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck.
-+ * In order to help drive the network toward lower queues and low latency while
-+ * maintaining high utilization, the average pacing rate aims to be slightly
-+ * lower than the estimated bandwidth. This is an important aspect of the
-+ * design.
-+ */
-+static const int bbr_pacing_margin_percent = 1;
-+
-+/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
-+ * that will allow a smoothly increasing pacing rate that will double each RTT
-+ * and send the same number of packets per RTT that an un-paced, slow-starting
-+ * Reno or CUBIC flow would. Max allowed value is 2047 (0x7FF).
-+ */
-+static int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1;
-+/* The gain for deriving startup cwnd. Max allowed value is 2047 (0x7FF). */
-+static int bbr_startup_cwnd_gain = BBR_UNIT * 2885 / 1000 + 1;
-+/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
-+ * the queue created in BBR_STARTUP in a single round. Max allowed value
-+ * is 1023 (0x3FF).
-+ */
-+static int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
-+/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs.
-+ * Max allowed value is 2047 (0x7FF).
-+ */
-+static int bbr_cwnd_gain = BBR_UNIT * 2;
-+/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw.
-+ * Max allowed value for each element is 1023 (0x3FF).
-+ */
-+enum bbr_pacing_gain_phase {
-+ BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */
-+ BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */
-+ BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */
-+ BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */
-+};
-+static int bbr_pacing_gain[] = {
-+ BBR_UNIT * 5 / 4, /* probe for more available bw */
-+ BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */
-+ BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */
-+ BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */
-+};
-+
-+/* Try to keep at least this many packets in flight, if things go smoothly. For
-+ * smooth functioning, a sliding window protocol ACKing every other packet
-+ * needs at least 4 packets in flight. Max allowed value is 15 (0xF).
-+ */
-+static u32 bbr_cwnd_min_target = 4;
-+
-+/* Cwnd to BDP proportion in PROBE_RTT mode scaled by BBR_UNIT. Default: 50%.
-+ * Use 0 to disable. Max allowed value is 255.
-+ */
-+static u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2;
-+
-+/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
-+/* If bw has increased significantly (1.25x), there may be more bw available.
-+ * Max allowed value is 1023 (0x3FF).
-+ */
-+static u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
-+/* But after 3 rounds w/o significant bw growth, estimate pipe is full.
-+ * Max allowed value is 7 (0x7).
-+ */
-+static u32 bbr_full_bw_cnt = 3;
-+
-+static u32 bbr_flags; /* Debugging related stuff */
-+
-+/* Whether to debug using printk.
-+ */
-+static bool bbr_debug_with_printk;
-+
-+/* Whether to debug using ftrace event tcp:tcp_bbr_event.
-+ * Ignored when bbr_debug_with_printk is set.
-+ */
-+static bool bbr_debug_ftrace;
-+
-+/* Experiment: each cycle, try to hold sub-unity gain until inflight <= BDP. */
-+static bool bbr_drain_to_target = true; /* default: enabled */
-+
-+/* Experiment: Flags to control BBR with ECN behavior.
-+ */
-+static bool bbr_precise_ece_ack = true; /* default: enabled */
-+
-+/* The max rwin scaling shift factor is 14 (RFC 1323), so the max sane rwin is
-+ * (2^(16+14) B)/(1024 B/packet) = 1M packets.
-+ */
-+static u32 bbr_cwnd_warn_val = 1U << 20;
-+
-+static u16 bbr_debug_port_mask;
-+
-+/* BBR module parameters. These are module parameters only in Google prod.
-+ * Upstream these are intentionally not module parameters.
-+ */
-+static int bbr_pacing_gain_size = CYCLE_LEN;
-+
-+/* Gain factor for adding extra_acked to target cwnd: */
-+static int bbr_extra_acked_gain = 256;
-+
-+/* Window length of extra_acked window. Max allowed val is 31. */
-+static u32 bbr_extra_acked_win_rtts = 5;
-+
-+/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */
-+static u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20;
-+
-+/* Time period for clamping cwnd increment due to ack aggregation */
-+static u32 bbr_extra_acked_max_us = 100 * 1000;
-+
-+/* Use extra acked in startup ?
-+ * 0: disabled
-+ * 1: use latest extra_acked value from 1-2 rtt in startup
-+ */
-+static int bbr_extra_acked_in_startup = 1; /* default: enabled */
-+
-+/* Experiment: don't grow cwnd beyond twice of what we just probed. */
-+static bool bbr_usage_based_cwnd; /* default: disabled */
-+
-+/* For lab testing, researchers can enable BBRv2 ECN support with this flag,
-+ * when they know that any ECN marks that the connections experience will be
-+ * DCTCP/L4S-style ECN marks, rather than RFC3168 ECN marks.
-+ * TODO(ncardwell): Production use of the BBRv2 ECN functionality depends on
-+ * negotiation or configuration that is outside the scope of the BBRv2
-+ * alpha release.
-+ */
-+static bool bbr_ecn_enable = false;
-+
-+module_param_named(min_tso_rate, bbr_min_tso_rate, int, 0644);
-+module_param_named(tso_rtt_shift, bbr_tso_rtt_shift, int, 0644);
-+module_param_named(high_gain, bbr_high_gain, int, 0644);
-+module_param_named(drain_gain, bbr_drain_gain, int, 0644);
-+module_param_named(startup_cwnd_gain, bbr_startup_cwnd_gain, int, 0644);
-+module_param_named(cwnd_gain, bbr_cwnd_gain, int, 0644);
-+module_param_array_named(pacing_gain, bbr_pacing_gain, int,
-+ &bbr_pacing_gain_size, 0644);
-+module_param_named(cwnd_min_target, bbr_cwnd_min_target, uint, 0644);
-+module_param_named(probe_rtt_cwnd_gain,
-+ bbr_probe_rtt_cwnd_gain, uint, 0664);
-+module_param_named(cwnd_warn_val, bbr_cwnd_warn_val, uint, 0664);
-+module_param_named(debug_port_mask, bbr_debug_port_mask, ushort, 0644);
-+module_param_named(flags, bbr_flags, uint, 0644);
-+module_param_named(debug_ftrace, bbr_debug_ftrace, bool, 0644);
-+module_param_named(debug_with_printk, bbr_debug_with_printk, bool, 0644);
-+module_param_named(min_rtt_win_sec, bbr_min_rtt_win_sec, uint, 0644);
-+module_param_named(probe_rtt_mode_ms, bbr_probe_rtt_mode_ms, uint, 0644);
-+module_param_named(probe_rtt_win_ms, bbr_probe_rtt_win_ms, uint, 0644);
-+module_param_named(full_bw_thresh, bbr_full_bw_thresh, uint, 0644);
-+module_param_named(full_bw_cnt, bbr_full_bw_cnt, uint, 0644);
-+module_param_named(cwnd_tso_bduget, bbr_cwnd_tso_budget, uint, 0664);
-+module_param_named(extra_acked_gain, bbr_extra_acked_gain, int, 0664);
-+module_param_named(extra_acked_win_rtts,
-+ bbr_extra_acked_win_rtts, uint, 0664);
-+module_param_named(extra_acked_max_us,
-+ bbr_extra_acked_max_us, uint, 0664);
-+module_param_named(ack_epoch_acked_reset_thresh,
-+ bbr_ack_epoch_acked_reset_thresh, uint, 0664);
-+module_param_named(drain_to_target, bbr_drain_to_target, bool, 0664);
-+module_param_named(precise_ece_ack, bbr_precise_ece_ack, bool, 0664);
-+module_param_named(extra_acked_in_startup,
-+ bbr_extra_acked_in_startup, int, 0664);
-+module_param_named(usage_based_cwnd, bbr_usage_based_cwnd, bool, 0664);
-+module_param_named(ecn_enable, bbr_ecn_enable, bool, 0664);
-+
-+static void bbr2_exit_probe_rtt(struct sock *sk);
-+static void bbr2_reset_congestion_signals(struct sock *sk);
-+
-+static void bbr_check_probe_rtt_done(struct sock *sk);
-+
-+/* Do we estimate that STARTUP filled the pipe? */
-+static bool bbr_full_bw_reached(const struct sock *sk)
-+{
-+ const struct bbr *bbr = inet_csk_ca(sk);
-+
-+ return bbr->full_bw_reached;
-+}
-+
-+/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
-+static u32 bbr_max_bw(const struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ return max(bbr->bw_hi[0], bbr->bw_hi[1]);
-+}
-+
-+/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
-+static u32 bbr_bw(const struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ return min(bbr_max_bw(sk), bbr->bw_lo);
-+}
-+
-+/* Return maximum extra acked in past k-2k round trips,
-+ * where k = bbr_extra_acked_win_rtts.
-+ */
-+static u16 bbr_extra_acked(const struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ return max(bbr->extra_acked[0], bbr->extra_acked[1]);
-+}
-+
-+/* Return rate in bytes per second, optionally with a gain.
-+ * The order here is chosen carefully to avoid overflow of u64. This should
-+ * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
-+ */
-+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain,
-+ int margin)
-+{
-+ unsigned int mss = tcp_sk(sk)->mss_cache;
-+
-+ rate *= mss;
-+ rate *= gain;
-+ rate >>= BBR_SCALE;
-+ rate *= USEC_PER_SEC / 100 * (100 - margin);
-+ rate >>= BW_SCALE;
-+ rate = max(rate, 1ULL);
-+ return rate;
-+}
-+
-+static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate)
-+{
-+ return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0);
-+}
-+
-+static u64 bbr_rate_kbps(struct sock *sk, u64 rate)
-+{
-+ rate = bbr_bw_bytes_per_sec(sk, rate);
-+ rate *= 8;
-+ do_div(rate, 1000);
-+ return rate;
-+}
-+
-+static u32 bbr_tso_segs_goal(struct sock *sk);
-+static void bbr_debug(struct sock *sk, u32 acked,
-+ const struct rate_sample *rs, struct bbr_context *ctx)
-+{
-+ static const char ca_states[] = {
-+ [TCP_CA_Open] = 'O',
-+ [TCP_CA_Disorder] = 'D',
-+ [TCP_CA_CWR] = 'C',
-+ [TCP_CA_Recovery] = 'R',
-+ [TCP_CA_Loss] = 'L',
-+ };
-+ static const char mode[] = {
-+ 'G', /* Growing - BBR_STARTUP */
-+ 'D', /* Drain - BBR_DRAIN */
-+ 'W', /* Window - BBR_PROBE_BW */
-+ 'M', /* Min RTT - BBR_PROBE_RTT */
-+ };
-+ static const char ack_phase[] = { /* bbr_ack_phase strings */
-+ 'I', /* BBR_ACKS_INIT - 'Init' */
-+ 'R', /* BBR_ACKS_REFILLING - 'Refilling' */
-+ 'B', /* BBR_ACKS_PROBE_STARTING - 'Before' */
-+ 'F', /* BBR_ACKS_PROBE_FEEDBACK - 'Feedback' */
-+ 'A', /* BBR_ACKS_PROBE_STOPPING - 'After' */
-+ };
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ const u32 una = tp->snd_una - bbr->debug.snd_isn;
-+ const u32 fack = tcp_highest_sack_seq(tp);
-+ const u16 dport = ntohs(inet_sk(sk)->inet_dport);
-+ bool is_port_match = (bbr_debug_port_mask &&
-+ ((dport & bbr_debug_port_mask) == 0));
-+ char debugmsg[320];
-+
-+ if (sk->sk_state == TCP_SYN_SENT)
-+ return; /* no bbr_init() yet if SYN retransmit -> CA_Loss */
-+
-+ if (!tp->snd_cwnd || tp->snd_cwnd > bbr_cwnd_warn_val) {
-+ char addr[INET6_ADDRSTRLEN + 10] = { 0 };
-+
-+ if (sk->sk_family == AF_INET)
-+ snprintf(addr, sizeof(addr), "%pI4:%u",
-+ &inet_sk(sk)->inet_daddr, dport);
-+ else if (sk->sk_family == AF_INET6)
-+ snprintf(addr, sizeof(addr), "%pI6:%u",
-+ &sk->sk_v6_daddr, dport);
-+
-+ WARN_ONCE(1,
-+ "BBR %s cwnd alert: %u "
-+ "snd_una: %u ca: %d pacing_gain: %u cwnd_gain: %u "
-+ "bw: %u rtt: %u min_rtt: %u "
-+ "acked: %u tso_segs: %u "
-+ "bw: %d %ld %d pif: %u\n",
-+ addr, tp->snd_cwnd,
-+ una, inet_csk(sk)->icsk_ca_state,
-+ bbr->pacing_gain, bbr->cwnd_gain,
-+ bbr_max_bw(sk), (tp->srtt_us >> 3), bbr->min_rtt_us,
-+ acked, bbr_tso_segs_goal(sk),
-+ rs->delivered, rs->interval_us, rs->is_retrans,
-+ tcp_packets_in_flight(tp));
-+ }
-+
-+ if (likely(!bbr_debug_with_printk && !bbr_debug_ftrace))
-+ return;
-+
-+ if (!sock_flag(sk, SOCK_DBG) && !is_port_match)
-+ return;
-+
-+ if (!ctx->log && !tp->app_limited && !(bbr_flags & FLAG_DEBUG_VERBOSE))
-+ return;
-+
-+ if (ipv4_is_loopback(inet_sk(sk)->inet_daddr) &&
-+ !(bbr_flags & FLAG_DEBUG_LOOPBACK))
-+ return;
-+
-+ snprintf(debugmsg, sizeof(debugmsg) - 1,
-+ "BBR %pI4:%-5u %5u,%03u:%-7u %c "
-+ "%c %2u br %2u cr %2d rtt %5ld d %2d i %5ld mrtt %d %cbw %llu "
-+ "bw %llu lb %llu ib %llu qb %llu "
-+ "a %u if %2u %c %c dl %u l %u al %u # %u t %u %c %c "
-+ "lr %d er %d ea %d bwl %lld il %d ih %d c %d "
-+ "v %d %c %u %c %s\n",
-+ &inet_sk(sk)->inet_daddr, dport,
-+ una / 1000, una % 1000, fack - tp->snd_una,
-+ ca_states[inet_csk(sk)->icsk_ca_state],
-+ bbr->debug.undo ? '@' : mode[bbr->mode],
-+ tp->snd_cwnd,
-+ bbr_extra_acked(sk), /* br (legacy): extra_acked */
-+ rs->tx_in_flight, /* cr (legacy): tx_inflight */
-+ rs->rtt_us,
-+ rs->delivered,
-+ rs->interval_us,
-+ bbr->min_rtt_us,
-+ rs->is_app_limited ? '_' : 'l',
-+ bbr_rate_kbps(sk, ctx->sample_bw), /* lbw: latest sample bw */
-+ bbr_rate_kbps(sk, bbr_max_bw(sk)), /* bw: max bw */
-+ 0ULL, /* lb: [obsolete] */
-+ 0ULL, /* ib: [obsolete] */
-+ div_u64((u64)sk->sk_pacing_rate * 8, 1000),
-+ acked,
-+ tcp_packets_in_flight(tp),
-+ rs->is_ack_delayed ? 'd' : '.',
-+ bbr->round_start ? '*' : '.',
-+ tp->delivered, tp->lost,
-+ tp->app_limited,
-+ 0, /* #: [obsolete] */
-+ ctx->target_cwnd,
-+ tp->reord_seen ? 'r' : '.', /* r: reordering seen? */
-+ ca_states[bbr->prev_ca_state],
-+ (rs->lost + rs->delivered) > 0 ?
-+ (1000 * rs->lost /
-+ (rs->lost + rs->delivered)) : 0, /* lr: loss rate x1000 */
-+ (rs->delivered) > 0 ?
-+ (1000 * rs->delivered_ce /
-+ (rs->delivered)) : 0, /* er: ECN rate x1000 */
-+ 1000 * bbr->ecn_alpha >> BBR_SCALE, /* ea: ECN alpha x1000 */
-+ bbr->bw_lo == ~0U ?
-+ -1 : (s64)bbr_rate_kbps(sk, bbr->bw_lo), /* bwl */
-+ bbr->inflight_lo, /* il */
-+ bbr->inflight_hi, /* ih */
-+ bbr->bw_probe_up_cnt, /* c */
-+ 2, /* v: version */
-+ bbr->debug.event,
-+ bbr->cycle_idx,
-+ ack_phase[bbr->ack_phase],
-+ bbr->bw_probe_samples ? "Y" : "N");
-+ debugmsg[sizeof(debugmsg) - 1] = 0;
-+
-+ /* printk takes a higher precedence. */
-+ if (bbr_debug_with_printk)
-+ printk(KERN_DEBUG "%s", debugmsg);
-+
-+ if (unlikely(bbr->debug.undo))
-+ bbr->debug.undo = 0;
-+}
-+
-+/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
-+static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
-+{
-+ u64 rate = bw;
-+
-+ rate = bbr_rate_bytes_per_sec(sk, rate, gain,
-+ bbr_pacing_margin_percent);
-+ rate = min_t(u64, rate, sk->sk_max_pacing_rate);
-+ return rate;
-+}
-+
-+/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
-+static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u64 bw;
-+ u32 rtt_us;
-+
-+ if (tp->srtt_us) { /* any RTT sample yet? */
-+ rtt_us = max(tp->srtt_us >> 3, 1U);
-+ bbr->has_seen_rtt = 1;
-+ } else { /* no RTT sample yet */
-+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
-+ }
-+ bw = (u64)tp->snd_cwnd * BW_UNIT;
-+ do_div(bw, rtt_us);
-+ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr->params.high_gain);
-+}
-+
-+/* Pace using current bw estimate and a gain factor. */
-+static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain);
-+
-+ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
-+ bbr_init_pacing_rate_from_rtt(sk);
-+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
-+ sk->sk_pacing_rate = rate;
-+}
-+
-+static u32 bbr_min_tso_segs(struct sock *sk)
-+{
-+ return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
-+}
-+
-+/* Return the number of segments BBR would like in a TSO/GSO skb, given
-+ * a particular max gso size as a constraint.
-+ */
-+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
-+ u32 gso_max_size)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 segs, r;
-+ u64 bytes;
-+
-+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
-+ bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift;
-+
-+ /* Budget a TSO/GSO burst size allowance based on min_rtt. For every
-+ * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst.
-+ * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K)
-+ */
-+ if (bbr->params.tso_rtt_shift) {
-+ r = bbr->min_rtt_us >> bbr->params.tso_rtt_shift;
-+ if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */
-+ bytes += GSO_MAX_SIZE >> r;
-+ }
-+
-+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
-+ segs = max_t(u32, div_u64(bytes, mss_now), bbr_min_tso_segs(sk));
-+ return segs;
-+}
-+
-+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
-+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
-+{
-+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
-+}
-+
-+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
-+static u32 bbr_tso_segs_goal(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+
-+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE);
-+}
-+
-+/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
-+static void bbr_save_cwnd(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
-+ bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
-+ else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
-+ bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
-+}
-+
-+static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (event == CA_EVENT_TX_START && tp->app_limited) {
-+ bbr->idle_restart = 1;
-+ bbr->ack_epoch_mstamp = tp->tcp_mstamp;
-+ bbr->ack_epoch_acked = 0;
-+ /* Avoid pointless buffer overflows: pace at est. bw if we don't
-+ * need more speed (we're restarting from idle and app-limited).
-+ */
-+ if (bbr->mode == BBR_PROBE_BW)
-+ bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
-+ else if (bbr->mode == BBR_PROBE_RTT)
-+ bbr_check_probe_rtt_done(sk);
-+ } else if ((event == CA_EVENT_ECN_IS_CE ||
-+ event == CA_EVENT_ECN_NO_CE) &&
-+ bbr_ecn_enable &&
-+ bbr->params.precise_ece_ack) {
-+ u32 state = bbr->ce_state;
-+ dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state);
-+ bbr->ce_state = state;
-+ if (tp->fast_ack_mode == 2 && event == CA_EVENT_ECN_IS_CE)
-+ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
-+ }
-+}
-+
-+/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth:
-+ *
-+ * bdp = ceil(bw * min_rtt * gain)
-+ *
-+ * The key factor, gain, controls the amount of queue. While a small gain
-+ * builds a smaller queue, it becomes more vulnerable to noise in RTT
-+ * measurements (e.g., delayed ACKs or other ACK compression effects). This
-+ * noise may cause BBR to under-estimate the rate.
-+ */
-+static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 bdp;
-+ u64 w;
-+
-+ /* If we've never had a valid RTT sample, cap cwnd at the initial
-+ * default. This should only happen when the connection is not using TCP
-+ * timestamps and has retransmitted all of the SYN/SYNACK/data packets
-+ * ACKed so far. In this case, an RTO can cut cwnd to 1, in which
-+ * case we need to slow-start up toward something safe: initial cwnd.
-+ */
-+ if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */
-+ return bbr->init_cwnd; /* be safe: cap at initial cwnd */
-+
-+ w = (u64)bw * bbr->min_rtt_us;
-+
-+ /* Apply a gain to the given value, remove the BW_SCALE shift, and
-+ * round the value up to avoid a negative feedback loop.
-+ */
-+ bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
-+
-+ return bdp;
-+}
-+
-+/* To achieve full performance in high-speed paths, we budget enough cwnd to
-+ * fit full-sized skbs in-flight on both end hosts to fully utilize the path:
-+ * - one skb in sending host Qdisc,
-+ * - one skb in sending host TSO/GSO engine
-+ * - one skb being received by receiver host LRO/GRO/delayed-ACK engine
-+ * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
-+ * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
-+ * which allows 2 outstanding 2-packet sequences, to try to keep pipe
-+ * full even with ACK-every-other-packet delayed ACKs.
-+ */
-+static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 tso_segs_goal;
-+
-+ tso_segs_goal = 3 * bbr_tso_segs_goal(sk);
-+
-+ /* Allow enough full-sized skbs in flight to utilize end systems. */
-+ if (bbr->params.cwnd_tso_budget == 1) {
-+ cwnd = max_t(u32, cwnd, tso_segs_goal);
-+ cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target);
-+ } else {
-+ cwnd += tso_segs_goal;
-+ cwnd = (cwnd + 1) & ~1U;
-+ }
-+ /* Ensure gain cycling gets inflight above BDP even for small BDPs. */
-+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
-+ cwnd += 2;
-+
-+ return cwnd;
-+}
-+
-+/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */
-+static u32 bbr_inflight(struct sock *sk, u32 bw, int gain)
-+{
-+ u32 inflight;
-+
-+ inflight = bbr_bdp(sk, bw, gain);
-+ inflight = bbr_quantization_budget(sk, inflight);
-+
-+ return inflight;
-+}
-+
-+/* With pacing at lower layers, there's often less data "in the network" than
-+ * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq),
-+ * we often have several skbs queued in the pacing layer with a pre-scheduled
-+ * earliest departure time (EDT). BBR adapts its pacing rate based on the
-+ * inflight level that it estimates has already been "baked in" by previous
-+ * departure time decisions. We calculate a rough estimate of the number of our
-+ * packets that might be in the network at the earliest departure time for the
-+ * next skb scheduled:
-+ * in_network_at_edt = inflight_at_edt - (EDT - now) * bw
-+ * If we're increasing inflight, then we want to know if the transmit of the
-+ * EDT skb will push inflight above the target, so inflight_at_edt includes
-+ * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight,
-+ * then estimate if inflight will sink too low just before the EDT transmit.
-+ */
-+static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u64 now_ns, edt_ns, interval_us;
-+ u32 interval_delivered, inflight_at_edt;
-+
-+ now_ns = tp->tcp_clock_cache;
-+ edt_ns = max(tp->tcp_wstamp_ns, now_ns);
-+ interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC);
-+ interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE;
-+ inflight_at_edt = inflight_now;
-+ if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */
-+ inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */
-+ if (interval_delivered >= inflight_at_edt)
-+ return 0;
-+ return inflight_at_edt - interval_delivered;
-+}
-+
-+/* Find the cwnd increment based on estimate of ack aggregation */
-+static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 max_aggr_cwnd, aggr_cwnd = 0;
-+
-+ if (bbr->params.extra_acked_gain &&
-+ (bbr_full_bw_reached(sk) || bbr->params.extra_acked_in_startup)) {
-+ max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us)
-+ / BW_UNIT;
-+ aggr_cwnd = (bbr->params.extra_acked_gain * bbr_extra_acked(sk))
-+ >> BBR_SCALE;
-+ aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd);
-+ }
-+
-+ return aggr_cwnd;
-+}
-+
-+/* Returns the cwnd for PROBE_RTT mode. */
-+static u32 bbr_probe_rtt_cwnd(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (bbr->params.probe_rtt_cwnd_gain == 0)
-+ return bbr->params.cwnd_min_target;
-+ return max_t(u32, bbr->params.cwnd_min_target,
-+ bbr_bdp(sk, bbr_bw(sk), bbr->params.probe_rtt_cwnd_gain));
-+}
-+
-+/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
-+ * has drawn us down below target), or snap down to target if we're above it.
-+ */
-+static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
-+ u32 acked, u32 bw, int gain, u32 cwnd,
-+ struct bbr_context *ctx)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 target_cwnd = 0, prev_cwnd = tp->snd_cwnd, max_probe;
-+
-+ if (!acked)
-+ goto done; /* no packet fully ACKed; just apply caps */
-+
-+ target_cwnd = bbr_bdp(sk, bw, gain);
-+
-+ /* Increment the cwnd to account for excess ACKed data that seems
-+ * due to aggregation (of data and/or ACKs) visible in the ACK stream.
-+ */
-+ target_cwnd += bbr_ack_aggregation_cwnd(sk);
-+ target_cwnd = bbr_quantization_budget(sk, target_cwnd);
-+
-+ /* If we're below target cwnd, slow start cwnd toward target cwnd. */
-+ bbr->debug.target_cwnd = target_cwnd;
-+
-+ /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */
-+ bbr->try_fast_path = 0;
-+ if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */
-+ cwnd += acked;
-+ if (cwnd >= target_cwnd) {
-+ cwnd = target_cwnd;
-+ bbr->try_fast_path = 1;
-+ }
-+ } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) {
-+ cwnd += acked;
-+ } else {
-+ bbr->try_fast_path = 1;
-+ }
-+
-+ /* When growing cwnd, don't grow beyond twice what we just probed. */
-+ if (bbr->params.usage_based_cwnd) {
-+ max_probe = max(2 * tp->max_packets_out, tp->snd_cwnd);
-+ cwnd = min(cwnd, max_probe);
-+ }
-+
-+ cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target);
-+done:
-+ tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
-+ if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
-+ tp->snd_cwnd = min_t(u32, tp->snd_cwnd, bbr_probe_rtt_cwnd(sk));
-+
-+ ctx->target_cwnd = target_cwnd;
-+ ctx->log = (tp->snd_cwnd != prev_cwnd);
-+}
-+
-+/* See if we have reached next round trip */
-+static void bbr_update_round_start(struct sock *sk,
-+ const struct rate_sample *rs, struct bbr_context *ctx)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr->round_start = 0;
-+
-+ /* See if we've reached the next RTT */
-+ if (rs->interval_us > 0 &&
-+ !before(rs->prior_delivered, bbr->next_rtt_delivered)) {
-+ bbr->next_rtt_delivered = tp->delivered;
-+ bbr->round_start = 1;
-+ }
-+}
-+
-+/* Calculate the bandwidth based on how fast packets are delivered */
-+static void bbr_calculate_bw_sample(struct sock *sk,
-+ const struct rate_sample *rs, struct bbr_context *ctx)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u64 bw = 0;
-+
-+ /* Divide delivered by the interval to find a (lower bound) bottleneck
-+ * bandwidth sample. Delivered is in packets and interval_us in uS and
-+ * ratio will be <<1 for most connections. So delivered is first scaled.
-+ * Round up to allow growth at low rates, even with integer division.
-+ */
-+ if (rs->interval_us > 0) {
-+ if (WARN_ONCE(rs->delivered < 0,
-+ "negative delivered: %d interval_us: %ld\n",
-+ rs->delivered, rs->interval_us))
-+ return;
-+
-+ bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us);
-+ }
-+
-+ ctx->sample_bw = bw;
-+ bbr->debug.rs_bw = bw;
-+}
-+
-+/* Estimates the windowed max degree of ack aggregation.
-+ * This is used to provision extra in-flight data to keep sending during
-+ * inter-ACK silences.
-+ *
-+ * Degree of ack aggregation is estimated as extra data acked beyond expected.
-+ *
-+ * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval"
-+ * cwnd += max_extra_acked
-+ *
-+ * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms).
-+ * Max filter is an approximate sliding window of 5-10 (packet timed) round
-+ * trips for non-startup phase, and 1-2 round trips for startup.
-+ */
-+static void bbr_update_ack_aggregation(struct sock *sk,
-+ const struct rate_sample *rs)
-+{
-+ u32 epoch_us, expected_acked, extra_acked;
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ u32 extra_acked_win_rtts_thresh = bbr->params.extra_acked_win_rtts;
-+
-+ if (!bbr->params.extra_acked_gain || rs->acked_sacked <= 0 ||
-+ rs->delivered < 0 || rs->interval_us <= 0)
-+ return;
-+
-+ if (bbr->round_start) {
-+ bbr->extra_acked_win_rtts = min(0x1F,
-+ bbr->extra_acked_win_rtts + 1);
-+ if (bbr->params.extra_acked_in_startup &&
-+ !bbr_full_bw_reached(sk))
-+ extra_acked_win_rtts_thresh = 1;
-+ if (bbr->extra_acked_win_rtts >=
-+ extra_acked_win_rtts_thresh) {
-+ bbr->extra_acked_win_rtts = 0;
-+ bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ?
-+ 0 : 1;
-+ bbr->extra_acked[bbr->extra_acked_win_idx] = 0;
-+ }
-+ }
-+
-+ /* Compute how many packets we expected to be delivered over epoch. */
-+ epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp,
-+ bbr->ack_epoch_mstamp);
-+ expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT;
-+
-+ /* Reset the aggregation epoch if ACK rate is below expected rate or
-+ * significantly large no. of ack received since epoch (potentially
-+ * quite old epoch).
-+ */
-+ if (bbr->ack_epoch_acked <= expected_acked ||
-+ (bbr->ack_epoch_acked + rs->acked_sacked >=
-+ bbr_ack_epoch_acked_reset_thresh)) {
-+ bbr->ack_epoch_acked = 0;
-+ bbr->ack_epoch_mstamp = tp->delivered_mstamp;
-+ expected_acked = 0;
-+ }
-+
-+ /* Compute excess data delivered, beyond what was expected. */
-+ bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
-+ bbr->ack_epoch_acked + rs->acked_sacked);
-+ extra_acked = bbr->ack_epoch_acked - expected_acked;
-+ extra_acked = min(extra_acked, tp->snd_cwnd);
-+ if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
-+ bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
-+}
-+
-+/* Estimate when the pipe is full, using the change in delivery rate: BBR
-+ * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
-+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
-+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
-+ * higher rwin, 3: we get higher delivery rate samples. Or transient
-+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
-+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
-+ */
-+static void bbr_check_full_bw_reached(struct sock *sk,
-+ const struct rate_sample *rs)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 bw_thresh;
-+
-+ if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
-+ return;
-+
-+ bw_thresh = (u64)bbr->full_bw * bbr->params.full_bw_thresh >> BBR_SCALE;
-+ if (bbr_max_bw(sk) >= bw_thresh) {
-+ bbr->full_bw = bbr_max_bw(sk);
-+ bbr->full_bw_cnt = 0;
-+ return;
-+ }
-+ ++bbr->full_bw_cnt;
-+ bbr->full_bw_reached = bbr->full_bw_cnt >= bbr->params.full_bw_cnt;
-+}
-+
-+/* If pipe is probably full, drain the queue and then enter steady-state. */
-+static bool bbr_check_drain(struct sock *sk, const struct rate_sample *rs,
-+ struct bbr_context *ctx)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
-+ bbr->mode = BBR_DRAIN; /* drain queue we created */
-+ tcp_sk(sk)->snd_ssthresh =
-+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
-+ bbr2_reset_congestion_signals(sk);
-+ } /* fall through to check if in-flight is already small: */
-+ if (bbr->mode == BBR_DRAIN &&
-+ bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
-+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT))
-+ return true; /* exiting DRAIN now */
-+ return false;
-+}
-+
-+static void bbr_check_probe_rtt_done(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (!(bbr->probe_rtt_done_stamp &&
-+ after(tcp_jiffies32, bbr->probe_rtt_done_stamp)))
-+ return;
-+
-+ bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */
-+ tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
-+ bbr2_exit_probe_rtt(sk);
-+}
-+
-+/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
-+ * periodically drain the bottleneck queue, to converge to measure the true
-+ * min_rtt (unloaded propagation delay). This allows the flows to keep queues
-+ * small (reducing queuing delay and packet loss) and achieve fairness among
-+ * BBR flows.
-+ *
-+ * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires,
-+ * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets.
-+ * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed
-+ * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and
-+ * re-enter the previous mode. BBR uses 200ms to approximately bound the
-+ * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s).
-+ *
-+ * Note that flows need only pay 2% if they are busy sending over the last 10
-+ * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have
-+ * natural silences or low-rate periods within 10 seconds where the rate is low
-+ * enough for long enough to drain its queue in the bottleneck. We pick up
-+ * these min RTT measurements opportunistically with our min_rtt filter. :-)
-+ */
-+static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ bool probe_rtt_expired, min_rtt_expired;
-+ u32 expire;
-+
-+ /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */
-+ expire = bbr->probe_rtt_min_stamp +
-+ msecs_to_jiffies(bbr->params.probe_rtt_win_ms);
-+ probe_rtt_expired = after(tcp_jiffies32, expire);
-+ if (rs->rtt_us >= 0 &&
-+ (rs->rtt_us <= bbr->probe_rtt_min_us ||
-+ (probe_rtt_expired && !rs->is_ack_delayed))) {
-+ bbr->probe_rtt_min_us = rs->rtt_us;
-+ bbr->probe_rtt_min_stamp = tcp_jiffies32;
-+ }
-+ /* Track min RTT seen in the min_rtt_win_sec filter window: */
-+ expire = bbr->min_rtt_stamp + bbr->params.min_rtt_win_sec * HZ;
-+ min_rtt_expired = after(tcp_jiffies32, expire);
-+ if (bbr->probe_rtt_min_us <= bbr->min_rtt_us ||
-+ min_rtt_expired) {
-+ bbr->min_rtt_us = bbr->probe_rtt_min_us;
-+ bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp;
-+ }
-+
-+ if (bbr->params.probe_rtt_mode_ms > 0 && probe_rtt_expired &&
-+ !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
-+ bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */
-+ bbr_save_cwnd(sk); /* note cwnd so we can restore it */
-+ bbr->probe_rtt_done_stamp = 0;
-+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
-+ bbr->next_rtt_delivered = tp->delivered;
-+ }
-+
-+ if (bbr->mode == BBR_PROBE_RTT) {
-+ /* Ignore low rate samples during this mode. */
-+ tp->app_limited =
-+ (tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
-+ /* Maintain min packets in flight for max(200 ms, 1 round). */
-+ if (!bbr->probe_rtt_done_stamp &&
-+ tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) {
-+ bbr->probe_rtt_done_stamp = tcp_jiffies32 +
-+ msecs_to_jiffies(bbr->params.probe_rtt_mode_ms);
-+ bbr->probe_rtt_round_done = 0;
-+ bbr->next_rtt_delivered = tp->delivered;
-+ } else if (bbr->probe_rtt_done_stamp) {
-+ if (bbr->round_start)
-+ bbr->probe_rtt_round_done = 1;
-+ if (bbr->probe_rtt_round_done)
-+ bbr_check_probe_rtt_done(sk);
-+ }
-+ }
-+ /* Restart after idle ends only once we process a new S/ACK for data */
-+ if (rs->delivered > 0)
-+ bbr->idle_restart = 0;
-+}
-+
-+static void bbr_update_gains(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ switch (bbr->mode) {
-+ case BBR_STARTUP:
-+ bbr->pacing_gain = bbr->params.high_gain;
-+ bbr->cwnd_gain = bbr->params.startup_cwnd_gain;
-+ break;
-+ case BBR_DRAIN:
-+ bbr->pacing_gain = bbr->params.drain_gain; /* slow, to drain */
-+ bbr->cwnd_gain = bbr->params.startup_cwnd_gain; /* keep cwnd */
-+ break;
-+ case BBR_PROBE_BW:
-+ bbr->pacing_gain = bbr->params.pacing_gain[bbr->cycle_idx];
-+ bbr->cwnd_gain = bbr->params.cwnd_gain;
-+ break;
-+ case BBR_PROBE_RTT:
-+ bbr->pacing_gain = BBR_UNIT;
-+ bbr->cwnd_gain = BBR_UNIT;
-+ break;
-+ default:
-+ WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode);
-+ break;
-+ }
-+}
-+
-+static void bbr_init(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ int i;
-+
-+ WARN_ON_ONCE(tp->snd_cwnd >= bbr_cwnd_warn_val);
-+
-+ bbr->initialized = 1;
-+ bbr->params.high_gain = min(0x7FF, bbr_high_gain);
-+ bbr->params.drain_gain = min(0x3FF, bbr_drain_gain);
-+ bbr->params.startup_cwnd_gain = min(0x7FF, bbr_startup_cwnd_gain);
-+ bbr->params.cwnd_gain = min(0x7FF, bbr_cwnd_gain);
-+ bbr->params.cwnd_tso_budget = min(0x1U, bbr_cwnd_tso_budget);
-+ bbr->params.cwnd_min_target = min(0xFU, bbr_cwnd_min_target);
-+ bbr->params.min_rtt_win_sec = min(0x1FU, bbr_min_rtt_win_sec);
-+ bbr->params.probe_rtt_mode_ms = min(0x1FFU, bbr_probe_rtt_mode_ms);
-+ bbr->params.full_bw_cnt = min(0x7U, bbr_full_bw_cnt);
-+ bbr->params.full_bw_thresh = min(0x3FFU, bbr_full_bw_thresh);
-+ bbr->params.extra_acked_gain = min(0x7FF, bbr_extra_acked_gain);
-+ bbr->params.extra_acked_win_rtts = min(0x1FU, bbr_extra_acked_win_rtts);
-+ bbr->params.drain_to_target = bbr_drain_to_target ? 1 : 0;
-+ bbr->params.precise_ece_ack = bbr_precise_ece_ack ? 1 : 0;
-+ bbr->params.extra_acked_in_startup = bbr_extra_acked_in_startup ? 1 : 0;
-+ bbr->params.probe_rtt_cwnd_gain = min(0xFFU, bbr_probe_rtt_cwnd_gain);
-+ bbr->params.probe_rtt_win_ms =
-+ min(0x3FFFU,
-+ min_t(u32, bbr_probe_rtt_win_ms,
-+ bbr->params.min_rtt_win_sec * MSEC_PER_SEC));
-+ for (i = 0; i < CYCLE_LEN; i++)
-+ bbr->params.pacing_gain[i] = min(0x3FF, bbr_pacing_gain[i]);
-+ bbr->params.usage_based_cwnd = bbr_usage_based_cwnd ? 1 : 0;
-+ bbr->params.tso_rtt_shift = min(0xFU, bbr_tso_rtt_shift);
-+
-+ bbr->debug.snd_isn = tp->snd_una;
-+ bbr->debug.target_cwnd = 0;
-+ bbr->debug.undo = 0;
-+
-+ bbr->init_cwnd = min(0x7FU, tp->snd_cwnd);
-+ bbr->prior_cwnd = tp->prior_cwnd;
-+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
-+ bbr->next_rtt_delivered = 0;
-+ bbr->prev_ca_state = TCP_CA_Open;
-+ bbr->packet_conservation = 0;
-+
-+ bbr->probe_rtt_done_stamp = 0;
-+ bbr->probe_rtt_round_done = 0;
-+ bbr->probe_rtt_min_us = tcp_min_rtt(tp);
-+ bbr->probe_rtt_min_stamp = tcp_jiffies32;
-+ bbr->min_rtt_us = tcp_min_rtt(tp);
-+ bbr->min_rtt_stamp = tcp_jiffies32;
-+
-+ bbr->has_seen_rtt = 0;
-+ bbr_init_pacing_rate_from_rtt(sk);
-+
-+ bbr->round_start = 0;
-+ bbr->idle_restart = 0;
-+ bbr->full_bw_reached = 0;
-+ bbr->full_bw = 0;
-+ bbr->full_bw_cnt = 0;
-+ bbr->cycle_mstamp = 0;
-+ bbr->cycle_idx = 0;
-+ bbr->mode = BBR_STARTUP;
-+ bbr->debug.rs_bw = 0;
-+
-+ bbr->ack_epoch_mstamp = tp->tcp_mstamp;
-+ bbr->ack_epoch_acked = 0;
-+ bbr->extra_acked_win_rtts = 0;
-+ bbr->extra_acked_win_idx = 0;
-+ bbr->extra_acked[0] = 0;
-+ bbr->extra_acked[1] = 0;
-+
-+ bbr->ce_state = 0;
-+ bbr->prior_rcv_nxt = tp->rcv_nxt;
-+ bbr->try_fast_path = 0;
-+
-+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
-+}
-+
-+static u32 bbr_sndbuf_expand(struct sock *sk)
-+{
-+ /* Provision 3 * cwnd since BBR may slow-start even during recovery. */
-+ return 3;
-+}
-+
-+/* __________________________________________________________________________
-+ *
-+ * Functions new to BBR v2 ("bbr") congestion control are below here.
-+ * __________________________________________________________________________
-+ */
-+
-+/* Incorporate a new bw sample into the current window of our max filter. */
-+static void bbr2_take_bw_hi_sample(struct sock *sk, u32 bw)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]);
-+}
-+
-+/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */
-+static void bbr2_advance_bw_hi_filter(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (!bbr->bw_hi[1])
-+ return; /* no samples in this window; remember old window */
-+ bbr->bw_hi[0] = bbr->bw_hi[1];
-+ bbr->bw_hi[1] = 0;
-+}
-+
-+/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */
-+static u32 bbr2_target_inflight(struct sock *sk)
-+{
-+ u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT);
-+
-+ return min(bdp, tcp_sk(sk)->snd_cwnd);
-+}
-+
-+static bool bbr2_is_probing_bandwidth(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ return (bbr->mode == BBR_STARTUP) ||
-+ (bbr->mode == BBR_PROBE_BW &&
-+ (bbr->cycle_idx == BBR_BW_PROBE_REFILL ||
-+ bbr->cycle_idx == BBR_BW_PROBE_UP));
-+}
-+
-+/* Has the given amount of time elapsed since we marked the phase start? */
-+static bool bbr2_has_elapsed_in_phase(const struct sock *sk, u32 interval_us)
-+{
-+ const struct tcp_sock *tp = tcp_sk(sk);
-+ const struct bbr *bbr = inet_csk_ca(sk);
-+
-+ return tcp_stamp_us_delta(tp->tcp_mstamp,
-+ bbr->cycle_mstamp + interval_us) > 0;
-+}
-+
-+static void bbr2_handle_queue_too_high_in_startup(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr->full_bw_reached = 1;
-+ bbr->inflight_hi = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
-+}
-+
-+/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */
-+static void bbr2_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible ||
-+ !bbr->params.full_ecn_cnt || !bbr->params.ecn_thresh)
-+ return;
-+
-+ if (ce_ratio >= bbr->params.ecn_thresh)
-+ bbr->startup_ecn_rounds++;
-+ else
-+ bbr->startup_ecn_rounds = 0;
-+
-+ if (bbr->startup_ecn_rounds >= bbr->params.full_ecn_cnt) {
-+ bbr->debug.event = 'E'; /* ECN caused STARTUP exit */
-+ bbr2_handle_queue_too_high_in_startup(sk);
-+ return;
-+ }
-+}
-+
-+static void bbr2_update_ecn_alpha(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ s32 delivered, delivered_ce;
-+ u64 alpha, ce_ratio;
-+ u32 gain;
-+
-+ if (bbr->params.ecn_factor == 0)
-+ return;
-+
-+ delivered = tp->delivered - bbr->alpha_last_delivered;
-+ delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce;
-+
-+ if (delivered == 0 || /* avoid divide by zero */
-+ WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */
-+ return;
-+
-+ /* See if we should use ECN sender logic for this connection. */
-+ if (!bbr->ecn_eligible && bbr_ecn_enable &&
-+ (bbr->min_rtt_us <= bbr->params.ecn_max_rtt_us ||
-+ !bbr->params.ecn_max_rtt_us))
-+ bbr->ecn_eligible = 1;
-+
-+ ce_ratio = (u64)delivered_ce << BBR_SCALE;
-+ do_div(ce_ratio, delivered);
-+ gain = bbr->params.ecn_alpha_gain;
-+ alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE;
-+ alpha += (gain * ce_ratio) >> BBR_SCALE;
-+ bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT);
-+
-+ bbr->alpha_last_delivered = tp->delivered;
-+ bbr->alpha_last_delivered_ce = tp->delivered_ce;
-+
-+ bbr2_check_ecn_too_high_in_startup(sk, ce_ratio);
-+}
-+
-+/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */
-+static void bbr2_raise_inflight_hi_slope(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 growth_this_round, cnt;
-+
-+ /* Calculate "slope": packets S/Acked per inflight_hi increment. */
-+ growth_this_round = 1 << bbr->bw_probe_up_rounds;
-+ bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30);
-+ cnt = tp->snd_cwnd / growth_this_round;
-+ cnt = max(cnt, 1U);
-+ bbr->bw_probe_up_cnt = cnt;
-+ bbr->debug.event = 'G'; /* Grow inflight_hi slope */
-+}
-+
-+/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */
-+static void bbr2_probe_inflight_hi_upward(struct sock *sk,
-+ const struct rate_sample *rs)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 delta;
-+
-+ if (!tp->is_cwnd_limited || tp->snd_cwnd < bbr->inflight_hi) {
-+ bbr->bw_probe_up_acks = 0; /* don't accmulate unused credits */
-+ return; /* not fully using inflight_hi, so don't grow it */
-+ }
-+
-+ /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */
-+ bbr->bw_probe_up_acks += rs->acked_sacked;
-+ if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) {
-+ delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt;
-+ bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt;
-+ bbr->inflight_hi += delta;
-+ bbr->debug.event = 'I'; /* Increment inflight_hi */
-+ }
-+
-+ if (bbr->round_start)
-+ bbr2_raise_inflight_hi_slope(sk);
-+}
-+
-+/* Does loss/ECN rate for this sample say inflight is "too high"?
-+ * This is used by both the bbr_check_loss_too_high_in_startup() function,
-+ * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which
-+ * uses it to notice when loss/ECN rates suggest inflight is too high.
-+ */
-+static bool bbr2_is_inflight_too_high(const struct sock *sk,
-+ const struct rate_sample *rs)
-+{
-+ const struct bbr *bbr = inet_csk_ca(sk);
-+ u32 loss_thresh, ecn_thresh;
-+
-+ if (rs->lost > 0 && rs->tx_in_flight) {
-+ loss_thresh = (u64)rs->tx_in_flight * bbr->params.loss_thresh >>
-+ BBR_SCALE;
-+ if (rs->lost > loss_thresh)
-+ return true;
-+ }
-+
-+ if (rs->delivered_ce > 0 && rs->delivered > 0 &&
-+ bbr->ecn_eligible && bbr->params.ecn_thresh) {
-+ ecn_thresh = (u64)rs->delivered * bbr->params.ecn_thresh >>
-+ BBR_SCALE;
-+ if (rs->delivered_ce >= ecn_thresh)
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+/* Calculate the tx_in_flight level that corresponded to excessive loss.
-+ * We find "lost_prefix" segs of the skb where loss rate went too high,
-+ * by solving for "lost_prefix" in the following equation:
-+ * lost / inflight >= loss_thresh
-+ * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh
-+ * Then we take that equation, convert it to fixed point, and
-+ * round up to the nearest packet.
-+ */
-+static u32 bbr2_inflight_hi_from_lost_skb(const struct sock *sk,
-+ const struct rate_sample *rs,
-+ const struct sk_buff *skb)
-+{
-+ const struct bbr *bbr = inet_csk_ca(sk);
-+ u32 loss_thresh = bbr->params.loss_thresh;
-+ u32 pcount, divisor, inflight_hi;
-+ s32 inflight_prev, lost_prev;
-+ u64 loss_budget, lost_prefix;
-+
-+ pcount = tcp_skb_pcount(skb);
-+
-+ /* How much data was in flight before this skb? */
-+ inflight_prev = rs->tx_in_flight - pcount;
-+ if (WARN_ONCE(inflight_prev < 0,
-+ "tx_in_flight: %u pcount: %u reneg: %u",
-+ rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg))
-+ return ~0U;
-+
-+ /* How much inflight data was marked lost before this skb? */
-+ lost_prev = rs->lost - pcount;
-+ if (WARN_ON_ONCE(lost_prev < 0))
-+ return ~0U;
-+
-+ /* At what prefix of this lost skb did losss rate exceed loss_thresh? */
-+ loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1;
-+ loss_budget >>= BBR_SCALE;
-+ if (lost_prev >= loss_budget) {
-+ lost_prefix = 0; /* previous losses crossed loss_thresh */
-+ } else {
-+ lost_prefix = loss_budget - lost_prev;
-+ lost_prefix <<= BBR_SCALE;
-+ divisor = BBR_UNIT - loss_thresh;
-+ if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */
-+ return ~0U;
-+ do_div(lost_prefix, divisor);
-+ }
-+
-+ inflight_hi = inflight_prev + lost_prefix;
-+ return inflight_hi;
-+}
-+
-+/* If loss/ECN rates during probing indicated we may have overfilled a
-+ * buffer, return an operating point that tries to leave unutilized headroom in
-+ * the path for other flows, for fairness convergence and lower RTTs and loss.
-+ */
-+static u32 bbr2_inflight_with_headroom(const struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 headroom, headroom_fraction;
-+
-+ if (bbr->inflight_hi == ~0U)
-+ return ~0U;
-+
-+ headroom_fraction = bbr->params.inflight_headroom;
-+ headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE;
-+ headroom = max(headroom, 1U);
-+ return max_t(s32, bbr->inflight_hi - headroom,
-+ bbr->params.cwnd_min_target);
-+}
-+
-+/* Bound cwnd to a sensible level, based on our current probing state
-+ * machine phase and model of a good inflight level (inflight_lo, inflight_hi).
-+ */
-+static void bbr2_bound_cwnd_for_inflight_model(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 cap;
-+
-+ /* tcp_rcv_synsent_state_process() currently calls tcp_ack()
-+ * and thus cong_control() without first initializing us(!).
-+ */
-+ if (!bbr->initialized)
-+ return;
-+
-+ cap = ~0U;
-+ if (bbr->mode == BBR_PROBE_BW &&
-+ bbr->cycle_idx != BBR_BW_PROBE_CRUISE) {
-+ /* Probe to see if more packets fit in the path. */
-+ cap = bbr->inflight_hi;
-+ } else {
-+ if (bbr->mode == BBR_PROBE_RTT ||
-+ (bbr->mode == BBR_PROBE_BW &&
-+ bbr->cycle_idx == BBR_BW_PROBE_CRUISE))
-+ cap = bbr2_inflight_with_headroom(sk);
-+ }
-+ /* Adapt to any loss/ECN since our last bw probe. */
-+ cap = min(cap, bbr->inflight_lo);
-+
-+ cap = max_t(u32, cap, bbr->params.cwnd_min_target);
-+ tp->snd_cwnd = min(cap, tp->snd_cwnd);
-+}
-+
-+/* Estimate a short-term lower bound on the capacity available now, based
-+ * on measurements of the current delivery process and recent history. When we
-+ * are seeing loss/ECN at times when we are not probing bw, then conservatively
-+ * move toward flow balance by multiplicatively cutting our short-term
-+ * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a
-+ * multiplicative decrease in order to converge to a lower capacity in time
-+ * logarithmic in the magnitude of the decrease.
-+ *
-+ * However, we do not cut our short-term estimates lower than the current rate
-+ * and volume of delivered data from this round trip, since from the current
-+ * delivery process we can estimate the measured capacity available now.
-+ *
-+ * Anything faster than that approach would knowingly risk high loss, which can
-+ * cause low bw for Reno/CUBIC and high loss recovery latency for
-+ * request/response flows using any congestion control.
-+ */
-+static void bbr2_adapt_lower_bounds(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 ecn_cut, ecn_inflight_lo, beta;
-+
-+ /* We only use lower-bound estimates when not probing bw.
-+ * When probing we need to push inflight higher to probe bw.
-+ */
-+ if (bbr2_is_probing_bandwidth(sk))
-+ return;
-+
-+ /* ECN response. */
-+ if (bbr->ecn_in_round && bbr->ecn_eligible && bbr->params.ecn_factor) {
-+ /* Reduce inflight to (1 - alpha*ecn_factor). */
-+ ecn_cut = (BBR_UNIT -
-+ ((bbr->ecn_alpha * bbr->params.ecn_factor) >>
-+ BBR_SCALE));
-+ if (bbr->inflight_lo == ~0U)
-+ bbr->inflight_lo = tp->snd_cwnd;
-+ ecn_inflight_lo = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE;
-+ } else {
-+ ecn_inflight_lo = ~0U;
-+ }
-+
-+ /* Loss response. */
-+ if (bbr->loss_in_round) {
-+ /* Reduce bw and inflight to (1 - beta). */
-+ if (bbr->bw_lo == ~0U)
-+ bbr->bw_lo = bbr_max_bw(sk);
-+ if (bbr->inflight_lo == ~0U)
-+ bbr->inflight_lo = tp->snd_cwnd;
-+ beta = bbr->params.beta;
-+ bbr->bw_lo =
-+ max_t(u32, bbr->bw_latest,
-+ (u64)bbr->bw_lo *
-+ (BBR_UNIT - beta) >> BBR_SCALE);
-+ bbr->inflight_lo =
-+ max_t(u32, bbr->inflight_latest,
-+ (u64)bbr->inflight_lo *
-+ (BBR_UNIT - beta) >> BBR_SCALE);
-+ }
-+
-+ /* Adjust to the lower of the levels implied by loss or ECN. */
-+ bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo);
-+}
-+
-+/* Reset any short-term lower-bound adaptation to congestion, so that we can
-+ * push our inflight up.
-+ */
-+static void bbr2_reset_lower_bounds(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr->bw_lo = ~0U;
-+ bbr->inflight_lo = ~0U;
-+}
-+
-+/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state
-+ * machine phase where we adapt our lower bound based on congestion signals.
-+ */
-+static void bbr2_reset_congestion_signals(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr->loss_in_round = 0;
-+ bbr->ecn_in_round = 0;
-+ bbr->loss_in_cycle = 0;
-+ bbr->ecn_in_cycle = 0;
-+ bbr->bw_latest = 0;
-+ bbr->inflight_latest = 0;
-+}
-+
-+/* Update (most of) our congestion signals: track the recent rate and volume of
-+ * delivered data, presence of loss, and EWMA degree of ECN marking.
-+ */
-+static void bbr2_update_congestion_signals(
-+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u64 bw;
-+
-+ bbr->loss_round_start = 0;
-+ if (rs->interval_us <= 0 || !rs->acked_sacked)
-+ return; /* Not a valid observation */
-+ bw = ctx->sample_bw;
-+
-+ if (!rs->is_app_limited || bw >= bbr_max_bw(sk))
-+ bbr2_take_bw_hi_sample(sk, bw);
-+
-+ bbr->loss_in_round |= (rs->losses > 0);
-+
-+ /* Update rate and volume of delivered data from latest round trip: */
-+ bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw);
-+ bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered);
-+
-+ if (before(rs->prior_delivered, bbr->loss_round_delivered))
-+ return; /* skip the per-round-trip updates */
-+ /* Now do per-round-trip updates. */
-+ bbr->loss_round_delivered = tp->delivered; /* mark round trip */
-+ bbr->loss_round_start = 1;
-+ bbr2_adapt_lower_bounds(sk);
-+
-+ /* Update windowed "latest" (single-round-trip) filters. */
-+ bbr->loss_in_round = 0;
-+ bbr->ecn_in_round = 0;
-+ bbr->bw_latest = ctx->sample_bw;
-+ bbr->inflight_latest = rs->delivered;
-+}
-+
-+/* Bandwidth probing can cause loss. To help coexistence with loss-based
-+ * congestion control we spread out our probing in a Reno-conscious way. Due to
-+ * the shape of the Reno sawtooth, the time required between loss epochs for an
-+ * idealized Reno flow is a number of round trips that is the BDP of that
-+ * flow. We count packet-timed round trips directly, since measured RTT can
-+ * vary widely, and Reno is driven by packet-timed round trips.
-+ */
-+static bool bbr2_is_reno_coexistence_probe_time(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 inflight, rounds, reno_gain, reno_rounds;
-+
-+ /* Random loss can shave some small percentage off of our inflight
-+ * in each round. To survive this, flows need robust periodic probes.
-+ */
-+ rounds = bbr->params.bw_probe_max_rounds;
-+
-+ reno_gain = bbr->params.bw_probe_reno_gain;
-+ if (reno_gain) {
-+ inflight = bbr2_target_inflight(sk);
-+ reno_rounds = ((u64)inflight * reno_gain) >> BBR_SCALE;
-+ rounds = min(rounds, reno_rounds);
-+ }
-+ return bbr->rounds_since_probe >= rounds;
-+}
-+
-+/* How long do we want to wait before probing for bandwidth (and risking
-+ * loss)? We randomize the wait, for better mixing and fairness convergence.
-+ *
-+ * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips.
-+ * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow,
-+ * (eg 4K video to a broadband user):
-+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
-+ *
-+ * We bound the BBR-native inter-bw-probe wall clock time to be:
-+ * (a) higher than 2 sec: to try to avoid causing loss for a long enough time
-+ * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must
-+ * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs
-+ * (b) lower than 3 sec: to ensure flows can start probing in a reasonable
-+ * amount of time to discover unutilized bw on human-scale interactive
-+ * time-scales (e.g. perhaps traffic from a web page download that we
-+ * were competing with is now complete).
-+ */
-+static void bbr2_pick_probe_wait(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ /* Decide the random round-trip bound for wait until probe: */
-+ bbr->rounds_since_probe =
-+ get_random_u32_below(bbr->params.bw_probe_rand_rounds);
-+ /* Decide the random wall clock bound for wait until probe: */
-+ bbr->probe_wait_us = bbr->params.bw_probe_base_us +
-+ get_random_u32_below(bbr->params.bw_probe_rand_us);
-+}
-+
-+static void bbr2_set_cycle_idx(struct sock *sk, int cycle_idx)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr->cycle_idx = cycle_idx;
-+ /* New phase, so need to update cwnd and pacing rate. */
-+ bbr->try_fast_path = 0;
-+}
-+
-+/* Send at estimated bw to fill the pipe, but not queue. We need this phase
-+ * before PROBE_UP, because as soon as we send faster than the available bw
-+ * we will start building a queue, and if the buffer is shallow we can cause
-+ * loss. If we do not fill the pipe before we cause this loss, our bw_hi and
-+ * inflight_hi estimates will underestimate.
-+ */
-+static void bbr2_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr2_reset_lower_bounds(sk);
-+ if (bbr->inflight_hi != ~0U)
-+ bbr->inflight_hi += bbr->params.refill_add_inc;
-+ bbr->bw_probe_up_rounds = bw_probe_up_rounds;
-+ bbr->bw_probe_up_acks = 0;
-+ bbr->stopped_risky_probe = 0;
-+ bbr->ack_phase = BBR_ACKS_REFILLING;
-+ bbr->next_rtt_delivered = tp->delivered;
-+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_REFILL);
-+}
-+
-+/* Now probe max deliverable data rate and volume. */
-+static void bbr2_start_bw_probe_up(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr->ack_phase = BBR_ACKS_PROBE_STARTING;
-+ bbr->next_rtt_delivered = tp->delivered;
-+ bbr->cycle_mstamp = tp->tcp_mstamp;
-+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_UP);
-+ bbr2_raise_inflight_hi_slope(sk);
-+}
-+
-+/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall
-+ * clock time at which to probe beyond an inflight that we think to be
-+ * safe. This will knowingly risk packet loss, so we want to do this rarely, to
-+ * keep packet loss rates low. Also start a round-trip counter, to probe faster
-+ * if we estimate a Reno flow at our BDP would probe faster.
-+ */
-+static void bbr2_start_bw_probe_down(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr2_reset_congestion_signals(sk);
-+ bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */
-+ bbr2_pick_probe_wait(sk);
-+ bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */
-+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
-+ bbr->next_rtt_delivered = tp->delivered;
-+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_DOWN);
-+}
-+
-+/* Cruise: maintain what we estimate to be a neutral, conservative
-+ * operating point, without attempting to probe up for bandwidth or down for
-+ * RTT, and only reducing inflight in response to loss/ECN signals.
-+ */
-+static void bbr2_start_bw_probe_cruise(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (bbr->inflight_lo != ~0U)
-+ bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi);
-+
-+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE);
-+}
-+
-+/* Loss and/or ECN rate is too high while probing.
-+ * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle.
-+ */
-+static void bbr2_handle_inflight_too_high(struct sock *sk,
-+ const struct rate_sample *rs)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ const u32 beta = bbr->params.beta;
-+
-+ bbr->prev_probe_too_high = 1;
-+ bbr->bw_probe_samples = 0; /* only react once per probe */
-+ bbr->debug.event = 'L'; /* Loss/ECN too high */
-+ /* If we are app-limited then we are not robustly
-+ * probing the max volume of inflight data we think
-+ * might be safe (analogous to how app-limited bw
-+ * samples are not known to be robustly probing bw).
-+ */
-+ if (!rs->is_app_limited)
-+ bbr->inflight_hi = max_t(u32, rs->tx_in_flight,
-+ (u64)bbr2_target_inflight(sk) *
-+ (BBR_UNIT - beta) >> BBR_SCALE);
-+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
-+ bbr2_start_bw_probe_down(sk);
-+}
-+
-+/* If we're seeing bw and loss samples reflecting our bw probing, adapt
-+ * using the signals we see. If loss or ECN mark rate gets too high, then adapt
-+ * inflight_hi downward. If we're able to push inflight higher without such
-+ * signals, push higher: adapt inflight_hi upward.
-+ */
-+static bool bbr2_adapt_upper_bounds(struct sock *sk,
-+ const struct rate_sample *rs)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ /* Track when we'll see bw/loss samples resulting from our bw probes. */
-+ if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start)
-+ bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK;
-+ if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) {
-+ /* End of samples from bw probing phase. */
-+ bbr->bw_probe_samples = 0;
-+ bbr->ack_phase = BBR_ACKS_INIT;
-+ /* At this point in the cycle, our current bw sample is also
-+ * our best recent chance at finding the highest available bw
-+ * for this flow. So now is the best time to forget the bw
-+ * samples from the previous cycle, by advancing the window.
-+ */
-+ if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited)
-+ bbr2_advance_bw_hi_filter(sk);
-+ /* If we had an inflight_hi, then probed and pushed inflight all
-+ * the way up to hit that inflight_hi without seeing any
-+ * high loss/ECN in all the resulting ACKs from that probing,
-+ * then probe up again, this time letting inflight persist at
-+ * inflight_hi for a round trip, then accelerating beyond.
-+ */
-+ if (bbr->mode == BBR_PROBE_BW &&
-+ bbr->stopped_risky_probe && !bbr->prev_probe_too_high) {
-+ bbr->debug.event = 'R'; /* reprobe */
-+ bbr2_start_bw_probe_refill(sk, 0);
-+ return true; /* yes, decided state transition */
-+ }
-+ }
-+
-+ if (bbr2_is_inflight_too_high(sk, rs)) {
-+ if (bbr->bw_probe_samples) /* sample is from bw probing? */
-+ bbr2_handle_inflight_too_high(sk, rs);
-+ } else {
-+ /* Loss/ECN rate is declared safe. Adjust upper bound upward. */
-+ if (bbr->inflight_hi == ~0U) /* no excess queue signals yet? */
-+ return false;
-+
-+ /* To be resilient to random loss, we must raise inflight_hi
-+ * if we observe in any phase that a higher level is safe.
-+ */
-+ if (rs->tx_in_flight > bbr->inflight_hi) {
-+ bbr->inflight_hi = rs->tx_in_flight;
-+ bbr->debug.event = 'U'; /* raise up inflight_hi */
-+ }
-+
-+ if (bbr->mode == BBR_PROBE_BW &&
-+ bbr->cycle_idx == BBR_BW_PROBE_UP)
-+ bbr2_probe_inflight_hi_upward(sk, rs);
-+ }
-+
-+ return false;
-+}
-+
-+/* Check if it's time to probe for bandwidth now, and if so, kick it off. */
-+static bool bbr2_check_time_to_probe_bw(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 n;
-+
-+ /* If we seem to be at an operating point where we are not seeing loss
-+ * but we are seeing ECN marks, then when the ECN marks cease we reprobe
-+ * quickly (in case a burst of cross-traffic has ceased and freed up bw,
-+ * or in case we are sharing with multiplicatively probing traffic).
-+ */
-+ if (bbr->params.ecn_reprobe_gain && bbr->ecn_eligible &&
-+ bbr->ecn_in_cycle && !bbr->loss_in_cycle &&
-+ inet_csk(sk)->icsk_ca_state == TCP_CA_Open) {
-+ bbr->debug.event = 'A'; /* *A*ll clear to probe *A*gain */
-+ /* Calculate n so that when bbr2_raise_inflight_hi_slope()
-+ * computes growth_this_round as 2^n it will be roughly the
-+ * desired volume of data (inflight_hi*ecn_reprobe_gain).
-+ */
-+ n = ilog2((((u64)bbr->inflight_hi *
-+ bbr->params.ecn_reprobe_gain) >> BBR_SCALE));
-+ bbr2_start_bw_probe_refill(sk, n);
-+ return true;
-+ }
-+
-+ if (bbr2_has_elapsed_in_phase(sk, bbr->probe_wait_us) ||
-+ bbr2_is_reno_coexistence_probe_time(sk)) {
-+ bbr2_start_bw_probe_refill(sk, 0);
-+ return true;
-+ }
-+ return false;
-+}
-+
-+/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */
-+static bool bbr2_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ bool is_under_bdp, is_long_enough;
-+
-+ /* Always need to pull inflight down to leave headroom in queue. */
-+ if (inflight > bbr2_inflight_with_headroom(sk))
-+ return false;
-+
-+ is_under_bdp = inflight <= bbr_inflight(sk, bw, BBR_UNIT);
-+ if (bbr->params.drain_to_target)
-+ return is_under_bdp;
-+
-+ is_long_enough = bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us);
-+ return is_under_bdp || is_long_enough;
-+}
-+
-+/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */
-+static void bbr2_update_cycle_phase(struct sock *sk,
-+ const struct rate_sample *rs)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ bool is_risky = false, is_queuing = false;
-+ u32 inflight, bw;
-+
-+ if (!bbr_full_bw_reached(sk))
-+ return;
-+
-+ /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */
-+ if (bbr2_adapt_upper_bounds(sk, rs))
-+ return; /* already decided state transition */
-+
-+ if (bbr->mode != BBR_PROBE_BW)
-+ return;
-+
-+ inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
-+ bw = bbr_max_bw(sk);
-+
-+ switch (bbr->cycle_idx) {
-+ /* First we spend most of our time cruising with a pacing_gain of 1.0,
-+ * which paces at the estimated bw, to try to fully use the pipe
-+ * without building queue. If we encounter loss/ECN marks, we adapt
-+ * by slowing down.
-+ */
-+ case BBR_BW_PROBE_CRUISE:
-+ if (bbr2_check_time_to_probe_bw(sk))
-+ return; /* already decided state transition */
-+ break;
-+
-+ /* After cruising, when it's time to probe, we first "refill": we send
-+ * at the estimated bw to fill the pipe, before probing higher and
-+ * knowingly risking overflowing the bottleneck buffer (causing loss).
-+ */
-+ case BBR_BW_PROBE_REFILL:
-+ if (bbr->round_start) {
-+ /* After one full round trip of sending in REFILL, we
-+ * start to see bw samples reflecting our REFILL, which
-+ * may be putting too much data in flight.
-+ */
-+ bbr->bw_probe_samples = 1;
-+ bbr2_start_bw_probe_up(sk);
-+ }
-+ break;
-+
-+ /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to
-+ * probe for bw. If we have not seen loss/ECN, we try to raise inflight
-+ * to at least pacing_gain*BDP; note that this may take more than
-+ * min_rtt if min_rtt is small (e.g. on a LAN).
-+ *
-+ * We terminate PROBE_UP bandwidth probing upon any of the following:
-+ *
-+ * (1) We've pushed inflight up to hit the inflight_hi target set in the
-+ * most recent previous bw probe phase. Thus we want to start
-+ * draining the queue immediately because it's very likely the most
-+ * recently sent packets will fill the queue and cause drops.
-+ * (checked here)
-+ * (2) We have probed for at least 1*min_rtt_us, and the
-+ * estimated queue is high enough (inflight > 1.25 * estimated_bdp).
-+ * (checked here)
-+ * (3) Loss filter says loss rate is "too high".
-+ * (checked in bbr_is_inflight_too_high())
-+ * (4) ECN filter says ECN mark rate is "too high".
-+ * (checked in bbr_is_inflight_too_high())
-+ */
-+ case BBR_BW_PROBE_UP:
-+ if (bbr->prev_probe_too_high &&
-+ inflight >= bbr->inflight_hi) {
-+ bbr->stopped_risky_probe = 1;
-+ is_risky = true;
-+ bbr->debug.event = 'D'; /* D for danger */
-+ } else if (bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us) &&
-+ inflight >=
-+ bbr_inflight(sk, bw,
-+ bbr->params.bw_probe_pif_gain)) {
-+ is_queuing = true;
-+ bbr->debug.event = 'Q'; /* building Queue */
-+ }
-+ if (is_risky || is_queuing) {
-+ bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */
-+ bbr2_start_bw_probe_down(sk); /* restart w/ down */
-+ }
-+ break;
-+
-+ /* After probing in PROBE_UP, we have usually accumulated some data in
-+ * the bottleneck buffer (if bw probing didn't find more bw). We next
-+ * enter PROBE_DOWN to try to drain any excess data from the queue. To
-+ * do this, we use a pacing_gain < 1.0. We hold this pacing gain until
-+ * our inflight is less then that target cruising point, which is the
-+ * minimum of (a) the amount needed to leave headroom, and (b) the
-+ * estimated BDP. Once inflight falls to match the target, we estimate
-+ * the queue is drained; persisting would underutilize the pipe.
-+ */
-+ case BBR_BW_PROBE_DOWN:
-+ if (bbr2_check_time_to_probe_bw(sk))
-+ return; /* already decided state transition */
-+ if (bbr2_check_time_to_cruise(sk, inflight, bw))
-+ bbr2_start_bw_probe_cruise(sk);
-+ break;
-+
-+ default:
-+ WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx);
-+ }
-+}
-+
-+/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */
-+static void bbr2_exit_probe_rtt(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr2_reset_lower_bounds(sk);
-+ if (bbr_full_bw_reached(sk)) {
-+ bbr->mode = BBR_PROBE_BW;
-+ /* Raising inflight after PROBE_RTT may cause loss, so reset
-+ * the PROBE_BW clock and schedule the next bandwidth probe for
-+ * a friendly and randomized future point in time.
-+ */
-+ bbr2_start_bw_probe_down(sk);
-+ /* Since we are exiting PROBE_RTT, we know inflight is
-+ * below our estimated BDP, so it is reasonable to cruise.
-+ */
-+ bbr2_start_bw_probe_cruise(sk);
-+ } else {
-+ bbr->mode = BBR_STARTUP;
-+ }
-+}
-+
-+/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until
-+ * the end of the round in recovery to get a good estimate of how many packets
-+ * have been lost, and how many we need to drain with a low pacing rate.
-+ */
-+static void bbr2_check_loss_too_high_in_startup(struct sock *sk,
-+ const struct rate_sample *rs)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (bbr_full_bw_reached(sk))
-+ return;
-+
-+ /* For STARTUP exit, check the loss rate at the end of each round trip
-+ * of Recovery episodes in STARTUP. We check the loss rate at the end
-+ * of the round trip to filter out noisy/low loss and have a better
-+ * sense of inflight (extent of loss), so we can drain more accurately.
-+ */
-+ if (rs->losses && bbr->loss_events_in_round < 0xf)
-+ bbr->loss_events_in_round++; /* update saturating counter */
-+ if (bbr->params.full_loss_cnt && bbr->loss_round_start &&
-+ inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery &&
-+ bbr->loss_events_in_round >= bbr->params.full_loss_cnt &&
-+ bbr2_is_inflight_too_high(sk, rs)) {
-+ bbr->debug.event = 'P'; /* Packet loss caused STARTUP exit */
-+ bbr2_handle_queue_too_high_in_startup(sk);
-+ return;
-+ }
-+ if (bbr->loss_round_start)
-+ bbr->loss_events_in_round = 0;
-+}
-+
-+/* If we are done draining, advance into steady state operation in PROBE_BW. */
-+static void bbr2_check_drain(struct sock *sk, const struct rate_sample *rs,
-+ struct bbr_context *ctx)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (bbr_check_drain(sk, rs, ctx)) {
-+ bbr->mode = BBR_PROBE_BW;
-+ bbr2_start_bw_probe_down(sk);
-+ }
-+}
-+
-+static void bbr2_update_model(struct sock *sk, const struct rate_sample *rs,
-+ struct bbr_context *ctx)
-+{
-+ bbr2_update_congestion_signals(sk, rs, ctx);
-+ bbr_update_ack_aggregation(sk, rs);
-+ bbr2_check_loss_too_high_in_startup(sk, rs);
-+ bbr_check_full_bw_reached(sk, rs);
-+ bbr2_check_drain(sk, rs, ctx);
-+ bbr2_update_cycle_phase(sk, rs);
-+ bbr_update_min_rtt(sk, rs);
-+}
-+
-+/* Fast path for app-limited case.
-+ *
-+ * On each ack, we execute bbr state machine, which primarily consists of:
-+ * 1) update model based on new rate sample, and
-+ * 2) update control based on updated model or state change.
-+ *
-+ * There are certain workload/scenarios, e.g. app-limited case, where
-+ * either we can skip updating model or we can skip update of both model
-+ * as well as control. This provides signifcant softirq cpu savings for
-+ * processing incoming acks.
-+ *
-+ * In case of app-limited, if there is no congestion (loss/ecn) and
-+ * if observed bw sample is less than current estimated bw, then we can
-+ * skip some of the computation in bbr state processing:
-+ *
-+ * - if there is no rtt/mode/phase change: In this case, since all the
-+ * parameters of the network model are constant, we can skip model
-+ * as well control update.
-+ *
-+ * - else we can skip rest of the model update. But we still need to
-+ * update the control to account for the new rtt/mode/phase.
-+ *
-+ * Returns whether we can take fast path or not.
-+ */
-+static bool bbr2_fast_path(struct sock *sk, bool *update_model,
-+ const struct rate_sample *rs, struct bbr_context *ctx)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u32 prev_min_rtt_us, prev_mode;
-+
-+ if (bbr->params.fast_path && bbr->try_fast_path &&
-+ rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) &&
-+ !bbr->loss_in_round && !bbr->ecn_in_round) {
-+ prev_mode = bbr->mode;
-+ prev_min_rtt_us = bbr->min_rtt_us;
-+ bbr2_check_drain(sk, rs, ctx);
-+ bbr2_update_cycle_phase(sk, rs);
-+ bbr_update_min_rtt(sk, rs);
-+
-+ if (bbr->mode == prev_mode &&
-+ bbr->min_rtt_us == prev_min_rtt_us &&
-+ bbr->try_fast_path)
-+ return true;
-+
-+ /* Skip model update, but control still needs to be updated */
-+ *update_model = false;
-+ }
-+ return false;
-+}
-+
-+static void bbr2_main(struct sock *sk, const struct rate_sample *rs)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ struct bbr_context ctx = { 0 };
-+ bool update_model = true;
-+ u32 bw;
-+
-+ bbr->debug.event = '.'; /* init to default NOP (no event yet) */
-+
-+ bbr_update_round_start(sk, rs, &ctx);
-+ if (bbr->round_start) {
-+ bbr->rounds_since_probe =
-+ min_t(s32, bbr->rounds_since_probe + 1, 0xFF);
-+ bbr2_update_ecn_alpha(sk);
-+ }
-+
-+ bbr->ecn_in_round |= rs->is_ece;
-+ bbr_calculate_bw_sample(sk, rs, &ctx);
-+
-+ if (bbr2_fast_path(sk, &update_model, rs, &ctx))
-+ goto out;
-+
-+ if (update_model)
-+ bbr2_update_model(sk, rs, &ctx);
-+
-+ bbr_update_gains(sk);
-+ bw = bbr_bw(sk);
-+ bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-+ bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain,
-+ tp->snd_cwnd, &ctx);
-+ bbr2_bound_cwnd_for_inflight_model(sk);
-+
-+out:
-+ bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state;
-+ bbr->loss_in_cycle |= rs->lost > 0;
-+ bbr->ecn_in_cycle |= rs->delivered_ce > 0;
-+
-+ bbr_debug(sk, rs->acked_sacked, rs, &ctx);
-+}
-+
-+/* Module parameters that are settable by TCP_CONGESTION_PARAMS are declared
-+ * down here, so that the algorithm functions that use the parameters must use
-+ * the per-socket parameters; if they accidentally use the global version
-+ * then there will be a compile error.
-+ * TODO(ncardwell): move all per-socket parameters down to this section.
-+ */
-+
-+/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE.
-+ * No loss response when 0. Max allwed value is 255.
-+ */
-+static u32 bbr_beta = BBR_UNIT * 30 / 100;
-+
-+/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE.
-+ * Max allowed value is 255.
-+ */
-+static u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; /* 1/16 = 6.25% */
-+
-+/* The initial value for the ecn_alpha state variable. Default and max
-+ * BBR_UNIT (256), representing 1.0. This allows a flow to respond quickly
-+ * to congestion if the bottleneck is congested when the flow starts up.
-+ */
-+static u32 bbr_ecn_alpha_init = BBR_UNIT; /* 1.0, to respond quickly */
-+
-+/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE.
-+ * No ECN based bounding when 0. Max allwed value is 255.
-+ */
-+static u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */
-+
-+/* Estimate bw probing has gone too far if CE ratio exceeds this threshold.
-+ * Scaled by BBR_SCALE. Disabled when 0. Max allowed is 255.
-+ */
-+static u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */
-+
-+/* Max RTT (in usec) at which to use sender-side ECN logic.
-+ * Disabled when 0 (ECN allowed at any RTT).
-+ * Max allowed for the parameter is 524287 (0x7ffff) us, ~524 ms.
-+ */
-+static u32 bbr_ecn_max_rtt_us = 5000;
-+
-+/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN
-+ * clears then use a multiplicative increase to quickly reprobe bw by
-+ * starting inflight probing at the given multiple of inflight_hi.
-+ * Default for this experimental knob is 0 (disabled).
-+ * Planned value for experiments: BBR_UNIT * 1 / 2 = 128, representing 0.5.
-+ */
-+static u32 bbr_ecn_reprobe_gain;
-+
-+/* Estimate bw probing has gone too far if loss rate exceeds this level. */
-+static u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */
-+
-+/* Exit STARTUP if number of loss marking events in a Recovery round is >= N,
-+ * and loss rate is higher than bbr_loss_thresh.
-+ * Disabled if 0. Max allowed value is 15 (0xF).
-+ */
-+static u32 bbr_full_loss_cnt = 8;
-+
-+/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh
-+ * meets this count. Max allowed value is 3.
-+ */
-+static u32 bbr_full_ecn_cnt = 2;
-+
-+/* Fraction of unutilized headroom to try to leave in path upon high loss. */
-+static u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100;
-+
-+/* Multiplier to get target inflight (as multiple of BDP) for PROBE_UP phase.
-+ * Default is 1.25x, as in BBR v1. Max allowed is 511.
-+ */
-+static u32 bbr_bw_probe_pif_gain = BBR_UNIT * 5 / 4;
-+
-+/* Multiplier to get Reno-style probe epoch duration as: k * BDP round trips.
-+ * If zero, disables this BBR v2 Reno-style BDP-scaled coexistence mechanism.
-+ * Max allowed is 511.
-+ */
-+static u32 bbr_bw_probe_reno_gain = BBR_UNIT;
-+
-+/* Max number of packet-timed rounds to wait before probing for bandwidth. If
-+ * we want to tolerate 1% random loss per round, and not have this cut our
-+ * inflight too much, we must probe for bw periodically on roughly this scale.
-+ * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance.
-+ * We aim to be fair with Reno/CUBIC up to a BDP of at least:
-+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
-+ */
-+static u32 bbr_bw_probe_max_rounds = 63;
-+
-+/* Max amount of randomness to inject in round counting for Reno-coexistence.
-+ * Max value is 15.
-+ */
-+static u32 bbr_bw_probe_rand_rounds = 2;
-+
-+/* Use BBR-native probe time scale starting at this many usec.
-+ * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least:
-+ * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs
-+ */
-+static u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */
-+
-+/* Use BBR-native probes spread over this many usec: */
-+static u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */
-+
-+/* Undo the model changes made in loss recovery if recovery was spurious? */
-+static bool bbr_undo = true;
-+
-+/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */
-+static bool bbr_fast_path = true; /* default: enabled */
-+
-+/* Use fast ack mode ? */
-+static int bbr_fast_ack_mode = 1; /* default: rwnd check off */
-+
-+/* How much to additively increase inflight_hi when entering REFILL? */
-+static u32 bbr_refill_add_inc; /* default: disabled */
-+
-+module_param_named(beta, bbr_beta, uint, 0644);
-+module_param_named(ecn_alpha_gain, bbr_ecn_alpha_gain, uint, 0644);
-+module_param_named(ecn_alpha_init, bbr_ecn_alpha_init, uint, 0644);
-+module_param_named(ecn_factor, bbr_ecn_factor, uint, 0644);
-+module_param_named(ecn_thresh, bbr_ecn_thresh, uint, 0644);
-+module_param_named(ecn_max_rtt_us, bbr_ecn_max_rtt_us, uint, 0644);
-+module_param_named(ecn_reprobe_gain, bbr_ecn_reprobe_gain, uint, 0644);
-+module_param_named(loss_thresh, bbr_loss_thresh, uint, 0664);
-+module_param_named(full_loss_cnt, bbr_full_loss_cnt, uint, 0664);
-+module_param_named(full_ecn_cnt, bbr_full_ecn_cnt, uint, 0664);
-+module_param_named(inflight_headroom, bbr_inflight_headroom, uint, 0664);
-+module_param_named(bw_probe_pif_gain, bbr_bw_probe_pif_gain, uint, 0664);
-+module_param_named(bw_probe_reno_gain, bbr_bw_probe_reno_gain, uint, 0664);
-+module_param_named(bw_probe_max_rounds, bbr_bw_probe_max_rounds, uint, 0664);
-+module_param_named(bw_probe_rand_rounds, bbr_bw_probe_rand_rounds, uint, 0664);
-+module_param_named(bw_probe_base_us, bbr_bw_probe_base_us, uint, 0664);
-+module_param_named(bw_probe_rand_us, bbr_bw_probe_rand_us, uint, 0664);
-+module_param_named(undo, bbr_undo, bool, 0664);
-+module_param_named(fast_path, bbr_fast_path, bool, 0664);
-+module_param_named(fast_ack_mode, bbr_fast_ack_mode, uint, 0664);
-+module_param_named(refill_add_inc, bbr_refill_add_inc, uint, 0664);
-+
-+static void bbr2_init(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr_init(sk); /* run shared init code for v1 and v2 */
-+
-+ /* BBR v2 parameters: */
-+ bbr->params.beta = min_t(u32, 0xFFU, bbr_beta);
-+ bbr->params.ecn_alpha_gain = min_t(u32, 0xFFU, bbr_ecn_alpha_gain);
-+ bbr->params.ecn_alpha_init = min_t(u32, BBR_UNIT, bbr_ecn_alpha_init);
-+ bbr->params.ecn_factor = min_t(u32, 0xFFU, bbr_ecn_factor);
-+ bbr->params.ecn_thresh = min_t(u32, 0xFFU, bbr_ecn_thresh);
-+ bbr->params.ecn_max_rtt_us = min_t(u32, 0x7ffffU, bbr_ecn_max_rtt_us);
-+ bbr->params.ecn_reprobe_gain = min_t(u32, 0x1FF, bbr_ecn_reprobe_gain);
-+ bbr->params.loss_thresh = min_t(u32, 0xFFU, bbr_loss_thresh);
-+ bbr->params.full_loss_cnt = min_t(u32, 0xFU, bbr_full_loss_cnt);
-+ bbr->params.full_ecn_cnt = min_t(u32, 0x3U, bbr_full_ecn_cnt);
-+ bbr->params.inflight_headroom =
-+ min_t(u32, 0xFFU, bbr_inflight_headroom);
-+ bbr->params.bw_probe_pif_gain =
-+ min_t(u32, 0x1FFU, bbr_bw_probe_pif_gain);
-+ bbr->params.bw_probe_reno_gain =
-+ min_t(u32, 0x1FFU, bbr_bw_probe_reno_gain);
-+ bbr->params.bw_probe_max_rounds =
-+ min_t(u32, 0xFFU, bbr_bw_probe_max_rounds);
-+ bbr->params.bw_probe_rand_rounds =
-+ min_t(u32, 0xFU, bbr_bw_probe_rand_rounds);
-+ bbr->params.bw_probe_base_us =
-+ min_t(u32, (1 << 26) - 1, bbr_bw_probe_base_us);
-+ bbr->params.bw_probe_rand_us =
-+ min_t(u32, (1 << 26) - 1, bbr_bw_probe_rand_us);
-+ bbr->params.undo = bbr_undo;
-+ bbr->params.fast_path = bbr_fast_path ? 1 : 0;
-+ bbr->params.refill_add_inc = min_t(u32, 0x3U, bbr_refill_add_inc);
-+
-+ /* BBR v2 state: */
-+ bbr->initialized = 1;
-+ /* Start sampling ECN mark rate after first full flight is ACKed: */
-+ bbr->loss_round_delivered = tp->delivered + 1;
-+ bbr->loss_round_start = 0;
-+ bbr->undo_bw_lo = 0;
-+ bbr->undo_inflight_lo = 0;
-+ bbr->undo_inflight_hi = 0;
-+ bbr->loss_events_in_round = 0;
-+ bbr->startup_ecn_rounds = 0;
-+ bbr2_reset_congestion_signals(sk);
-+ bbr->bw_lo = ~0U;
-+ bbr->bw_hi[0] = 0;
-+ bbr->bw_hi[1] = 0;
-+ bbr->inflight_lo = ~0U;
-+ bbr->inflight_hi = ~0U;
-+ bbr->bw_probe_up_cnt = ~0U;
-+ bbr->bw_probe_up_acks = 0;
-+ bbr->bw_probe_up_rounds = 0;
-+ bbr->probe_wait_us = 0;
-+ bbr->stopped_risky_probe = 0;
-+ bbr->ack_phase = BBR_ACKS_INIT;
-+ bbr->rounds_since_probe = 0;
-+ bbr->bw_probe_samples = 0;
-+ bbr->prev_probe_too_high = 0;
-+ bbr->ecn_eligible = 0;
-+ bbr->ecn_alpha = bbr->params.ecn_alpha_init;
-+ bbr->alpha_last_delivered = 0;
-+ bbr->alpha_last_delivered_ce = 0;
-+
-+ tp->fast_ack_mode = min_t(u32, 0x2U, bbr_fast_ack_mode);
-+
-+ if ((tp->ecn_flags & TCP_ECN_OK) && bbr_ecn_enable)
-+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
-+}
-+
-+/* Core TCP stack informs us that the given skb was just marked lost. */
-+static void bbr2_skb_marked_lost(struct sock *sk, const struct sk_buff *skb)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
-+ struct rate_sample rs;
-+
-+ /* Capture "current" data over the full round trip of loss,
-+ * to have a better chance to see the full capacity of the path.
-+ */
-+ if (!bbr->loss_in_round) /* first loss in this round trip? */
-+ bbr->loss_round_delivered = tp->delivered; /* set round trip */
-+ bbr->loss_in_round = 1;
-+ bbr->loss_in_cycle = 1;
-+
-+ if (!bbr->bw_probe_samples)
-+ return; /* not an skb sent while probing for bandwidth */
-+ if (unlikely(!scb->tx.delivered_mstamp))
-+ return; /* skb was SACKed, reneged, marked lost; ignore it */
-+ /* We are probing for bandwidth. Construct a rate sample that
-+ * estimates what happened in the flight leading up to this lost skb,
-+ * then see if the loss rate went too high, and if so at which packet.
-+ */
-+ memset(&rs, 0, sizeof(rs));
-+ rs.tx_in_flight = scb->tx.in_flight;
-+ rs.lost = tp->lost - scb->tx.lost;
-+ rs.is_app_limited = scb->tx.is_app_limited;
-+ if (bbr2_is_inflight_too_high(sk, &rs)) {
-+ rs.tx_in_flight = bbr2_inflight_hi_from_lost_skb(sk, &rs, skb);
-+ bbr2_handle_inflight_too_high(sk, &rs);
-+ }
-+}
-+
-+/* Revert short-term model if current loss recovery event was spurious. */
-+static u32 bbr2_undo_cwnd(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr->debug.undo = 1;
-+ bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
-+ bbr->full_bw_cnt = 0;
-+ bbr->loss_in_round = 0;
-+
-+ if (!bbr->params.undo)
-+ return tp->snd_cwnd;
-+
-+ /* Revert to cwnd and other state saved before loss episode. */
-+ bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo);
-+ bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo);
-+ bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi);
-+ return bbr->prior_cwnd;
-+}
-+
-+/* Entering loss recovery, so save state for when we undo recovery. */
-+static u32 bbr2_ssthresh(struct sock *sk)
-+{
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ bbr_save_cwnd(sk);
-+ /* For undo, save state that adapts based on loss signal. */
-+ bbr->undo_bw_lo = bbr->bw_lo;
-+ bbr->undo_inflight_lo = bbr->inflight_lo;
-+ bbr->undo_inflight_hi = bbr->inflight_hi;
-+ return tcp_sk(sk)->snd_ssthresh;
-+}
-+
-+static enum tcp_bbr2_phase bbr2_get_phase(struct bbr *bbr)
-+{
-+ switch (bbr->mode) {
-+ case BBR_STARTUP:
-+ return BBR2_PHASE_STARTUP;
-+ case BBR_DRAIN:
-+ return BBR2_PHASE_DRAIN;
-+ case BBR_PROBE_BW:
-+ break;
-+ case BBR_PROBE_RTT:
-+ return BBR2_PHASE_PROBE_RTT;
-+ default:
-+ return BBR2_PHASE_INVALID;
-+ }
-+ switch (bbr->cycle_idx) {
-+ case BBR_BW_PROBE_UP:
-+ return BBR2_PHASE_PROBE_BW_UP;
-+ case BBR_BW_PROBE_DOWN:
-+ return BBR2_PHASE_PROBE_BW_DOWN;
-+ case BBR_BW_PROBE_CRUISE:
-+ return BBR2_PHASE_PROBE_BW_CRUISE;
-+ case BBR_BW_PROBE_REFILL:
-+ return BBR2_PHASE_PROBE_BW_REFILL;
-+ default:
-+ return BBR2_PHASE_INVALID;
-+ }
-+}
-+
-+static size_t bbr2_get_info(struct sock *sk, u32 ext, int *attr,
-+ union tcp_cc_info *info)
-+{
-+ if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
-+ ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-+ struct bbr *bbr = inet_csk_ca(sk);
-+ u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk));
-+ u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk));
-+ u64 bw_lo = bbr->bw_lo == ~0U ?
-+ ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo);
-+
-+ memset(&info->bbr2, 0, sizeof(info->bbr2));
-+ info->bbr2.bbr_bw_lsb = (u32)bw;
-+ info->bbr2.bbr_bw_msb = (u32)(bw >> 32);
-+ info->bbr2.bbr_min_rtt = bbr->min_rtt_us;
-+ info->bbr2.bbr_pacing_gain = bbr->pacing_gain;
-+ info->bbr2.bbr_cwnd_gain = bbr->cwnd_gain;
-+ info->bbr2.bbr_bw_hi_lsb = (u32)bw_hi;
-+ info->bbr2.bbr_bw_hi_msb = (u32)(bw_hi >> 32);
-+ info->bbr2.bbr_bw_lo_lsb = (u32)bw_lo;
-+ info->bbr2.bbr_bw_lo_msb = (u32)(bw_lo >> 32);
-+ info->bbr2.bbr_mode = bbr->mode;
-+ info->bbr2.bbr_phase = (__u8)bbr2_get_phase(bbr);
-+ info->bbr2.bbr_version = (__u8)2;
-+ info->bbr2.bbr_inflight_lo = bbr->inflight_lo;
-+ info->bbr2.bbr_inflight_hi = bbr->inflight_hi;
-+ info->bbr2.bbr_extra_acked = bbr_extra_acked(sk);
-+ *attr = INET_DIAG_BBRINFO;
-+ return sizeof(info->bbr2);
-+ }
-+ return 0;
-+}
-+
-+static void bbr2_set_state(struct sock *sk, u8 new_state)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct bbr *bbr = inet_csk_ca(sk);
-+
-+ if (new_state == TCP_CA_Loss) {
-+ struct rate_sample rs = { .losses = 1 };
-+ struct bbr_context ctx = { 0 };
-+
-+ bbr->prev_ca_state = TCP_CA_Loss;
-+ bbr->full_bw = 0;
-+ if (!bbr2_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) {
-+ /* bbr_adapt_lower_bounds() needs cwnd before
-+ * we suffered an RTO, to update inflight_lo:
-+ */
-+ bbr->inflight_lo =
-+ max(tp->snd_cwnd, bbr->prior_cwnd);
-+ }
-+ bbr_debug(sk, 0, &rs, &ctx);
-+ } else if (bbr->prev_ca_state == TCP_CA_Loss &&
-+ new_state != TCP_CA_Loss) {
-+ tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
-+ bbr->try_fast_path = 0; /* bound cwnd using latest model */
-+ }
-+}
-+
-+static struct tcp_congestion_ops tcp_bbr2_cong_ops __read_mostly = {
-+ .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS,
-+ .name = "bbr2",
-+ .owner = THIS_MODULE,
-+ .init = bbr2_init,
-+ .cong_control = bbr2_main,
-+ .sndbuf_expand = bbr_sndbuf_expand,
-+ .skb_marked_lost = bbr2_skb_marked_lost,
-+ .undo_cwnd = bbr2_undo_cwnd,
-+ .cwnd_event = bbr_cwnd_event,
-+ .ssthresh = bbr2_ssthresh,
-+ .tso_segs = bbr_tso_segs,
-+ .get_info = bbr2_get_info,
-+ .set_state = bbr2_set_state,
-+};
-+
-+static int __init bbr_register(void)
-+{
-+ BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
-+ return tcp_register_congestion_control(&tcp_bbr2_cong_ops);
-+}
-+
-+static void __exit bbr_unregister(void)
-+{
-+ tcp_unregister_congestion_control(&tcp_bbr2_cong_ops);
-+}
-+
-+module_init(bbr_register);
-+module_exit(bbr_unregister);
-+
-+MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
-+MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
-+MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
-+MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
-+MODULE_AUTHOR("Priyaranjan Jha <priyarjha@google.com>");
-+MODULE_AUTHOR("Yousuk Seung <ysseung@google.com>");
-+MODULE_AUTHOR("Kevin Yang <yyd@google.com>");
-+MODULE_AUTHOR("Arjun Roy <arjunroy@google.com>");
-+
-+MODULE_LICENSE("Dual BSD/GPL");
-+MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
-diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
-index d3cae40749e8..0f268f2ff2e9 100644
---- a/net/ipv4/tcp_cong.c
-+++ b/net/ipv4/tcp_cong.c
-@@ -189,6 +189,7 @@ void tcp_init_congestion_control(struct sock *sk)
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- tcp_sk(sk)->prior_ssthresh = 0;
-+ tcp_sk(sk)->fast_ack_mode = 0;
- if (icsk->icsk_ca_ops->init)
- icsk->icsk_ca_ops->init(sk);
- if (tcp_ca_needs_ecn(sk))
-diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
-index 0640453fce54..8a455eb0c552 100644
---- a/net/ipv4/tcp_input.c
-+++ b/net/ipv4/tcp_input.c
-@@ -349,7 +349,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
- tcp_enter_quickack_mode(sk, 2);
- break;
- case INET_ECN_CE:
-- if (tcp_ca_needs_ecn(sk))
-+ if (tcp_ca_wants_ce_events(sk))
- tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
-
- if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
-@@ -360,7 +360,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
- tp->ecn_flags |= TCP_ECN_SEEN;
- break;
- default:
-- if (tcp_ca_needs_ecn(sk))
-+ if (tcp_ca_wants_ce_events(sk))
- tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
- tp->ecn_flags |= TCP_ECN_SEEN;
- break;
-@@ -1079,7 +1079,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
- */
- static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
- {
-+ struct sock *sk = (struct sock *)tp;
-+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-+
- tp->lost += tcp_skb_pcount(skb);
-+ if (ca_ops->skb_marked_lost)
-+ ca_ops->skb_marked_lost(sk, skb);
- }
-
- void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
-@@ -1460,6 +1465,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
- WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
- tcp_skb_pcount_add(skb, -pcount);
-
-+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */
-+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
-+ "prev in_flight: %u skb in_flight: %u pcount: %u",
-+ TCP_SKB_CB(prev)->tx.in_flight,
-+ TCP_SKB_CB(skb)->tx.in_flight,
-+ pcount))
-+ TCP_SKB_CB(skb)->tx.in_flight = 0;
-+ else
-+ TCP_SKB_CB(skb)->tx.in_flight -= pcount;
-+ TCP_SKB_CB(prev)->tx.in_flight += pcount;
-+
- /* When we're adding to gso_segs == 1, gso_size will be zero,
- * in theory this shouldn't be necessary but as long as DSACK
- * code can come after this skb later on it's better to keep
-@@ -3812,6 +3828,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
-
- prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
- rs.prior_in_flight = tcp_packets_in_flight(tp);
-+ tcp_rate_check_app_limited(sk);
-
- /* ts_recent update must be made after we are sure that the packet
- * is in window.
-@@ -3910,6 +3927,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
- delivered = tcp_newly_delivered(sk, delivered, flag);
- lost = tp->lost - lost; /* freshly marked lost */
- rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
-+ rs.is_ece = !!(flag & FLAG_ECE);
- tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
- tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
- tcp_xmit_recovery(sk, rexmit);
-@@ -5509,13 +5527,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
-
- /* More than one full frame received... */
- if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
-+ (tp->fast_ack_mode == 1 ||
- /* ... and right edge of window advances far enough.
- * (tcp_recvmsg() will send ACK otherwise).
- * If application uses SO_RCVLOWAT, we want send ack now if
- * we have not received enough bytes to satisfy the condition.
- */
-- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
-- __tcp_select_window(sk) >= tp->rcv_wnd)) ||
-+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
-+ __tcp_select_window(sk) >= tp->rcv_wnd))) ||
- /* We ACK each frame or... */
- tcp_in_quickack_mode(sk) ||
- /* Protocol state mandates a one-time immediate ACK */
-diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
-index c69f4d966024..a9ceec2702b2 100644
---- a/net/ipv4/tcp_output.c
-+++ b/net/ipv4/tcp_output.c
-@@ -375,7 +375,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
- th->cwr = 1;
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
- }
-- } else if (!tcp_ca_needs_ecn(sk)) {
-+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
-+ !tcp_ca_needs_ecn(sk)) {
- /* ACK or retransmitted segment: clear ECT|CE */
- INET_ECN_dontxmit(sk);
- }
-@@ -1533,7 +1533,7 @@
- {
- struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *buff;
-- int old_factor;
-+ int old_factor, inflight_prev;
- long limit;
- int nlen;
- u8 flags;
-@@ -1610,6 +1611,15 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
-
- if (diff)
- tcp_adjust_pcount(sk, skb, diff);
-+
-+ /* Set buff tx.in_flight as if buff were sent by itself. */
-+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
-+ if (WARN_ONCE(inflight_prev < 0,
-+ "inconsistent: tx.in_flight: %u old_factor: %d",
-+ TCP_SKB_CB(skb)->tx.in_flight, old_factor))
-+ inflight_prev = 0;
-+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
-+ tcp_skb_pcount(buff);
- }
-
- /* Link BUFF into the send queue. */
-@@ -1993,13 +2003,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
- {
- const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-- u32 min_tso, tso_segs;
--
-- min_tso = ca_ops->min_tso_segs ?
-- ca_ops->min_tso_segs(sk) :
-- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
-+ u32 tso_segs;
-
-- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
-+ tso_segs = ca_ops->tso_segs ?
-+ ca_ops->tso_segs(sk, mss_now) :
-+ tcp_tso_autosize(sk, mss_now,
-+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs));
- return min_t(u32, tso_segs, sk->sk_gso_max_segs);
- }
-
-@@ -2635,6 +2644,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
- skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
- list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
- tcp_init_tso_segs(skb, mss_now);
-+ tcp_set_tx_in_flight(sk, skb);
- goto repair; /* Skip network transmission */
- }
-
-diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
-index a8f6d9d06f2e..a8b4c9504570 100644
---- a/net/ipv4/tcp_rate.c
-+++ b/net/ipv4/tcp_rate.c
-@@ -34,6 +34,24 @@
- * ready to send in the write queue.
- */
-
-+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ u32 in_flight;
-+
-+ /* Check, sanitize, and record packets in flight after skb was sent. */
-+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
-+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
-+ "insane in_flight %u cc %s mss %u "
-+ "cwnd %u pif %u %u %u %u\n",
-+ in_flight, inet_csk(sk)->icsk_ca_ops->name,
-+ tp->mss_cache, tp->snd_cwnd,
-+ tp->packets_out, tp->retrans_out,
-+ tp->sacked_out, tp->lost_out))
-+ in_flight = TCPCB_IN_FLIGHT_MAX;
-+ TCP_SKB_CB(skb)->tx.in_flight = in_flight;
-+}
-+
- /* Snapshot the current delivery information in the skb, to generate
- * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
- */
-@@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
- TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
- TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
- TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
-+ TCP_SKB_CB(skb)->tx.lost = tp->lost;
- TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
-+ tcp_set_tx_in_flight(sk, skb);
- }
-
- /* When an skb is sacked or acked, we fill in the rate sample with the (prior)
-@@ -91,18 +111,21 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
- if (!rs->prior_delivered ||
- tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
- scb->end_seq, rs->last_end_seq)) {
-+ rs->prior_lost = scb->tx.lost;
- rs->prior_delivered_ce = scb->tx.delivered_ce;
- rs->prior_delivered = scb->tx.delivered;
- rs->prior_mstamp = scb->tx.delivered_mstamp;
- rs->is_app_limited = scb->tx.is_app_limited;
- rs->is_retrans = scb->sacked & TCPCB_RETRANS;
- rs->last_end_seq = scb->end_seq;
-+ rs->tx_in_flight = scb->tx.in_flight;
-
- /* Record send time of most recently ACKed packet: */
- tp->first_tx_mstamp = tx_tstamp;
- /* Find the duration of the "send phase" of this window: */
-- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
-- scb->tx.first_tx_mstamp);
-+ rs->interval_us = tcp_stamp32_us_delta(
-+ tp->first_tx_mstamp,
-+ scb->tx.first_tx_mstamp);
-
- }
- /* Mark off the skb delivered once it's sacked to avoid being
-@@ -144,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- return;
- }
- rs->delivered = tp->delivered - rs->prior_delivered;
-+ rs->lost = tp->lost - rs->prior_lost;
-
- rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
- /* delivered_ce occupies less than 32 bits in the skb control block */
-@@ -155,7 +179,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- * longer phase.
- */
- snd_us = rs->interval_us; /* send phase */
-- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
-+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
- rs->prior_mstamp); /* ack phase */
- rs->interval_us = max(snd_us, ack_us);
-
-diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
-index cb79127f45c3..70e4de876a7f 100644
---- a/net/ipv4/tcp_timer.c
-+++ b/net/ipv4/tcp_timer.c
-@@ -605,6 +605,7 @@ void tcp_write_timer_handler(struct sock *sk)
- return;
- }
-
-+ tcp_rate_check_app_limited(sk);
- tcp_mstamp_refresh(tcp_sk(sk));
- event = icsk->icsk_pending;
-
---- a/net/ipv4/tcp_input.c
-+++ b/net/ipv4/tcp_input.c
-@@ -287,7 +287,7 @@
- icsk->icsk_ack.quick = quickacks;
- }
-
-+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
--static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
- {
- struct inet_connection_sock *icsk = inet_csk(sk);
-
-@@ -295,6 +299,7 @@
- inet_csk_exit_pingpong_mode(sk);
- icsk->icsk_ack.ato = TCP_ATO_MIN;
- }
-+EXPORT_SYMBOL(tcp_enter_quickack_mode);
-
- /* Send ACKs quickly, if "quick" count is not exhausted
- * and the session is not interactive.
---- a/include/net/tcp.h
-+++ b/include/net/tcp.h
-@@ -350,6 +350,7 @@
- struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
- bool force_schedule);
-
-+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
- static inline void tcp_dec_quickack_mode(struct sock *sk)
- {
- struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/SOURCES/tkg-bcachefs.patch b/SOURCES/tkg-bcachefs.patch
deleted file mode 100644
index aca2622..0000000
--- a/SOURCES/tkg-bcachefs.patch
+++ /dev/null
@@ -1,98955 +0,0 @@
-From 0a195912e89bd49e868e7e4879d137091f0762c8 Mon Sep 17 00:00:00 2001
-From: Piotr Gorski <lucjan.lucjanov@gmail.com>
-Date: Wed, 8 Nov 2023 08:07:19 +0100
-Subject: [PATCH] bcachefs
-
-Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
----
- MAINTAINERS | 23 +
- drivers/md/bcache/Kconfig | 10 +-
- drivers/md/bcache/Makefile | 4 +-
- drivers/md/bcache/bcache.h | 2 +-
- drivers/md/bcache/super.c | 1 -
- drivers/md/bcache/util.h | 3 +-
- fs/Kconfig | 1 +
- fs/Makefile | 1 +
- fs/bcachefs/Kconfig | 83 +
- fs/bcachefs/Makefile | 89 +
- fs/bcachefs/acl.c | 463 +++
- fs/bcachefs/acl.h | 60 +
- fs/bcachefs/alloc_background.c | 2159 +++++++++++
- fs/bcachefs/alloc_background.h | 259 ++
- fs/bcachefs/alloc_foreground.c | 1600 ++++++++
- fs/bcachefs/alloc_foreground.h | 224 ++
- fs/bcachefs/alloc_types.h | 126 +
- fs/bcachefs/backpointers.c | 860 +++++
- fs/bcachefs/backpointers.h | 140 +
- fs/bcachefs/bbpos.h | 37 +
- fs/bcachefs/bbpos_types.h | 18 +
- fs/bcachefs/bcachefs.h | 1161 ++++++
- fs/bcachefs/bcachefs_format.h | 2425 ++++++++++++
- fs/bcachefs/bcachefs_ioctl.h | 368 ++
- fs/bcachefs/bkey.c | 1120 ++++++
- fs/bcachefs/bkey.h | 778 ++++
- fs/bcachefs/bkey_buf.h | 61 +
- fs/bcachefs/bkey_cmp.h | 129 +
- fs/bcachefs/bkey_methods.c | 459 +++
- fs/bcachefs/bkey_methods.h | 179 +
- fs/bcachefs/bkey_sort.c | 201 +
- fs/bcachefs/bkey_sort.h | 54 +
- fs/bcachefs/bset.c | 1592 ++++++++
- fs/bcachefs/bset.h | 541 +++
- fs/bcachefs/btree_cache.c | 1215 ++++++
- fs/bcachefs/btree_cache.h | 131 +
- fs/bcachefs/btree_gc.c | 2145 +++++++++++
- fs/bcachefs/btree_gc.h | 114 +
- fs/bcachefs/btree_io.c | 2298 ++++++++++++
- fs/bcachefs/btree_io.h | 228 ++
- fs/bcachefs/btree_iter.c | 3242 +++++++++++++++++
- fs/bcachefs/btree_iter.h | 943 +++++
- fs/bcachefs/btree_journal_iter.c | 531 +++
- fs/bcachefs/btree_journal_iter.h | 57 +
- fs/bcachefs/btree_key_cache.c | 1072 ++++++
- fs/bcachefs/btree_key_cache.h | 48 +
- fs/bcachefs/btree_locking.c | 817 +++++
- fs/bcachefs/btree_locking.h | 433 +++
- fs/bcachefs/btree_trans_commit.c | 1145 ++++++
- fs/bcachefs/btree_types.h | 756 ++++
- fs/bcachefs/btree_update.c | 933 +++++
- fs/bcachefs/btree_update.h | 340 ++
- fs/bcachefs/btree_update_interior.c | 2474 +++++++++++++
- fs/bcachefs/btree_update_interior.h | 337 ++
- fs/bcachefs/btree_write_buffer.c | 375 ++
- fs/bcachefs/btree_write_buffer.h | 14 +
- fs/bcachefs/btree_write_buffer_types.h | 44 +
- fs/bcachefs/buckets.c | 2168 +++++++++++
- fs/bcachefs/buckets.h | 458 +++
- fs/bcachefs/buckets_types.h | 92 +
- fs/bcachefs/buckets_waiting_for_journal.c | 166 +
- fs/bcachefs/buckets_waiting_for_journal.h | 15 +
- .../buckets_waiting_for_journal_types.h | 23 +
- fs/bcachefs/chardev.c | 784 ++++
- fs/bcachefs/chardev.h | 31 +
- fs/bcachefs/checksum.c | 804 ++++
- fs/bcachefs/checksum.h | 213 ++
- fs/bcachefs/clock.c | 193 +
- fs/bcachefs/clock.h | 38 +
- fs/bcachefs/clock_types.h | 37 +
- fs/bcachefs/compress.c | 728 ++++
- fs/bcachefs/compress.h | 73 +
- fs/bcachefs/counters.c | 107 +
- fs/bcachefs/counters.h | 17 +
- fs/bcachefs/darray.h | 93 +
- fs/bcachefs/data_update.c | 551 +++
- fs/bcachefs/data_update.h | 44 +
- fs/bcachefs/debug.c | 954 +++++
- fs/bcachefs/debug.h | 32 +
- fs/bcachefs/dirent.c | 577 +++
- fs/bcachefs/dirent.h | 70 +
- fs/bcachefs/disk_groups.c | 620 ++++
- fs/bcachefs/disk_groups.h | 111 +
- fs/bcachefs/disk_groups_types.h | 18 +
- fs/bcachefs/ec.c | 1969 ++++++++++
- fs/bcachefs/ec.h | 260 ++
- fs/bcachefs/ec_types.h | 41 +
- fs/bcachefs/errcode.c | 68 +
- fs/bcachefs/errcode.h | 269 ++
- fs/bcachefs/error.c | 299 ++
- fs/bcachefs/error.h | 242 ++
- fs/bcachefs/extent_update.c | 173 +
- fs/bcachefs/extent_update.h | 12 +
- fs/bcachefs/extents.c | 1516 ++++++++
- fs/bcachefs/extents.h | 765 ++++
- fs/bcachefs/extents_types.h | 40 +
- fs/bcachefs/eytzinger.h | 281 ++
- fs/bcachefs/fifo.h | 127 +
- fs/bcachefs/fs-common.c | 501 +++
- fs/bcachefs/fs-common.h | 43 +
- fs/bcachefs/fs-io-buffered.c | 1106 ++++++
- fs/bcachefs/fs-io-buffered.h | 27 +
- fs/bcachefs/fs-io-direct.c | 680 ++++
- fs/bcachefs/fs-io-direct.h | 16 +
- fs/bcachefs/fs-io-pagecache.c | 791 ++++
- fs/bcachefs/fs-io-pagecache.h | 176 +
- fs/bcachefs/fs-io.c | 1072 ++++++
- fs/bcachefs/fs-io.h | 184 +
- fs/bcachefs/fs-ioctl.c | 572 +++
- fs/bcachefs/fs-ioctl.h | 81 +
- fs/bcachefs/fs.c | 1977 ++++++++++
- fs/bcachefs/fs.h | 209 ++
- fs/bcachefs/fsck.c | 2490 +++++++++++++
- fs/bcachefs/fsck.h | 15 +
- fs/bcachefs/inode.c | 1198 ++++++
- fs/bcachefs/inode.h | 217 ++
- fs/bcachefs/io_misc.c | 524 +++
- fs/bcachefs/io_misc.h | 34 +
- fs/bcachefs/io_read.c | 1210 ++++++
- fs/bcachefs/io_read.h | 158 +
- fs/bcachefs/io_write.c | 1675 +++++++++
- fs/bcachefs/io_write.h | 110 +
- fs/bcachefs/io_write_types.h | 96 +
- fs/bcachefs/journal.c | 1468 ++++++++
- fs/bcachefs/journal.h | 549 +++
- fs/bcachefs/journal_io.c | 1947 ++++++++++
- fs/bcachefs/journal_io.h | 65 +
- fs/bcachefs/journal_reclaim.c | 876 +++++
- fs/bcachefs/journal_reclaim.h | 87 +
- fs/bcachefs/journal_sb.c | 219 ++
- fs/bcachefs/journal_sb.h | 24 +
- fs/bcachefs/journal_seq_blacklist.c | 320 ++
- fs/bcachefs/journal_seq_blacklist.h | 22 +
- fs/bcachefs/journal_types.h | 345 ++
- fs/bcachefs/keylist.c | 52 +
- fs/bcachefs/keylist.h | 74 +
- fs/bcachefs/keylist_types.h | 16 +
- fs/bcachefs/logged_ops.c | 112 +
- fs/bcachefs/logged_ops.h | 20 +
- fs/bcachefs/lru.c | 164 +
- fs/bcachefs/lru.h | 69 +
- fs/bcachefs/mean_and_variance.c | 159 +
- fs/bcachefs/mean_and_variance.h | 198 +
- fs/bcachefs/mean_and_variance_test.c | 240 ++
- fs/bcachefs/migrate.c | 179 +
- fs/bcachefs/migrate.h | 7 +
- fs/bcachefs/move.c | 1198 ++++++
- fs/bcachefs/move.h | 139 +
- fs/bcachefs/move_types.h | 36 +
- fs/bcachefs/movinggc.c | 431 +++
- fs/bcachefs/movinggc.h | 12 +
- fs/bcachefs/nocow_locking.c | 144 +
- fs/bcachefs/nocow_locking.h | 50 +
- fs/bcachefs/nocow_locking_types.h | 20 +
- fs/bcachefs/opts.c | 602 +++
- fs/bcachefs/opts.h | 564 +++
- fs/bcachefs/printbuf.c | 425 +++
- fs/bcachefs/printbuf.h | 284 ++
- fs/bcachefs/quota.c | 979 +++++
- fs/bcachefs/quota.h | 74 +
- fs/bcachefs/quota_types.h | 43 +
- fs/bcachefs/rebalance.c | 464 +++
- fs/bcachefs/rebalance.h | 27 +
- fs/bcachefs/rebalance_types.h | 37 +
- fs/bcachefs/recovery.c | 1057 ++++++
- fs/bcachefs/recovery.h | 33 +
- fs/bcachefs/recovery_types.h | 53 +
- fs/bcachefs/reflink.c | 406 +++
- fs/bcachefs/reflink.h | 81 +
- fs/bcachefs/replicas.c | 1050 ++++++
- fs/bcachefs/replicas.h | 91 +
- fs/bcachefs/replicas_types.h | 27 +
- fs/bcachefs/sb-clean.c | 398 ++
- fs/bcachefs/sb-clean.h | 16 +
- fs/bcachefs/sb-errors.c | 172 +
- fs/bcachefs/sb-errors.h | 270 ++
- fs/bcachefs/sb-errors_types.h | 16 +
- fs/bcachefs/sb-members.c | 420 +++
- fs/bcachefs/sb-members.h | 227 ++
- fs/bcachefs/seqmutex.h | 48 +
- fs/bcachefs/siphash.c | 173 +
- fs/bcachefs/siphash.h | 87 +
- fs/bcachefs/six.c | 917 +++++
- fs/bcachefs/six.h | 393 ++
- fs/bcachefs/snapshot.c | 1713 +++++++++
- fs/bcachefs/snapshot.h | 268 ++
- fs/bcachefs/str_hash.h | 370 ++
- fs/bcachefs/subvolume.c | 437 +++
- fs/bcachefs/subvolume.h | 35 +
- fs/bcachefs/subvolume_types.h | 31 +
- fs/bcachefs/super-io.c | 1266 +++++++
- fs/bcachefs/super-io.h | 94 +
- fs/bcachefs/super.c | 2017 ++++++++++
- fs/bcachefs/super.h | 52 +
- fs/bcachefs/super_types.h | 40 +
- fs/bcachefs/sysfs.c | 1034 ++++++
- fs/bcachefs/sysfs.h | 48 +
- fs/bcachefs/tests.c | 919 +++++
- fs/bcachefs/tests.h | 15 +
- fs/bcachefs/trace.c | 17 +
- fs/bcachefs/trace.h | 1334 +++++++
- fs/bcachefs/two_state_shared_lock.c | 8 +
- fs/bcachefs/two_state_shared_lock.h | 59 +
- fs/bcachefs/util.c | 1159 ++++++
- fs/bcachefs/util.h | 833 +++++
- fs/bcachefs/varint.c | 129 +
- fs/bcachefs/varint.h | 11 +
- fs/bcachefs/vstructs.h | 63 +
- fs/bcachefs/xattr.c | 643 ++++
- fs/bcachefs/xattr.h | 50 +
- fs/dcache.c | 12 +-
- .../md/bcache => include/linux}/closure.h | 56 +-
- include/linux/dcache.h | 1 +
- include/linux/exportfs.h | 6 +
- include/linux/generic-radix-tree.h | 68 +-
- include/linux/sched.h | 1 +
- include/linux/string_helpers.h | 4 +-
- init/init_task.c | 1 +
- kernel/locking/mutex.c | 3 +
- kernel/stacktrace.c | 2 +
- lib/Kconfig | 3 +
- lib/Kconfig.debug | 9 +
- lib/Makefile | 2 +
- {drivers/md/bcache => lib}/closure.c | 45 +-
- lib/errname.c | 1 +
- lib/generic-radix-tree.c | 76 +-
- lib/string_helpers.c | 10 +-
- tools/objtool/noreturns.h | 2 +
- 228 files changed, 96727 insertions(+), 60 deletions(-)
- create mode 100644 fs/bcachefs/Kconfig
- create mode 100644 fs/bcachefs/Makefile
- create mode 100644 fs/bcachefs/acl.c
- create mode 100644 fs/bcachefs/acl.h
- create mode 100644 fs/bcachefs/alloc_background.c
- create mode 100644 fs/bcachefs/alloc_background.h
- create mode 100644 fs/bcachefs/alloc_foreground.c
- create mode 100644 fs/bcachefs/alloc_foreground.h
- create mode 100644 fs/bcachefs/alloc_types.h
- create mode 100644 fs/bcachefs/backpointers.c
- create mode 100644 fs/bcachefs/backpointers.h
- create mode 100644 fs/bcachefs/bbpos.h
- create mode 100644 fs/bcachefs/bbpos_types.h
- create mode 100644 fs/bcachefs/bcachefs.h
- create mode 100644 fs/bcachefs/bcachefs_format.h
- create mode 100644 fs/bcachefs/bcachefs_ioctl.h
- create mode 100644 fs/bcachefs/bkey.c
- create mode 100644 fs/bcachefs/bkey.h
- create mode 100644 fs/bcachefs/bkey_buf.h
- create mode 100644 fs/bcachefs/bkey_cmp.h
- create mode 100644 fs/bcachefs/bkey_methods.c
- create mode 100644 fs/bcachefs/bkey_methods.h
- create mode 100644 fs/bcachefs/bkey_sort.c
- create mode 100644 fs/bcachefs/bkey_sort.h
- create mode 100644 fs/bcachefs/bset.c
- create mode 100644 fs/bcachefs/bset.h
- create mode 100644 fs/bcachefs/btree_cache.c
- create mode 100644 fs/bcachefs/btree_cache.h
- create mode 100644 fs/bcachefs/btree_gc.c
- create mode 100644 fs/bcachefs/btree_gc.h
- create mode 100644 fs/bcachefs/btree_io.c
- create mode 100644 fs/bcachefs/btree_io.h
- create mode 100644 fs/bcachefs/btree_iter.c
- create mode 100644 fs/bcachefs/btree_iter.h
- create mode 100644 fs/bcachefs/btree_journal_iter.c
- create mode 100644 fs/bcachefs/btree_journal_iter.h
- create mode 100644 fs/bcachefs/btree_key_cache.c
- create mode 100644 fs/bcachefs/btree_key_cache.h
- create mode 100644 fs/bcachefs/btree_locking.c
- create mode 100644 fs/bcachefs/btree_locking.h
- create mode 100644 fs/bcachefs/btree_trans_commit.c
- create mode 100644 fs/bcachefs/btree_types.h
- create mode 100644 fs/bcachefs/btree_update.c
- create mode 100644 fs/bcachefs/btree_update.h
- create mode 100644 fs/bcachefs/btree_update_interior.c
- create mode 100644 fs/bcachefs/btree_update_interior.h
- create mode 100644 fs/bcachefs/btree_write_buffer.c
- create mode 100644 fs/bcachefs/btree_write_buffer.h
- create mode 100644 fs/bcachefs/btree_write_buffer_types.h
- create mode 100644 fs/bcachefs/buckets.c
- create mode 100644 fs/bcachefs/buckets.h
- create mode 100644 fs/bcachefs/buckets_types.h
- create mode 100644 fs/bcachefs/buckets_waiting_for_journal.c
- create mode 100644 fs/bcachefs/buckets_waiting_for_journal.h
- create mode 100644 fs/bcachefs/buckets_waiting_for_journal_types.h
- create mode 100644 fs/bcachefs/chardev.c
- create mode 100644 fs/bcachefs/chardev.h
- create mode 100644 fs/bcachefs/checksum.c
- create mode 100644 fs/bcachefs/checksum.h
- create mode 100644 fs/bcachefs/clock.c
- create mode 100644 fs/bcachefs/clock.h
- create mode 100644 fs/bcachefs/clock_types.h
- create mode 100644 fs/bcachefs/compress.c
- create mode 100644 fs/bcachefs/compress.h
- create mode 100644 fs/bcachefs/counters.c
- create mode 100644 fs/bcachefs/counters.h
- create mode 100644 fs/bcachefs/darray.h
- create mode 100644 fs/bcachefs/data_update.c
- create mode 100644 fs/bcachefs/data_update.h
- create mode 100644 fs/bcachefs/debug.c
- create mode 100644 fs/bcachefs/debug.h
- create mode 100644 fs/bcachefs/dirent.c
- create mode 100644 fs/bcachefs/dirent.h
- create mode 100644 fs/bcachefs/disk_groups.c
- create mode 100644 fs/bcachefs/disk_groups.h
- create mode 100644 fs/bcachefs/disk_groups_types.h
- create mode 100644 fs/bcachefs/ec.c
- create mode 100644 fs/bcachefs/ec.h
- create mode 100644 fs/bcachefs/ec_types.h
- create mode 100644 fs/bcachefs/errcode.c
- create mode 100644 fs/bcachefs/errcode.h
- create mode 100644 fs/bcachefs/error.c
- create mode 100644 fs/bcachefs/error.h
- create mode 100644 fs/bcachefs/extent_update.c
- create mode 100644 fs/bcachefs/extent_update.h
- create mode 100644 fs/bcachefs/extents.c
- create mode 100644 fs/bcachefs/extents.h
- create mode 100644 fs/bcachefs/extents_types.h
- create mode 100644 fs/bcachefs/eytzinger.h
- create mode 100644 fs/bcachefs/fifo.h
- create mode 100644 fs/bcachefs/fs-common.c
- create mode 100644 fs/bcachefs/fs-common.h
- create mode 100644 fs/bcachefs/fs-io-buffered.c
- create mode 100644 fs/bcachefs/fs-io-buffered.h
- create mode 100644 fs/bcachefs/fs-io-direct.c
- create mode 100644 fs/bcachefs/fs-io-direct.h
- create mode 100644 fs/bcachefs/fs-io-pagecache.c
- create mode 100644 fs/bcachefs/fs-io-pagecache.h
- create mode 100644 fs/bcachefs/fs-io.c
- create mode 100644 fs/bcachefs/fs-io.h
- create mode 100644 fs/bcachefs/fs-ioctl.c
- create mode 100644 fs/bcachefs/fs-ioctl.h
- create mode 100644 fs/bcachefs/fs.c
- create mode 100644 fs/bcachefs/fs.h
- create mode 100644 fs/bcachefs/fsck.c
- create mode 100644 fs/bcachefs/fsck.h
- create mode 100644 fs/bcachefs/inode.c
- create mode 100644 fs/bcachefs/inode.h
- create mode 100644 fs/bcachefs/io_misc.c
- create mode 100644 fs/bcachefs/io_misc.h
- create mode 100644 fs/bcachefs/io_read.c
- create mode 100644 fs/bcachefs/io_read.h
- create mode 100644 fs/bcachefs/io_write.c
- create mode 100644 fs/bcachefs/io_write.h
- create mode 100644 fs/bcachefs/io_write_types.h
- create mode 100644 fs/bcachefs/journal.c
- create mode 100644 fs/bcachefs/journal.h
- create mode 100644 fs/bcachefs/journal_io.c
- create mode 100644 fs/bcachefs/journal_io.h
- create mode 100644 fs/bcachefs/journal_reclaim.c
- create mode 100644 fs/bcachefs/journal_reclaim.h
- create mode 100644 fs/bcachefs/journal_sb.c
- create mode 100644 fs/bcachefs/journal_sb.h
- create mode 100644 fs/bcachefs/journal_seq_blacklist.c
- create mode 100644 fs/bcachefs/journal_seq_blacklist.h
- create mode 100644 fs/bcachefs/journal_types.h
- create mode 100644 fs/bcachefs/keylist.c
- create mode 100644 fs/bcachefs/keylist.h
- create mode 100644 fs/bcachefs/keylist_types.h
- create mode 100644 fs/bcachefs/logged_ops.c
- create mode 100644 fs/bcachefs/logged_ops.h
- create mode 100644 fs/bcachefs/lru.c
- create mode 100644 fs/bcachefs/lru.h
- create mode 100644 fs/bcachefs/mean_and_variance.c
- create mode 100644 fs/bcachefs/mean_and_variance.h
- create mode 100644 fs/bcachefs/mean_and_variance_test.c
- create mode 100644 fs/bcachefs/migrate.c
- create mode 100644 fs/bcachefs/migrate.h
- create mode 100644 fs/bcachefs/move.c
- create mode 100644 fs/bcachefs/move.h
- create mode 100644 fs/bcachefs/move_types.h
- create mode 100644 fs/bcachefs/movinggc.c
- create mode 100644 fs/bcachefs/movinggc.h
- create mode 100644 fs/bcachefs/nocow_locking.c
- create mode 100644 fs/bcachefs/nocow_locking.h
- create mode 100644 fs/bcachefs/nocow_locking_types.h
- create mode 100644 fs/bcachefs/opts.c
- create mode 100644 fs/bcachefs/opts.h
- create mode 100644 fs/bcachefs/printbuf.c
- create mode 100644 fs/bcachefs/printbuf.h
- create mode 100644 fs/bcachefs/quota.c
- create mode 100644 fs/bcachefs/quota.h
- create mode 100644 fs/bcachefs/quota_types.h
- create mode 100644 fs/bcachefs/rebalance.c
- create mode 100644 fs/bcachefs/rebalance.h
- create mode 100644 fs/bcachefs/rebalance_types.h
- create mode 100644 fs/bcachefs/recovery.c
- create mode 100644 fs/bcachefs/recovery.h
- create mode 100644 fs/bcachefs/recovery_types.h
- create mode 100644 fs/bcachefs/reflink.c
- create mode 100644 fs/bcachefs/reflink.h
- create mode 100644 fs/bcachefs/replicas.c
- create mode 100644 fs/bcachefs/replicas.h
- create mode 100644 fs/bcachefs/replicas_types.h
- create mode 100644 fs/bcachefs/sb-clean.c
- create mode 100644 fs/bcachefs/sb-clean.h
- create mode 100644 fs/bcachefs/sb-errors.c
- create mode 100644 fs/bcachefs/sb-errors.h
- create mode 100644 fs/bcachefs/sb-errors_types.h
- create mode 100644 fs/bcachefs/sb-members.c
- create mode 100644 fs/bcachefs/sb-members.h
- create mode 100644 fs/bcachefs/seqmutex.h
- create mode 100644 fs/bcachefs/siphash.c
- create mode 100644 fs/bcachefs/siphash.h
- create mode 100644 fs/bcachefs/six.c
- create mode 100644 fs/bcachefs/six.h
- create mode 100644 fs/bcachefs/snapshot.c
- create mode 100644 fs/bcachefs/snapshot.h
- create mode 100644 fs/bcachefs/str_hash.h
- create mode 100644 fs/bcachefs/subvolume.c
- create mode 100644 fs/bcachefs/subvolume.h
- create mode 100644 fs/bcachefs/subvolume_types.h
- create mode 100644 fs/bcachefs/super-io.c
- create mode 100644 fs/bcachefs/super-io.h
- create mode 100644 fs/bcachefs/super.c
- create mode 100644 fs/bcachefs/super.h
- create mode 100644 fs/bcachefs/super_types.h
- create mode 100644 fs/bcachefs/sysfs.c
- create mode 100644 fs/bcachefs/sysfs.h
- create mode 100644 fs/bcachefs/tests.c
- create mode 100644 fs/bcachefs/tests.h
- create mode 100644 fs/bcachefs/trace.c
- create mode 100644 fs/bcachefs/trace.h
- create mode 100644 fs/bcachefs/two_state_shared_lock.c
- create mode 100644 fs/bcachefs/two_state_shared_lock.h
- create mode 100644 fs/bcachefs/util.c
- create mode 100644 fs/bcachefs/util.h
- create mode 100644 fs/bcachefs/varint.c
- create mode 100644 fs/bcachefs/varint.h
- create mode 100644 fs/bcachefs/vstructs.h
- create mode 100644 fs/bcachefs/xattr.c
- create mode 100644 fs/bcachefs/xattr.h
- rename {drivers/md/bcache => include/linux}/closure.h (91%)
- rename {drivers/md/bcache => lib}/closure.c (83%)
-
-diff --git a/MAINTAINERS b/MAINTAINERS
-index 2894f0777537..ce1c7073f40c 100644
---- a/MAINTAINERS
-+++ b/MAINTAINERS
-@@ -3482,6 +3482,14 @@ W: http://bcache.evilpiepirate.org
- C: irc://irc.oftc.net/bcache
- F: drivers/md/bcache/
-
-+BCACHEFS
-+M: Kent Overstreet <kent.overstreet@linux.dev>
-+R: Brian Foster <bfoster@redhat.com>
-+L: linux-bcachefs@vger.kernel.org
-+S: Supported
-+C: irc://irc.oftc.net/bcache
-+F: fs/bcachefs/
-+
- BDISP ST MEDIA DRIVER
- M: Fabien Dessenne <fabien.dessenne@foss.st.com>
- L: linux-media@vger.kernel.org
-@@ -5068,6 +5076,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
- F: Documentation/devicetree/bindings/timer/
- F: drivers/clocksource/
-
-+CLOSURES
-+M: Kent Overstreet <kent.overstreet@linux.dev>
-+L: linux-bcachefs@vger.kernel.org
-+S: Supported
-+C: irc://irc.oftc.net/bcache
-+F: include/linux/closure.h
-+F: lib/closure.c
-+
- CMPC ACPI DRIVER
- M: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
- M: Daniel Oliveira Nascimento <don@syst.com.br>
-@@ -8748,6 +8764,13 @@ S: Supported
- T: git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm.git
- F: drivers/pmdomain/
-
-+GENERIC RADIX TREE
-+M: Kent Overstreet <kent.overstreet@linux.dev>
-+S: Supported
-+C: irc://irc.oftc.net/bcache
-+F: include/linux/generic-radix-tree.h
-+F: lib/generic-radix-tree.c
-+
- GENERIC RESISTIVE TOUCHSCREEN ADC DRIVER
- M: Eugen Hristev <eugen.hristev@microchip.com>
- L: linux-input@vger.kernel.org
-diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
-index 529c9d04e9a4..b2d10063d35f 100644
---- a/drivers/md/bcache/Kconfig
-+++ b/drivers/md/bcache/Kconfig
-@@ -4,6 +4,7 @@ config BCACHE
- tristate "Block device as cache"
- select BLOCK_HOLDER_DEPRECATED if SYSFS
- select CRC64
-+ select CLOSURES
- help
- Allows a block device to be used as cache for other devices; uses
- a btree for indexing and the layout is optimized for SSDs.
-@@ -19,15 +20,6 @@ config BCACHE_DEBUG
- Enables extra debugging tools, allows expensive runtime checks to be
- turned on.
-
--config BCACHE_CLOSURES_DEBUG
-- bool "Debug closures"
-- depends on BCACHE
-- select DEBUG_FS
-- help
-- Keeps all active closures in a linked list and provides a debugfs
-- interface to list them, which makes it possible to see asynchronous
-- operations that get stuck.
--
- config BCACHE_ASYNC_REGISTRATION
- bool "Asynchronous device registration"
- depends on BCACHE
-diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
-index 5b87e59676b8..054e8a33a7ab 100644
---- a/drivers/md/bcache/Makefile
-+++ b/drivers/md/bcache/Makefile
-@@ -2,6 +2,6 @@
-
- obj-$(CONFIG_BCACHE) += bcache.o
-
--bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
-- io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
-+bcache-y := alloc.o bset.o btree.o debug.o extents.o io.o\
-+ journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
- util.o writeback.o features.o
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 5a79bb3c272f..7c0d00432612 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -179,6 +179,7 @@
- #define pr_fmt(fmt) "bcache: %s() " fmt, __func__
-
- #include <linux/bio.h>
-+#include <linux/closure.h>
- #include <linux/kobject.h>
- #include <linux/list.h>
- #include <linux/mutex.h>
-@@ -192,7 +193,6 @@
- #include "bcache_ondisk.h"
- #include "bset.h"
- #include "util.h"
--#include "closure.h"
-
- struct bucket {
- atomic_t pin;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 0ae2b3676293..4affe587586c 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -2905,7 +2905,6 @@ static int __init bcache_init(void)
- goto err;
-
- bch_debug_init();
-- closure_debug_init();
-
- bcache_is_reboot = false;
-
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index 6f3cb7c92130..f61ab1bada6c 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -4,6 +4,7 @@
- #define _BCACHE_UTIL_H
-
- #include <linux/blkdev.h>
-+#include <linux/closure.h>
- #include <linux/errno.h>
- #include <linux/kernel.h>
- #include <linux/sched/clock.h>
-@@ -13,8 +14,6 @@
- #include <linux/workqueue.h>
- #include <linux/crc64.h>
-
--#include "closure.h"
--
- struct closure;
-
- #ifdef CONFIG_BCACHE_DEBUG
-diff --git a/fs/Kconfig b/fs/Kconfig
-index aa7e03cc1941..0d6cb927872a 100644
---- a/fs/Kconfig
-+++ b/fs/Kconfig
-@@ -48,6 +48,7 @@ source "fs/ocfs2/Kconfig"
- source "fs/btrfs/Kconfig"
- source "fs/nilfs2/Kconfig"
- source "fs/f2fs/Kconfig"
-+source "fs/bcachefs/Kconfig"
- source "fs/zonefs/Kconfig"
-
- endif # BLOCK
-diff --git a/fs/Makefile b/fs/Makefile
-index f9541f40be4e..75522f88e763 100644
---- a/fs/Makefile
-+++ b/fs/Makefile
-@@ -123,6 +123,7 @@ obj-$(CONFIG_OCFS2_FS) += ocfs2/
- obj-$(CONFIG_BTRFS_FS) += btrfs/
- obj-$(CONFIG_GFS2_FS) += gfs2/
- obj-$(CONFIG_F2FS_FS) += f2fs/
-+obj-$(CONFIG_BCACHEFS_FS) += bcachefs/
- obj-$(CONFIG_CEPH_FS) += ceph/
- obj-$(CONFIG_PSTORE) += pstore/
- obj-$(CONFIG_EFIVAR_FS) += efivarfs/
-diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
-new file mode 100644
-index 000000000000..c08c2c7d6fbb
---- /dev/null
-+++ b/fs/bcachefs/Kconfig
-@@ -0,0 +1,83 @@
-+
-+config BCACHEFS_FS
-+ tristate "bcachefs filesystem support (EXPERIMENTAL)"
-+ depends on BLOCK
-+ select EXPORTFS
-+ select CLOSURES
-+ select LIBCRC32C
-+ select CRC64
-+ select FS_POSIX_ACL
-+ select LZ4_COMPRESS
-+ select LZ4_DECOMPRESS
-+ select LZ4HC_COMPRESS
-+ select LZ4HC_DECOMPRESS
-+ select ZLIB_DEFLATE
-+ select ZLIB_INFLATE
-+ select ZSTD_COMPRESS
-+ select ZSTD_DECOMPRESS
-+ select CRYPTO_SHA256
-+ select CRYPTO_CHACHA20
-+ select CRYPTO_POLY1305
-+ select KEYS
-+ select RAID6_PQ
-+ select XOR_BLOCKS
-+ select XXHASH
-+ select SRCU
-+ select SYMBOLIC_ERRNAME
-+ help
-+ The bcachefs filesystem - a modern, copy on write filesystem, with
-+ support for multiple devices, compression, checksumming, etc.
-+
-+config BCACHEFS_QUOTA
-+ bool "bcachefs quota support"
-+ depends on BCACHEFS_FS
-+ select QUOTACTL
-+
-+config BCACHEFS_POSIX_ACL
-+ bool "bcachefs POSIX ACL support"
-+ depends on BCACHEFS_FS
-+ select FS_POSIX_ACL
-+
-+config BCACHEFS_DEBUG_TRANSACTIONS
-+ bool "bcachefs runtime info"
-+ depends on BCACHEFS_FS
-+ help
-+ This makes the list of running btree transactions available in debugfs.
-+
-+ This is a highly useful debugging feature but does add a small amount of overhead.
-+
-+config BCACHEFS_DEBUG
-+ bool "bcachefs debugging"
-+ depends on BCACHEFS_FS
-+ help
-+ Enables many extra debugging checks and assertions.
-+
-+ The resulting code will be significantly slower than normal; you
-+ probably shouldn't select this option unless you're a developer.
-+
-+config BCACHEFS_TESTS
-+ bool "bcachefs unit and performance tests"
-+ depends on BCACHEFS_FS
-+ help
-+ Include some unit and performance tests for the core btree code
-+
-+config BCACHEFS_LOCK_TIME_STATS
-+ bool "bcachefs lock time statistics"
-+ depends on BCACHEFS_FS
-+ help
-+ Expose statistics for how long we held a lock in debugfs
-+
-+config BCACHEFS_NO_LATENCY_ACCT
-+ bool "disable latency accounting and time stats"
-+ depends on BCACHEFS_FS
-+ help
-+ This disables device latency tracking and time stats, only for performance testing
-+
-+config MEAN_AND_VARIANCE_UNIT_TEST
-+ tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
-+ depends on KUNIT
-+ depends on BCACHEFS_FS
-+ default KUNIT_ALL_TESTS
-+ help
-+ This option enables the kunit tests for mean_and_variance module.
-+ If unsure, say N.
-diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
-new file mode 100644
-index 000000000000..45b64f89258c
---- /dev/null
-+++ b/fs/bcachefs/Makefile
-@@ -0,0 +1,89 @@
-+
-+obj-$(CONFIG_BCACHEFS_FS) += bcachefs.o
-+
-+bcachefs-y := \
-+ acl.o \
-+ alloc_background.o \
-+ alloc_foreground.o \
-+ backpointers.o \
-+ bkey.o \
-+ bkey_methods.o \
-+ bkey_sort.o \
-+ bset.o \
-+ btree_cache.o \
-+ btree_gc.o \
-+ btree_io.o \
-+ btree_iter.o \
-+ btree_journal_iter.o \
-+ btree_key_cache.o \
-+ btree_locking.o \
-+ btree_trans_commit.o \
-+ btree_update.o \
-+ btree_update_interior.o \
-+ btree_write_buffer.o \
-+ buckets.o \
-+ buckets_waiting_for_journal.o \
-+ chardev.o \
-+ checksum.o \
-+ clock.o \
-+ compress.o \
-+ counters.o \
-+ debug.o \
-+ dirent.o \
-+ disk_groups.o \
-+ data_update.o \
-+ ec.o \
-+ errcode.o \
-+ error.o \
-+ extents.o \
-+ extent_update.o \
-+ fs.o \
-+ fs-common.o \
-+ fs-ioctl.o \
-+ fs-io.o \
-+ fs-io-buffered.o \
-+ fs-io-direct.o \
-+ fs-io-pagecache.o \
-+ fsck.o \
-+ inode.o \
-+ io_read.o \
-+ io_misc.o \
-+ io_write.o \
-+ journal.o \
-+ journal_io.o \
-+ journal_reclaim.o \
-+ journal_sb.o \
-+ journal_seq_blacklist.o \
-+ keylist.o \
-+ logged_ops.o \
-+ lru.o \
-+ mean_and_variance.o \
-+ migrate.o \
-+ move.o \
-+ movinggc.o \
-+ nocow_locking.o \
-+ opts.o \
-+ printbuf.o \
-+ quota.o \
-+ rebalance.o \
-+ recovery.o \
-+ reflink.o \
-+ replicas.o \
-+ sb-clean.o \
-+ sb-errors.o \
-+ sb-members.o \
-+ siphash.o \
-+ six.o \
-+ snapshot.o \
-+ subvolume.o \
-+ super.o \
-+ super-io.o \
-+ sysfs.o \
-+ tests.o \
-+ trace.o \
-+ two_state_shared_lock.o \
-+ util.o \
-+ varint.o \
-+ xattr.o
-+
-+obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o
-diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
-new file mode 100644
-index 000000000000..f3809897f00a
---- /dev/null
-+++ b/fs/bcachefs/acl.c
-@@ -0,0 +1,463 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+
-+#include "acl.h"
-+#include "xattr.h"
-+
-+#include <linux/posix_acl.h>
-+
-+static const char * const acl_types[] = {
-+ [ACL_USER_OBJ] = "user_obj",
-+ [ACL_USER] = "user",
-+ [ACL_GROUP_OBJ] = "group_obj",
-+ [ACL_GROUP] = "group",
-+ [ACL_MASK] = "mask",
-+ [ACL_OTHER] = "other",
-+ NULL,
-+};
-+
-+void bch2_acl_to_text(struct printbuf *out, const void *value, size_t size)
-+{
-+ const void *p, *end = value + size;
-+
-+ if (!value ||
-+ size < sizeof(bch_acl_header) ||
-+ ((bch_acl_header *)value)->a_version != cpu_to_le32(BCH_ACL_VERSION))
-+ return;
-+
-+ p = value + sizeof(bch_acl_header);
-+ while (p < end) {
-+ const bch_acl_entry *in = p;
-+ unsigned tag = le16_to_cpu(in->e_tag);
-+
-+ prt_str(out, acl_types[tag]);
-+
-+ switch (tag) {
-+ case ACL_USER_OBJ:
-+ case ACL_GROUP_OBJ:
-+ case ACL_MASK:
-+ case ACL_OTHER:
-+ p += sizeof(bch_acl_entry_short);
-+ break;
-+ case ACL_USER:
-+ prt_printf(out, " uid %u", le32_to_cpu(in->e_id));
-+ p += sizeof(bch_acl_entry);
-+ break;
-+ case ACL_GROUP:
-+ prt_printf(out, " gid %u", le32_to_cpu(in->e_id));
-+ p += sizeof(bch_acl_entry);
-+ break;
-+ }
-+
-+ prt_printf(out, " %o", le16_to_cpu(in->e_perm));
-+
-+ if (p != end)
-+ prt_char(out, ' ');
-+ }
-+}
-+
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+
-+#include "fs.h"
-+
-+#include <linux/fs.h>
-+#include <linux/posix_acl_xattr.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+
-+static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
-+{
-+ return sizeof(bch_acl_header) +
-+ sizeof(bch_acl_entry_short) * nr_short +
-+ sizeof(bch_acl_entry) * nr_long;
-+}
-+
-+static inline int acl_to_xattr_type(int type)
-+{
-+ switch (type) {
-+ case ACL_TYPE_ACCESS:
-+ return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS;
-+ case ACL_TYPE_DEFAULT:
-+ return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+/*
-+ * Convert from filesystem to in-memory representation.
-+ */
-+static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans,
-+ const void *value, size_t size)
-+{
-+ const void *p, *end = value + size;
-+ struct posix_acl *acl;
-+ struct posix_acl_entry *out;
-+ unsigned count = 0;
-+ int ret;
-+
-+ if (!value)
-+ return NULL;
-+ if (size < sizeof(bch_acl_header))
-+ goto invalid;
-+ if (((bch_acl_header *)value)->a_version !=
-+ cpu_to_le32(BCH_ACL_VERSION))
-+ goto invalid;
-+
-+ p = value + sizeof(bch_acl_header);
-+ while (p < end) {
-+ const bch_acl_entry *entry = p;
-+
-+ if (p + sizeof(bch_acl_entry_short) > end)
-+ goto invalid;
-+
-+ switch (le16_to_cpu(entry->e_tag)) {
-+ case ACL_USER_OBJ:
-+ case ACL_GROUP_OBJ:
-+ case ACL_MASK:
-+ case ACL_OTHER:
-+ p += sizeof(bch_acl_entry_short);
-+ break;
-+ case ACL_USER:
-+ case ACL_GROUP:
-+ p += sizeof(bch_acl_entry);
-+ break;
-+ default:
-+ goto invalid;
-+ }
-+
-+ count++;
-+ }
-+
-+ if (p > end)
-+ goto invalid;
-+
-+ if (!count)
-+ return NULL;
-+
-+ acl = allocate_dropping_locks(trans, ret,
-+ posix_acl_alloc(count, _gfp));
-+ if (!acl)
-+ return ERR_PTR(-ENOMEM);
-+ if (ret) {
-+ kfree(acl);
-+ return ERR_PTR(ret);
-+ }
-+
-+ out = acl->a_entries;
-+
-+ p = value + sizeof(bch_acl_header);
-+ while (p < end) {
-+ const bch_acl_entry *in = p;
-+
-+ out->e_tag = le16_to_cpu(in->e_tag);
-+ out->e_perm = le16_to_cpu(in->e_perm);
-+
-+ switch (out->e_tag) {
-+ case ACL_USER_OBJ:
-+ case ACL_GROUP_OBJ:
-+ case ACL_MASK:
-+ case ACL_OTHER:
-+ p += sizeof(bch_acl_entry_short);
-+ break;
-+ case ACL_USER:
-+ out->e_uid = make_kuid(&init_user_ns,
-+ le32_to_cpu(in->e_id));
-+ p += sizeof(bch_acl_entry);
-+ break;
-+ case ACL_GROUP:
-+ out->e_gid = make_kgid(&init_user_ns,
-+ le32_to_cpu(in->e_id));
-+ p += sizeof(bch_acl_entry);
-+ break;
-+ }
-+
-+ out++;
-+ }
-+
-+ BUG_ON(out != acl->a_entries + acl->a_count);
-+
-+ return acl;
-+invalid:
-+ pr_err("invalid acl entry");
-+ return ERR_PTR(-EINVAL);
-+}
-+
-+#define acl_for_each_entry(acl, acl_e) \
-+ for (acl_e = acl->a_entries; \
-+ acl_e < acl->a_entries + acl->a_count; \
-+ acl_e++)
-+
-+/*
-+ * Convert from in-memory to filesystem representation.
-+ */
-+static struct bkey_i_xattr *
-+bch2_acl_to_xattr(struct btree_trans *trans,
-+ const struct posix_acl *acl,
-+ int type)
-+{
-+ struct bkey_i_xattr *xattr;
-+ bch_acl_header *acl_header;
-+ const struct posix_acl_entry *acl_e;
-+ void *outptr;
-+ unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
-+
-+ acl_for_each_entry(acl, acl_e) {
-+ switch (acl_e->e_tag) {
-+ case ACL_USER:
-+ case ACL_GROUP:
-+ nr_long++;
-+ break;
-+ case ACL_USER_OBJ:
-+ case ACL_GROUP_OBJ:
-+ case ACL_MASK:
-+ case ACL_OTHER:
-+ nr_short++;
-+ break;
-+ default:
-+ return ERR_PTR(-EINVAL);
-+ }
-+ }
-+
-+ acl_len = bch2_acl_size(nr_short, nr_long);
-+ u64s = BKEY_U64s + xattr_val_u64s(0, acl_len);
-+
-+ if (u64s > U8_MAX)
-+ return ERR_PTR(-E2BIG);
-+
-+ xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+ if (IS_ERR(xattr))
-+ return xattr;
-+
-+ bkey_xattr_init(&xattr->k_i);
-+ xattr->k.u64s = u64s;
-+ xattr->v.x_type = acl_to_xattr_type(type);
-+ xattr->v.x_name_len = 0;
-+ xattr->v.x_val_len = cpu_to_le16(acl_len);
-+
-+ acl_header = xattr_val(&xattr->v);
-+ acl_header->a_version = cpu_to_le32(BCH_ACL_VERSION);
-+
-+ outptr = (void *) acl_header + sizeof(*acl_header);
-+
-+ acl_for_each_entry(acl, acl_e) {
-+ bch_acl_entry *entry = outptr;
-+
-+ entry->e_tag = cpu_to_le16(acl_e->e_tag);
-+ entry->e_perm = cpu_to_le16(acl_e->e_perm);
-+ switch (acl_e->e_tag) {
-+ case ACL_USER:
-+ entry->e_id = cpu_to_le32(
-+ from_kuid(&init_user_ns, acl_e->e_uid));
-+ outptr += sizeof(bch_acl_entry);
-+ break;
-+ case ACL_GROUP:
-+ entry->e_id = cpu_to_le32(
-+ from_kgid(&init_user_ns, acl_e->e_gid));
-+ outptr += sizeof(bch_acl_entry);
-+ break;
-+
-+ case ACL_USER_OBJ:
-+ case ACL_GROUP_OBJ:
-+ case ACL_MASK:
-+ case ACL_OTHER:
-+ outptr += sizeof(bch_acl_entry_short);
-+ break;
-+ }
-+ }
-+
-+ BUG_ON(outptr != xattr_val(&xattr->v) + acl_len);
-+
-+ return xattr;
-+}
-+
-+struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
-+ struct dentry *dentry, int type)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
-+ struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter = { NULL };
-+ struct bkey_s_c_xattr xattr;
-+ struct posix_acl *acl = NULL;
-+ struct bkey_s_c k;
-+ int ret;
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
-+ &hash, inode_inum(inode), &search, 0);
-+ if (ret) {
-+ if (!bch2_err_matches(ret, ENOENT))
-+ acl = ERR_PTR(ret);
-+ goto out;
-+ }
-+
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret) {
-+ acl = ERR_PTR(ret);
-+ goto out;
-+ }
-+
-+ xattr = bkey_s_c_to_xattr(k);
-+ acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
-+ le16_to_cpu(xattr.v->x_val_len));
-+
-+ if (!IS_ERR(acl))
-+ set_cached_acl(&inode->v, type, acl);
-+out:
-+ if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return acl;
-+}
-+
-+int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum,
-+ struct bch_inode_unpacked *inode_u,
-+ struct posix_acl *acl, int type)
-+{
-+ struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode_u);
-+ int ret;
-+
-+ if (type == ACL_TYPE_DEFAULT &&
-+ !S_ISDIR(inode_u->bi_mode))
-+ return acl ? -EACCES : 0;
-+
-+ if (acl) {
-+ struct bkey_i_xattr *xattr =
-+ bch2_acl_to_xattr(trans, acl, type);
-+ if (IS_ERR(xattr))
-+ return PTR_ERR(xattr);
-+
-+ ret = bch2_hash_set(trans, bch2_xattr_hash_desc, &hash_info,
-+ inum, &xattr->k_i, 0);
-+ } else {
-+ struct xattr_search_key search =
-+ X_SEARCH(acl_to_xattr_type(type), "", 0);
-+
-+ ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, &hash_info,
-+ inum, &search);
-+ }
-+
-+ return bch2_err_matches(ret, ENOENT) ? 0 : ret;
-+}
-+
-+int bch2_set_acl(struct mnt_idmap *idmap,
-+ struct dentry *dentry,
-+ struct posix_acl *_acl, int type)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter inode_iter = { NULL };
-+ struct bch_inode_unpacked inode_u;
-+ struct posix_acl *acl;
-+ umode_t mode;
-+ int ret;
-+
-+ mutex_lock(&inode->ei_update_lock);
-+retry:
-+ bch2_trans_begin(trans);
-+ acl = _acl;
-+
-+ ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto btree_err;
-+
-+ mode = inode_u.bi_mode;
-+
-+ if (type == ACL_TYPE_ACCESS) {
-+ ret = posix_acl_update_mode(idmap, &inode->v, &mode, &acl);
-+ if (ret)
-+ goto btree_err;
-+ }
-+
-+ ret = bch2_set_acl_trans(trans, inode_inum(inode), &inode_u, acl, type);
-+ if (ret)
-+ goto btree_err;
-+
-+ inode_u.bi_ctime = bch2_current_time(c);
-+ inode_u.bi_mode = mode;
-+
-+ ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
-+ bch2_trans_commit(trans, NULL, NULL, 0);
-+btree_err:
-+ bch2_trans_iter_exit(trans, &inode_iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+ if (unlikely(ret))
-+ goto err;
-+
-+ bch2_inode_update_after_write(trans, inode, &inode_u,
-+ ATTR_CTIME|ATTR_MODE);
-+
-+ set_cached_acl(&inode->v, type, acl);
-+err:
-+ mutex_unlock(&inode->ei_update_lock);
-+ bch2_trans_put(trans);
-+
-+ return ret;
-+}
-+
-+int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
-+ struct bch_inode_unpacked *inode,
-+ umode_t mode,
-+ struct posix_acl **new_acl)
-+{
-+ struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode);
-+ struct xattr_search_key search = X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0);
-+ struct btree_iter iter;
-+ struct bkey_s_c_xattr xattr;
-+ struct bkey_i_xattr *new;
-+ struct posix_acl *acl = NULL;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
-+ &hash_info, inum, &search, BTREE_ITER_INTENT);
-+ if (ret)
-+ return bch2_err_matches(ret, ENOENT) ? 0 : ret;
-+
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+ xattr = bkey_s_c_to_xattr(k);
-+
-+ acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
-+ le16_to_cpu(xattr.v->x_val_len));
-+ ret = PTR_ERR_OR_ZERO(acl);
-+ if (IS_ERR_OR_NULL(acl))
-+ goto err;
-+
-+ ret = allocate_dropping_locks_errcode(trans,
-+ __posix_acl_chmod(&acl, _gfp, mode));
-+ if (ret)
-+ goto err;
-+
-+ new = bch2_acl_to_xattr(trans, acl, ACL_TYPE_ACCESS);
-+ if (IS_ERR(new)) {
-+ ret = PTR_ERR(new);
-+ goto err;
-+ }
-+
-+ new->k.p = iter.pos;
-+ ret = bch2_trans_update(trans, &iter, &new->k_i, 0);
-+ *new_acl = acl;
-+ acl = NULL;
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ if (!IS_ERR_OR_NULL(acl))
-+ kfree(acl);
-+ return ret;
-+}
-+
-+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
-diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h
-new file mode 100644
-index 000000000000..27e7eec0f278
---- /dev/null
-+++ b/fs/bcachefs/acl.h
-@@ -0,0 +1,60 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ACL_H
-+#define _BCACHEFS_ACL_H
-+
-+struct bch_inode_unpacked;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+struct posix_acl;
-+
-+#define BCH_ACL_VERSION 0x0001
-+
-+typedef struct {
-+ __le16 e_tag;
-+ __le16 e_perm;
-+ __le32 e_id;
-+} bch_acl_entry;
-+
-+typedef struct {
-+ __le16 e_tag;
-+ __le16 e_perm;
-+} bch_acl_entry_short;
-+
-+typedef struct {
-+ __le32 a_version;
-+} bch_acl_header;
-+
-+void bch2_acl_to_text(struct printbuf *, const void *, size_t);
-+
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+
-+struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int);
-+
-+int bch2_set_acl_trans(struct btree_trans *, subvol_inum,
-+ struct bch_inode_unpacked *,
-+ struct posix_acl *, int);
-+int bch2_set_acl(struct mnt_idmap *, struct dentry *, struct posix_acl *, int);
-+int bch2_acl_chmod(struct btree_trans *, subvol_inum,
-+ struct bch_inode_unpacked *,
-+ umode_t, struct posix_acl **);
-+
-+#else
-+
-+static inline int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum,
-+ struct bch_inode_unpacked *inode_u,
-+ struct posix_acl *acl, int type)
-+{
-+ return 0;
-+}
-+
-+static inline int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
-+ struct bch_inode_unpacked *inode,
-+ umode_t mode,
-+ struct posix_acl **new_acl)
-+{
-+ return 0;
-+}
-+
-+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
-+
-+#endif /* _BCACHEFS_ACL_H */
-diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
-new file mode 100644
-index 000000000000..1fec0e67891f
---- /dev/null
-+++ b/fs/bcachefs/alloc_background.c
-@@ -0,0 +1,2159 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "backpointers.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "btree_write_buffer.h"
-+#include "buckets.h"
-+#include "buckets_waiting_for_journal.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "lru.h"
-+#include "recovery.h"
-+#include "trace.h"
-+#include "varint.h"
-+
-+#include <linux/kthread.h>
-+#include <linux/math64.h>
-+#include <linux/random.h>
-+#include <linux/rculist.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched/task.h>
-+#include <linux/sort.h>
-+
-+/* Persistent alloc info: */
-+
-+static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
-+#define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8,
-+ BCH_ALLOC_FIELDS_V1()
-+#undef x
-+};
-+
-+struct bkey_alloc_unpacked {
-+ u64 journal_seq;
-+ u8 gen;
-+ u8 oldest_gen;
-+ u8 data_type;
-+ bool need_discard:1;
-+ bool need_inc_gen:1;
-+#define x(_name, _bits) u##_bits _name;
-+ BCH_ALLOC_FIELDS_V2()
-+#undef x
-+};
-+
-+static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
-+ const void **p, unsigned field)
-+{
-+ unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES[field];
-+ u64 v;
-+
-+ if (!(a->fields & (1 << field)))
-+ return 0;
-+
-+ switch (bytes) {
-+ case 1:
-+ v = *((const u8 *) *p);
-+ break;
-+ case 2:
-+ v = le16_to_cpup(*p);
-+ break;
-+ case 4:
-+ v = le32_to_cpup(*p);
-+ break;
-+ case 8:
-+ v = le64_to_cpup(*p);
-+ break;
-+ default:
-+ BUG();
-+ }
-+
-+ *p += bytes;
-+ return v;
-+}
-+
-+static void bch2_alloc_unpack_v1(struct bkey_alloc_unpacked *out,
-+ struct bkey_s_c k)
-+{
-+ const struct bch_alloc *in = bkey_s_c_to_alloc(k).v;
-+ const void *d = in->data;
-+ unsigned idx = 0;
-+
-+ out->gen = in->gen;
-+
-+#define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++);
-+ BCH_ALLOC_FIELDS_V1()
-+#undef x
-+}
-+
-+static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2(k);
-+ const u8 *in = a.v->data;
-+ const u8 *end = bkey_val_end(a);
-+ unsigned fieldnr = 0;
-+ int ret;
-+ u64 v;
-+
-+ out->gen = a.v->gen;
-+ out->oldest_gen = a.v->oldest_gen;
-+ out->data_type = a.v->data_type;
-+
-+#define x(_name, _bits) \
-+ if (fieldnr < a.v->nr_fields) { \
-+ ret = bch2_varint_decode_fast(in, end, &v); \
-+ if (ret < 0) \
-+ return ret; \
-+ in += ret; \
-+ } else { \
-+ v = 0; \
-+ } \
-+ out->_name = v; \
-+ if (v != out->_name) \
-+ return -1; \
-+ fieldnr++;
-+
-+ BCH_ALLOC_FIELDS_V2()
-+#undef x
-+ return 0;
-+}
-+
-+static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3(k);
-+ const u8 *in = a.v->data;
-+ const u8 *end = bkey_val_end(a);
-+ unsigned fieldnr = 0;
-+ int ret;
-+ u64 v;
-+
-+ out->gen = a.v->gen;
-+ out->oldest_gen = a.v->oldest_gen;
-+ out->data_type = a.v->data_type;
-+ out->need_discard = BCH_ALLOC_V3_NEED_DISCARD(a.v);
-+ out->need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN(a.v);
-+ out->journal_seq = le64_to_cpu(a.v->journal_seq);
-+
-+#define x(_name, _bits) \
-+ if (fieldnr < a.v->nr_fields) { \
-+ ret = bch2_varint_decode_fast(in, end, &v); \
-+ if (ret < 0) \
-+ return ret; \
-+ in += ret; \
-+ } else { \
-+ v = 0; \
-+ } \
-+ out->_name = v; \
-+ if (v != out->_name) \
-+ return -1; \
-+ fieldnr++;
-+
-+ BCH_ALLOC_FIELDS_V2()
-+#undef x
-+ return 0;
-+}
-+
-+static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
-+{
-+ struct bkey_alloc_unpacked ret = { .gen = 0 };
-+
-+ switch (k.k->type) {
-+ case KEY_TYPE_alloc:
-+ bch2_alloc_unpack_v1(&ret, k);
-+ break;
-+ case KEY_TYPE_alloc_v2:
-+ bch2_alloc_unpack_v2(&ret, k);
-+ break;
-+ case KEY_TYPE_alloc_v3:
-+ bch2_alloc_unpack_v3(&ret, k);
-+ break;
-+ }
-+
-+ return ret;
-+}
-+
-+static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
-+{
-+ unsigned i, bytes = offsetof(struct bch_alloc, data);
-+
-+ for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_V1_FIELD_BYTES); i++)
-+ if (a->fields & (1 << i))
-+ bytes += BCH_ALLOC_V1_FIELD_BYTES[i];
-+
-+ return DIV_ROUND_UP(bytes, sizeof(u64));
-+}
-+
-+int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
-+ int ret = 0;
-+
-+ /* allow for unknown fields */
-+ bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err,
-+ alloc_v1_val_size_bad,
-+ "incorrect value size (%zu < %u)",
-+ bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v));
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_alloc_unpacked u;
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err,
-+ alloc_v2_unpack_error,
-+ "unpack error");
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_alloc_unpacked u;
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err,
-+ alloc_v2_unpack_error,
-+ "unpack error");
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags, struct printbuf *err)
-+{
-+ struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err,
-+ alloc_v4_val_size_bad,
-+ "bad val size (%u > %zu)",
-+ alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
-+
-+ bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
-+ BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err,
-+ alloc_v4_backpointers_start_bad,
-+ "invalid backpointers_start");
-+
-+ bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err,
-+ alloc_key_data_type_bad,
-+ "invalid data type (got %u should be %u)",
-+ a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
-+
-+ switch (a.v->data_type) {
-+ case BCH_DATA_free:
-+ case BCH_DATA_need_gc_gens:
-+ case BCH_DATA_need_discard:
-+ bkey_fsck_err_on(a.v->dirty_sectors ||
-+ a.v->cached_sectors ||
-+ a.v->stripe, c, err,
-+ alloc_key_empty_but_have_data,
-+ "empty data type free but have data");
-+ break;
-+ case BCH_DATA_sb:
-+ case BCH_DATA_journal:
-+ case BCH_DATA_btree:
-+ case BCH_DATA_user:
-+ case BCH_DATA_parity:
-+ bkey_fsck_err_on(!a.v->dirty_sectors, c, err,
-+ alloc_key_dirty_sectors_0,
-+ "data_type %s but dirty_sectors==0",
-+ bch2_data_types[a.v->data_type]);
-+ break;
-+ case BCH_DATA_cached:
-+ bkey_fsck_err_on(!a.v->cached_sectors ||
-+ a.v->dirty_sectors ||
-+ a.v->stripe, c, err,
-+ alloc_key_cached_inconsistency,
-+ "data type inconsistency");
-+
-+ bkey_fsck_err_on(!a.v->io_time[READ] &&
-+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
-+ c, err,
-+ alloc_key_cached_but_read_time_zero,
-+ "cached bucket with read_time == 0");
-+ break;
-+ case BCH_DATA_stripe:
-+ break;
-+ }
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_alloc_v4_swab(struct bkey_s k)
-+{
-+ struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
-+ struct bch_backpointer *bp, *bps;
-+
-+ a->journal_seq = swab64(a->journal_seq);
-+ a->flags = swab32(a->flags);
-+ a->dirty_sectors = swab32(a->dirty_sectors);
-+ a->cached_sectors = swab32(a->cached_sectors);
-+ a->io_time[0] = swab64(a->io_time[0]);
-+ a->io_time[1] = swab64(a->io_time[1]);
-+ a->stripe = swab32(a->stripe);
-+ a->nr_external_backpointers = swab32(a->nr_external_backpointers);
-+ a->fragmentation_lru = swab64(a->fragmentation_lru);
-+
-+ bps = alloc_v4_backpointers(a);
-+ for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) {
-+ bp->bucket_offset = swab40(bp->bucket_offset);
-+ bp->bucket_len = swab32(bp->bucket_len);
-+ bch2_bpos_swab(&bp->pos);
-+ }
-+}
-+
-+void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
-+{
-+ struct bch_alloc_v4 _a;
-+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
-+ unsigned i;
-+
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+
-+ prt_printf(out, "gen %u oldest_gen %u data_type %s",
-+ a->gen, a->oldest_gen,
-+ a->data_type < BCH_DATA_NR
-+ ? bch2_data_types[a->data_type]
-+ : "(invalid data type)");
-+ prt_newline(out);
-+ prt_printf(out, "journal_seq %llu", a->journal_seq);
-+ prt_newline(out);
-+ prt_printf(out, "need_discard %llu", BCH_ALLOC_V4_NEED_DISCARD(a));
-+ prt_newline(out);
-+ prt_printf(out, "need_inc_gen %llu", BCH_ALLOC_V4_NEED_INC_GEN(a));
-+ prt_newline(out);
-+ prt_printf(out, "dirty_sectors %u", a->dirty_sectors);
-+ prt_newline(out);
-+ prt_printf(out, "cached_sectors %u", a->cached_sectors);
-+ prt_newline(out);
-+ prt_printf(out, "stripe %u", a->stripe);
-+ prt_newline(out);
-+ prt_printf(out, "stripe_redundancy %u", a->stripe_redundancy);
-+ prt_newline(out);
-+ prt_printf(out, "io_time[READ] %llu", a->io_time[READ]);
-+ prt_newline(out);
-+ prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]);
-+ prt_newline(out);
-+ prt_printf(out, "fragmentation %llu", a->fragmentation_lru);
-+ prt_newline(out);
-+ prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a));
-+ prt_newline(out);
-+
-+ if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) {
-+ struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k);
-+ const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v);
-+
-+ prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v));
-+ printbuf_indent_add(out, 2);
-+
-+ for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) {
-+ prt_newline(out);
-+ bch2_backpointer_to_text(out, &bps[i]);
-+ }
-+
-+ printbuf_indent_sub(out, 2);
-+ }
-+
-+ printbuf_indent_sub(out, 2);
-+}
-+
-+void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
-+{
-+ if (k.k->type == KEY_TYPE_alloc_v4) {
-+ void *src, *dst;
-+
-+ *out = *bkey_s_c_to_alloc_v4(k).v;
-+
-+ src = alloc_v4_backpointers(out);
-+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
-+ dst = alloc_v4_backpointers(out);
-+
-+ if (src < dst)
-+ memset(src, 0, dst - src);
-+
-+ SET_BCH_ALLOC_V4_NR_BACKPOINTERS(out, 0);
-+ } else {
-+ struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
-+
-+ *out = (struct bch_alloc_v4) {
-+ .journal_seq = u.journal_seq,
-+ .flags = u.need_discard,
-+ .gen = u.gen,
-+ .oldest_gen = u.oldest_gen,
-+ .data_type = u.data_type,
-+ .stripe_redundancy = u.stripe_redundancy,
-+ .dirty_sectors = u.dirty_sectors,
-+ .cached_sectors = u.cached_sectors,
-+ .io_time[READ] = u.read_time,
-+ .io_time[WRITE] = u.write_time,
-+ .stripe = u.stripe,
-+ };
-+
-+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
-+ }
-+}
-+
-+static noinline struct bkey_i_alloc_v4 *
-+__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
-+{
-+ struct bkey_i_alloc_v4 *ret;
-+
-+ ret = bch2_trans_kmalloc(trans, max(bkey_bytes(k.k), sizeof(struct bkey_i_alloc_v4)));
-+ if (IS_ERR(ret))
-+ return ret;
-+
-+ if (k.k->type == KEY_TYPE_alloc_v4) {
-+ void *src, *dst;
-+
-+ bkey_reassemble(&ret->k_i, k);
-+
-+ src = alloc_v4_backpointers(&ret->v);
-+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
-+ dst = alloc_v4_backpointers(&ret->v);
-+
-+ if (src < dst)
-+ memset(src, 0, dst - src);
-+
-+ SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v, 0);
-+ set_alloc_v4_u64s(ret);
-+ } else {
-+ bkey_alloc_v4_init(&ret->k_i);
-+ ret->k.p = k.k->p;
-+ bch2_alloc_to_v4(k, &ret->v);
-+ }
-+ return ret;
-+}
-+
-+static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k)
-+{
-+ struct bkey_s_c_alloc_v4 a;
-+
-+ if (likely(k.k->type == KEY_TYPE_alloc_v4) &&
-+ ((a = bkey_s_c_to_alloc_v4(k), true) &&
-+ BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) == 0))
-+ return bch2_bkey_make_mut_noupdate_typed(trans, k, alloc_v4);
-+
-+ return __bch2_alloc_to_v4_mut(trans, k);
-+}
-+
-+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
-+{
-+ return bch2_alloc_to_v4_mut_inlined(trans, k);
-+}
-+
-+struct bkey_i_alloc_v4 *
-+bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bpos pos)
-+{
-+ struct bkey_s_c k;
-+ struct bkey_i_alloc_v4 *a;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, iter, BTREE_ID_alloc, pos,
-+ BTREE_ITER_WITH_UPDATES|
-+ BTREE_ITER_CACHED|
-+ BTREE_ITER_INTENT);
-+ ret = bkey_err(k);
-+ if (unlikely(ret))
-+ return ERR_PTR(ret);
-+
-+ a = bch2_alloc_to_v4_mut_inlined(trans, k);
-+ ret = PTR_ERR_OR_ZERO(a);
-+ if (unlikely(ret))
-+ goto err;
-+ return a;
-+err:
-+ bch2_trans_iter_exit(trans, iter);
-+ return ERR_PTR(ret);
-+}
-+
-+static struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset)
-+{
-+ *offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK;
-+
-+ pos.offset >>= KEY_TYPE_BUCKET_GENS_BITS;
-+ return pos;
-+}
-+
-+static struct bpos bucket_gens_pos_to_alloc(struct bpos pos, unsigned offset)
-+{
-+ pos.offset <<= KEY_TYPE_BUCKET_GENS_BITS;
-+ pos.offset += offset;
-+ return pos;
-+}
-+
-+static unsigned alloc_gen(struct bkey_s_c k, unsigned offset)
-+{
-+ return k.k->type == KEY_TYPE_bucket_gens
-+ ? bkey_s_c_to_bucket_gens(k).v->gens[offset]
-+ : 0;
-+}
-+
-+int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err,
-+ bucket_gens_val_size_bad,
-+ "bad val size (%zu != %zu)",
-+ bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
-+{
-+ struct bkey_s_c_bucket_gens g = bkey_s_c_to_bucket_gens(k);
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(g.v->gens); i++) {
-+ if (i)
-+ prt_char(out, ' ');
-+ prt_printf(out, "%u", g.v->gens[i]);
-+ }
-+}
-+
-+int bch2_bucket_gens_init(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_alloc_v4 a;
-+ struct bkey_i_bucket_gens g;
-+ bool have_bucket_gens_key = false;
-+ unsigned offset;
-+ struct bpos pos;
-+ u8 gen;
-+ int ret;
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
-+ BTREE_ITER_PREFETCH, k, ret) {
-+ /*
-+ * Not a fsck error because this is checked/repaired by
-+ * bch2_check_alloc_key() which runs later:
-+ */
-+ if (!bch2_dev_bucket_exists(c, k.k->p))
-+ continue;
-+
-+ gen = bch2_alloc_to_v4(k, &a)->gen;
-+ pos = alloc_gens_pos(iter.pos, &offset);
-+
-+ if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
-+ ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_LAZY_RW,
-+ bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
-+ if (ret)
-+ break;
-+ have_bucket_gens_key = false;
-+ }
-+
-+ if (!have_bucket_gens_key) {
-+ bkey_bucket_gens_init(&g.k_i);
-+ g.k.p = pos;
-+ have_bucket_gens_key = true;
-+ }
-+
-+ g.v.gens[offset] = gen;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (have_bucket_gens_key && !ret)
-+ ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_LAZY_RW,
-+ bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
-+
-+ bch2_trans_put(trans);
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+int bch2_alloc_read(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_dev *ca;
-+ int ret;
-+
-+ down_read(&c->gc_lock);
-+
-+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
-+ const struct bch_bucket_gens *g;
-+ u64 b;
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
-+ BTREE_ITER_PREFETCH, k, ret) {
-+ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
-+ u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
-+
-+ if (k.k->type != KEY_TYPE_bucket_gens)
-+ continue;
-+
-+ g = bkey_s_c_to_bucket_gens(k).v;
-+
-+ /*
-+ * Not a fsck error because this is checked/repaired by
-+ * bch2_check_alloc_key() which runs later:
-+ */
-+ if (!bch2_dev_exists2(c, k.k->p.inode))
-+ continue;
-+
-+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
-+
-+ for (b = max_t(u64, ca->mi.first_bucket, start);
-+ b < min_t(u64, ca->mi.nbuckets, end);
-+ b++)
-+ *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+ } else {
-+ struct bch_alloc_v4 a;
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
-+ BTREE_ITER_PREFETCH, k, ret) {
-+ /*
-+ * Not a fsck error because this is checked/repaired by
-+ * bch2_check_alloc_key() which runs later:
-+ */
-+ if (!bch2_dev_bucket_exists(c, k.k->p))
-+ continue;
-+
-+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
-+
-+ *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+ }
-+
-+ bch2_trans_put(trans);
-+ up_read(&c->gc_lock);
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+
-+ return ret;
-+}
-+
-+/* Free space/discard btree: */
-+
-+static int bch2_bucket_do_index(struct btree_trans *trans,
-+ struct bkey_s_c alloc_k,
-+ const struct bch_alloc_v4 *a,
-+ bool set)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode);
-+ struct btree_iter iter;
-+ struct bkey_s_c old;
-+ struct bkey_i *k;
-+ enum btree_id btree;
-+ enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted;
-+ enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
-+ struct printbuf buf = PRINTBUF;
-+ int ret;
-+
-+ if (a->data_type != BCH_DATA_free &&
-+ a->data_type != BCH_DATA_need_discard)
-+ return 0;
-+
-+ k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k));
-+ if (IS_ERR(k))
-+ return PTR_ERR(k);
-+
-+ bkey_init(&k->k);
-+ k->k.type = new_type;
-+
-+ switch (a->data_type) {
-+ case BCH_DATA_free:
-+ btree = BTREE_ID_freespace;
-+ k->k.p = alloc_freespace_pos(alloc_k.k->p, *a);
-+ bch2_key_resize(&k->k, 1);
-+ break;
-+ case BCH_DATA_need_discard:
-+ btree = BTREE_ID_need_discard;
-+ k->k.p = alloc_k.k->p;
-+ break;
-+ default:
-+ return 0;
-+ }
-+
-+ old = bch2_bkey_get_iter(trans, &iter, btree,
-+ bkey_start_pos(&k->k),
-+ BTREE_ITER_INTENT);
-+ ret = bkey_err(old);
-+ if (ret)
-+ return ret;
-+
-+ if (ca->mi.freespace_initialized &&
-+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info &&
-+ bch2_trans_inconsistent_on(old.k->type != old_type, trans,
-+ "incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n"
-+ " for %s",
-+ set ? "setting" : "clearing",
-+ bch2_btree_id_str(btree),
-+ iter.pos.inode,
-+ iter.pos.offset,
-+ bch2_bkey_types[old.k->type],
-+ bch2_bkey_types[old_type],
-+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ ret = bch2_trans_update(trans, &iter, k, 0);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static noinline int bch2_bucket_gen_update(struct btree_trans *trans,
-+ struct bpos bucket, u8 gen)
-+{
-+ struct btree_iter iter;
-+ unsigned offset;
-+ struct bpos pos = alloc_gens_pos(bucket, &offset);
-+ struct bkey_i_bucket_gens *g;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ g = bch2_trans_kmalloc(trans, sizeof(*g));
-+ ret = PTR_ERR_OR_ZERO(g);
-+ if (ret)
-+ return ret;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_bucket_gens, pos,
-+ BTREE_ITER_INTENT|
-+ BTREE_ITER_WITH_UPDATES);
-+ ret = bkey_err(k);
-+ if (ret)
-+ return ret;
-+
-+ if (k.k->type != KEY_TYPE_bucket_gens) {
-+ bkey_bucket_gens_init(&g->k_i);
-+ g->k.p = iter.pos;
-+ } else {
-+ bkey_reassemble(&g->k_i, k);
-+ }
-+
-+ g->v.gens[offset] = gen;
-+
-+ ret = bch2_trans_update(trans, &iter, &g->k_i, 0);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_trans_mark_alloc(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_i *new,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_alloc_v4 old_a_convert, *new_a;
-+ const struct bch_alloc_v4 *old_a;
-+ u64 old_lru, new_lru;
-+ int ret = 0;
-+
-+ /*
-+ * Deletion only happens in the device removal path, with
-+ * BTREE_TRIGGER_NORUN:
-+ */
-+ BUG_ON(new->k.type != KEY_TYPE_alloc_v4);
-+
-+ old_a = bch2_alloc_to_v4(old, &old_a_convert);
-+ new_a = &bkey_i_to_alloc_v4(new)->v;
-+
-+ new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
-+
-+ if (new_a->dirty_sectors > old_a->dirty_sectors ||
-+ new_a->cached_sectors > old_a->cached_sectors) {
-+ new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
-+ new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
-+ SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
-+ SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
-+ }
-+
-+ if (data_type_is_empty(new_a->data_type) &&
-+ BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
-+ !bch2_bucket_is_open_safe(c, new->k.p.inode, new->k.p.offset)) {
-+ new_a->gen++;
-+ SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
-+ }
-+
-+ if (old_a->data_type != new_a->data_type ||
-+ (new_a->data_type == BCH_DATA_free &&
-+ alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) {
-+ ret = bch2_bucket_do_index(trans, old, old_a, false) ?:
-+ bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (new_a->data_type == BCH_DATA_cached &&
-+ !new_a->io_time[READ])
-+ new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
-+
-+ old_lru = alloc_lru_idx_read(*old_a);
-+ new_lru = alloc_lru_idx_read(*new_a);
-+
-+ if (old_lru != new_lru) {
-+ ret = bch2_lru_change(trans, new->k.p.inode,
-+ bucket_to_u64(new->k.p),
-+ old_lru, new_lru);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a,
-+ bch_dev_bkey_exists(c, new->k.p.inode));
-+
-+ if (old_a->fragmentation_lru != new_a->fragmentation_lru) {
-+ ret = bch2_lru_change(trans,
-+ BCH_LRU_FRAGMENTATION_START,
-+ bucket_to_u64(new->k.p),
-+ old_a->fragmentation_lru, new_a->fragmentation_lru);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (old_a->gen != new_a->gen) {
-+ ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+/*
-+ * This synthesizes deleted extents for holes, similar to BTREE_ITER_SLOTS for
-+ * extents style btrees, but works on non-extents btrees:
-+ */
-+static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole)
-+{
-+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
-+
-+ if (bkey_err(k))
-+ return k;
-+
-+ if (k.k->type) {
-+ return k;
-+ } else {
-+ struct btree_iter iter2;
-+ struct bpos next;
-+
-+ bch2_trans_copy_iter(&iter2, iter);
-+
-+ if (!bpos_eq(iter->path->l[0].b->key.k.p, SPOS_MAX))
-+ end = bkey_min(end, bpos_nosnap_successor(iter->path->l[0].b->key.k.p));
-+
-+ end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1));
-+
-+ /*
-+ * btree node min/max is a closed interval, upto takes a half
-+ * open interval:
-+ */
-+ k = bch2_btree_iter_peek_upto(&iter2, end);
-+ next = iter2.pos;
-+ bch2_trans_iter_exit(iter->trans, &iter2);
-+
-+ BUG_ON(next.offset >= iter->pos.offset + U32_MAX);
-+
-+ if (bkey_err(k))
-+ return k;
-+
-+ bkey_init(hole);
-+ hole->p = iter->pos;
-+
-+ bch2_key_resize(hole, next.offset - iter->pos.offset);
-+ return (struct bkey_s_c) { hole, NULL };
-+ }
-+}
-+
-+static bool next_bucket(struct bch_fs *c, struct bpos *bucket)
-+{
-+ struct bch_dev *ca;
-+ unsigned iter;
-+
-+ if (bch2_dev_bucket_exists(c, *bucket))
-+ return true;
-+
-+ if (bch2_dev_exists2(c, bucket->inode)) {
-+ ca = bch_dev_bkey_exists(c, bucket->inode);
-+
-+ if (bucket->offset < ca->mi.first_bucket) {
-+ bucket->offset = ca->mi.first_bucket;
-+ return true;
-+ }
-+
-+ bucket->inode++;
-+ bucket->offset = 0;
-+ }
-+
-+ rcu_read_lock();
-+ iter = bucket->inode;
-+ ca = __bch2_next_dev(c, &iter, NULL);
-+ if (ca)
-+ *bucket = POS(ca->dev_idx, ca->mi.first_bucket);
-+ rcu_read_unlock();
-+
-+ return ca != NULL;
-+}
-+
-+static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, struct bkey *hole)
-+{
-+ struct bch_fs *c = iter->trans->c;
-+ struct bkey_s_c k;
-+again:
-+ k = bch2_get_key_or_hole(iter, POS_MAX, hole);
-+ if (bkey_err(k))
-+ return k;
-+
-+ if (!k.k->type) {
-+ struct bpos bucket = bkey_start_pos(k.k);
-+
-+ if (!bch2_dev_bucket_exists(c, bucket)) {
-+ if (!next_bucket(c, &bucket))
-+ return bkey_s_c_null;
-+
-+ bch2_btree_iter_set_pos(iter, bucket);
-+ goto again;
-+ }
-+
-+ if (!bch2_dev_bucket_exists(c, k.k->p)) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
-+
-+ bch2_key_resize(hole, ca->mi.nbuckets - bucket.offset);
-+ }
-+ }
-+
-+ return k;
-+}
-+
-+static noinline_for_stack
-+int bch2_check_alloc_key(struct btree_trans *trans,
-+ struct bkey_s_c alloc_k,
-+ struct btree_iter *alloc_iter,
-+ struct btree_iter *discard_iter,
-+ struct btree_iter *freespace_iter,
-+ struct btree_iter *bucket_gens_iter)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_dev *ca;
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a;
-+ unsigned discard_key_type, freespace_key_type;
-+ unsigned gens_offset;
-+ struct bkey_s_c k;
-+ struct printbuf buf = PRINTBUF;
-+ int ret;
-+
-+ if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c,
-+ alloc_key_to_missing_dev_bucket,
-+ "alloc key for invalid device:bucket %llu:%llu",
-+ alloc_k.k->p.inode, alloc_k.k->p.offset))
-+ return bch2_btree_delete_at(trans, alloc_iter, 0);
-+
-+ ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode);
-+ if (!ca->mi.freespace_initialized)
-+ return 0;
-+
-+ a = bch2_alloc_to_v4(alloc_k, &a_convert);
-+
-+ discard_key_type = a->data_type == BCH_DATA_need_discard ? KEY_TYPE_set : 0;
-+ bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p);
-+ k = bch2_btree_iter_peek_slot(discard_iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (k.k->type != discard_key_type &&
-+ (c->opts.reconstruct_alloc ||
-+ fsck_err(c, need_discard_key_wrong,
-+ "incorrect key in need_discard btree (got %s should be %s)\n"
-+ " %s",
-+ bch2_bkey_types[k.k->type],
-+ bch2_bkey_types[discard_key_type],
-+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
-+ struct bkey_i *update =
-+ bch2_trans_kmalloc(trans, sizeof(*update));
-+
-+ ret = PTR_ERR_OR_ZERO(update);
-+ if (ret)
-+ goto err;
-+
-+ bkey_init(&update->k);
-+ update->k.type = discard_key_type;
-+ update->k.p = discard_iter->pos;
-+
-+ ret = bch2_trans_update(trans, discard_iter, update, 0);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ freespace_key_type = a->data_type == BCH_DATA_free ? KEY_TYPE_set : 0;
-+ bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
-+ k = bch2_btree_iter_peek_slot(freespace_iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (k.k->type != freespace_key_type &&
-+ (c->opts.reconstruct_alloc ||
-+ fsck_err(c, freespace_key_wrong,
-+ "incorrect key in freespace btree (got %s should be %s)\n"
-+ " %s",
-+ bch2_bkey_types[k.k->type],
-+ bch2_bkey_types[freespace_key_type],
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
-+ struct bkey_i *update =
-+ bch2_trans_kmalloc(trans, sizeof(*update));
-+
-+ ret = PTR_ERR_OR_ZERO(update);
-+ if (ret)
-+ goto err;
-+
-+ bkey_init(&update->k);
-+ update->k.type = freespace_key_type;
-+ update->k.p = freespace_iter->pos;
-+ bch2_key_resize(&update->k, 1);
-+
-+ ret = bch2_trans_update(trans, freespace_iter, update, 0);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset));
-+ k = bch2_btree_iter_peek_slot(bucket_gens_iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (a->gen != alloc_gen(k, gens_offset) &&
-+ (c->opts.reconstruct_alloc ||
-+ fsck_err(c, bucket_gens_key_wrong,
-+ "incorrect gen in bucket_gens btree (got %u should be %u)\n"
-+ " %s",
-+ alloc_gen(k, gens_offset), a->gen,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
-+ struct bkey_i_bucket_gens *g =
-+ bch2_trans_kmalloc(trans, sizeof(*g));
-+
-+ ret = PTR_ERR_OR_ZERO(g);
-+ if (ret)
-+ goto err;
-+
-+ if (k.k->type == KEY_TYPE_bucket_gens) {
-+ bkey_reassemble(&g->k_i, k);
-+ } else {
-+ bkey_bucket_gens_init(&g->k_i);
-+ g->k.p = alloc_gens_pos(alloc_k.k->p, &gens_offset);
-+ }
-+
-+ g->v.gens[gens_offset] = a->gen;
-+
-+ ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0);
-+ if (ret)
-+ goto err;
-+ }
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static noinline_for_stack
-+int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
-+ struct bpos start,
-+ struct bpos *end,
-+ struct btree_iter *freespace_iter)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_dev *ca;
-+ struct bkey_s_c k;
-+ struct printbuf buf = PRINTBUF;
-+ int ret;
-+
-+ ca = bch_dev_bkey_exists(c, start.inode);
-+ if (!ca->mi.freespace_initialized)
-+ return 0;
-+
-+ bch2_btree_iter_set_pos(freespace_iter, start);
-+
-+ k = bch2_btree_iter_peek_slot(freespace_iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ *end = bkey_min(k.k->p, *end);
-+
-+ if (k.k->type != KEY_TYPE_set &&
-+ (c->opts.reconstruct_alloc ||
-+ fsck_err(c, freespace_hole_missing,
-+ "hole in alloc btree missing in freespace btree\n"
-+ " device %llu buckets %llu-%llu",
-+ freespace_iter->pos.inode,
-+ freespace_iter->pos.offset,
-+ end->offset))) {
-+ struct bkey_i *update =
-+ bch2_trans_kmalloc(trans, sizeof(*update));
-+
-+ ret = PTR_ERR_OR_ZERO(update);
-+ if (ret)
-+ goto err;
-+
-+ bkey_init(&update->k);
-+ update->k.type = KEY_TYPE_set;
-+ update->k.p = freespace_iter->pos;
-+ bch2_key_resize(&update->k,
-+ min_t(u64, U32_MAX, end->offset -
-+ freespace_iter->pos.offset));
-+
-+ ret = bch2_trans_update(trans, freespace_iter, update, 0);
-+ if (ret)
-+ goto err;
-+ }
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static noinline_for_stack
-+int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
-+ struct bpos start,
-+ struct bpos *end,
-+ struct btree_iter *bucket_gens_iter)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_s_c k;
-+ struct printbuf buf = PRINTBUF;
-+ unsigned i, gens_offset, gens_end_offset;
-+ int ret;
-+
-+ if (c->sb.version < bcachefs_metadata_version_bucket_gens)
-+ return 0;
-+
-+ bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
-+
-+ k = bch2_btree_iter_peek_slot(bucket_gens_iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (bkey_cmp(alloc_gens_pos(start, &gens_offset),
-+ alloc_gens_pos(*end, &gens_end_offset)))
-+ gens_end_offset = KEY_TYPE_BUCKET_GENS_NR;
-+
-+ if (k.k->type == KEY_TYPE_bucket_gens) {
-+ struct bkey_i_bucket_gens g;
-+ bool need_update = false;
-+
-+ bkey_reassemble(&g.k_i, k);
-+
-+ for (i = gens_offset; i < gens_end_offset; i++) {
-+ if (fsck_err_on(g.v.gens[i], c,
-+ bucket_gens_hole_wrong,
-+ "hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)",
-+ bucket_gens_pos_to_alloc(k.k->p, i).inode,
-+ bucket_gens_pos_to_alloc(k.k->p, i).offset,
-+ g.v.gens[i])) {
-+ g.v.gens[i] = 0;
-+ need_update = true;
-+ }
-+ }
-+
-+ if (need_update) {
-+ struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
-+
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ goto err;
-+
-+ memcpy(u, &g, sizeof(g));
-+
-+ ret = bch2_trans_update(trans, bucket_gens_iter, u, 0);
-+ if (ret)
-+ goto err;
-+ }
-+ }
-+
-+ *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0));
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_trans *trans,
-+ struct btree_iter *iter)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter alloc_iter;
-+ struct bkey_s_c alloc_k;
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a;
-+ u64 genbits;
-+ struct bpos pos;
-+ enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard
-+ ? BCH_DATA_need_discard
-+ : BCH_DATA_free;
-+ struct printbuf buf = PRINTBUF;
-+ int ret;
-+
-+ pos = iter->pos;
-+ pos.offset &= ~(~0ULL << 56);
-+ genbits = iter->pos.offset & (~0ULL << 56);
-+
-+ alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, pos, 0);
-+ ret = bkey_err(alloc_k);
-+ if (ret)
-+ return ret;
-+
-+ if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
-+ need_discard_freespace_key_to_invalid_dev_bucket,
-+ "entry in %s btree for nonexistant dev:bucket %llu:%llu",
-+ bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset))
-+ goto delete;
-+
-+ a = bch2_alloc_to_v4(alloc_k, &a_convert);
-+
-+ if (fsck_err_on(a->data_type != state ||
-+ (state == BCH_DATA_free &&
-+ genbits != alloc_freespace_genbits(*a)), c,
-+ need_discard_freespace_key_bad,
-+ "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
-+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
-+ bch2_btree_id_str(iter->btree_id),
-+ iter->pos.inode,
-+ iter->pos.offset,
-+ a->data_type == state,
-+ genbits >> 56, alloc_freespace_genbits(*a) >> 56))
-+ goto delete;
-+out:
-+fsck_err:
-+ set_btree_iter_dontneed(&alloc_iter);
-+ bch2_trans_iter_exit(trans, &alloc_iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+delete:
-+ ret = bch2_btree_delete_extent_at(trans, iter,
-+ iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW);
-+ goto out;
-+}
-+
-+static int bch2_check_discard_freespace_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bpos end)
-+{
-+ if (!btree_id_is_extents(iter->btree_id)) {
-+ return __bch2_check_discard_freespace_key(trans, iter);
-+ } else {
-+ int ret = 0;
-+
-+ while (!bkey_eq(iter->pos, end) &&
-+ !(ret = btree_trans_too_many_iters(trans) ?:
-+ __bch2_check_discard_freespace_key(trans, iter)))
-+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
-+
-+ return ret;
-+ }
-+}
-+
-+/*
-+ * We've already checked that generation numbers in the bucket_gens btree are
-+ * valid for buckets that exist; this just checks for keys for nonexistent
-+ * buckets.
-+ */
-+static noinline_for_stack
-+int bch2_check_bucket_gens_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_i_bucket_gens g;
-+ struct bch_dev *ca;
-+ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
-+ u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
-+ u64 b;
-+ bool need_update = false, dev_exists;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ BUG_ON(k.k->type != KEY_TYPE_bucket_gens);
-+ bkey_reassemble(&g.k_i, k);
-+
-+ /* if no bch_dev, skip out whether we repair or not */
-+ dev_exists = bch2_dev_exists2(c, k.k->p.inode);
-+ if (!dev_exists) {
-+ if (fsck_err_on(!dev_exists, c,
-+ bucket_gens_to_invalid_dev,
-+ "bucket_gens key for invalid device:\n %s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-+ ret = bch2_btree_delete_at(trans, iter, 0);
-+ }
-+ goto out;
-+ }
-+
-+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
-+ if (fsck_err_on(end <= ca->mi.first_bucket ||
-+ start >= ca->mi.nbuckets, c,
-+ bucket_gens_to_invalid_buckets,
-+ "bucket_gens key for invalid buckets:\n %s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-+ ret = bch2_btree_delete_at(trans, iter, 0);
-+ goto out;
-+ }
-+
-+ for (b = start; b < ca->mi.first_bucket; b++)
-+ if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
-+ bucket_gens_nonzero_for_invalid_buckets,
-+ "bucket_gens key has nonzero gen for invalid bucket")) {
-+ g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
-+ need_update = true;
-+ }
-+
-+ for (b = ca->mi.nbuckets; b < end; b++)
-+ if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
-+ bucket_gens_nonzero_for_invalid_buckets,
-+ "bucket_gens key has nonzero gen for invalid bucket")) {
-+ g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
-+ need_update = true;
-+ }
-+
-+ if (need_update) {
-+ struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
-+
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ goto out;
-+
-+ memcpy(u, &g, sizeof(g));
-+ ret = bch2_trans_update(trans, iter, u, 0);
-+ }
-+out:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+int bch2_check_alloc_info(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter;
-+ struct bkey hole;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN,
-+ BTREE_ITER_PREFETCH);
-+ bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN,
-+ BTREE_ITER_PREFETCH);
-+ bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
-+ BTREE_ITER_PREFETCH);
-+ bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN,
-+ BTREE_ITER_PREFETCH);
-+
-+ while (1) {
-+ struct bpos next;
-+
-+ bch2_trans_begin(trans);
-+
-+ k = bch2_get_key_or_real_bucket_hole(&iter, &hole);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto bkey_err;
-+
-+ if (!k.k)
-+ break;
-+
-+ if (k.k->type) {
-+ next = bpos_nosnap_successor(k.k->p);
-+
-+ ret = bch2_check_alloc_key(trans,
-+ k, &iter,
-+ &discard_iter,
-+ &freespace_iter,
-+ &bucket_gens_iter);
-+ if (ret)
-+ goto bkey_err;
-+ } else {
-+ next = k.k->p;
-+
-+ ret = bch2_check_alloc_hole_freespace(trans,
-+ bkey_start_pos(k.k),
-+ &next,
-+ &freespace_iter) ?:
-+ bch2_check_alloc_hole_bucket_gens(trans,
-+ bkey_start_pos(k.k),
-+ &next,
-+ &bucket_gens_iter);
-+ if (ret)
-+ goto bkey_err;
-+ }
-+
-+ ret = bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_LAZY_RW);
-+ if (ret)
-+ goto bkey_err;
-+
-+ bch2_btree_iter_set_pos(&iter, next);
-+bkey_err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &bucket_gens_iter);
-+ bch2_trans_iter_exit(trans, &freespace_iter);
-+ bch2_trans_iter_exit(trans, &discard_iter);
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret < 0)
-+ goto err;
-+
-+ ret = for_each_btree_key2(trans, iter,
-+ BTREE_ID_need_discard, POS_MIN,
-+ BTREE_ITER_PREFETCH, k,
-+ bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?:
-+ for_each_btree_key2(trans, iter,
-+ BTREE_ID_freespace, POS_MIN,
-+ BTREE_ITER_PREFETCH, k,
-+ bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?:
-+ for_each_btree_key_commit(trans, iter,
-+ BTREE_ID_bucket_gens, POS_MIN,
-+ BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+ bch2_check_bucket_gens_key(trans, &iter, k));
-+err:
-+ bch2_trans_put(trans);
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
-+ struct btree_iter *alloc_iter)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter lru_iter;
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a;
-+ struct bkey_s_c alloc_k, lru_k;
-+ struct printbuf buf = PRINTBUF;
-+ int ret;
-+
-+ alloc_k = bch2_btree_iter_peek(alloc_iter);
-+ if (!alloc_k.k)
-+ return 0;
-+
-+ ret = bkey_err(alloc_k);
-+ if (ret)
-+ return ret;
-+
-+ a = bch2_alloc_to_v4(alloc_k, &a_convert);
-+
-+ if (a->data_type != BCH_DATA_cached)
-+ return 0;
-+
-+ lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
-+ lru_pos(alloc_k.k->p.inode,
-+ bucket_to_u64(alloc_k.k->p),
-+ a->io_time[READ]), 0);
-+ ret = bkey_err(lru_k);
-+ if (ret)
-+ return ret;
-+
-+ if (fsck_err_on(!a->io_time[READ], c,
-+ alloc_key_cached_but_read_time_zero,
-+ "cached bucket with read_time 0\n"
-+ " %s",
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) ||
-+ fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
-+ alloc_key_to_missing_lru_entry,
-+ "missing lru entry\n"
-+ " %s",
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
-+ u64 read_time = a->io_time[READ] ?:
-+ atomic64_read(&c->io_clock[READ].now);
-+
-+ ret = bch2_lru_set(trans,
-+ alloc_k.k->p.inode,
-+ bucket_to_u64(alloc_k.k->p),
-+ read_time);
-+ if (ret)
-+ goto err;
-+
-+ if (a->io_time[READ] != read_time) {
-+ struct bkey_i_alloc_v4 *a_mut =
-+ bch2_alloc_to_v4_mut(trans, alloc_k);
-+ ret = PTR_ERR_OR_ZERO(a_mut);
-+ if (ret)
-+ goto err;
-+
-+ a_mut->v.io_time[READ] = read_time;
-+ ret = bch2_trans_update(trans, alloc_iter,
-+ &a_mut->k_i, BTREE_TRIGGER_NORUN);
-+ if (ret)
-+ goto err;
-+ }
-+ }
-+err:
-+fsck_err:
-+ bch2_trans_iter_exit(trans, &lru_iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
-+ POS_MIN, BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+ bch2_check_alloc_to_lru_ref(trans, &iter)));
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int bch2_discard_one_bucket(struct btree_trans *trans,
-+ struct btree_iter *need_discard_iter,
-+ struct bpos *discard_pos_done,
-+ u64 *seen,
-+ u64 *open,
-+ u64 *need_journal_commit,
-+ u64 *discarded)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bpos pos = need_discard_iter->pos;
-+ struct btree_iter iter = { NULL };
-+ struct bkey_s_c k;
-+ struct bch_dev *ca;
-+ struct bkey_i_alloc_v4 *a;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ ca = bch_dev_bkey_exists(c, pos.inode);
-+ if (!percpu_ref_tryget(&ca->io_ref)) {
-+ bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
-+ return 0;
-+ }
-+
-+ if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
-+ (*open)++;
-+ goto out;
-+ }
-+
-+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
-+ c->journal.flushed_seq_ondisk,
-+ pos.inode, pos.offset)) {
-+ (*need_journal_commit)++;
-+ goto out;
-+ }
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
-+ need_discard_iter->pos,
-+ BTREE_ITER_CACHED);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto out;
-+
-+ a = bch2_alloc_to_v4_mut(trans, k);
-+ ret = PTR_ERR_OR_ZERO(a);
-+ if (ret)
-+ goto out;
-+
-+ if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
-+ a->v.gen++;
-+ SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
-+ goto write;
-+ }
-+
-+ if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
-+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
-+ bch2_trans_inconsistent(trans,
-+ "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
-+ "%s",
-+ a->v.journal_seq,
-+ c->journal.flushed_seq_ondisk,
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ ret = -EIO;
-+ }
-+ goto out;
-+ }
-+
-+ if (a->v.data_type != BCH_DATA_need_discard) {
-+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
-+ bch2_trans_inconsistent(trans,
-+ "bucket incorrectly set in need_discard btree\n"
-+ "%s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ ret = -EIO;
-+ }
-+
-+ goto out;
-+ }
-+
-+ if (!bkey_eq(*discard_pos_done, iter.pos) &&
-+ ca->mi.discard && !c->opts.nochanges) {
-+ /*
-+ * This works without any other locks because this is the only
-+ * thread that removes items from the need_discard tree
-+ */
-+ bch2_trans_unlock(trans);
-+ blkdev_issue_discard(ca->disk_sb.bdev,
-+ k.k->p.offset * ca->mi.bucket_size,
-+ ca->mi.bucket_size,
-+ GFP_KERNEL);
-+ *discard_pos_done = iter.pos;
-+
-+ ret = bch2_trans_relock_notrace(trans);
-+ if (ret)
-+ goto out;
-+ }
-+
-+ SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
-+ a->v.data_type = alloc_data_type(a->v, a->v.data_type);
-+write:
-+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BCH_WATERMARK_btree|
-+ BTREE_INSERT_NOFAIL);
-+ if (ret)
-+ goto out;
-+
-+ this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]);
-+ (*discarded)++;
-+out:
-+ (*seen)++;
-+ bch2_trans_iter_exit(trans, &iter);
-+ percpu_ref_put(&ca->io_ref);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static void bch2_do_discards_work(struct work_struct *work)
-+{
-+ struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
-+ struct bpos discard_pos_done = POS_MAX;
-+ int ret;
-+
-+ /*
-+ * We're doing the commit in bch2_discard_one_bucket instead of using
-+ * for_each_btree_key_commit() so that we can increment counters after
-+ * successful commit:
-+ */
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key2(trans, iter,
-+ BTREE_ID_need_discard, POS_MIN, 0, k,
-+ bch2_discard_one_bucket(trans, &iter, &discard_pos_done,
-+ &seen,
-+ &open,
-+ &need_journal_commit,
-+ &discarded)));
-+
-+ if (need_journal_commit * 2 > seen)
-+ bch2_journal_flush_async(&c->journal, NULL);
-+
-+ bch2_write_ref_put(c, BCH_WRITE_REF_discard);
-+
-+ trace_discard_buckets(c, seen, open, need_journal_commit, discarded,
-+ bch2_err_str(ret));
-+}
-+
-+void bch2_do_discards(struct bch_fs *c)
-+{
-+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
-+ !queue_work(c->write_ref_wq, &c->discard_work))
-+ bch2_write_ref_put(c, BCH_WRITE_REF_discard);
-+}
-+
-+static int invalidate_one_bucket(struct btree_trans *trans,
-+ struct btree_iter *lru_iter,
-+ struct bkey_s_c lru_k,
-+ s64 *nr_to_invalidate)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter alloc_iter = { NULL };
-+ struct bkey_i_alloc_v4 *a = NULL;
-+ struct printbuf buf = PRINTBUF;
-+ struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
-+ unsigned cached_sectors;
-+ int ret = 0;
-+
-+ if (*nr_to_invalidate <= 0)
-+ return 1;
-+
-+ if (!bch2_dev_bucket_exists(c, bucket)) {
-+ prt_str(&buf, "lru entry points to invalid bucket");
-+ goto err;
-+ }
-+
-+ if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
-+ return 0;
-+
-+ a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
-+ ret = PTR_ERR_OR_ZERO(a);
-+ if (ret)
-+ goto out;
-+
-+ /* We expect harmless races here due to the btree write buffer: */
-+ if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v))
-+ goto out;
-+
-+ BUG_ON(a->v.data_type != BCH_DATA_cached);
-+
-+ if (!a->v.cached_sectors)
-+ bch_err(c, "invalidating empty bucket, confused");
-+
-+ cached_sectors = a->v.cached_sectors;
-+
-+ SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
-+ a->v.gen++;
-+ a->v.data_type = 0;
-+ a->v.dirty_sectors = 0;
-+ a->v.cached_sectors = 0;
-+ a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
-+ a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now);
-+
-+ ret = bch2_trans_update(trans, &alloc_iter, &a->k_i,
-+ BTREE_TRIGGER_BUCKET_INVALIDATE) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BCH_WATERMARK_btree|
-+ BTREE_INSERT_NOFAIL);
-+ if (ret)
-+ goto out;
-+
-+ trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors);
-+ --*nr_to_invalidate;
-+out:
-+ bch2_trans_iter_exit(trans, &alloc_iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+err:
-+ prt_str(&buf, "\n lru key: ");
-+ bch2_bkey_val_to_text(&buf, c, lru_k);
-+
-+ prt_str(&buf, "\n lru entry: ");
-+ bch2_lru_pos_to_text(&buf, lru_iter->pos);
-+
-+ prt_str(&buf, "\n alloc key: ");
-+ if (!a)
-+ bch2_bpos_to_text(&buf, bucket);
-+ else
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
-+
-+ bch_err(c, "%s", buf.buf);
-+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) {
-+ bch2_inconsistent_error(c);
-+ ret = -EINVAL;
-+ }
-+
-+ goto out;
-+}
-+
-+static void bch2_do_invalidates_work(struct work_struct *work)
-+{
-+ struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
-+ struct bch_dev *ca;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ unsigned i;
-+ int ret = 0;
-+
-+ ret = bch2_btree_write_buffer_flush(trans);
-+ if (ret)
-+ goto err;
-+
-+ for_each_member_device(ca, c, i) {
-+ s64 nr_to_invalidate =
-+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
-+
-+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru,
-+ lru_pos(ca->dev_idx, 0, 0),
-+ lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
-+ BTREE_ITER_INTENT, k,
-+ invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
-+
-+ if (ret < 0) {
-+ percpu_ref_put(&ca->ref);
-+ break;
-+ }
-+ }
-+err:
-+ bch2_trans_put(trans);
-+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
-+}
-+
-+void bch2_do_invalidates(struct bch_fs *c)
-+{
-+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
-+ !queue_work(c->write_ref_wq, &c->invalidate_work))
-+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
-+}
-+
-+int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
-+ u64 bucket_start, u64 bucket_end)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey hole;
-+ struct bpos end = POS(ca->dev_idx, bucket_end);
-+ struct bch_member *m;
-+ unsigned long last_updated = jiffies;
-+ int ret;
-+
-+ BUG_ON(bucket_start > bucket_end);
-+ BUG_ON(bucket_end > ca->mi.nbuckets);
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
-+ POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)),
-+ BTREE_ITER_PREFETCH);
-+ /*
-+ * Scan the alloc btree for every bucket on @ca, and add buckets to the
-+ * freespace/need_discard/need_gc_gens btrees as needed:
-+ */
-+ while (1) {
-+ if (last_updated + HZ * 10 < jiffies) {
-+ bch_info(ca, "%s: currently at %llu/%llu",
-+ __func__, iter.pos.offset, ca->mi.nbuckets);
-+ last_updated = jiffies;
-+ }
-+
-+ bch2_trans_begin(trans);
-+
-+ if (bkey_ge(iter.pos, end)) {
-+ ret = 0;
-+ break;
-+ }
-+
-+ k = bch2_get_key_or_hole(&iter, end, &hole);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto bkey_err;
-+
-+ if (k.k->type) {
-+ /*
-+ * We process live keys in the alloc btree one at a
-+ * time:
-+ */
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
-+
-+ ret = bch2_bucket_do_index(trans, k, a, true) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_LAZY_RW|
-+ BTREE_INSERT_NOFAIL);
-+ if (ret)
-+ goto bkey_err;
-+
-+ bch2_btree_iter_advance(&iter);
-+ } else {
-+ struct bkey_i *freespace;
-+
-+ freespace = bch2_trans_kmalloc(trans, sizeof(*freespace));
-+ ret = PTR_ERR_OR_ZERO(freespace);
-+ if (ret)
-+ goto bkey_err;
-+
-+ bkey_init(&freespace->k);
-+ freespace->k.type = KEY_TYPE_set;
-+ freespace->k.p = k.k->p;
-+ freespace->k.size = k.k->size;
-+
-+ ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_LAZY_RW|
-+ BTREE_INSERT_NOFAIL);
-+ if (ret)
-+ goto bkey_err;
-+
-+ bch2_btree_iter_set_pos(&iter, k.k->p);
-+ }
-+bkey_err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ break;
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+
-+ if (ret < 0) {
-+ bch_err_msg(ca, ret, "initializing free space");
-+ return ret;
-+ }
-+
-+ mutex_lock(&c->sb_lock);
-+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
-+ SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true);
-+ mutex_unlock(&c->sb_lock);
-+
-+ return 0;
-+}
-+
-+int bch2_fs_freespace_init(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+ int ret = 0;
-+ bool doing_init = false;
-+
-+ /*
-+ * We can crash during the device add path, so we need to check this on
-+ * every mount:
-+ */
-+
-+ for_each_member_device(ca, c, i) {
-+ if (ca->mi.freespace_initialized)
-+ continue;
-+
-+ if (!doing_init) {
-+ bch_info(c, "initializing freespace");
-+ doing_init = true;
-+ }
-+
-+ ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
-+ if (ret) {
-+ percpu_ref_put(&ca->ref);
-+ bch_err_fn(c, ret);
-+ return ret;
-+ }
-+ }
-+
-+ if (doing_init) {
-+ mutex_lock(&c->sb_lock);
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+ bch_verbose(c, "done initializing freespace");
-+ }
-+
-+ return 0;
-+}
-+
-+/* Bucket IO clocks: */
-+
-+int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
-+ size_t bucket_nr, int rw)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_i_alloc_v4 *a;
-+ u64 now;
-+ int ret = 0;
-+
-+ a = bch2_trans_start_alloc_update(trans, &iter, POS(dev, bucket_nr));
-+ ret = PTR_ERR_OR_ZERO(a);
-+ if (ret)
-+ return ret;
-+
-+ now = atomic64_read(&c->io_clock[rw].now);
-+ if (a->v.io_time[rw] == now)
-+ goto out;
-+
-+ a->v.io_time[rw] = now;
-+
-+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
-+ bch2_trans_commit(trans, NULL, NULL, 0);
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+/* Startup/shutdown (ro/rw): */
-+
-+void bch2_recalc_capacity(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ u64 capacity = 0, reserved_sectors = 0, gc_reserve;
-+ unsigned bucket_size_max = 0;
-+ unsigned long ra_pages = 0;
-+ unsigned i;
-+
-+ lockdep_assert_held(&c->state_lock);
-+
-+ for_each_online_member(ca, c, i) {
-+ struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi;
-+
-+ ra_pages += bdi->ra_pages;
-+ }
-+
-+ bch2_set_ra_pages(c, ra_pages);
-+
-+ for_each_rw_member(ca, c, i) {
-+ u64 dev_reserve = 0;
-+
-+ /*
-+ * We need to reserve buckets (from the number
-+ * of currently available buckets) against
-+ * foreground writes so that mainly copygc can
-+ * make forward progress.
-+ *
-+ * We need enough to refill the various reserves
-+ * from scratch - copygc will use its entire
-+ * reserve all at once, then run against when
-+ * its reserve is refilled (from the formerly
-+ * available buckets).
-+ *
-+ * This reserve is just used when considering if
-+ * allocations for foreground writes must wait -
-+ * not -ENOSPC calculations.
-+ */
-+
-+ dev_reserve += ca->nr_btree_reserve * 2;
-+ dev_reserve += ca->mi.nbuckets >> 6; /* copygc reserve */
-+
-+ dev_reserve += 1; /* btree write point */
-+ dev_reserve += 1; /* copygc write point */
-+ dev_reserve += 1; /* rebalance write point */
-+
-+ dev_reserve *= ca->mi.bucket_size;
-+
-+ capacity += bucket_to_sector(ca, ca->mi.nbuckets -
-+ ca->mi.first_bucket);
-+
-+ reserved_sectors += dev_reserve * 2;
-+
-+ bucket_size_max = max_t(unsigned, bucket_size_max,
-+ ca->mi.bucket_size);
-+ }
-+
-+ gc_reserve = c->opts.gc_reserve_bytes
-+ ? c->opts.gc_reserve_bytes >> 9
-+ : div64_u64(capacity * c->opts.gc_reserve_percent, 100);
-+
-+ reserved_sectors = max(gc_reserve, reserved_sectors);
-+
-+ reserved_sectors = min(reserved_sectors, capacity);
-+
-+ c->capacity = capacity - reserved_sectors;
-+
-+ c->bucket_size_max = bucket_size_max;
-+
-+ /* Wake up case someone was waiting for buckets */
-+ closure_wake_up(&c->freelist_wait);
-+}
-+
-+u64 bch2_min_rw_member_capacity(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+ u64 ret = U64_MAX;
-+
-+ for_each_rw_member(ca, c, i)
-+ ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
-+ return ret;
-+}
-+
-+static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ struct open_bucket *ob;
-+ bool ret = false;
-+
-+ for (ob = c->open_buckets;
-+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
-+ ob++) {
-+ spin_lock(&ob->lock);
-+ if (ob->valid && !ob->on_partial_list &&
-+ ob->dev == ca->dev_idx)
-+ ret = true;
-+ spin_unlock(&ob->lock);
-+ }
-+
-+ return ret;
-+}
-+
-+/* device goes ro: */
-+void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ unsigned i;
-+
-+ /* First, remove device from allocation groups: */
-+
-+ for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
-+ clear_bit(ca->dev_idx, c->rw_devs[i].d);
-+
-+ /*
-+ * Capacity is calculated based off of devices in allocation groups:
-+ */
-+ bch2_recalc_capacity(c);
-+
-+ bch2_open_buckets_stop(c, ca, false);
-+
-+ /*
-+ * Wake up threads that were blocked on allocation, so they can notice
-+ * the device can no longer be removed and the capacity has changed:
-+ */
-+ closure_wake_up(&c->freelist_wait);
-+
-+ /*
-+ * journal_res_get() can block waiting for free space in the journal -
-+ * it needs to notice there may not be devices to allocate from anymore:
-+ */
-+ wake_up(&c->journal.wait);
-+
-+ /* Now wait for any in flight writes: */
-+
-+ closure_wait_event(&c->open_buckets_wait,
-+ !bch2_dev_has_open_write_point(c, ca));
-+}
-+
-+/* device goes rw: */
-+void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
-+ if (ca->mi.data_allowed & (1 << i))
-+ set_bit(ca->dev_idx, c->rw_devs[i].d);
-+}
-+
-+void bch2_fs_allocator_background_init(struct bch_fs *c)
-+{
-+ spin_lock_init(&c->freelist_lock);
-+ INIT_WORK(&c->discard_work, bch2_do_discards_work);
-+ INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
-+}
-diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
-new file mode 100644
-index 000000000000..73faf99a222a
---- /dev/null
-+++ b/fs/bcachefs/alloc_background.h
-@@ -0,0 +1,259 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_BACKGROUND_H
-+#define _BCACHEFS_ALLOC_BACKGROUND_H
-+
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "super.h"
-+
-+enum bkey_invalid_flags;
-+
-+/* How out of date a pointer gen is allowed to be: */
-+#define BUCKET_GC_GEN_MAX 96U
-+
-+static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
-+{
-+ struct bch_dev *ca;
-+
-+ if (!bch2_dev_exists2(c, pos.inode))
-+ return false;
-+
-+ ca = bch_dev_bkey_exists(c, pos.inode);
-+ return pos.offset >= ca->mi.first_bucket &&
-+ pos.offset < ca->mi.nbuckets;
-+}
-+
-+static inline u64 bucket_to_u64(struct bpos bucket)
-+{
-+ return (bucket.inode << 48) | bucket.offset;
-+}
-+
-+static inline struct bpos u64_to_bucket(u64 bucket)
-+{
-+ return POS(bucket >> 48, bucket & ~(~0ULL << 48));
-+}
-+
-+static inline u8 alloc_gc_gen(struct bch_alloc_v4 a)
-+{
-+ return a.gen - a.oldest_gen;
-+}
-+
-+static inline enum bch_data_type __alloc_data_type(u32 dirty_sectors,
-+ u32 cached_sectors,
-+ u32 stripe,
-+ struct bch_alloc_v4 a,
-+ enum bch_data_type data_type)
-+{
-+ if (stripe)
-+ return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
-+ if (dirty_sectors)
-+ return data_type;
-+ if (cached_sectors)
-+ return BCH_DATA_cached;
-+ if (BCH_ALLOC_V4_NEED_DISCARD(&a))
-+ return BCH_DATA_need_discard;
-+ if (alloc_gc_gen(a) >= BUCKET_GC_GEN_MAX)
-+ return BCH_DATA_need_gc_gens;
-+ return BCH_DATA_free;
-+}
-+
-+static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
-+ enum bch_data_type data_type)
-+{
-+ return __alloc_data_type(a.dirty_sectors, a.cached_sectors,
-+ a.stripe, a, data_type);
-+}
-+
-+static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type)
-+{
-+ return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type;
-+}
-+
-+static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
-+{
-+ return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
-+}
-+
-+#define DATA_TYPES_MOVABLE \
-+ ((1U << BCH_DATA_btree)| \
-+ (1U << BCH_DATA_user)| \
-+ (1U << BCH_DATA_stripe))
-+
-+static inline bool data_type_movable(enum bch_data_type type)
-+{
-+ return (1U << type) & DATA_TYPES_MOVABLE;
-+}
-+
-+static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
-+ struct bch_dev *ca)
-+{
-+ if (!data_type_movable(a.data_type) ||
-+ a.dirty_sectors >= ca->mi.bucket_size)
-+ return 0;
-+
-+ return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
-+}
-+
-+static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a)
-+{
-+ return ((u64) alloc_gc_gen(a) >> 4) << 56;
-+}
-+
-+static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_v4 a)
-+{
-+ pos.offset |= alloc_freespace_genbits(a);
-+ return pos;
-+}
-+
-+static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
-+{
-+ unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
-+ BCH_ALLOC_V4_U64s_V0) +
-+ BCH_ALLOC_V4_NR_BACKPOINTERS(a) *
-+ (sizeof(struct bch_backpointer) / sizeof(u64));
-+
-+ BUG_ON(ret > U8_MAX - BKEY_U64s);
-+ return ret;
-+}
-+
-+static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a)
-+{
-+ set_bkey_val_u64s(&a->k, alloc_v4_u64s(&a->v));
-+}
-+
-+struct bkey_i_alloc_v4 *
-+bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos);
-+
-+void __bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *);
-+
-+static inline const struct bch_alloc_v4 *bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *convert)
-+{
-+ const struct bch_alloc_v4 *ret;
-+
-+ if (unlikely(k.k->type != KEY_TYPE_alloc_v4))
-+ goto slowpath;
-+
-+ ret = bkey_s_c_to_alloc_v4(k).v;
-+ if (BCH_ALLOC_V4_BACKPOINTERS_START(ret) != BCH_ALLOC_V4_U64s)
-+ goto slowpath;
-+
-+ return ret;
-+slowpath:
-+ __bch2_alloc_to_v4(k, convert);
-+ return convert;
-+}
-+
-+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s_c);
-+
-+int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
-+
-+int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_alloc_v4_swab(struct bkey_s);
-+void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_alloc ((struct bkey_ops) { \
-+ .key_invalid = bch2_alloc_v1_invalid, \
-+ .val_to_text = bch2_alloc_to_text, \
-+ .trans_trigger = bch2_trans_mark_alloc, \
-+ .atomic_trigger = bch2_mark_alloc, \
-+ .min_val_size = 8, \
-+})
-+
-+#define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \
-+ .key_invalid = bch2_alloc_v2_invalid, \
-+ .val_to_text = bch2_alloc_to_text, \
-+ .trans_trigger = bch2_trans_mark_alloc, \
-+ .atomic_trigger = bch2_mark_alloc, \
-+ .min_val_size = 8, \
-+})
-+
-+#define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \
-+ .key_invalid = bch2_alloc_v3_invalid, \
-+ .val_to_text = bch2_alloc_to_text, \
-+ .trans_trigger = bch2_trans_mark_alloc, \
-+ .atomic_trigger = bch2_mark_alloc, \
-+ .min_val_size = 16, \
-+})
-+
-+#define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \
-+ .key_invalid = bch2_alloc_v4_invalid, \
-+ .val_to_text = bch2_alloc_to_text, \
-+ .swab = bch2_alloc_v4_swab, \
-+ .trans_trigger = bch2_trans_mark_alloc, \
-+ .atomic_trigger = bch2_mark_alloc, \
-+ .min_val_size = 48, \
-+})
-+
-+int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \
-+ .key_invalid = bch2_bucket_gens_invalid, \
-+ .val_to_text = bch2_bucket_gens_to_text, \
-+})
-+
-+int bch2_bucket_gens_init(struct bch_fs *);
-+
-+static inline bool bkey_is_alloc(const struct bkey *k)
-+{
-+ return k->type == KEY_TYPE_alloc ||
-+ k->type == KEY_TYPE_alloc_v2 ||
-+ k->type == KEY_TYPE_alloc_v3;
-+}
-+
-+int bch2_alloc_read(struct bch_fs *);
-+
-+int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_i *, unsigned);
-+int bch2_check_alloc_info(struct bch_fs *);
-+int bch2_check_alloc_to_lru_refs(struct bch_fs *);
-+void bch2_do_discards(struct bch_fs *);
-+
-+static inline u64 should_invalidate_buckets(struct bch_dev *ca,
-+ struct bch_dev_usage u)
-+{
-+ u64 want_free = ca->mi.nbuckets >> 7;
-+ u64 free = max_t(s64, 0,
-+ u.d[BCH_DATA_free].buckets
-+ + u.d[BCH_DATA_need_discard].buckets
-+ - bch2_dev_buckets_reserved(ca, BCH_WATERMARK_stripe));
-+
-+ return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
-+}
-+
-+void bch2_do_invalidates(struct bch_fs *);
-+
-+static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
-+{
-+ return (void *) ((u64 *) &a->v +
-+ (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
-+ BCH_ALLOC_V4_U64s_V0));
-+}
-+
-+static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a)
-+{
-+ return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
-+}
-+
-+int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64);
-+int bch2_fs_freespace_init(struct bch_fs *);
-+
-+void bch2_recalc_capacity(struct bch_fs *);
-+u64 bch2_min_rw_member_capacity(struct bch_fs *);
-+
-+void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
-+void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_fs_allocator_background_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
-diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
-new file mode 100644
-index 000000000000..b85c7765272f
---- /dev/null
-+++ b/fs/bcachefs/alloc_foreground.c
-@@ -0,0 +1,1600 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright 2012 Google, Inc.
-+ *
-+ * Foreground allocator code: allocate buckets from freelist, and allocate in
-+ * sector granularity from writepoints.
-+ *
-+ * bch2_bucket_alloc() allocates a single bucket from a specific device.
-+ *
-+ * bch2_bucket_alloc_set() allocates one or more buckets from different devices
-+ * in a given filesystem.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "backpointers.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "buckets_waiting_for_journal.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "io_write.h"
-+#include "journal.h"
-+#include "movinggc.h"
-+#include "nocow_locking.h"
-+#include "trace.h"
-+
-+#include <linux/math64.h>
-+#include <linux/rculist.h>
-+#include <linux/rcupdate.h>
-+
-+static void bch2_trans_mutex_lock_norelock(struct btree_trans *trans,
-+ struct mutex *lock)
-+{
-+ if (!mutex_trylock(lock)) {
-+ bch2_trans_unlock(trans);
-+ mutex_lock(lock);
-+ }
-+}
-+
-+const char * const bch2_watermarks[] = {
-+#define x(t) #t,
-+ BCH_WATERMARKS()
-+#undef x
-+ NULL
-+};
-+
-+/*
-+ * Open buckets represent a bucket that's currently being allocated from. They
-+ * serve two purposes:
-+ *
-+ * - They track buckets that have been partially allocated, allowing for
-+ * sub-bucket sized allocations - they're used by the sector allocator below
-+ *
-+ * - They provide a reference to the buckets they own that mark and sweep GC
-+ * can find, until the new allocation has a pointer to it inserted into the
-+ * btree
-+ *
-+ * When allocating some space with the sector allocator, the allocation comes
-+ * with a reference to an open bucket - the caller is required to put that
-+ * reference _after_ doing the index update that makes its allocation reachable.
-+ */
-+
-+void bch2_reset_alloc_cursors(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ rcu_read_lock();
-+ for_each_member_device_rcu(ca, c, i, NULL)
-+ ca->alloc_cursor = 0;
-+ rcu_read_unlock();
-+}
-+
-+static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob)
-+{
-+ open_bucket_idx_t idx = ob - c->open_buckets;
-+ open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket);
-+
-+ ob->hash = *slot;
-+ *slot = idx;
-+}
-+
-+static void bch2_open_bucket_hash_remove(struct bch_fs *c, struct open_bucket *ob)
-+{
-+ open_bucket_idx_t idx = ob - c->open_buckets;
-+ open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket);
-+
-+ while (*slot != idx) {
-+ BUG_ON(!*slot);
-+ slot = &c->open_buckets[*slot].hash;
-+ }
-+
-+ *slot = ob->hash;
-+ ob->hash = 0;
-+}
-+
-+void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-+
-+ if (ob->ec) {
-+ ec_stripe_new_put(c, ob->ec, STRIPE_REF_io);
-+ return;
-+ }
-+
-+ percpu_down_read(&c->mark_lock);
-+ spin_lock(&ob->lock);
-+
-+ ob->valid = false;
-+ ob->data_type = 0;
-+
-+ spin_unlock(&ob->lock);
-+ percpu_up_read(&c->mark_lock);
-+
-+ spin_lock(&c->freelist_lock);
-+ bch2_open_bucket_hash_remove(c, ob);
-+
-+ ob->freelist = c->open_buckets_freelist;
-+ c->open_buckets_freelist = ob - c->open_buckets;
-+
-+ c->open_buckets_nr_free++;
-+ ca->nr_open_buckets--;
-+ spin_unlock(&c->freelist_lock);
-+
-+ closure_wake_up(&c->open_buckets_wait);
-+}
-+
-+void bch2_open_bucket_write_error(struct bch_fs *c,
-+ struct open_buckets *obs,
-+ unsigned dev)
-+{
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ open_bucket_for_each(c, obs, ob, i)
-+ if (ob->dev == dev && ob->ec)
-+ bch2_ec_bucket_cancel(c, ob);
-+}
-+
-+static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
-+{
-+ struct open_bucket *ob;
-+
-+ BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
-+
-+ ob = c->open_buckets + c->open_buckets_freelist;
-+ c->open_buckets_freelist = ob->freelist;
-+ atomic_set(&ob->pin, 1);
-+ ob->data_type = 0;
-+
-+ c->open_buckets_nr_free--;
-+ return ob;
-+}
-+
-+static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
-+{
-+ BUG_ON(c->open_buckets_partial_nr >=
-+ ARRAY_SIZE(c->open_buckets_partial));
-+
-+ spin_lock(&c->freelist_lock);
-+ ob->on_partial_list = true;
-+ c->open_buckets_partial[c->open_buckets_partial_nr++] =
-+ ob - c->open_buckets;
-+ spin_unlock(&c->freelist_lock);
-+
-+ closure_wake_up(&c->open_buckets_wait);
-+ closure_wake_up(&c->freelist_wait);
-+}
-+
-+/* _only_ for allocating the journal on a new device: */
-+long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
-+{
-+ while (ca->new_fs_bucket_idx < ca->mi.nbuckets) {
-+ u64 b = ca->new_fs_bucket_idx++;
-+
-+ if (!is_superblock_bucket(ca, b) &&
-+ (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse)))
-+ return b;
-+ }
-+
-+ return -1;
-+}
-+
-+static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
-+{
-+ switch (watermark) {
-+ case BCH_WATERMARK_reclaim:
-+ return 0;
-+ case BCH_WATERMARK_btree:
-+ case BCH_WATERMARK_btree_copygc:
-+ return OPEN_BUCKETS_COUNT / 4;
-+ case BCH_WATERMARK_copygc:
-+ return OPEN_BUCKETS_COUNT / 3;
-+ default:
-+ return OPEN_BUCKETS_COUNT / 2;
-+ }
-+}
-+
-+static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
-+ u64 bucket,
-+ enum bch_watermark watermark,
-+ const struct bch_alloc_v4 *a,
-+ struct bucket_alloc_state *s,
-+ struct closure *cl)
-+{
-+ struct open_bucket *ob;
-+
-+ if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) {
-+ s->skipped_nouse++;
-+ return NULL;
-+ }
-+
-+ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
-+ s->skipped_open++;
-+ return NULL;
-+ }
-+
-+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
-+ c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) {
-+ s->skipped_need_journal_commit++;
-+ return NULL;
-+ }
-+
-+ if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) {
-+ s->skipped_nocow++;
-+ return NULL;
-+ }
-+
-+ spin_lock(&c->freelist_lock);
-+
-+ if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) {
-+ if (cl)
-+ closure_wait(&c->open_buckets_wait, cl);
-+
-+ if (!c->blocked_allocate_open_bucket)
-+ c->blocked_allocate_open_bucket = local_clock();
-+
-+ spin_unlock(&c->freelist_lock);
-+ return ERR_PTR(-BCH_ERR_open_buckets_empty);
-+ }
-+
-+ /* Recheck under lock: */
-+ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
-+ spin_unlock(&c->freelist_lock);
-+ s->skipped_open++;
-+ return NULL;
-+ }
-+
-+ ob = bch2_open_bucket_alloc(c);
-+
-+ spin_lock(&ob->lock);
-+
-+ ob->valid = true;
-+ ob->sectors_free = ca->mi.bucket_size;
-+ ob->dev = ca->dev_idx;
-+ ob->gen = a->gen;
-+ ob->bucket = bucket;
-+ spin_unlock(&ob->lock);
-+
-+ ca->nr_open_buckets++;
-+ bch2_open_bucket_hash_add(c, ob);
-+
-+ if (c->blocked_allocate_open_bucket) {
-+ bch2_time_stats_update(
-+ &c->times[BCH_TIME_blocked_allocate_open_bucket],
-+ c->blocked_allocate_open_bucket);
-+ c->blocked_allocate_open_bucket = 0;
-+ }
-+
-+ if (c->blocked_allocate) {
-+ bch2_time_stats_update(
-+ &c->times[BCH_TIME_blocked_allocate],
-+ c->blocked_allocate);
-+ c->blocked_allocate = 0;
-+ }
-+
-+ spin_unlock(&c->freelist_lock);
-+ return ob;
-+}
-+
-+static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca,
-+ enum bch_watermark watermark, u64 free_entry,
-+ struct bucket_alloc_state *s,
-+ struct bkey_s_c freespace_k,
-+ struct closure *cl)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter = { NULL };
-+ struct bkey_s_c k;
-+ struct open_bucket *ob;
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a;
-+ u64 b = free_entry & ~(~0ULL << 56);
-+ unsigned genbits = free_entry >> 56;
-+ struct printbuf buf = PRINTBUF;
-+ int ret;
-+
-+ if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) {
-+ prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n"
-+ " freespace key ",
-+ ca->mi.first_bucket, ca->mi.nbuckets);
-+ bch2_bkey_val_to_text(&buf, c, freespace_k);
-+ bch2_trans_inconsistent(trans, "%s", buf.buf);
-+ ob = ERR_PTR(-EIO);
-+ goto err;
-+ }
-+
-+ k = bch2_bkey_get_iter(trans, &iter,
-+ BTREE_ID_alloc, POS(ca->dev_idx, b),
-+ BTREE_ITER_CACHED);
-+ ret = bkey_err(k);
-+ if (ret) {
-+ ob = ERR_PTR(ret);
-+ goto err;
-+ }
-+
-+ a = bch2_alloc_to_v4(k, &a_convert);
-+
-+ if (a->data_type != BCH_DATA_free) {
-+ if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) {
-+ ob = NULL;
-+ goto err;
-+ }
-+
-+ prt_printf(&buf, "non free bucket in freespace btree\n"
-+ " freespace key ");
-+ bch2_bkey_val_to_text(&buf, c, freespace_k);
-+ prt_printf(&buf, "\n ");
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ bch2_trans_inconsistent(trans, "%s", buf.buf);
-+ ob = ERR_PTR(-EIO);
-+ goto err;
-+ }
-+
-+ if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
-+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
-+ prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
-+ " freespace key ",
-+ genbits, alloc_freespace_genbits(*a) >> 56);
-+ bch2_bkey_val_to_text(&buf, c, freespace_k);
-+ prt_printf(&buf, "\n ");
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ bch2_trans_inconsistent(trans, "%s", buf.buf);
-+ ob = ERR_PTR(-EIO);
-+ goto err;
-+ }
-+
-+ if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) {
-+ struct bch_backpointer bp;
-+ struct bpos bp_pos = POS_MIN;
-+
-+ ret = bch2_get_next_backpointer(trans, POS(ca->dev_idx, b), -1,
-+ &bp_pos, &bp,
-+ BTREE_ITER_NOPRESERVE);
-+ if (ret) {
-+ ob = ERR_PTR(ret);
-+ goto err;
-+ }
-+
-+ if (!bkey_eq(bp_pos, POS_MAX)) {
-+ /*
-+ * Bucket may have data in it - we don't call
-+ * bc2h_trans_inconnsistent() because fsck hasn't
-+ * finished yet
-+ */
-+ ob = NULL;
-+ goto err;
-+ }
-+ }
-+
-+ ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl);
-+ if (!ob)
-+ iter.path->preserve = false;
-+err:
-+ if (iter.trans && iter.path)
-+ set_btree_iter_dontneed(&iter);
-+ bch2_trans_iter_exit(trans, &iter);
-+ printbuf_exit(&buf);
-+ return ob;
-+}
-+
-+/*
-+ * This path is for before the freespace btree is initialized:
-+ *
-+ * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock &
-+ * journal buckets - journal buckets will be < ca->new_fs_bucket_idx
-+ */
-+static noinline struct open_bucket *
-+bch2_bucket_alloc_early(struct btree_trans *trans,
-+ struct bch_dev *ca,
-+ enum bch_watermark watermark,
-+ struct bucket_alloc_state *s,
-+ struct closure *cl)
-+{
-+ struct btree_iter iter, citer;
-+ struct bkey_s_c k, ck;
-+ struct open_bucket *ob = NULL;
-+ u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
-+ u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor));
-+ u64 alloc_cursor = alloc_start;
-+ int ret;
-+
-+ /*
-+ * Scan with an uncached iterator to avoid polluting the key cache. An
-+ * uncached iter will return a cached key if one exists, but if not
-+ * there is no other underlying protection for the associated key cache
-+ * slot. To avoid racing bucket allocations, look up the cached key slot
-+ * of any likely allocation candidate before attempting to proceed with
-+ * the allocation. This provides proper exclusion on the associated
-+ * bucket.
-+ */
-+again:
-+ for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
-+ BTREE_ITER_SLOTS, k, ret) {
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a;
-+
-+ if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)))
-+ break;
-+
-+ if (ca->new_fs_bucket_idx &&
-+ is_superblock_bucket(ca, k.k->p.offset))
-+ continue;
-+
-+ a = bch2_alloc_to_v4(k, &a_convert);
-+ if (a->data_type != BCH_DATA_free)
-+ continue;
-+
-+ /* now check the cached key to serialize concurrent allocs of the bucket */
-+ ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED);
-+ ret = bkey_err(ck);
-+ if (ret)
-+ break;
-+
-+ a = bch2_alloc_to_v4(ck, &a_convert);
-+ if (a->data_type != BCH_DATA_free)
-+ goto next;
-+
-+ s->buckets_seen++;
-+
-+ ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
-+next:
-+ citer.path->preserve = false;
-+ bch2_trans_iter_exit(trans, &citer);
-+ if (ob)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ alloc_cursor = iter.pos.offset;
-+ ca->alloc_cursor = alloc_cursor;
-+
-+ if (!ob && ret)
-+ ob = ERR_PTR(ret);
-+
-+ if (!ob && alloc_start > first_bucket) {
-+ alloc_cursor = alloc_start = first_bucket;
-+ goto again;
-+ }
-+
-+ return ob;
-+}
-+
-+static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
-+ struct bch_dev *ca,
-+ enum bch_watermark watermark,
-+ struct bucket_alloc_state *s,
-+ struct closure *cl)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct open_bucket *ob = NULL;
-+ u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(ca->alloc_cursor));
-+ u64 alloc_cursor = alloc_start;
-+ int ret;
-+
-+ BUG_ON(ca->new_fs_bucket_idx);
-+again:
-+ for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
-+ POS(ca->dev_idx, alloc_cursor), 0, k, ret) {
-+ if (k.k->p.inode != ca->dev_idx)
-+ break;
-+
-+ for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
-+ alloc_cursor < k.k->p.offset;
-+ alloc_cursor++) {
-+ ret = btree_trans_too_many_iters(trans);
-+ if (ret) {
-+ ob = ERR_PTR(ret);
-+ break;
-+ }
-+
-+ s->buckets_seen++;
-+
-+ ob = try_alloc_bucket(trans, ca, watermark,
-+ alloc_cursor, s, k, cl);
-+ if (ob) {
-+ iter.path->preserve = false;
-+ break;
-+ }
-+ }
-+
-+ if (ob || ret)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ ca->alloc_cursor = alloc_cursor;
-+
-+ if (!ob && ret)
-+ ob = ERR_PTR(ret);
-+
-+ if (!ob && alloc_start > ca->mi.first_bucket) {
-+ alloc_cursor = alloc_start = ca->mi.first_bucket;
-+ goto again;
-+ }
-+
-+ return ob;
-+}
-+
-+/**
-+ * bch2_bucket_alloc_trans - allocate a single bucket from a specific device
-+ * @trans: transaction object
-+ * @ca: device to allocate from
-+ * @watermark: how important is this allocation?
-+ * @cl: if not NULL, closure to be used to wait if buckets not available
-+ * @usage: for secondarily also returning the current device usage
-+ *
-+ * Returns: an open_bucket on success, or an ERR_PTR() on failure.
-+ */
-+static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
-+ struct bch_dev *ca,
-+ enum bch_watermark watermark,
-+ struct closure *cl,
-+ struct bch_dev_usage *usage)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct open_bucket *ob = NULL;
-+ bool freespace = READ_ONCE(ca->mi.freespace_initialized);
-+ u64 avail;
-+ struct bucket_alloc_state s = { 0 };
-+ bool waiting = false;
-+again:
-+ bch2_dev_usage_read_fast(ca, usage);
-+ avail = dev_buckets_free(ca, *usage, watermark);
-+
-+ if (usage->d[BCH_DATA_need_discard].buckets > avail)
-+ bch2_do_discards(c);
-+
-+ if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
-+ bch2_do_gc_gens(c);
-+
-+ if (should_invalidate_buckets(ca, *usage))
-+ bch2_do_invalidates(c);
-+
-+ if (!avail) {
-+ if (cl && !waiting) {
-+ closure_wait(&c->freelist_wait, cl);
-+ waiting = true;
-+ goto again;
-+ }
-+
-+ if (!c->blocked_allocate)
-+ c->blocked_allocate = local_clock();
-+
-+ ob = ERR_PTR(-BCH_ERR_freelist_empty);
-+ goto err;
-+ }
-+
-+ if (waiting)
-+ closure_wake_up(&c->freelist_wait);
-+alloc:
-+ ob = likely(freespace)
-+ ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
-+ : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
-+
-+ if (s.skipped_need_journal_commit * 2 > avail)
-+ bch2_journal_flush_async(&c->journal, NULL);
-+
-+ if (!ob && freespace && c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) {
-+ freespace = false;
-+ goto alloc;
-+ }
-+err:
-+ if (!ob)
-+ ob = ERR_PTR(-BCH_ERR_no_buckets_found);
-+
-+ if (!IS_ERR(ob))
-+ trace_and_count(c, bucket_alloc, ca,
-+ bch2_watermarks[watermark],
-+ ob->bucket,
-+ usage->d[BCH_DATA_free].buckets,
-+ avail,
-+ bch2_copygc_wait_amount(c),
-+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
-+ &s,
-+ cl == NULL,
-+ "");
-+ else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
-+ trace_and_count(c, bucket_alloc_fail, ca,
-+ bch2_watermarks[watermark],
-+ 0,
-+ usage->d[BCH_DATA_free].buckets,
-+ avail,
-+ bch2_copygc_wait_amount(c),
-+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
-+ &s,
-+ cl == NULL,
-+ bch2_err_str(PTR_ERR(ob)));
-+
-+ return ob;
-+}
-+
-+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
-+ enum bch_watermark watermark,
-+ struct closure *cl)
-+{
-+ struct bch_dev_usage usage;
-+ struct open_bucket *ob;
-+
-+ bch2_trans_do(c, NULL, NULL, 0,
-+ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
-+ cl, &usage)));
-+ return ob;
-+}
-+
-+static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
-+ unsigned l, unsigned r)
-+{
-+ return ((stripe->next_alloc[l] > stripe->next_alloc[r]) -
-+ (stripe->next_alloc[l] < stripe->next_alloc[r]));
-+}
-+
-+#define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r)
-+
-+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
-+ struct dev_stripe_state *stripe,
-+ struct bch_devs_mask *devs)
-+{
-+ struct dev_alloc_list ret = { .nr = 0 };
-+ unsigned i;
-+
-+ for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
-+ ret.devs[ret.nr++] = i;
-+
-+ bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
-+ return ret;
-+}
-+
-+static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca,
-+ struct dev_stripe_state *stripe,
-+ struct bch_dev_usage *usage)
-+{
-+ u64 *v = stripe->next_alloc + ca->dev_idx;
-+ u64 free_space = dev_buckets_available(ca, BCH_WATERMARK_normal);
-+ u64 free_space_inv = free_space
-+ ? div64_u64(1ULL << 48, free_space)
-+ : 1ULL << 48;
-+ u64 scale = *v / 4;
-+
-+ if (*v + free_space_inv >= *v)
-+ *v += free_space_inv;
-+ else
-+ *v = U64_MAX;
-+
-+ for (v = stripe->next_alloc;
-+ v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++)
-+ *v = *v < scale ? 0 : *v - scale;
-+}
-+
-+void bch2_dev_stripe_increment(struct bch_dev *ca,
-+ struct dev_stripe_state *stripe)
-+{
-+ struct bch_dev_usage usage;
-+
-+ bch2_dev_usage_read_fast(ca, &usage);
-+ bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
-+}
-+
-+static int add_new_bucket(struct bch_fs *c,
-+ struct open_buckets *ptrs,
-+ struct bch_devs_mask *devs_may_alloc,
-+ unsigned nr_replicas,
-+ unsigned *nr_effective,
-+ bool *have_cache,
-+ unsigned flags,
-+ struct open_bucket *ob)
-+{
-+ unsigned durability =
-+ bch_dev_bkey_exists(c, ob->dev)->mi.durability;
-+
-+ BUG_ON(*nr_effective >= nr_replicas);
-+ BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
-+
-+ __clear_bit(ob->dev, devs_may_alloc->d);
-+ *nr_effective += (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
-+ ? durability : 1;
-+ *have_cache |= !durability;
-+
-+ ob_push(c, ptrs, ob);
-+
-+ if (*nr_effective >= nr_replicas)
-+ return 1;
-+ if (ob->ec)
-+ return 1;
-+ return 0;
-+}
-+
-+int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
-+ struct open_buckets *ptrs,
-+ struct dev_stripe_state *stripe,
-+ struct bch_devs_mask *devs_may_alloc,
-+ unsigned nr_replicas,
-+ unsigned *nr_effective,
-+ bool *have_cache,
-+ unsigned flags,
-+ enum bch_data_type data_type,
-+ enum bch_watermark watermark,
-+ struct closure *cl)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct dev_alloc_list devs_sorted =
-+ bch2_dev_alloc_list(c, stripe, devs_may_alloc);
-+ unsigned dev;
-+ struct bch_dev *ca;
-+ int ret = -BCH_ERR_insufficient_devices;
-+ unsigned i;
-+
-+ BUG_ON(*nr_effective >= nr_replicas);
-+
-+ for (i = 0; i < devs_sorted.nr; i++) {
-+ struct bch_dev_usage usage;
-+ struct open_bucket *ob;
-+
-+ dev = devs_sorted.devs[i];
-+
-+ rcu_read_lock();
-+ ca = rcu_dereference(c->devs[dev]);
-+ if (ca)
-+ percpu_ref_get(&ca->ref);
-+ rcu_read_unlock();
-+
-+ if (!ca)
-+ continue;
-+
-+ if (!ca->mi.durability && *have_cache) {
-+ percpu_ref_put(&ca->ref);
-+ continue;
-+ }
-+
-+ ob = bch2_bucket_alloc_trans(trans, ca, watermark, cl, &usage);
-+ if (!IS_ERR(ob))
-+ bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
-+ percpu_ref_put(&ca->ref);
-+
-+ if (IS_ERR(ob)) {
-+ ret = PTR_ERR(ob);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl)
-+ break;
-+ continue;
-+ }
-+
-+ ob->data_type = data_type;
-+
-+ if (add_new_bucket(c, ptrs, devs_may_alloc,
-+ nr_replicas, nr_effective,
-+ have_cache, flags, ob)) {
-+ ret = 0;
-+ break;
-+ }
-+ }
-+
-+ return ret;
-+}
-+
-+/* Allocate from stripes: */
-+
-+/*
-+ * if we can't allocate a new stripe because there are already too many
-+ * partially filled stripes, force allocating from an existing stripe even when
-+ * it's to a device we don't want:
-+ */
-+
-+static int bucket_alloc_from_stripe(struct btree_trans *trans,
-+ struct open_buckets *ptrs,
-+ struct write_point *wp,
-+ struct bch_devs_mask *devs_may_alloc,
-+ u16 target,
-+ unsigned nr_replicas,
-+ unsigned *nr_effective,
-+ bool *have_cache,
-+ enum bch_watermark watermark,
-+ unsigned flags,
-+ struct closure *cl)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct dev_alloc_list devs_sorted;
-+ struct ec_stripe_head *h;
-+ struct open_bucket *ob;
-+ unsigned i, ec_idx;
-+ int ret = 0;
-+
-+ if (nr_replicas < 2)
-+ return 0;
-+
-+ if (ec_open_bucket(c, ptrs))
-+ return 0;
-+
-+ h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl);
-+ if (IS_ERR(h))
-+ return PTR_ERR(h);
-+ if (!h)
-+ return 0;
-+
-+ devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
-+
-+ for (i = 0; i < devs_sorted.nr; i++)
-+ for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
-+ if (!h->s->blocks[ec_idx])
-+ continue;
-+
-+ ob = c->open_buckets + h->s->blocks[ec_idx];
-+ if (ob->dev == devs_sorted.devs[i] &&
-+ !test_and_set_bit(ec_idx, h->s->blocks_allocated))
-+ goto got_bucket;
-+ }
-+ goto out_put_head;
-+got_bucket:
-+ ob->ec_idx = ec_idx;
-+ ob->ec = h->s;
-+ ec_stripe_new_get(h->s, STRIPE_REF_io);
-+
-+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
-+ nr_replicas, nr_effective,
-+ have_cache, flags, ob);
-+out_put_head:
-+ bch2_ec_stripe_head_put(c, h);
-+ return ret;
-+}
-+
-+/* Sector allocator */
-+
-+static bool want_bucket(struct bch_fs *c,
-+ struct write_point *wp,
-+ struct bch_devs_mask *devs_may_alloc,
-+ bool *have_cache, bool ec,
-+ struct open_bucket *ob)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-+
-+ if (!test_bit(ob->dev, devs_may_alloc->d))
-+ return false;
-+
-+ if (ob->data_type != wp->data_type)
-+ return false;
-+
-+ if (!ca->mi.durability &&
-+ (wp->data_type == BCH_DATA_btree || ec || *have_cache))
-+ return false;
-+
-+ if (ec != (ob->ec != NULL))
-+ return false;
-+
-+ return true;
-+}
-+
-+static int bucket_alloc_set_writepoint(struct bch_fs *c,
-+ struct open_buckets *ptrs,
-+ struct write_point *wp,
-+ struct bch_devs_mask *devs_may_alloc,
-+ unsigned nr_replicas,
-+ unsigned *nr_effective,
-+ bool *have_cache,
-+ bool ec, unsigned flags)
-+{
-+ struct open_buckets ptrs_skip = { .nr = 0 };
-+ struct open_bucket *ob;
-+ unsigned i;
-+ int ret = 0;
-+
-+ open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+ if (!ret && want_bucket(c, wp, devs_may_alloc,
-+ have_cache, ec, ob))
-+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
-+ nr_replicas, nr_effective,
-+ have_cache, flags, ob);
-+ else
-+ ob_push(c, &ptrs_skip, ob);
-+ }
-+ wp->ptrs = ptrs_skip;
-+
-+ return ret;
-+}
-+
-+static int bucket_alloc_set_partial(struct bch_fs *c,
-+ struct open_buckets *ptrs,
-+ struct write_point *wp,
-+ struct bch_devs_mask *devs_may_alloc,
-+ unsigned nr_replicas,
-+ unsigned *nr_effective,
-+ bool *have_cache, bool ec,
-+ enum bch_watermark watermark,
-+ unsigned flags)
-+{
-+ int i, ret = 0;
-+
-+ if (!c->open_buckets_partial_nr)
-+ return 0;
-+
-+ spin_lock(&c->freelist_lock);
-+
-+ if (!c->open_buckets_partial_nr)
-+ goto unlock;
-+
-+ for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
-+ struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
-+
-+ if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-+ struct bch_dev_usage usage;
-+ u64 avail;
-+
-+ bch2_dev_usage_read_fast(ca, &usage);
-+ avail = dev_buckets_free(ca, usage, watermark);
-+ if (!avail)
-+ continue;
-+
-+ array_remove_item(c->open_buckets_partial,
-+ c->open_buckets_partial_nr,
-+ i);
-+ ob->on_partial_list = false;
-+
-+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
-+ nr_replicas, nr_effective,
-+ have_cache, flags, ob);
-+ if (ret)
-+ break;
-+ }
-+ }
-+unlock:
-+ spin_unlock(&c->freelist_lock);
-+ return ret;
-+}
-+
-+static int __open_bucket_add_buckets(struct btree_trans *trans,
-+ struct open_buckets *ptrs,
-+ struct write_point *wp,
-+ struct bch_devs_list *devs_have,
-+ u16 target,
-+ bool erasure_code,
-+ unsigned nr_replicas,
-+ unsigned *nr_effective,
-+ bool *have_cache,
-+ enum bch_watermark watermark,
-+ unsigned flags,
-+ struct closure *_cl)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_devs_mask devs;
-+ struct open_bucket *ob;
-+ struct closure *cl = NULL;
-+ unsigned i;
-+ int ret;
-+
-+ devs = target_rw_devs(c, wp->data_type, target);
-+
-+ /* Don't allocate from devices we already have pointers to: */
-+ for (i = 0; i < devs_have->nr; i++)
-+ __clear_bit(devs_have->devs[i], devs.d);
-+
-+ open_bucket_for_each(c, ptrs, ob, i)
-+ __clear_bit(ob->dev, devs.d);
-+
-+ if (erasure_code && ec_open_bucket(c, ptrs))
-+ return 0;
-+
-+ ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
-+ nr_replicas, nr_effective,
-+ have_cache, erasure_code, flags);
-+ if (ret)
-+ return ret;
-+
-+ ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
-+ nr_replicas, nr_effective,
-+ have_cache, erasure_code, watermark, flags);
-+ if (ret)
-+ return ret;
-+
-+ if (erasure_code) {
-+ ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
-+ target,
-+ nr_replicas, nr_effective,
-+ have_cache,
-+ watermark, flags, _cl);
-+ } else {
-+retry_blocking:
-+ /*
-+ * Try nonblocking first, so that if one device is full we'll try from
-+ * other devices:
-+ */
-+ ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
-+ nr_replicas, nr_effective, have_cache,
-+ flags, wp->data_type, watermark, cl);
-+ if (ret &&
-+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
-+ !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
-+ !cl && _cl) {
-+ cl = _cl;
-+ goto retry_blocking;
-+ }
-+ }
-+
-+ return ret;
-+}
-+
-+static int open_bucket_add_buckets(struct btree_trans *trans,
-+ struct open_buckets *ptrs,
-+ struct write_point *wp,
-+ struct bch_devs_list *devs_have,
-+ u16 target,
-+ unsigned erasure_code,
-+ unsigned nr_replicas,
-+ unsigned *nr_effective,
-+ bool *have_cache,
-+ enum bch_watermark watermark,
-+ unsigned flags,
-+ struct closure *cl)
-+{
-+ int ret;
-+
-+ if (erasure_code) {
-+ ret = __open_bucket_add_buckets(trans, ptrs, wp,
-+ devs_have, target, erasure_code,
-+ nr_replicas, nr_effective, have_cache,
-+ watermark, flags, cl);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
-+ bch2_err_matches(ret, BCH_ERR_operation_blocked) ||
-+ bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
-+ bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
-+ return ret;
-+ if (*nr_effective >= nr_replicas)
-+ return 0;
-+ }
-+
-+ ret = __open_bucket_add_buckets(trans, ptrs, wp,
-+ devs_have, target, false,
-+ nr_replicas, nr_effective, have_cache,
-+ watermark, flags, cl);
-+ return ret < 0 ? ret : 0;
-+}
-+
-+/**
-+ * should_drop_bucket - check if this is open_bucket should go away
-+ * @ob: open_bucket to predicate on
-+ * @c: filesystem handle
-+ * @ca: if set, we're killing buckets for a particular device
-+ * @ec: if true, we're shutting down erasure coding and killing all ec
-+ * open_buckets
-+ * otherwise, return true
-+ * Returns: true if we should kill this open_bucket
-+ *
-+ * We're killing open_buckets because we're shutting down a device, erasure
-+ * coding, or the entire filesystem - check if this open_bucket matches:
-+ */
-+static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
-+ struct bch_dev *ca, bool ec)
-+{
-+ if (ec) {
-+ return ob->ec != NULL;
-+ } else if (ca) {
-+ bool drop = ob->dev == ca->dev_idx;
-+ struct open_bucket *ob2;
-+ unsigned i;
-+
-+ if (!drop && ob->ec) {
-+ unsigned nr_blocks;
-+
-+ mutex_lock(&ob->ec->lock);
-+ nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks;
-+
-+ for (i = 0; i < nr_blocks; i++) {
-+ if (!ob->ec->blocks[i])
-+ continue;
-+
-+ ob2 = c->open_buckets + ob->ec->blocks[i];
-+ drop |= ob2->dev == ca->dev_idx;
-+ }
-+ mutex_unlock(&ob->ec->lock);
-+ }
-+
-+ return drop;
-+ } else {
-+ return true;
-+ }
-+}
-+
-+static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
-+ bool ec, struct write_point *wp)
-+{
-+ struct open_buckets ptrs = { .nr = 0 };
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ mutex_lock(&wp->lock);
-+ open_bucket_for_each(c, &wp->ptrs, ob, i)
-+ if (should_drop_bucket(ob, c, ca, ec))
-+ bch2_open_bucket_put(c, ob);
-+ else
-+ ob_push(c, &ptrs, ob);
-+ wp->ptrs = ptrs;
-+ mutex_unlock(&wp->lock);
-+}
-+
-+void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
-+ bool ec)
-+{
-+ unsigned i;
-+
-+ /* Next, close write points that point to this device... */
-+ for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
-+ bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
-+
-+ bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
-+ bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point);
-+ bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
-+
-+ mutex_lock(&c->btree_reserve_cache_lock);
-+ while (c->btree_reserve_cache_nr) {
-+ struct btree_alloc *a =
-+ &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-+
-+ bch2_open_buckets_put(c, &a->ob);
-+ }
-+ mutex_unlock(&c->btree_reserve_cache_lock);
-+
-+ spin_lock(&c->freelist_lock);
-+ i = 0;
-+ while (i < c->open_buckets_partial_nr) {
-+ struct open_bucket *ob =
-+ c->open_buckets + c->open_buckets_partial[i];
-+
-+ if (should_drop_bucket(ob, c, ca, ec)) {
-+ --c->open_buckets_partial_nr;
-+ swap(c->open_buckets_partial[i],
-+ c->open_buckets_partial[c->open_buckets_partial_nr]);
-+ ob->on_partial_list = false;
-+ spin_unlock(&c->freelist_lock);
-+ bch2_open_bucket_put(c, ob);
-+ spin_lock(&c->freelist_lock);
-+ } else {
-+ i++;
-+ }
-+ }
-+ spin_unlock(&c->freelist_lock);
-+
-+ bch2_ec_stop_dev(c, ca);
-+}
-+
-+static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
-+ unsigned long write_point)
-+{
-+ unsigned hash =
-+ hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
-+
-+ return &c->write_points_hash[hash];
-+}
-+
-+static struct write_point *__writepoint_find(struct hlist_head *head,
-+ unsigned long write_point)
-+{
-+ struct write_point *wp;
-+
-+ rcu_read_lock();
-+ hlist_for_each_entry_rcu(wp, head, node)
-+ if (wp->write_point == write_point)
-+ goto out;
-+ wp = NULL;
-+out:
-+ rcu_read_unlock();
-+ return wp;
-+}
-+
-+static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
-+{
-+ u64 stranded = c->write_points_nr * c->bucket_size_max;
-+ u64 free = bch2_fs_usage_read_short(c).free;
-+
-+ return stranded * factor > free;
-+}
-+
-+static bool try_increase_writepoints(struct bch_fs *c)
-+{
-+ struct write_point *wp;
-+
-+ if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
-+ too_many_writepoints(c, 32))
-+ return false;
-+
-+ wp = c->write_points + c->write_points_nr++;
-+ hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
-+ return true;
-+}
-+
-+static bool try_decrease_writepoints(struct btree_trans *trans, unsigned old_nr)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct write_point *wp;
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ mutex_lock(&c->write_points_hash_lock);
-+ if (c->write_points_nr < old_nr) {
-+ mutex_unlock(&c->write_points_hash_lock);
-+ return true;
-+ }
-+
-+ if (c->write_points_nr == 1 ||
-+ !too_many_writepoints(c, 8)) {
-+ mutex_unlock(&c->write_points_hash_lock);
-+ return false;
-+ }
-+
-+ wp = c->write_points + --c->write_points_nr;
-+
-+ hlist_del_rcu(&wp->node);
-+ mutex_unlock(&c->write_points_hash_lock);
-+
-+ bch2_trans_mutex_lock_norelock(trans, &wp->lock);
-+ open_bucket_for_each(c, &wp->ptrs, ob, i)
-+ open_bucket_free_unused(c, ob);
-+ wp->ptrs.nr = 0;
-+ mutex_unlock(&wp->lock);
-+ return true;
-+}
-+
-+static struct write_point *writepoint_find(struct btree_trans *trans,
-+ unsigned long write_point)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct write_point *wp, *oldest;
-+ struct hlist_head *head;
-+
-+ if (!(write_point & 1UL)) {
-+ wp = (struct write_point *) write_point;
-+ bch2_trans_mutex_lock_norelock(trans, &wp->lock);
-+ return wp;
-+ }
-+
-+ head = writepoint_hash(c, write_point);
-+restart_find:
-+ wp = __writepoint_find(head, write_point);
-+ if (wp) {
-+lock_wp:
-+ bch2_trans_mutex_lock_norelock(trans, &wp->lock);
-+ if (wp->write_point == write_point)
-+ goto out;
-+ mutex_unlock(&wp->lock);
-+ goto restart_find;
-+ }
-+restart_find_oldest:
-+ oldest = NULL;
-+ for (wp = c->write_points;
-+ wp < c->write_points + c->write_points_nr; wp++)
-+ if (!oldest || time_before64(wp->last_used, oldest->last_used))
-+ oldest = wp;
-+
-+ bch2_trans_mutex_lock_norelock(trans, &oldest->lock);
-+ bch2_trans_mutex_lock_norelock(trans, &c->write_points_hash_lock);
-+ if (oldest >= c->write_points + c->write_points_nr ||
-+ try_increase_writepoints(c)) {
-+ mutex_unlock(&c->write_points_hash_lock);
-+ mutex_unlock(&oldest->lock);
-+ goto restart_find_oldest;
-+ }
-+
-+ wp = __writepoint_find(head, write_point);
-+ if (wp && wp != oldest) {
-+ mutex_unlock(&c->write_points_hash_lock);
-+ mutex_unlock(&oldest->lock);
-+ goto lock_wp;
-+ }
-+
-+ wp = oldest;
-+ hlist_del_rcu(&wp->node);
-+ wp->write_point = write_point;
-+ hlist_add_head_rcu(&wp->node, head);
-+ mutex_unlock(&c->write_points_hash_lock);
-+out:
-+ wp->last_used = local_clock();
-+ return wp;
-+}
-+
-+/*
-+ * Get us an open_bucket we can allocate from, return with it locked:
-+ */
-+int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
-+ unsigned target,
-+ unsigned erasure_code,
-+ struct write_point_specifier write_point,
-+ struct bch_devs_list *devs_have,
-+ unsigned nr_replicas,
-+ unsigned nr_replicas_required,
-+ enum bch_watermark watermark,
-+ unsigned flags,
-+ struct closure *cl,
-+ struct write_point **wp_ret)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct write_point *wp;
-+ struct open_bucket *ob;
-+ struct open_buckets ptrs;
-+ unsigned nr_effective, write_points_nr;
-+ bool have_cache;
-+ int ret;
-+ int i;
-+
-+ BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
-+
-+ BUG_ON(!nr_replicas || !nr_replicas_required);
-+retry:
-+ ptrs.nr = 0;
-+ nr_effective = 0;
-+ write_points_nr = c->write_points_nr;
-+ have_cache = false;
-+
-+ *wp_ret = wp = writepoint_find(trans, write_point.v);
-+
-+ /* metadata may not allocate on cache devices: */
-+ if (wp->data_type != BCH_DATA_user)
-+ have_cache = true;
-+
-+ if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
-+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
-+ target, erasure_code,
-+ nr_replicas, &nr_effective,
-+ &have_cache, watermark,
-+ flags, NULL);
-+ if (!ret ||
-+ bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto alloc_done;
-+
-+ /* Don't retry from all devices if we're out of open buckets: */
-+ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
-+ goto allocate_blocking;
-+
-+ /*
-+ * Only try to allocate cache (durability = 0 devices) from the
-+ * specified target:
-+ */
-+ have_cache = true;
-+
-+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
-+ 0, erasure_code,
-+ nr_replicas, &nr_effective,
-+ &have_cache, watermark,
-+ flags, cl);
-+ } else {
-+allocate_blocking:
-+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
-+ target, erasure_code,
-+ nr_replicas, &nr_effective,
-+ &have_cache, watermark,
-+ flags, cl);
-+ }
-+alloc_done:
-+ BUG_ON(!ret && nr_effective < nr_replicas);
-+
-+ if (erasure_code && !ec_open_bucket(c, &ptrs))
-+ pr_debug("failed to get ec bucket: ret %u", ret);
-+
-+ if (ret == -BCH_ERR_insufficient_devices &&
-+ nr_effective >= nr_replicas_required)
-+ ret = 0;
-+
-+ if (ret)
-+ goto err;
-+
-+ /* Free buckets we didn't use: */
-+ open_bucket_for_each(c, &wp->ptrs, ob, i)
-+ open_bucket_free_unused(c, ob);
-+
-+ wp->ptrs = ptrs;
-+
-+ wp->sectors_free = UINT_MAX;
-+
-+ open_bucket_for_each(c, &wp->ptrs, ob, i)
-+ wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
-+
-+ BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
-+
-+ return 0;
-+err:
-+ open_bucket_for_each(c, &wp->ptrs, ob, i)
-+ if (ptrs.nr < ARRAY_SIZE(ptrs.v))
-+ ob_push(c, &ptrs, ob);
-+ else
-+ open_bucket_free_unused(c, ob);
-+ wp->ptrs = ptrs;
-+
-+ mutex_unlock(&wp->lock);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_freelist_empty) &&
-+ try_decrease_writepoints(trans, write_points_nr))
-+ goto retry;
-+
-+ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) ||
-+ bch2_err_matches(ret, BCH_ERR_freelist_empty))
-+ return cl
-+ ? -BCH_ERR_bucket_alloc_blocked
-+ : -BCH_ERR_ENOSPC_bucket_alloc;
-+
-+ return ret;
-+}
-+
-+struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-+
-+ return (struct bch_extent_ptr) {
-+ .type = 1 << BCH_EXTENT_ENTRY_ptr,
-+ .gen = ob->gen,
-+ .dev = ob->dev,
-+ .offset = bucket_to_sector(ca, ob->bucket) +
-+ ca->mi.bucket_size -
-+ ob->sectors_free,
-+ };
-+}
-+
-+void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
-+ struct bkey_i *k, unsigned sectors,
-+ bool cached)
-+{
-+ bch2_alloc_sectors_append_ptrs_inlined(c, wp, k, sectors, cached);
-+}
-+
-+/*
-+ * Append pointers to the space we just allocated to @k, and mark @sectors space
-+ * as allocated out of @ob
-+ */
-+void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
-+{
-+ bch2_alloc_sectors_done_inlined(c, wp);
-+}
-+
-+static inline void writepoint_init(struct write_point *wp,
-+ enum bch_data_type type)
-+{
-+ mutex_init(&wp->lock);
-+ wp->data_type = type;
-+
-+ INIT_WORK(&wp->index_update_work, bch2_write_point_do_index_updates);
-+ INIT_LIST_HEAD(&wp->writes);
-+ spin_lock_init(&wp->writes_lock);
-+}
-+
-+void bch2_fs_allocator_foreground_init(struct bch_fs *c)
-+{
-+ struct open_bucket *ob;
-+ struct write_point *wp;
-+
-+ mutex_init(&c->write_points_hash_lock);
-+ c->write_points_nr = ARRAY_SIZE(c->write_points);
-+
-+ /* open bucket 0 is a sentinal NULL: */
-+ spin_lock_init(&c->open_buckets[0].lock);
-+
-+ for (ob = c->open_buckets + 1;
-+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
-+ spin_lock_init(&ob->lock);
-+ c->open_buckets_nr_free++;
-+
-+ ob->freelist = c->open_buckets_freelist;
-+ c->open_buckets_freelist = ob - c->open_buckets;
-+ }
-+
-+ writepoint_init(&c->btree_write_point, BCH_DATA_btree);
-+ writepoint_init(&c->rebalance_write_point, BCH_DATA_user);
-+ writepoint_init(&c->copygc_write_point, BCH_DATA_user);
-+
-+ for (wp = c->write_points;
-+ wp < c->write_points + c->write_points_nr; wp++) {
-+ writepoint_init(wp, BCH_DATA_user);
-+
-+ wp->last_used = local_clock();
-+ wp->write_point = (unsigned long) wp;
-+ hlist_add_head_rcu(&wp->node,
-+ writepoint_hash(c, wp->write_point));
-+ }
-+}
-+
-+static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-+ unsigned data_type = ob->data_type;
-+ barrier(); /* READ_ONCE() doesn't work on bitfields */
-+
-+ prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u",
-+ ob - c->open_buckets,
-+ atomic_read(&ob->pin),
-+ data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
-+ ob->dev, ob->bucket, ob->gen,
-+ ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size);
-+ if (ob->ec)
-+ prt_printf(out, " ec idx %llu", ob->ec->idx);
-+ if (ob->on_partial_list)
-+ prt_str(out, " partial");
-+ prt_newline(out);
-+}
-+
-+void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ struct open_bucket *ob;
-+
-+ out->atomic++;
-+
-+ for (ob = c->open_buckets;
-+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
-+ ob++) {
-+ spin_lock(&ob->lock);
-+ if (ob->valid && !ob->on_partial_list)
-+ bch2_open_bucket_to_text(out, c, ob);
-+ spin_unlock(&ob->lock);
-+ }
-+
-+ --out->atomic;
-+}
-+
-+void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ unsigned i;
-+
-+ out->atomic++;
-+ spin_lock(&c->freelist_lock);
-+
-+ for (i = 0; i < c->open_buckets_partial_nr; i++)
-+ bch2_open_bucket_to_text(out, c,
-+ c->open_buckets + c->open_buckets_partial[i]);
-+
-+ spin_unlock(&c->freelist_lock);
-+ --out->atomic;
-+}
-+
-+static const char * const bch2_write_point_states[] = {
-+#define x(n) #n,
-+ WRITE_POINT_STATES()
-+#undef x
-+ NULL
-+};
-+
-+static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct write_point *wp)
-+{
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ prt_printf(out, "%lu: ", wp->write_point);
-+ prt_human_readable_u64(out, wp->sectors_allocated);
-+
-+ prt_printf(out, " last wrote: ");
-+ bch2_pr_time_units(out, sched_clock() - wp->last_used);
-+
-+ for (i = 0; i < WRITE_POINT_STATE_NR; i++) {
-+ prt_printf(out, " %s: ", bch2_write_point_states[i]);
-+ bch2_pr_time_units(out, wp->time[i]);
-+ }
-+
-+ prt_newline(out);
-+
-+ printbuf_indent_add(out, 2);
-+ open_bucket_for_each(c, &wp->ptrs, ob, i)
-+ bch2_open_bucket_to_text(out, c, ob);
-+ printbuf_indent_sub(out, 2);
-+}
-+
-+void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ struct write_point *wp;
-+
-+ prt_str(out, "Foreground write points\n");
-+ for (wp = c->write_points;
-+ wp < c->write_points + ARRAY_SIZE(c->write_points);
-+ wp++)
-+ bch2_write_point_to_text(out, c, wp);
-+
-+ prt_str(out, "Copygc write point\n");
-+ bch2_write_point_to_text(out, c, &c->copygc_write_point);
-+
-+ prt_str(out, "Rebalance write point\n");
-+ bch2_write_point_to_text(out, c, &c->rebalance_write_point);
-+
-+ prt_str(out, "Btree write point\n");
-+ bch2_write_point_to_text(out, c, &c->btree_write_point);
-+}
-diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
-new file mode 100644
-index 000000000000..7aaeec44c746
---- /dev/null
-+++ b/fs/bcachefs/alloc_foreground.h
-@@ -0,0 +1,224 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_FOREGROUND_H
-+#define _BCACHEFS_ALLOC_FOREGROUND_H
-+
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+#include "extents.h"
-+#include "sb-members.h"
-+
-+#include <linux/hash.h>
-+
-+struct bkey;
-+struct bch_dev;
-+struct bch_fs;
-+struct bch_devs_List;
-+
-+extern const char * const bch2_watermarks[];
-+
-+void bch2_reset_alloc_cursors(struct bch_fs *);
-+
-+struct dev_alloc_list {
-+ unsigned nr;
-+ u8 devs[BCH_SB_MEMBERS_MAX];
-+};
-+
-+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
-+ struct dev_stripe_state *,
-+ struct bch_devs_mask *);
-+void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
-+
-+long bch2_bucket_alloc_new_fs(struct bch_dev *);
-+
-+struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
-+ enum bch_watermark, struct closure *);
-+
-+static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
-+ struct open_bucket *ob)
-+{
-+ BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
-+
-+ obs->v[obs->nr++] = ob - c->open_buckets;
-+}
-+
-+#define open_bucket_for_each(_c, _obs, _ob, _i) \
-+ for ((_i) = 0; \
-+ (_i) < (_obs)->nr && \
-+ ((_ob) = (_c)->open_buckets + (_obs)->v[_i], true); \
-+ (_i)++)
-+
-+static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
-+ struct open_buckets *obs)
-+{
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ open_bucket_for_each(c, obs, ob, i)
-+ if (ob->ec)
-+ return ob;
-+
-+ return NULL;
-+}
-+
-+void bch2_open_bucket_write_error(struct bch_fs *,
-+ struct open_buckets *, unsigned);
-+
-+void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
-+
-+static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-+{
-+ if (atomic_dec_and_test(&ob->pin))
-+ __bch2_open_bucket_put(c, ob);
-+}
-+
-+static inline void bch2_open_buckets_put(struct bch_fs *c,
-+ struct open_buckets *ptrs)
-+{
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ open_bucket_for_each(c, ptrs, ob, i)
-+ bch2_open_bucket_put(c, ob);
-+ ptrs->nr = 0;
-+}
-+
-+static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct write_point *wp)
-+{
-+ struct open_buckets ptrs = { .nr = 0 }, keep = { .nr = 0 };
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ open_bucket_for_each(c, &wp->ptrs, ob, i)
-+ ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
-+ wp->ptrs = keep;
-+
-+ mutex_unlock(&wp->lock);
-+
-+ bch2_open_buckets_put(c, &ptrs);
-+}
-+
-+static inline void bch2_open_bucket_get(struct bch_fs *c,
-+ struct write_point *wp,
-+ struct open_buckets *ptrs)
-+{
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+ ob->data_type = wp->data_type;
-+ atomic_inc(&ob->pin);
-+ ob_push(c, ptrs, ob);
-+ }
-+}
-+
-+static inline open_bucket_idx_t *open_bucket_hashslot(struct bch_fs *c,
-+ unsigned dev, u64 bucket)
-+{
-+ return c->open_buckets_hash +
-+ (jhash_3words(dev, bucket, bucket >> 32, 0) &
-+ (OPEN_BUCKETS_COUNT - 1));
-+}
-+
-+static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucket)
-+{
-+ open_bucket_idx_t slot = *open_bucket_hashslot(c, dev, bucket);
-+
-+ while (slot) {
-+ struct open_bucket *ob = &c->open_buckets[slot];
-+
-+ if (ob->dev == dev && ob->bucket == bucket)
-+ return true;
-+
-+ slot = ob->hash;
-+ }
-+
-+ return false;
-+}
-+
-+static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 bucket)
-+{
-+ bool ret;
-+
-+ if (bch2_bucket_is_open(c, dev, bucket))
-+ return true;
-+
-+ spin_lock(&c->freelist_lock);
-+ ret = bch2_bucket_is_open(c, dev, bucket);
-+ spin_unlock(&c->freelist_lock);
-+
-+ return ret;
-+}
-+
-+int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
-+ struct dev_stripe_state *, struct bch_devs_mask *,
-+ unsigned, unsigned *, bool *, unsigned,
-+ enum bch_data_type, enum bch_watermark,
-+ struct closure *);
-+
-+int bch2_alloc_sectors_start_trans(struct btree_trans *,
-+ unsigned, unsigned,
-+ struct write_point_specifier,
-+ struct bch_devs_list *,
-+ unsigned, unsigned,
-+ enum bch_watermark,
-+ unsigned,
-+ struct closure *,
-+ struct write_point **);
-+
-+struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *, struct open_bucket *);
-+
-+/*
-+ * Append pointers to the space we just allocated to @k, and mark @sectors space
-+ * as allocated out of @ob
-+ */
-+static inline void
-+bch2_alloc_sectors_append_ptrs_inlined(struct bch_fs *c, struct write_point *wp,
-+ struct bkey_i *k, unsigned sectors,
-+ bool cached)
-+{
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ BUG_ON(sectors > wp->sectors_free);
-+ wp->sectors_free -= sectors;
-+ wp->sectors_allocated += sectors;
-+
-+ open_bucket_for_each(c, &wp->ptrs, ob, i) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-+ struct bch_extent_ptr ptr = bch2_ob_ptr(c, ob);
-+
-+ ptr.cached = cached ||
-+ (!ca->mi.durability &&
-+ wp->data_type == BCH_DATA_user);
-+
-+ bch2_bkey_append_ptr(k, ptr);
-+
-+ BUG_ON(sectors > ob->sectors_free);
-+ ob->sectors_free -= sectors;
-+ }
-+}
-+
-+void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
-+ struct bkey_i *, unsigned, bool);
-+void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
-+
-+void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool);
-+
-+static inline struct write_point_specifier writepoint_hashed(unsigned long v)
-+{
-+ return (struct write_point_specifier) { .v = v | 1 };
-+}
-+
-+static inline struct write_point_specifier writepoint_ptr(struct write_point *wp)
-+{
-+ return (struct write_point_specifier) { .v = (unsigned long) wp };
-+}
-+
-+void bch2_fs_allocator_foreground_init(struct bch_fs *);
-+
-+void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
-+void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
-+
-+#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
-diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
-new file mode 100644
-index 000000000000..b91b7a461056
---- /dev/null
-+++ b/fs/bcachefs/alloc_types.h
-@@ -0,0 +1,126 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ALLOC_TYPES_H
-+#define _BCACHEFS_ALLOC_TYPES_H
-+
-+#include <linux/mutex.h>
-+#include <linux/spinlock.h>
-+
-+#include "clock_types.h"
-+#include "fifo.h"
-+
-+struct bucket_alloc_state {
-+ u64 buckets_seen;
-+ u64 skipped_open;
-+ u64 skipped_need_journal_commit;
-+ u64 skipped_nocow;
-+ u64 skipped_nouse;
-+};
-+
-+#define BCH_WATERMARKS() \
-+ x(stripe) \
-+ x(normal) \
-+ x(copygc) \
-+ x(btree) \
-+ x(btree_copygc) \
-+ x(reclaim)
-+
-+enum bch_watermark {
-+#define x(name) BCH_WATERMARK_##name,
-+ BCH_WATERMARKS()
-+#undef x
-+ BCH_WATERMARK_NR,
-+};
-+
-+#define BCH_WATERMARK_BITS 3
-+#define BCH_WATERMARK_MASK ~(~0U << BCH_WATERMARK_BITS)
-+
-+#define OPEN_BUCKETS_COUNT 1024
-+
-+#define WRITE_POINT_HASH_NR 32
-+#define WRITE_POINT_MAX 32
-+
-+/*
-+ * 0 is never a valid open_bucket_idx_t:
-+ */
-+typedef u16 open_bucket_idx_t;
-+
-+struct open_bucket {
-+ spinlock_t lock;
-+ atomic_t pin;
-+ open_bucket_idx_t freelist;
-+ open_bucket_idx_t hash;
-+
-+ /*
-+ * When an open bucket has an ec_stripe attached, this is the index of
-+ * the block in the stripe this open_bucket corresponds to:
-+ */
-+ u8 ec_idx;
-+ enum bch_data_type data_type:6;
-+ unsigned valid:1;
-+ unsigned on_partial_list:1;
-+
-+ u8 dev;
-+ u8 gen;
-+ u32 sectors_free;
-+ u64 bucket;
-+ struct ec_stripe_new *ec;
-+};
-+
-+#define OPEN_BUCKET_LIST_MAX 15
-+
-+struct open_buckets {
-+ open_bucket_idx_t nr;
-+ open_bucket_idx_t v[OPEN_BUCKET_LIST_MAX];
-+};
-+
-+struct dev_stripe_state {
-+ u64 next_alloc[BCH_SB_MEMBERS_MAX];
-+};
-+
-+#define WRITE_POINT_STATES() \
-+ x(stopped) \
-+ x(waiting_io) \
-+ x(waiting_work) \
-+ x(running)
-+
-+enum write_point_state {
-+#define x(n) WRITE_POINT_##n,
-+ WRITE_POINT_STATES()
-+#undef x
-+ WRITE_POINT_STATE_NR
-+};
-+
-+struct write_point {
-+ struct {
-+ struct hlist_node node;
-+ struct mutex lock;
-+ u64 last_used;
-+ unsigned long write_point;
-+ enum bch_data_type data_type;
-+
-+ /* calculated based on how many pointers we're actually going to use: */
-+ unsigned sectors_free;
-+
-+ struct open_buckets ptrs;
-+ struct dev_stripe_state stripe;
-+
-+ u64 sectors_allocated;
-+ } __aligned(SMP_CACHE_BYTES);
-+
-+ struct {
-+ struct work_struct index_update_work;
-+
-+ struct list_head writes;
-+ spinlock_t writes_lock;
-+
-+ enum write_point_state state;
-+ u64 last_state_change;
-+ u64 time[WRITE_POINT_STATE_NR];
-+ } __aligned(SMP_CACHE_BYTES);
-+};
-+
-+struct write_point_specifier {
-+ unsigned long v;
-+};
-+
-+#endif /* _BCACHEFS_ALLOC_TYPES_H */
-diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
-new file mode 100644
-index 000000000000..ef02c9bb0354
---- /dev/null
-+++ b/fs/bcachefs/backpointers.c
-@@ -0,0 +1,860 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bbpos.h"
-+#include "alloc_background.h"
-+#include "backpointers.h"
-+#include "btree_cache.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_write_buffer.h"
-+#include "error.h"
-+
-+#include <linux/mm.h>
-+
-+static bool extent_matches_bp(struct bch_fs *c,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k,
-+ struct bpos bucket,
-+ struct bch_backpointer bp)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ struct bpos bucket2;
-+ struct bch_backpointer bp2;
-+
-+ if (p.ptr.cached)
-+ continue;
-+
-+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
-+ &bucket2, &bp2);
-+ if (bpos_eq(bucket, bucket2) &&
-+ !memcmp(&bp, &bp2, sizeof(bp)))
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
-+ struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
-+ c, err,
-+ backpointer_pos_wrong,
-+ "backpointer at wrong pos");
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
-+{
-+ prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
-+ bch2_btree_id_str(bp->btree_id),
-+ bp->level,
-+ (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
-+ (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
-+ bp->bucket_len);
-+ bch2_bpos_to_text(out, bp->pos);
-+}
-+
-+void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
-+{
-+ prt_str(out, "bucket=");
-+ bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
-+ prt_str(out, " ");
-+
-+ bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
-+}
-+
-+void bch2_backpointer_swab(struct bkey_s k)
-+{
-+ struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
-+
-+ bp.v->bucket_offset = swab40(bp.v->bucket_offset);
-+ bp.v->bucket_len = swab32(bp.v->bucket_len);
-+ bch2_bpos_swab(&bp.v->pos);
-+}
-+
-+static noinline int backpointer_mod_err(struct btree_trans *trans,
-+ struct bch_backpointer bp,
-+ struct bkey_s_c bp_k,
-+ struct bkey_s_c orig_k,
-+ bool insert)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct printbuf buf = PRINTBUF;
-+
-+ if (insert) {
-+ prt_printf(&buf, "existing backpointer found when inserting ");
-+ bch2_backpointer_to_text(&buf, &bp);
-+ prt_newline(&buf);
-+ printbuf_indent_add(&buf, 2);
-+
-+ prt_printf(&buf, "found ");
-+ bch2_bkey_val_to_text(&buf, c, bp_k);
-+ prt_newline(&buf);
-+
-+ prt_printf(&buf, "for ");
-+ bch2_bkey_val_to_text(&buf, c, orig_k);
-+
-+ bch_err(c, "%s", buf.buf);
-+ } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
-+ prt_printf(&buf, "backpointer not found when deleting");
-+ prt_newline(&buf);
-+ printbuf_indent_add(&buf, 2);
-+
-+ prt_printf(&buf, "searching for ");
-+ bch2_backpointer_to_text(&buf, &bp);
-+ prt_newline(&buf);
-+
-+ prt_printf(&buf, "got ");
-+ bch2_bkey_val_to_text(&buf, c, bp_k);
-+ prt_newline(&buf);
-+
-+ prt_printf(&buf, "for ");
-+ bch2_bkey_val_to_text(&buf, c, orig_k);
-+
-+ bch_err(c, "%s", buf.buf);
-+ }
-+
-+ printbuf_exit(&buf);
-+
-+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
-+ bch2_inconsistent_error(c);
-+ return -EIO;
-+ } else {
-+ return 0;
-+ }
-+}
-+
-+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
-+ struct bkey_i_backpointer *bp_k,
-+ struct bch_backpointer bp,
-+ struct bkey_s_c orig_k,
-+ bool insert)
-+{
-+ struct btree_iter bp_iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
-+ bp_k->k.p,
-+ BTREE_ITER_INTENT|
-+ BTREE_ITER_SLOTS|
-+ BTREE_ITER_WITH_UPDATES);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (insert
-+ ? k.k->type
-+ : (k.k->type != KEY_TYPE_backpointer ||
-+ memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
-+ ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
-+err:
-+ bch2_trans_iter_exit(trans, &bp_iter);
-+ return ret;
-+}
-+
-+/*
-+ * Find the next backpointer >= *bp_offset:
-+ */
-+int bch2_get_next_backpointer(struct btree_trans *trans,
-+ struct bpos bucket, int gen,
-+ struct bpos *bp_pos,
-+ struct bch_backpointer *bp,
-+ unsigned iter_flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
-+ struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ if (bpos_ge(*bp_pos, bp_end_pos))
-+ goto done;
-+
-+ if (gen >= 0) {
-+ k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
-+ bucket, BTREE_ITER_CACHED|iter_flags);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto out;
-+
-+ if (k.k->type != KEY_TYPE_alloc_v4 ||
-+ bkey_s_c_to_alloc_v4(k).v->gen != gen)
-+ goto done;
-+ }
-+
-+ *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
-+
-+ for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
-+ *bp_pos, iter_flags, k, ret) {
-+ if (bpos_ge(k.k->p, bp_end_pos))
-+ break;
-+
-+ *bp_pos = k.k->p;
-+ *bp = *bkey_s_c_to_backpointer(k).v;
-+ goto out;
-+ }
-+done:
-+ *bp_pos = SPOS_MAX;
-+out:
-+ bch2_trans_iter_exit(trans, &bp_iter);
-+ bch2_trans_iter_exit(trans, &alloc_iter);
-+ return ret;
-+}
-+
-+static void backpointer_not_found(struct btree_trans *trans,
-+ struct bpos bp_pos,
-+ struct bch_backpointer bp,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct printbuf buf = PRINTBUF;
-+ struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
-+
-+ /*
-+ * If we're using the btree write buffer, the backpointer we were
-+ * looking at may have already been deleted - failure to find what it
-+ * pointed to is not an error:
-+ */
-+ if (likely(!bch2_backpointers_no_use_write_buffer))
-+ return;
-+
-+ prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
-+ bp.level ? "btree node" : "extent");
-+ prt_printf(&buf, "bucket: ");
-+ bch2_bpos_to_text(&buf, bucket);
-+ prt_printf(&buf, "\n ");
-+
-+ prt_printf(&buf, "backpointer pos: ");
-+ bch2_bpos_to_text(&buf, bp_pos);
-+ prt_printf(&buf, "\n ");
-+
-+ bch2_backpointer_to_text(&buf, &bp);
-+ prt_printf(&buf, "\n ");
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
-+ bch_err_ratelimited(c, "%s", buf.buf);
-+ else
-+ bch2_trans_inconsistent(trans, "%s", buf.buf);
-+
-+ printbuf_exit(&buf);
-+}
-+
-+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bpos bp_pos,
-+ struct bch_backpointer bp,
-+ unsigned iter_flags)
-+{
-+ if (likely(!bp.level)) {
-+ struct bch_fs *c = trans->c;
-+ struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
-+ struct bkey_s_c k;
-+
-+ bch2_trans_node_iter_init(trans, iter,
-+ bp.btree_id,
-+ bp.pos,
-+ 0, 0,
-+ iter_flags);
-+ k = bch2_btree_iter_peek_slot(iter);
-+ if (bkey_err(k)) {
-+ bch2_trans_iter_exit(trans, iter);
-+ return k;
-+ }
-+
-+ if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
-+ return k;
-+
-+ bch2_trans_iter_exit(trans, iter);
-+ backpointer_not_found(trans, bp_pos, bp, k);
-+ return bkey_s_c_null;
-+ } else {
-+ struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
-+
-+ if (IS_ERR_OR_NULL(b)) {
-+ bch2_trans_iter_exit(trans, iter);
-+ return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
-+ }
-+ return bkey_i_to_s_c(&b->key);
-+ }
-+}
-+
-+struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bpos bp_pos,
-+ struct bch_backpointer bp)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
-+ struct btree *b;
-+
-+ BUG_ON(!bp.level);
-+
-+ bch2_trans_node_iter_init(trans, iter,
-+ bp.btree_id,
-+ bp.pos,
-+ 0,
-+ bp.level - 1,
-+ 0);
-+ b = bch2_btree_iter_peek_node(iter);
-+ if (IS_ERR(b))
-+ goto err;
-+
-+ BUG_ON(b->c.level != bp.level - 1);
-+
-+ if (b && extent_matches_bp(c, bp.btree_id, bp.level,
-+ bkey_i_to_s_c(&b->key),
-+ bucket, bp))
-+ return b;
-+
-+ if (b && btree_node_will_make_reachable(b)) {
-+ b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
-+ } else {
-+ backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
-+ b = NULL;
-+ }
-+err:
-+ bch2_trans_iter_exit(trans, iter);
-+ return b;
-+}
-+
-+static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter alloc_iter = { NULL };
-+ struct bkey_s_c alloc_k;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
-+ backpointer_to_missing_device,
-+ "backpointer for missing device:\n%s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-+ ret = bch2_btree_delete_at(trans, bp_iter, 0);
-+ goto out;
-+ }
-+
-+ alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
-+ bp_pos_to_bucket(c, k.k->p), 0);
-+ ret = bkey_err(alloc_k);
-+ if (ret)
-+ goto out;
-+
-+ if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
-+ backpointer_to_missing_alloc,
-+ "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
-+ alloc_iter.pos.inode, alloc_iter.pos.offset,
-+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
-+ ret = bch2_btree_delete_at(trans, bp_iter, 0);
-+ goto out;
-+ }
-+out:
-+fsck_err:
-+ bch2_trans_iter_exit(trans, &alloc_iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+/* verify that every backpointer has a corresponding alloc key */
-+int bch2_check_btree_backpointers(struct bch_fs *c)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter,
-+ BTREE_ID_backpointers, POS_MIN, 0, k,
-+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-+ bch2_check_btree_backpointer(trans, &iter, k)));
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+struct bpos_level {
-+ unsigned level;
-+ struct bpos pos;
-+};
-+
-+static int check_bp_exists(struct btree_trans *trans,
-+ struct bpos bucket,
-+ struct bch_backpointer bp,
-+ struct bkey_s_c orig_k,
-+ struct bpos bucket_start,
-+ struct bpos bucket_end,
-+ struct bpos_level *last_flushed)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter bp_iter = { NULL };
-+ struct printbuf buf = PRINTBUF;
-+ struct bkey_s_c bp_k;
-+ int ret;
-+
-+ if (bpos_lt(bucket, bucket_start) ||
-+ bpos_gt(bucket, bucket_end))
-+ return 0;
-+
-+ if (!bch2_dev_bucket_exists(c, bucket))
-+ goto missing;
-+
-+ bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
-+ bucket_pos_to_bp(c, bucket, bp.bucket_offset),
-+ 0);
-+ ret = bkey_err(bp_k);
-+ if (ret)
-+ goto err;
-+
-+ if (bp_k.k->type != KEY_TYPE_backpointer ||
-+ memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
-+ if (last_flushed->level != bp.level ||
-+ !bpos_eq(last_flushed->pos, orig_k.k->p)) {
-+ last_flushed->level = bp.level;
-+ last_flushed->pos = orig_k.k->p;
-+
-+ ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-+ -BCH_ERR_transaction_restart_write_buffer_flush;
-+ goto out;
-+ }
-+ goto missing;
-+ }
-+out:
-+err:
-+fsck_err:
-+ bch2_trans_iter_exit(trans, &bp_iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+missing:
-+ prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
-+ bch2_btree_id_str(bp.btree_id), bp.level);
-+ bch2_bkey_val_to_text(&buf, c, orig_k);
-+ prt_printf(&buf, "\nbp pos ");
-+ bch2_bpos_to_text(&buf, bp_iter.pos);
-+
-+ if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers ||
-+ c->opts.reconstruct_alloc ||
-+ fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
-+ ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
-+
-+ goto out;
-+}
-+
-+static int check_extent_to_backpointers(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bpos bucket_start,
-+ struct bpos bucket_end,
-+ struct bpos_level *last_flushed)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_ptrs_c ptrs;
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ k = bch2_btree_iter_peek_all_levels(iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ return ret;
-+ if (!k.k)
-+ return 0;
-+
-+ ptrs = bch2_bkey_ptrs_c(k);
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ struct bpos bucket_pos;
-+ struct bch_backpointer bp;
-+
-+ if (p.ptr.cached)
-+ continue;
-+
-+ bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
-+ k, p, &bucket_pos, &bp);
-+
-+ ret = check_bp_exists(trans, bucket_pos, bp, k,
-+ bucket_start, bucket_end,
-+ last_flushed);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static int check_btree_root_to_backpointers(struct btree_trans *trans,
-+ enum btree_id btree_id,
-+ struct bpos bucket_start,
-+ struct bpos bucket_end,
-+ struct bpos_level *last_flushed)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_root *r = bch2_btree_id_root(c, btree_id);
-+ struct btree_iter iter;
-+ struct btree *b;
-+ struct bkey_s_c k;
-+ struct bkey_ptrs_c ptrs;
-+ struct extent_ptr_decoded p;
-+ const union bch_extent_entry *entry;
-+ int ret;
-+
-+ bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, r->level, 0);
-+ b = bch2_btree_iter_peek_node(&iter);
-+ ret = PTR_ERR_OR_ZERO(b);
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(b != btree_node_root(c, b));
-+
-+ k = bkey_i_to_s_c(&b->key);
-+ ptrs = bch2_bkey_ptrs_c(k);
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ struct bpos bucket_pos;
-+ struct bch_backpointer bp;
-+
-+ if (p.ptr.cached)
-+ continue;
-+
-+ bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1,
-+ k, p, &bucket_pos, &bp);
-+
-+ ret = check_bp_exists(trans, bucket_pos, bp, k,
-+ bucket_start, bucket_end,
-+ last_flushed);
-+ if (ret)
-+ goto err;
-+ }
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
-+{
-+ return (struct bbpos) {
-+ .btree = bp.btree_id,
-+ .pos = bp.pos,
-+ };
-+}
-+
-+static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
-+{
-+ struct sysinfo i;
-+ u64 mem_bytes;
-+
-+ si_meminfo(&i);
-+ mem_bytes = i.totalram * i.mem_unit;
-+ return div_u64(mem_bytes >> 1, btree_bytes(c));
-+}
-+
-+static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
-+ unsigned btree_leaf_mask,
-+ unsigned btree_interior_mask,
-+ struct bbpos start, struct bbpos *end)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
-+ enum btree_id btree;
-+ int ret = 0;
-+
-+ for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
-+ unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
-+
-+ if (!((1U << btree) & btree_leaf_mask) &&
-+ !((1U << btree) & btree_interior_mask))
-+ continue;
-+
-+ bch2_trans_node_iter_init(trans, &iter, btree,
-+ btree == start.btree ? start.pos : POS_MIN,
-+ 0, depth, 0);
-+ /*
-+ * for_each_btree_key_contineu() doesn't check the return value
-+ * from bch2_btree_iter_advance(), which is needed when
-+ * iterating over interior nodes where we'll see keys at
-+ * SPOS_MAX:
-+ */
-+ do {
-+ k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
-+ ret = bkey_err(k);
-+ if (!k.k || ret)
-+ break;
-+
-+ --btree_nodes;
-+ if (!btree_nodes) {
-+ *end = BBPOS(btree, k.k->p);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return 0;
-+ }
-+ } while (bch2_btree_iter_advance(&iter));
-+ bch2_trans_iter_exit(trans, &iter);
-+ }
-+
-+ *end = BBPOS_MAX;
-+ return ret;
-+}
-+
-+static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
-+ struct bpos bucket_start,
-+ struct bpos bucket_end)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ enum btree_id btree_id;
-+ struct bpos_level last_flushed = { UINT_MAX, POS_MIN };
-+ int ret = 0;
-+
-+ for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
-+ unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
-+
-+ bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
-+ depth,
-+ BTREE_ITER_ALL_LEVELS|
-+ BTREE_ITER_PREFETCH);
-+
-+ do {
-+ ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_LAZY_RW|
-+ BTREE_INSERT_NOFAIL,
-+ check_extent_to_backpointers(trans, &iter,
-+ bucket_start, bucket_end,
-+ &last_flushed));
-+ if (ret)
-+ break;
-+ } while (!bch2_btree_iter_advance(&iter));
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret)
-+ break;
-+
-+ ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_LAZY_RW|
-+ BTREE_INSERT_NOFAIL,
-+ check_btree_root_to_backpointers(trans, btree_id,
-+ bucket_start, bucket_end,
-+ &last_flushed));
-+ if (ret)
-+ break;
-+ }
-+ return ret;
-+}
-+
-+static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
-+ struct bpos bucket)
-+{
-+ return bch2_dev_exists2(c, bucket.inode)
-+ ? bucket_pos_to_bp(c, bucket, 0)
-+ : bucket;
-+}
-+
-+static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
-+ struct bpos start, struct bpos *end)
-+{
-+ struct btree_iter alloc_iter;
-+ struct btree_iter bp_iter;
-+ struct bkey_s_c alloc_k, bp_k;
-+ size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
-+ bool alloc_end = false, bp_end = false;
-+ int ret = 0;
-+
-+ bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
-+ start, 0, 1, 0);
-+ bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
-+ bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
-+ while (1) {
-+ alloc_k = !alloc_end
-+ ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
-+ : bkey_s_c_null;
-+ bp_k = !bp_end
-+ ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
-+ : bkey_s_c_null;
-+
-+ ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
-+ if ((!alloc_k.k && !bp_k.k) || ret) {
-+ *end = SPOS_MAX;
-+ break;
-+ }
-+
-+ --btree_nodes;
-+ if (!btree_nodes) {
-+ *end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
-+ break;
-+ }
-+
-+ if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
-+ bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
-+ if (!bch2_btree_iter_advance(&alloc_iter))
-+ alloc_end = true;
-+ } else {
-+ if (!bch2_btree_iter_advance(&bp_iter))
-+ bp_end = true;
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, &bp_iter);
-+ bch2_trans_iter_exit(trans, &alloc_iter);
-+ return ret;
-+}
-+
-+int bch2_check_extents_to_backpointers(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct bpos start = POS_MIN, end;
-+ int ret;
-+
-+ while (1) {
-+ ret = bch2_get_alloc_in_memory_pos(trans, start, &end);
-+ if (ret)
-+ break;
-+
-+ if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX))
-+ bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
-+ __func__, btree_nodes_fit_in_ram(c));
-+
-+ if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ prt_str(&buf, "check_extents_to_backpointers(): ");
-+ bch2_bpos_to_text(&buf, start);
-+ prt_str(&buf, "-");
-+ bch2_bpos_to_text(&buf, end);
-+
-+ bch_verbose(c, "%s", buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+
-+ ret = bch2_check_extents_to_backpointers_pass(trans, start, end);
-+ if (ret || bpos_eq(end, SPOS_MAX))
-+ break;
-+
-+ start = bpos_successor(end);
-+ }
-+ bch2_trans_put(trans);
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int check_one_backpointer(struct btree_trans *trans,
-+ struct bbpos start,
-+ struct bbpos end,
-+ struct bkey_s_c_backpointer bp,
-+ struct bpos *last_flushed_pos)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bbpos pos = bp_to_bbpos(*bp.v);
-+ struct bkey_s_c k;
-+ struct printbuf buf = PRINTBUF;
-+ int ret;
-+
-+ if (bbpos_cmp(pos, start) < 0 ||
-+ bbpos_cmp(pos, end) > 0)
-+ return 0;
-+
-+ k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
-+ ret = bkey_err(k);
-+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
-+ return 0;
-+ if (ret)
-+ return ret;
-+
-+ if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
-+ *last_flushed_pos = bp.k->p;
-+ ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-+ -BCH_ERR_transaction_restart_write_buffer_flush;
-+ goto out;
-+ }
-+
-+ if (fsck_err_on(!k.k, c,
-+ backpointer_to_missing_ptr,
-+ "backpointer for missing %s\n %s",
-+ bp.v->level ? "btree node" : "extent",
-+ (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
-+ ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
-+ goto out;
-+ }
-+out:
-+fsck_err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
-+ struct bbpos start,
-+ struct bbpos end)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bpos last_flushed_pos = SPOS_MAX;
-+
-+ return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
-+ POS_MIN, BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-+ check_one_backpointer(trans, start, end,
-+ bkey_s_c_to_backpointer(k),
-+ &last_flushed_pos));
-+}
-+
-+int bch2_check_backpointers_to_extents(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
-+ int ret;
-+
-+ while (1) {
-+ ret = bch2_get_btree_in_memory_pos(trans,
-+ (1U << BTREE_ID_extents)|
-+ (1U << BTREE_ID_reflink),
-+ ~0,
-+ start, &end);
-+ if (ret)
-+ break;
-+
-+ if (!bbpos_cmp(start, BBPOS_MIN) &&
-+ bbpos_cmp(end, BBPOS_MAX))
-+ bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
-+ __func__, btree_nodes_fit_in_ram(c));
-+
-+ if (bbpos_cmp(start, BBPOS_MIN) ||
-+ bbpos_cmp(end, BBPOS_MAX)) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ prt_str(&buf, "check_backpointers_to_extents(): ");
-+ bch2_bbpos_to_text(&buf, start);
-+ prt_str(&buf, "-");
-+ bch2_bbpos_to_text(&buf, end);
-+
-+ bch_verbose(c, "%s", buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+
-+ ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
-+ if (ret || !bbpos_cmp(end, BBPOS_MAX))
-+ break;
-+
-+ start = bbpos_successor(end);
-+ }
-+ bch2_trans_put(trans);
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
-new file mode 100644
-index 000000000000..ab866feeaf66
---- /dev/null
-+++ b/fs/bcachefs/backpointers.h
-@@ -0,0 +1,140 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
-+#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
-+
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "super.h"
-+
-+static inline u64 swab40(u64 x)
-+{
-+ return (((x & 0x00000000ffULL) << 32)|
-+ ((x & 0x000000ff00ULL) << 16)|
-+ ((x & 0x0000ff0000ULL) >> 0)|
-+ ((x & 0x00ff000000ULL) >> 16)|
-+ ((x & 0xff00000000ULL) >> 32));
-+}
-+
-+int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
-+void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+void bch2_backpointer_swab(struct bkey_s);
-+
-+#define bch2_bkey_ops_backpointer ((struct bkey_ops) { \
-+ .key_invalid = bch2_backpointer_invalid, \
-+ .val_to_text = bch2_backpointer_k_to_text, \
-+ .swab = bch2_backpointer_swab, \
-+ .min_val_size = 32, \
-+})
-+
-+#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10
-+
-+/*
-+ * Convert from pos in backpointer btree to pos of corresponding bucket in alloc
-+ * btree:
-+ */
-+static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c,
-+ struct bpos bp_pos)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, bp_pos.inode);
-+ u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
-+
-+ return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
-+}
-+
-+/*
-+ * Convert from pos in alloc btree + bucket offset to pos in backpointer btree:
-+ */
-+static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
-+ struct bpos bucket,
-+ u64 bucket_offset)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
-+ struct bpos ret;
-+
-+ ret = POS(bucket.inode,
-+ (bucket_to_sector(ca, bucket.offset) <<
-+ MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
-+
-+ EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
-+
-+ return ret;
-+}
-+
-+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bkey_i_backpointer *,
-+ struct bch_backpointer, struct bkey_s_c, bool);
-+
-+static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
-+ struct bpos bucket,
-+ struct bch_backpointer bp,
-+ struct bkey_s_c orig_k,
-+ bool insert)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_i_backpointer *bp_k;
-+ int ret;
-+
-+ bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
-+ ret = PTR_ERR_OR_ZERO(bp_k);
-+ if (ret)
-+ return ret;
-+
-+ bkey_backpointer_init(&bp_k->k_i);
-+ bp_k->k.p = bucket_pos_to_bp(c, bucket, bp.bucket_offset);
-+ bp_k->v = bp;
-+
-+ if (!insert) {
-+ bp_k->k.type = KEY_TYPE_deleted;
-+ set_bkey_val_u64s(&bp_k->k, 0);
-+ }
-+
-+ if (unlikely(bch2_backpointers_no_use_write_buffer))
-+ return bch2_bucket_backpointer_mod_nowritebuffer(trans, bp_k, bp, orig_k, insert);
-+
-+ return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i);
-+}
-+
-+static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k, struct extent_ptr_decoded p)
-+{
-+ return level ? BCH_DATA_btree :
-+ p.has_ec ? BCH_DATA_stripe :
-+ BCH_DATA_user;
-+}
-+
-+static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k, struct extent_ptr_decoded p,
-+ struct bpos *bucket_pos, struct bch_backpointer *bp)
-+{
-+ enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
-+ s64 sectors = level ? btree_sectors(c) : k.k->size;
-+ u32 bucket_offset;
-+
-+ *bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset);
-+ *bp = (struct bch_backpointer) {
-+ .btree_id = btree_id,
-+ .level = level,
-+ .data_type = data_type,
-+ .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
-+ p.crc.offset,
-+ .bucket_len = ptr_disk_sectors(sectors, p),
-+ .pos = k.k->p,
-+ };
-+}
-+
-+int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int,
-+ struct bpos *, struct bch_backpointer *, unsigned);
-+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *,
-+ struct bpos, struct bch_backpointer,
-+ unsigned);
-+struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *,
-+ struct bpos, struct bch_backpointer);
-+
-+int bch2_check_btree_backpointers(struct bch_fs *);
-+int bch2_check_extents_to_backpointers(struct bch_fs *);
-+int bch2_check_backpointers_to_extents(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */
-diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h
-new file mode 100644
-index 000000000000..be2edced5213
---- /dev/null
-+++ b/fs/bcachefs/bbpos.h
-@@ -0,0 +1,37 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BBPOS_H
-+#define _BCACHEFS_BBPOS_H
-+
-+#include "bbpos_types.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+
-+static inline int bbpos_cmp(struct bbpos l, struct bbpos r)
-+{
-+ return cmp_int(l.btree, r.btree) ?: bpos_cmp(l.pos, r.pos);
-+}
-+
-+static inline struct bbpos bbpos_successor(struct bbpos pos)
-+{
-+ if (bpos_cmp(pos.pos, SPOS_MAX)) {
-+ pos.pos = bpos_successor(pos.pos);
-+ return pos;
-+ }
-+
-+ if (pos.btree != BTREE_ID_NR) {
-+ pos.btree++;
-+ pos.pos = POS_MIN;
-+ return pos;
-+ }
-+
-+ BUG();
-+}
-+
-+static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos)
-+{
-+ prt_str(out, bch2_btree_id_str(pos.btree));
-+ prt_char(out, ':');
-+ bch2_bpos_to_text(out, pos.pos);
-+}
-+
-+#endif /* _BCACHEFS_BBPOS_H */
-diff --git a/fs/bcachefs/bbpos_types.h b/fs/bcachefs/bbpos_types.h
-new file mode 100644
-index 000000000000..5198e94cf3b8
---- /dev/null
-+++ b/fs/bcachefs/bbpos_types.h
-@@ -0,0 +1,18 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BBPOS_TYPES_H
-+#define _BCACHEFS_BBPOS_TYPES_H
-+
-+struct bbpos {
-+ enum btree_id btree;
-+ struct bpos pos;
-+};
-+
-+static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
-+{
-+ return (struct bbpos) { btree, pos };
-+}
-+
-+#define BBPOS_MIN BBPOS(0, POS_MIN)
-+#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX)
-+
-+#endif /* _BCACHEFS_BBPOS_TYPES_H */
-diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
-new file mode 100644
-index 000000000000..9cb8684959ee
---- /dev/null
-+++ b/fs/bcachefs/bcachefs.h
-@@ -0,0 +1,1161 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_H
-+#define _BCACHEFS_H
-+
-+/*
-+ * SOME HIGH LEVEL CODE DOCUMENTATION:
-+ *
-+ * Bcache mostly works with cache sets, cache devices, and backing devices.
-+ *
-+ * Support for multiple cache devices hasn't quite been finished off yet, but
-+ * it's about 95% plumbed through. A cache set and its cache devices is sort of
-+ * like a md raid array and its component devices. Most of the code doesn't care
-+ * about individual cache devices, the main abstraction is the cache set.
-+ *
-+ * Multiple cache devices is intended to give us the ability to mirror dirty
-+ * cached data and metadata, without mirroring clean cached data.
-+ *
-+ * Backing devices are different, in that they have a lifetime independent of a
-+ * cache set. When you register a newly formatted backing device it'll come up
-+ * in passthrough mode, and then you can attach and detach a backing device from
-+ * a cache set at runtime - while it's mounted and in use. Detaching implicitly
-+ * invalidates any cached data for that backing device.
-+ *
-+ * A cache set can have multiple (many) backing devices attached to it.
-+ *
-+ * There's also flash only volumes - this is the reason for the distinction
-+ * between struct cached_dev and struct bcache_device. A flash only volume
-+ * works much like a bcache device that has a backing device, except the
-+ * "cached" data is always dirty. The end result is that we get thin
-+ * provisioning with very little additional code.
-+ *
-+ * Flash only volumes work but they're not production ready because the moving
-+ * garbage collector needs more work. More on that later.
-+ *
-+ * BUCKETS/ALLOCATION:
-+ *
-+ * Bcache is primarily designed for caching, which means that in normal
-+ * operation all of our available space will be allocated. Thus, we need an
-+ * efficient way of deleting things from the cache so we can write new things to
-+ * it.
-+ *
-+ * To do this, we first divide the cache device up into buckets. A bucket is the
-+ * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+
-+ * works efficiently.
-+ *
-+ * Each bucket has a 16 bit priority, and an 8 bit generation associated with
-+ * it. The gens and priorities for all the buckets are stored contiguously and
-+ * packed on disk (in a linked list of buckets - aside from the superblock, all
-+ * of bcache's metadata is stored in buckets).
-+ *
-+ * The priority is used to implement an LRU. We reset a bucket's priority when
-+ * we allocate it or on cache it, and every so often we decrement the priority
-+ * of each bucket. It could be used to implement something more sophisticated,
-+ * if anyone ever gets around to it.
-+ *
-+ * The generation is used for invalidating buckets. Each pointer also has an 8
-+ * bit generation embedded in it; for a pointer to be considered valid, its gen
-+ * must match the gen of the bucket it points into. Thus, to reuse a bucket all
-+ * we have to do is increment its gen (and write its new gen to disk; we batch
-+ * this up).
-+ *
-+ * Bcache is entirely COW - we never write twice to a bucket, even buckets that
-+ * contain metadata (including btree nodes).
-+ *
-+ * THE BTREE:
-+ *
-+ * Bcache is in large part design around the btree.
-+ *
-+ * At a high level, the btree is just an index of key -> ptr tuples.
-+ *
-+ * Keys represent extents, and thus have a size field. Keys also have a variable
-+ * number of pointers attached to them (potentially zero, which is handy for
-+ * invalidating the cache).
-+ *
-+ * The key itself is an inode:offset pair. The inode number corresponds to a
-+ * backing device or a flash only volume. The offset is the ending offset of the
-+ * extent within the inode - not the starting offset; this makes lookups
-+ * slightly more convenient.
-+ *
-+ * Pointers contain the cache device id, the offset on that device, and an 8 bit
-+ * generation number. More on the gen later.
-+ *
-+ * Index lookups are not fully abstracted - cache lookups in particular are
-+ * still somewhat mixed in with the btree code, but things are headed in that
-+ * direction.
-+ *
-+ * Updates are fairly well abstracted, though. There are two different ways of
-+ * updating the btree; insert and replace.
-+ *
-+ * BTREE_INSERT will just take a list of keys and insert them into the btree -
-+ * overwriting (possibly only partially) any extents they overlap with. This is
-+ * used to update the index after a write.
-+ *
-+ * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is
-+ * overwriting a key that matches another given key. This is used for inserting
-+ * data into the cache after a cache miss, and for background writeback, and for
-+ * the moving garbage collector.
-+ *
-+ * There is no "delete" operation; deleting things from the index is
-+ * accomplished by either by invalidating pointers (by incrementing a bucket's
-+ * gen) or by inserting a key with 0 pointers - which will overwrite anything
-+ * previously present at that location in the index.
-+ *
-+ * This means that there are always stale/invalid keys in the btree. They're
-+ * filtered out by the code that iterates through a btree node, and removed when
-+ * a btree node is rewritten.
-+ *
-+ * BTREE NODES:
-+ *
-+ * Our unit of allocation is a bucket, and we can't arbitrarily allocate and
-+ * free smaller than a bucket - so, that's how big our btree nodes are.
-+ *
-+ * (If buckets are really big we'll only use part of the bucket for a btree node
-+ * - no less than 1/4th - but a bucket still contains no more than a single
-+ * btree node. I'd actually like to change this, but for now we rely on the
-+ * bucket's gen for deleting btree nodes when we rewrite/split a node.)
-+ *
-+ * Anyways, btree nodes are big - big enough to be inefficient with a textbook
-+ * btree implementation.
-+ *
-+ * The way this is solved is that btree nodes are internally log structured; we
-+ * can append new keys to an existing btree node without rewriting it. This
-+ * means each set of keys we write is sorted, but the node is not.
-+ *
-+ * We maintain this log structure in memory - keeping 1Mb of keys sorted would
-+ * be expensive, and we have to distinguish between the keys we have written and
-+ * the keys we haven't. So to do a lookup in a btree node, we have to search
-+ * each sorted set. But we do merge written sets together lazily, so the cost of
-+ * these extra searches is quite low (normally most of the keys in a btree node
-+ * will be in one big set, and then there'll be one or two sets that are much
-+ * smaller).
-+ *
-+ * This log structure makes bcache's btree more of a hybrid between a
-+ * conventional btree and a compacting data structure, with some of the
-+ * advantages of both.
-+ *
-+ * GARBAGE COLLECTION:
-+ *
-+ * We can't just invalidate any bucket - it might contain dirty data or
-+ * metadata. If it once contained dirty data, other writes might overwrite it
-+ * later, leaving no valid pointers into that bucket in the index.
-+ *
-+ * Thus, the primary purpose of garbage collection is to find buckets to reuse.
-+ * It also counts how much valid data it each bucket currently contains, so that
-+ * allocation can reuse buckets sooner when they've been mostly overwritten.
-+ *
-+ * It also does some things that are really internal to the btree
-+ * implementation. If a btree node contains pointers that are stale by more than
-+ * some threshold, it rewrites the btree node to avoid the bucket's generation
-+ * wrapping around. It also merges adjacent btree nodes if they're empty enough.
-+ *
-+ * THE JOURNAL:
-+ *
-+ * Bcache's journal is not necessary for consistency; we always strictly
-+ * order metadata writes so that the btree and everything else is consistent on
-+ * disk in the event of an unclean shutdown, and in fact bcache had writeback
-+ * caching (with recovery from unclean shutdown) before journalling was
-+ * implemented.
-+ *
-+ * Rather, the journal is purely a performance optimization; we can't complete a
-+ * write until we've updated the index on disk, otherwise the cache would be
-+ * inconsistent in the event of an unclean shutdown. This means that without the
-+ * journal, on random write workloads we constantly have to update all the leaf
-+ * nodes in the btree, and those writes will be mostly empty (appending at most
-+ * a few keys each) - highly inefficient in terms of amount of metadata writes,
-+ * and it puts more strain on the various btree resorting/compacting code.
-+ *
-+ * The journal is just a log of keys we've inserted; on startup we just reinsert
-+ * all the keys in the open journal entries. That means that when we're updating
-+ * a node in the btree, we can wait until a 4k block of keys fills up before
-+ * writing them out.
-+ *
-+ * For simplicity, we only journal updates to leaf nodes; updates to parent
-+ * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth
-+ * the complexity to deal with journalling them (in particular, journal replay)
-+ * - updates to non leaf nodes just happen synchronously (see btree_split()).
-+ */
-+
-+#undef pr_fmt
-+#ifdef __KERNEL__
-+#define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__
-+#else
-+#define pr_fmt(fmt) "%s() " fmt "\n", __func__
-+#endif
-+
-+#include <linux/backing-dev-defs.h>
-+#include <linux/bug.h>
-+#include <linux/bio.h>
-+#include <linux/closure.h>
-+#include <linux/kobject.h>
-+#include <linux/list.h>
-+#include <linux/math64.h>
-+#include <linux/mutex.h>
-+#include <linux/percpu-refcount.h>
-+#include <linux/percpu-rwsem.h>
-+#include <linux/rhashtable.h>
-+#include <linux/rwsem.h>
-+#include <linux/semaphore.h>
-+#include <linux/seqlock.h>
-+#include <linux/shrinker.h>
-+#include <linux/srcu.h>
-+#include <linux/types.h>
-+#include <linux/workqueue.h>
-+#include <linux/zstd.h>
-+
-+#include "bcachefs_format.h"
-+#include "errcode.h"
-+#include "fifo.h"
-+#include "nocow_locking_types.h"
-+#include "opts.h"
-+#include "recovery_types.h"
-+#include "sb-errors_types.h"
-+#include "seqmutex.h"
-+#include "util.h"
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+#define BCH_WRITE_REF_DEBUG
-+#endif
-+
-+#ifndef dynamic_fault
-+#define dynamic_fault(...) 0
-+#endif
-+
-+#define race_fault(...) dynamic_fault("bcachefs:race")
-+
-+#define trace_and_count(_c, _name, ...) \
-+do { \
-+ this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]); \
-+ trace_##_name(__VA_ARGS__); \
-+} while (0)
-+
-+#define bch2_fs_init_fault(name) \
-+ dynamic_fault("bcachefs:bch_fs_init:" name)
-+#define bch2_meta_read_fault(name) \
-+ dynamic_fault("bcachefs:meta:read:" name)
-+#define bch2_meta_write_fault(name) \
-+ dynamic_fault("bcachefs:meta:write:" name)
-+
-+#ifdef __KERNEL__
-+#define BCACHEFS_LOG_PREFIX
-+#endif
-+
-+#ifdef BCACHEFS_LOG_PREFIX
-+
-+#define bch2_log_msg(_c, fmt) "bcachefs (%s): " fmt, ((_c)->name)
-+#define bch2_fmt_dev(_ca, fmt) "bcachefs (%s): " fmt "\n", ((_ca)->name)
-+#define bch2_fmt_dev_offset(_ca, _offset, fmt) "bcachefs (%s sector %llu): " fmt "\n", ((_ca)->name), (_offset)
-+#define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum)
-+#define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \
-+ "bcachefs (%s inum %llu offset %llu): " fmt "\n", ((_c)->name), (_inum), (_offset)
-+
-+#else
-+
-+#define bch2_log_msg(_c, fmt) fmt
-+#define bch2_fmt_dev(_ca, fmt) "%s: " fmt "\n", ((_ca)->name)
-+#define bch2_fmt_dev_offset(_ca, _offset, fmt) "%s sector %llu: " fmt "\n", ((_ca)->name), (_offset)
-+#define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum)
-+#define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \
-+ "inum %llu offset %llu: " fmt "\n", (_inum), (_offset)
-+
-+#endif
-+
-+#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
-+
-+#define bch_info(c, fmt, ...) \
-+ printk(KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_notice(c, fmt, ...) \
-+ printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_warn(c, fmt, ...) \
-+ printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_warn_ratelimited(c, fmt, ...) \
-+ printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
-+
-+#define bch_err(c, fmt, ...) \
-+ printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_err_dev(ca, fmt, ...) \
-+ printk(KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
-+#define bch_err_dev_offset(ca, _offset, fmt, ...) \
-+ printk(KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
-+#define bch_err_inum(c, _inum, fmt, ...) \
-+ printk(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
-+#define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \
-+ printk(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
-+
-+#define bch_err_ratelimited(c, fmt, ...) \
-+ printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
-+#define bch_err_dev_ratelimited(ca, fmt, ...) \
-+ printk_ratelimited(KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
-+#define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \
-+ printk_ratelimited(KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
-+#define bch_err_inum_ratelimited(c, _inum, fmt, ...) \
-+ printk_ratelimited(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
-+#define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \
-+ printk_ratelimited(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
-+
-+#define bch_err_fn(_c, _ret) \
-+do { \
-+ if (_ret && !bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
-+ bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\
-+} while (0)
-+
-+#define bch_err_msg(_c, _ret, _msg, ...) \
-+do { \
-+ if (_ret && !bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
-+ bch_err(_c, "%s(): error " _msg " %s", __func__, \
-+ ##__VA_ARGS__, bch2_err_str(_ret)); \
-+} while (0)
-+
-+#define bch_verbose(c, fmt, ...) \
-+do { \
-+ if ((c)->opts.verbose) \
-+ bch_info(c, fmt, ##__VA_ARGS__); \
-+} while (0)
-+
-+#define pr_verbose_init(opts, fmt, ...) \
-+do { \
-+ if (opt_get(opts, verbose)) \
-+ pr_info(fmt, ##__VA_ARGS__); \
-+} while (0)
-+
-+/* Parameters that are useful for debugging, but should always be compiled in: */
-+#define BCH_DEBUG_PARAMS_ALWAYS() \
-+ BCH_DEBUG_PARAM(key_merging_disabled, \
-+ "Disables merging of extents") \
-+ BCH_DEBUG_PARAM(btree_gc_always_rewrite, \
-+ "Causes mark and sweep to compact and rewrite every " \
-+ "btree node it traverses") \
-+ BCH_DEBUG_PARAM(btree_gc_rewrite_disabled, \
-+ "Disables rewriting of btree nodes during mark and sweep")\
-+ BCH_DEBUG_PARAM(btree_shrinker_disabled, \
-+ "Disables the shrinker callback for the btree node cache")\
-+ BCH_DEBUG_PARAM(verify_btree_ondisk, \
-+ "Reread btree nodes at various points to verify the " \
-+ "mergesort in the read path against modifications " \
-+ "done in memory") \
-+ BCH_DEBUG_PARAM(verify_all_btree_replicas, \
-+ "When reading btree nodes, read all replicas and " \
-+ "compare them") \
-+ BCH_DEBUG_PARAM(backpointers_no_use_write_buffer, \
-+ "Don't use the write buffer for backpointers, enabling "\
-+ "extra runtime checks")
-+
-+/* Parameters that should only be compiled in debug mode: */
-+#define BCH_DEBUG_PARAMS_DEBUG() \
-+ BCH_DEBUG_PARAM(expensive_debug_checks, \
-+ "Enables various runtime debugging checks that " \
-+ "significantly affect performance") \
-+ BCH_DEBUG_PARAM(debug_check_iterators, \
-+ "Enables extra verification for btree iterators") \
-+ BCH_DEBUG_PARAM(debug_check_btree_accounting, \
-+ "Verify btree accounting for keys within a node") \
-+ BCH_DEBUG_PARAM(journal_seq_verify, \
-+ "Store the journal sequence number in the version " \
-+ "number of every btree key, and verify that btree " \
-+ "update ordering is preserved during recovery") \
-+ BCH_DEBUG_PARAM(inject_invalid_keys, \
-+ "Store the journal sequence number in the version " \
-+ "number of every btree key, and verify that btree " \
-+ "update ordering is preserved during recovery") \
-+ BCH_DEBUG_PARAM(test_alloc_startup, \
-+ "Force allocator startup to use the slowpath where it" \
-+ "can't find enough free buckets without invalidating" \
-+ "cached data") \
-+ BCH_DEBUG_PARAM(force_reconstruct_read, \
-+ "Force reads to use the reconstruct path, when reading" \
-+ "from erasure coded extents") \
-+ BCH_DEBUG_PARAM(test_restart_gc, \
-+ "Test restarting mark and sweep gc when bucket gens change")
-+
-+#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALL()
-+#else
-+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
-+#endif
-+
-+#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
-+BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+#ifndef CONFIG_BCACHEFS_DEBUG
-+#define BCH_DEBUG_PARAM(name, description) static const __maybe_unused bool bch2_##name;
-+BCH_DEBUG_PARAMS_DEBUG()
-+#undef BCH_DEBUG_PARAM
-+#endif
-+
-+#define BCH_TIME_STATS() \
-+ x(btree_node_mem_alloc) \
-+ x(btree_node_split) \
-+ x(btree_node_compact) \
-+ x(btree_node_merge) \
-+ x(btree_node_sort) \
-+ x(btree_node_read) \
-+ x(btree_interior_update_foreground) \
-+ x(btree_interior_update_total) \
-+ x(btree_gc) \
-+ x(data_write) \
-+ x(data_read) \
-+ x(data_promote) \
-+ x(journal_flush_write) \
-+ x(journal_noflush_write) \
-+ x(journal_flush_seq) \
-+ x(blocked_journal) \
-+ x(blocked_allocate) \
-+ x(blocked_allocate_open_bucket) \
-+ x(nocow_lock_contended)
-+
-+enum bch_time_stats {
-+#define x(name) BCH_TIME_##name,
-+ BCH_TIME_STATS()
-+#undef x
-+ BCH_TIME_STAT_NR
-+};
-+
-+#include "alloc_types.h"
-+#include "btree_types.h"
-+#include "btree_write_buffer_types.h"
-+#include "buckets_types.h"
-+#include "buckets_waiting_for_journal_types.h"
-+#include "clock_types.h"
-+#include "disk_groups_types.h"
-+#include "ec_types.h"
-+#include "journal_types.h"
-+#include "keylist_types.h"
-+#include "quota_types.h"
-+#include "rebalance_types.h"
-+#include "replicas_types.h"
-+#include "subvolume_types.h"
-+#include "super_types.h"
-+
-+/* Number of nodes btree coalesce will try to coalesce at once */
-+#define GC_MERGE_NODES 4U
-+
-+/* Maximum number of nodes we might need to allocate atomically: */
-+#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
-+
-+/* Size of the freelist we allocate btree nodes from: */
-+#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
-+
-+#define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
-+
-+struct btree;
-+
-+enum gc_phase {
-+ GC_PHASE_NOT_RUNNING,
-+ GC_PHASE_START,
-+ GC_PHASE_SB,
-+
-+ GC_PHASE_BTREE_stripes,
-+ GC_PHASE_BTREE_extents,
-+ GC_PHASE_BTREE_inodes,
-+ GC_PHASE_BTREE_dirents,
-+ GC_PHASE_BTREE_xattrs,
-+ GC_PHASE_BTREE_alloc,
-+ GC_PHASE_BTREE_quotas,
-+ GC_PHASE_BTREE_reflink,
-+ GC_PHASE_BTREE_subvolumes,
-+ GC_PHASE_BTREE_snapshots,
-+ GC_PHASE_BTREE_lru,
-+ GC_PHASE_BTREE_freespace,
-+ GC_PHASE_BTREE_need_discard,
-+ GC_PHASE_BTREE_backpointers,
-+ GC_PHASE_BTREE_bucket_gens,
-+ GC_PHASE_BTREE_snapshot_trees,
-+ GC_PHASE_BTREE_deleted_inodes,
-+ GC_PHASE_BTREE_logged_ops,
-+ GC_PHASE_BTREE_rebalance_work,
-+
-+ GC_PHASE_PENDING_DELETE,
-+};
-+
-+struct gc_pos {
-+ enum gc_phase phase;
-+ struct bpos pos;
-+ unsigned level;
-+};
-+
-+struct reflink_gc {
-+ u64 offset;
-+ u32 size;
-+ u32 refcount;
-+};
-+
-+typedef GENRADIX(struct reflink_gc) reflink_gc_table;
-+
-+struct io_count {
-+ u64 sectors[2][BCH_DATA_NR];
-+};
-+
-+struct bch_dev {
-+ struct kobject kobj;
-+ struct percpu_ref ref;
-+ struct completion ref_completion;
-+ struct percpu_ref io_ref;
-+ struct completion io_ref_completion;
-+
-+ struct bch_fs *fs;
-+
-+ u8 dev_idx;
-+ /*
-+ * Cached version of this device's member info from superblock
-+ * Committed by bch2_write_super() -> bch_fs_mi_update()
-+ */
-+ struct bch_member_cpu mi;
-+ atomic64_t errors[BCH_MEMBER_ERROR_NR];
-+
-+ __uuid_t uuid;
-+ char name[BDEVNAME_SIZE];
-+
-+ struct bch_sb_handle disk_sb;
-+ struct bch_sb *sb_read_scratch;
-+ int sb_write_error;
-+ dev_t dev;
-+ atomic_t flush_seq;
-+
-+ struct bch_devs_mask self;
-+
-+ /* biosets used in cloned bios for writing multiple replicas */
-+ struct bio_set replica_set;
-+
-+ /*
-+ * Buckets:
-+ * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
-+ * gc_lock, for device resize - holding any is sufficient for access:
-+ * Or rcu_read_lock(), but only for ptr_stale():
-+ */
-+ struct bucket_array __rcu *buckets_gc;
-+ struct bucket_gens __rcu *bucket_gens;
-+ u8 *oldest_gen;
-+ unsigned long *buckets_nouse;
-+ struct rw_semaphore bucket_lock;
-+
-+ struct bch_dev_usage *usage_base;
-+ struct bch_dev_usage __percpu *usage[JOURNAL_BUF_NR];
-+ struct bch_dev_usage __percpu *usage_gc;
-+
-+ /* Allocator: */
-+ u64 new_fs_bucket_idx;
-+ u64 alloc_cursor;
-+
-+ unsigned nr_open_buckets;
-+ unsigned nr_btree_reserve;
-+
-+ size_t inc_gen_needs_gc;
-+ size_t inc_gen_really_needs_gc;
-+ size_t buckets_waiting_on_journal;
-+
-+ atomic64_t rebalance_work;
-+
-+ struct journal_device journal;
-+ u64 prev_journal_sector;
-+
-+ struct work_struct io_error_work;
-+
-+ /* The rest of this all shows up in sysfs */
-+ atomic64_t cur_latency[2];
-+ struct bch2_time_stats io_latency[2];
-+
-+#define CONGESTED_MAX 1024
-+ atomic_t congested;
-+ u64 congested_last;
-+
-+ struct io_count __percpu *io_done;
-+};
-+
-+enum {
-+ /* startup: */
-+ BCH_FS_STARTED,
-+ BCH_FS_MAY_GO_RW,
-+ BCH_FS_RW,
-+ BCH_FS_WAS_RW,
-+
-+ /* shutdown: */
-+ BCH_FS_STOPPING,
-+ BCH_FS_EMERGENCY_RO,
-+ BCH_FS_GOING_RO,
-+ BCH_FS_WRITE_DISABLE_COMPLETE,
-+ BCH_FS_CLEAN_SHUTDOWN,
-+
-+ /* fsck passes: */
-+ BCH_FS_FSCK_DONE,
-+ BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */
-+ BCH_FS_NEED_ANOTHER_GC,
-+
-+ BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS,
-+
-+ /* errors: */
-+ BCH_FS_ERROR,
-+ BCH_FS_TOPOLOGY_ERROR,
-+ BCH_FS_ERRORS_FIXED,
-+ BCH_FS_ERRORS_NOT_FIXED,
-+};
-+
-+struct btree_debug {
-+ unsigned id;
-+};
-+
-+#define BCH_TRANSACTIONS_NR 128
-+
-+struct btree_transaction_stats {
-+ struct bch2_time_stats lock_hold_times;
-+ struct mutex lock;
-+ unsigned nr_max_paths;
-+ unsigned wb_updates_size;
-+ unsigned max_mem;
-+ char *max_paths_text;
-+};
-+
-+struct bch_fs_pcpu {
-+ u64 sectors_available;
-+};
-+
-+struct journal_seq_blacklist_table {
-+ size_t nr;
-+ struct journal_seq_blacklist_table_entry {
-+ u64 start;
-+ u64 end;
-+ bool dirty;
-+ } entries[0];
-+};
-+
-+struct journal_keys {
-+ struct journal_key {
-+ u64 journal_seq;
-+ u32 journal_offset;
-+ enum btree_id btree_id:8;
-+ unsigned level:8;
-+ bool allocated;
-+ bool overwritten;
-+ struct bkey_i *k;
-+ } *d;
-+ /*
-+ * Gap buffer: instead of all the empty space in the array being at the
-+ * end of the buffer - from @nr to @size - the empty space is at @gap.
-+ * This means that sequential insertions are O(n) instead of O(n^2).
-+ */
-+ size_t gap;
-+ size_t nr;
-+ size_t size;
-+};
-+
-+struct btree_trans_buf {
-+ struct btree_trans *trans;
-+};
-+
-+#define REPLICAS_DELTA_LIST_MAX (1U << 16)
-+
-+#define BCACHEFS_ROOT_SUBVOL_INUM \
-+ ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
-+
-+#define BCH_WRITE_REFS() \
-+ x(trans) \
-+ x(write) \
-+ x(promote) \
-+ x(node_rewrite) \
-+ x(stripe_create) \
-+ x(stripe_delete) \
-+ x(reflink) \
-+ x(fallocate) \
-+ x(discard) \
-+ x(invalidate) \
-+ x(delete_dead_snapshots) \
-+ x(snapshot_delete_pagecache) \
-+ x(sysfs)
-+
-+enum bch_write_ref {
-+#define x(n) BCH_WRITE_REF_##n,
-+ BCH_WRITE_REFS()
-+#undef x
-+ BCH_WRITE_REF_NR,
-+};
-+
-+struct bch_fs {
-+ struct closure cl;
-+
-+ struct list_head list;
-+ struct kobject kobj;
-+ struct kobject counters_kobj;
-+ struct kobject internal;
-+ struct kobject opts_dir;
-+ struct kobject time_stats;
-+ unsigned long flags;
-+
-+ int minor;
-+ struct device *chardev;
-+ struct super_block *vfs_sb;
-+ dev_t dev;
-+ char name[40];
-+
-+ /* ro/rw, add/remove/resize devices: */
-+ struct rw_semaphore state_lock;
-+
-+ /* Counts outstanding writes, for clean transition to read-only */
-+#ifdef BCH_WRITE_REF_DEBUG
-+ atomic_long_t writes[BCH_WRITE_REF_NR];
-+#else
-+ struct percpu_ref writes;
-+#endif
-+ struct work_struct read_only_work;
-+
-+ struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
-+
-+ struct bch_replicas_cpu replicas;
-+ struct bch_replicas_cpu replicas_gc;
-+ struct mutex replicas_gc_lock;
-+ mempool_t replicas_delta_pool;
-+
-+ struct journal_entry_res btree_root_journal_res;
-+ struct journal_entry_res replicas_journal_res;
-+ struct journal_entry_res clock_journal_res;
-+ struct journal_entry_res dev_usage_journal_res;
-+
-+ struct bch_disk_groups_cpu __rcu *disk_groups;
-+
-+ struct bch_opts opts;
-+
-+ /* Updated by bch2_sb_update():*/
-+ struct {
-+ __uuid_t uuid;
-+ __uuid_t user_uuid;
-+
-+ u16 version;
-+ u16 version_min;
-+ u16 version_upgrade_complete;
-+
-+ u8 nr_devices;
-+ u8 clean;
-+
-+ u8 encryption_type;
-+
-+ u64 time_base_lo;
-+ u32 time_base_hi;
-+ unsigned time_units_per_sec;
-+ unsigned nsec_per_time_unit;
-+ u64 features;
-+ u64 compat;
-+ } sb;
-+
-+
-+ struct bch_sb_handle disk_sb;
-+
-+ unsigned short block_bits; /* ilog2(block_size) */
-+
-+ u16 btree_foreground_merge_threshold;
-+
-+ struct closure sb_write;
-+ struct mutex sb_lock;
-+
-+ /* snapshot.c: */
-+ struct snapshot_table __rcu *snapshots;
-+ size_t snapshot_table_size;
-+ struct mutex snapshot_table_lock;
-+ struct rw_semaphore snapshot_create_lock;
-+
-+ struct work_struct snapshot_delete_work;
-+ struct work_struct snapshot_wait_for_pagecache_and_delete_work;
-+ snapshot_id_list snapshots_unlinked;
-+ struct mutex snapshots_unlinked_lock;
-+
-+ /* BTREE CACHE */
-+ struct bio_set btree_bio;
-+ struct workqueue_struct *io_complete_wq;
-+
-+ struct btree_root btree_roots_known[BTREE_ID_NR];
-+ DARRAY(struct btree_root) btree_roots_extra;
-+ struct mutex btree_root_lock;
-+
-+ struct btree_cache btree_cache;
-+
-+ /*
-+ * Cache of allocated btree nodes - if we allocate a btree node and
-+ * don't use it, if we free it that space can't be reused until going
-+ * _all_ the way through the allocator (which exposes us to a livelock
-+ * when allocating btree reserves fail halfway through) - instead, we
-+ * can stick them here:
-+ */
-+ struct btree_alloc btree_reserve_cache[BTREE_NODE_RESERVE * 2];
-+ unsigned btree_reserve_cache_nr;
-+ struct mutex btree_reserve_cache_lock;
-+
-+ mempool_t btree_interior_update_pool;
-+ struct list_head btree_interior_update_list;
-+ struct list_head btree_interior_updates_unwritten;
-+ struct mutex btree_interior_update_lock;
-+ struct closure_waitlist btree_interior_update_wait;
-+
-+ struct workqueue_struct *btree_interior_update_worker;
-+ struct work_struct btree_interior_update_work;
-+
-+ struct list_head pending_node_rewrites;
-+ struct mutex pending_node_rewrites_lock;
-+
-+ /* btree_io.c: */
-+ spinlock_t btree_write_error_lock;
-+ struct btree_write_stats {
-+ atomic64_t nr;
-+ atomic64_t bytes;
-+ } btree_write_stats[BTREE_WRITE_TYPE_NR];
-+
-+ /* btree_iter.c: */
-+ struct seqmutex btree_trans_lock;
-+ struct list_head btree_trans_list;
-+ mempool_t btree_trans_pool;
-+ mempool_t btree_trans_mem_pool;
-+ struct btree_trans_buf __percpu *btree_trans_bufs;
-+
-+ struct srcu_struct btree_trans_barrier;
-+ bool btree_trans_barrier_initialized;
-+
-+ struct btree_key_cache btree_key_cache;
-+ unsigned btree_key_cache_btrees;
-+
-+ struct btree_write_buffer btree_write_buffer;
-+
-+ struct workqueue_struct *btree_update_wq;
-+ struct workqueue_struct *btree_io_complete_wq;
-+ /* copygc needs its own workqueue for index updates.. */
-+ struct workqueue_struct *copygc_wq;
-+ /*
-+ * Use a dedicated wq for write ref holder tasks. Required to avoid
-+ * dependency problems with other wq tasks that can block on ref
-+ * draining, such as read-only transition.
-+ */
-+ struct workqueue_struct *write_ref_wq;
-+
-+ /* ALLOCATION */
-+ struct bch_devs_mask rw_devs[BCH_DATA_NR];
-+
-+ u64 capacity; /* sectors */
-+
-+ /*
-+ * When capacity _decreases_ (due to a disk being removed), we
-+ * increment capacity_gen - this invalidates outstanding reservations
-+ * and forces them to be revalidated
-+ */
-+ u32 capacity_gen;
-+ unsigned bucket_size_max;
-+
-+ atomic64_t sectors_available;
-+ struct mutex sectors_available_lock;
-+
-+ struct bch_fs_pcpu __percpu *pcpu;
-+
-+ struct percpu_rw_semaphore mark_lock;
-+
-+ seqcount_t usage_lock;
-+ struct bch_fs_usage *usage_base;
-+ struct bch_fs_usage __percpu *usage[JOURNAL_BUF_NR];
-+ struct bch_fs_usage __percpu *usage_gc;
-+ u64 __percpu *online_reserved;
-+
-+ /* single element mempool: */
-+ struct mutex usage_scratch_lock;
-+ struct bch_fs_usage_online *usage_scratch;
-+
-+ struct io_clock io_clock[2];
-+
-+ /* JOURNAL SEQ BLACKLIST */
-+ struct journal_seq_blacklist_table *
-+ journal_seq_blacklist_table;
-+ struct work_struct journal_seq_blacklist_gc_work;
-+
-+ /* ALLOCATOR */
-+ spinlock_t freelist_lock;
-+ struct closure_waitlist freelist_wait;
-+ u64 blocked_allocate;
-+ u64 blocked_allocate_open_bucket;
-+
-+ open_bucket_idx_t open_buckets_freelist;
-+ open_bucket_idx_t open_buckets_nr_free;
-+ struct closure_waitlist open_buckets_wait;
-+ struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
-+ open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
-+
-+ open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
-+ open_bucket_idx_t open_buckets_partial_nr;
-+
-+ struct write_point btree_write_point;
-+ struct write_point rebalance_write_point;
-+
-+ struct write_point write_points[WRITE_POINT_MAX];
-+ struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
-+ struct mutex write_points_hash_lock;
-+ unsigned write_points_nr;
-+
-+ struct buckets_waiting_for_journal buckets_waiting_for_journal;
-+ struct work_struct discard_work;
-+ struct work_struct invalidate_work;
-+
-+ /* GARBAGE COLLECTION */
-+ struct task_struct *gc_thread;
-+ atomic_t kick_gc;
-+ unsigned long gc_count;
-+
-+ enum btree_id gc_gens_btree;
-+ struct bpos gc_gens_pos;
-+
-+ /*
-+ * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos]
-+ * has been marked by GC.
-+ *
-+ * gc_cur_phase is a superset of btree_ids (BTREE_ID_extents etc.)
-+ *
-+ * Protected by gc_pos_lock. Only written to by GC thread, so GC thread
-+ * can read without a lock.
-+ */
-+ seqcount_t gc_pos_lock;
-+ struct gc_pos gc_pos;
-+
-+ /*
-+ * The allocation code needs gc_mark in struct bucket to be correct, but
-+ * it's not while a gc is in progress.
-+ */
-+ struct rw_semaphore gc_lock;
-+ struct mutex gc_gens_lock;
-+
-+ /* IO PATH */
-+ struct semaphore io_in_flight;
-+ struct bio_set bio_read;
-+ struct bio_set bio_read_split;
-+ struct bio_set bio_write;
-+ struct mutex bio_bounce_pages_lock;
-+ mempool_t bio_bounce_pages;
-+ struct bucket_nocow_lock_table
-+ nocow_locks;
-+ struct rhashtable promote_table;
-+
-+ mempool_t compression_bounce[2];
-+ mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
-+ mempool_t decompress_workspace;
-+ ZSTD_parameters zstd_params;
-+
-+ struct crypto_shash *sha256;
-+ struct crypto_sync_skcipher *chacha20;
-+ struct crypto_shash *poly1305;
-+
-+ atomic64_t key_version;
-+
-+ mempool_t large_bkey_pool;
-+
-+ /* MOVE.C */
-+ struct list_head moving_context_list;
-+ struct mutex moving_context_lock;
-+
-+ /* REBALANCE */
-+ struct bch_fs_rebalance rebalance;
-+
-+ /* COPYGC */
-+ struct task_struct *copygc_thread;
-+ struct write_point copygc_write_point;
-+ s64 copygc_wait_at;
-+ s64 copygc_wait;
-+ bool copygc_running;
-+ wait_queue_head_t copygc_running_wq;
-+
-+ /* STRIPES: */
-+ GENRADIX(struct stripe) stripes;
-+ GENRADIX(struct gc_stripe) gc_stripes;
-+
-+ struct hlist_head ec_stripes_new[32];
-+ spinlock_t ec_stripes_new_lock;
-+
-+ ec_stripes_heap ec_stripes_heap;
-+ struct mutex ec_stripes_heap_lock;
-+
-+ /* ERASURE CODING */
-+ struct list_head ec_stripe_head_list;
-+ struct mutex ec_stripe_head_lock;
-+
-+ struct list_head ec_stripe_new_list;
-+ struct mutex ec_stripe_new_lock;
-+ wait_queue_head_t ec_stripe_new_wait;
-+
-+ struct work_struct ec_stripe_create_work;
-+ u64 ec_stripe_hint;
-+
-+ struct work_struct ec_stripe_delete_work;
-+
-+ struct bio_set ec_bioset;
-+
-+ /* REFLINK */
-+ reflink_gc_table reflink_gc_table;
-+ size_t reflink_gc_nr;
-+
-+ /* fs.c */
-+ struct list_head vfs_inodes_list;
-+ struct mutex vfs_inodes_lock;
-+
-+ /* VFS IO PATH - fs-io.c */
-+ struct bio_set writepage_bioset;
-+ struct bio_set dio_write_bioset;
-+ struct bio_set dio_read_bioset;
-+ struct bio_set nocow_flush_bioset;
-+
-+ /* QUOTAS */
-+ struct bch_memquota_type quotas[QTYP_NR];
-+
-+ /* RECOVERY */
-+ u64 journal_replay_seq_start;
-+ u64 journal_replay_seq_end;
-+ enum bch_recovery_pass curr_recovery_pass;
-+ /* bitmap of explicitly enabled recovery passes: */
-+ u64 recovery_passes_explicit;
-+ u64 recovery_passes_complete;
-+
-+ /* DEBUG JUNK */
-+ struct dentry *fs_debug_dir;
-+ struct dentry *btree_debug_dir;
-+ struct btree_debug btree_debug[BTREE_ID_NR];
-+ struct btree *verify_data;
-+ struct btree_node *verify_ondisk;
-+ struct mutex verify_lock;
-+
-+ u64 *unused_inode_hints;
-+ unsigned inode_shard_bits;
-+
-+ /*
-+ * A btree node on disk could have too many bsets for an iterator to fit
-+ * on the stack - have to dynamically allocate them
-+ */
-+ mempool_t fill_iter;
-+
-+ mempool_t btree_bounce_pool;
-+
-+ struct journal journal;
-+ GENRADIX(struct journal_replay *) journal_entries;
-+ u64 journal_entries_base_seq;
-+ struct journal_keys journal_keys;
-+ struct list_head journal_iters;
-+
-+ u64 last_bucket_seq_cleanup;
-+
-+ u64 counters_on_mount[BCH_COUNTER_NR];
-+ u64 __percpu *counters;
-+
-+ unsigned btree_gc_periodic:1;
-+ unsigned copy_gc_enabled:1;
-+ bool promote_whole_extents;
-+
-+ struct bch2_time_stats times[BCH_TIME_STAT_NR];
-+
-+ struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
-+
-+ /* ERRORS */
-+ struct list_head fsck_error_msgs;
-+ struct mutex fsck_error_msgs_lock;
-+ bool fsck_alloc_msgs_err;
-+
-+ bch_sb_errors_cpu fsck_error_counts;
-+ struct mutex fsck_error_counts_lock;
-+};
-+
-+extern struct wait_queue_head bch2_read_only_wait;
-+
-+static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
-+{
-+#ifdef BCH_WRITE_REF_DEBUG
-+ atomic_long_inc(&c->writes[ref]);
-+#else
-+ percpu_ref_get(&c->writes);
-+#endif
-+}
-+
-+static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
-+{
-+#ifdef BCH_WRITE_REF_DEBUG
-+ return !test_bit(BCH_FS_GOING_RO, &c->flags) &&
-+ atomic_long_inc_not_zero(&c->writes[ref]);
-+#else
-+ return percpu_ref_tryget_live(&c->writes);
-+#endif
-+}
-+
-+static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
-+{
-+#ifdef BCH_WRITE_REF_DEBUG
-+ long v = atomic_long_dec_return(&c->writes[ref]);
-+
-+ BUG_ON(v < 0);
-+ if (v)
-+ return;
-+ for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
-+ if (atomic_long_read(&c->writes[i]))
-+ return;
-+
-+ set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-+ wake_up(&bch2_read_only_wait);
-+#else
-+ percpu_ref_put(&c->writes);
-+#endif
-+}
-+
-+static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
-+{
-+#ifndef NO_BCACHEFS_FS
-+ if (c->vfs_sb)
-+ c->vfs_sb->s_bdi->ra_pages = ra_pages;
-+#endif
-+}
-+
-+static inline unsigned bucket_bytes(const struct bch_dev *ca)
-+{
-+ return ca->mi.bucket_size << 9;
-+}
-+
-+static inline unsigned block_bytes(const struct bch_fs *c)
-+{
-+ return c->opts.block_size;
-+}
-+
-+static inline unsigned block_sectors(const struct bch_fs *c)
-+{
-+ return c->opts.block_size >> 9;
-+}
-+
-+static inline size_t btree_sectors(const struct bch_fs *c)
-+{
-+ return c->opts.btree_node_size >> 9;
-+}
-+
-+static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree)
-+{
-+ return c->btree_key_cache_btrees & (1U << btree);
-+}
-+
-+static inline struct timespec64 bch2_time_to_timespec(const struct bch_fs *c, s64 time)
-+{
-+ struct timespec64 t;
-+ s32 rem;
-+
-+ time += c->sb.time_base_lo;
-+
-+ t.tv_sec = div_s64_rem(time, c->sb.time_units_per_sec, &rem);
-+ t.tv_nsec = rem * c->sb.nsec_per_time_unit;
-+ return t;
-+}
-+
-+static inline s64 timespec_to_bch2_time(const struct bch_fs *c, struct timespec64 ts)
-+{
-+ return (ts.tv_sec * c->sb.time_units_per_sec +
-+ (int) ts.tv_nsec / c->sb.nsec_per_time_unit) - c->sb.time_base_lo;
-+}
-+
-+static inline s64 bch2_current_time(const struct bch_fs *c)
-+{
-+ struct timespec64 now;
-+
-+ ktime_get_coarse_real_ts64(&now);
-+ return timespec_to_bch2_time(c, now);
-+}
-+
-+static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
-+{
-+ return dev < c->sb.nr_devices && c->devs[dev];
-+}
-+
-+#define BKEY_PADDED_ONSTACK(key, pad) \
-+ struct { struct bkey_i key; __u64 key ## _pad[pad]; }
-+
-+#endif /* _BCACHEFS_H */
-diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
-new file mode 100644
-index 000000000000..0a750953ff92
---- /dev/null
-+++ b/fs/bcachefs/bcachefs_format.h
-@@ -0,0 +1,2425 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FORMAT_H
-+#define _BCACHEFS_FORMAT_H
-+
-+/*
-+ * bcachefs on disk data structures
-+ *
-+ * OVERVIEW:
-+ *
-+ * There are three main types of on disk data structures in bcachefs (this is
-+ * reduced from 5 in bcache)
-+ *
-+ * - superblock
-+ * - journal
-+ * - btree
-+ *
-+ * The btree is the primary structure; most metadata exists as keys in the
-+ * various btrees. There are only a small number of btrees, they're not
-+ * sharded - we have one btree for extents, another for inodes, et cetera.
-+ *
-+ * SUPERBLOCK:
-+ *
-+ * The superblock contains the location of the journal, the list of devices in
-+ * the filesystem, and in general any metadata we need in order to decide
-+ * whether we can start a filesystem or prior to reading the journal/btree
-+ * roots.
-+ *
-+ * The superblock is extensible, and most of the contents of the superblock are
-+ * in variable length, type tagged fields; see struct bch_sb_field.
-+ *
-+ * Backup superblocks do not reside in a fixed location; also, superblocks do
-+ * not have a fixed size. To locate backup superblocks we have struct
-+ * bch_sb_layout; we store a copy of this inside every superblock, and also
-+ * before the first superblock.
-+ *
-+ * JOURNAL:
-+ *
-+ * The journal primarily records btree updates in the order they occurred;
-+ * journal replay consists of just iterating over all the keys in the open
-+ * journal entries and re-inserting them into the btrees.
-+ *
-+ * The journal also contains entry types for the btree roots, and blacklisted
-+ * journal sequence numbers (see journal_seq_blacklist.c).
-+ *
-+ * BTREE:
-+ *
-+ * bcachefs btrees are copy on write b+ trees, where nodes are big (typically
-+ * 128k-256k) and log structured. We use struct btree_node for writing the first
-+ * entry in a given node (offset 0), and struct btree_node_entry for all
-+ * subsequent writes.
-+ *
-+ * After the header, btree node entries contain a list of keys in sorted order.
-+ * Values are stored inline with the keys; since values are variable length (and
-+ * keys effectively are variable length too, due to packing) we can't do random
-+ * access without building up additional in memory tables in the btree node read
-+ * path.
-+ *
-+ * BTREE KEYS (struct bkey):
-+ *
-+ * The various btrees share a common format for the key - so as to avoid
-+ * switching in fastpath lookup/comparison code - but define their own
-+ * structures for the key values.
-+ *
-+ * The size of a key/value pair is stored as a u8 in units of u64s, so the max
-+ * size is just under 2k. The common part also contains a type tag for the
-+ * value, and a format field indicating whether the key is packed or not (and
-+ * also meant to allow adding new key fields in the future, if desired).
-+ *
-+ * bkeys, when stored within a btree node, may also be packed. In that case, the
-+ * bkey_format in that node is used to unpack it. Packed bkeys mean that we can
-+ * be generous with field sizes in the common part of the key format (64 bit
-+ * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost.
-+ */
-+
-+#include <asm/types.h>
-+#include <asm/byteorder.h>
-+#include <linux/kernel.h>
-+#include <linux/uuid.h>
-+#include "vstructs.h"
-+
-+#ifdef __KERNEL__
-+typedef uuid_t __uuid_t;
-+#endif
-+
-+#define BITMASK(name, type, field, offset, end) \
-+static const __maybe_unused unsigned name##_OFFSET = offset; \
-+static const __maybe_unused unsigned name##_BITS = (end - offset); \
-+ \
-+static inline __u64 name(const type *k) \
-+{ \
-+ return (k->field >> offset) & ~(~0ULL << (end - offset)); \
-+} \
-+ \
-+static inline void SET_##name(type *k, __u64 v) \
-+{ \
-+ k->field &= ~(~(~0ULL << (end - offset)) << offset); \
-+ k->field |= (v & ~(~0ULL << (end - offset))) << offset; \
-+}
-+
-+#define LE_BITMASK(_bits, name, type, field, offset, end) \
-+static const __maybe_unused unsigned name##_OFFSET = offset; \
-+static const __maybe_unused unsigned name##_BITS = (end - offset); \
-+static const __maybe_unused __u##_bits name##_MAX = (1ULL << (end - offset)) - 1;\
-+ \
-+static inline __u64 name(const type *k) \
-+{ \
-+ return (__le##_bits##_to_cpu(k->field) >> offset) & \
-+ ~(~0ULL << (end - offset)); \
-+} \
-+ \
-+static inline void SET_##name(type *k, __u64 v) \
-+{ \
-+ __u##_bits new = __le##_bits##_to_cpu(k->field); \
-+ \
-+ new &= ~(~(~0ULL << (end - offset)) << offset); \
-+ new |= (v & ~(~0ULL << (end - offset))) << offset; \
-+ k->field = __cpu_to_le##_bits(new); \
-+}
-+
-+#define LE16_BITMASK(n, t, f, o, e) LE_BITMASK(16, n, t, f, o, e)
-+#define LE32_BITMASK(n, t, f, o, e) LE_BITMASK(32, n, t, f, o, e)
-+#define LE64_BITMASK(n, t, f, o, e) LE_BITMASK(64, n, t, f, o, e)
-+
-+struct bkey_format {
-+ __u8 key_u64s;
-+ __u8 nr_fields;
-+ /* One unused slot for now: */
-+ __u8 bits_per_field[6];
-+ __le64 field_offset[6];
-+};
-+
-+/* Btree keys - all units are in sectors */
-+
-+struct bpos {
-+ /*
-+ * Word order matches machine byte order - btree code treats a bpos as a
-+ * single large integer, for search/comparison purposes
-+ *
-+ * Note that wherever a bpos is embedded in another on disk data
-+ * structure, it has to be byte swabbed when reading in metadata that
-+ * wasn't written in native endian order:
-+ */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+ __u32 snapshot;
-+ __u64 offset;
-+ __u64 inode;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+ __u64 inode;
-+ __u64 offset; /* Points to end of extent - sectors */
-+ __u32 snapshot;
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+} __packed __aligned(4);
-+
-+#define KEY_INODE_MAX ((__u64)~0ULL)
-+#define KEY_OFFSET_MAX ((__u64)~0ULL)
-+#define KEY_SNAPSHOT_MAX ((__u32)~0U)
-+#define KEY_SIZE_MAX ((__u32)~0U)
-+
-+static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot)
-+{
-+ return (struct bpos) {
-+ .inode = inode,
-+ .offset = offset,
-+ .snapshot = snapshot,
-+ };
-+}
-+
-+#define POS_MIN SPOS(0, 0, 0)
-+#define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, 0)
-+#define SPOS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX)
-+#define POS(_inode, _offset) SPOS(_inode, _offset, 0)
-+
-+/* Empty placeholder struct, for container_of() */
-+struct bch_val {
-+ __u64 __nothing[0];
-+};
-+
-+struct bversion {
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+ __u64 lo;
-+ __u32 hi;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+ __u32 hi;
-+ __u64 lo;
-+#endif
-+} __packed __aligned(4);
-+
-+struct bkey {
-+ /* Size of combined key and value, in u64s */
-+ __u8 u64s;
-+
-+ /* Format of key (0 for format local to btree node) */
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+ __u8 format:7,
-+ needs_whiteout:1;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+ __u8 needs_whiteout:1,
-+ format:7;
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+ /* Type of the value */
-+ __u8 type;
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+ __u8 pad[1];
-+
-+ struct bversion version;
-+ __u32 size; /* extent size, in sectors */
-+ struct bpos p;
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+ struct bpos p;
-+ __u32 size; /* extent size, in sectors */
-+ struct bversion version;
-+
-+ __u8 pad[1];
-+#endif
-+} __packed __aligned(8);
-+
-+struct bkey_packed {
-+ __u64 _data[0];
-+
-+ /* Size of combined key and value, in u64s */
-+ __u8 u64s;
-+
-+ /* Format of key (0 for format local to btree node) */
-+
-+ /*
-+ * XXX: next incompat on disk format change, switch format and
-+ * needs_whiteout - bkey_packed() will be cheaper if format is the high
-+ * bits of the bitfield
-+ */
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+ __u8 format:7,
-+ needs_whiteout:1;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+ __u8 needs_whiteout:1,
-+ format:7;
-+#endif
-+
-+ /* Type of the value */
-+ __u8 type;
-+ __u8 key_start[0];
-+
-+ /*
-+ * We copy bkeys with struct assignment in various places, and while
-+ * that shouldn't be done with packed bkeys we can't disallow it in C,
-+ * and it's legal to cast a bkey to a bkey_packed - so padding it out
-+ * to the same size as struct bkey should hopefully be safest.
-+ */
-+ __u8 pad[sizeof(struct bkey) - 3];
-+} __packed __aligned(8);
-+
-+typedef struct {
-+ __le64 lo;
-+ __le64 hi;
-+} bch_le128;
-+
-+#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64))
-+#define BKEY_U64s_MAX U8_MAX
-+#define BKEY_VAL_U64s_MAX (BKEY_U64s_MAX - BKEY_U64s)
-+
-+#define KEY_PACKED_BITS_START 24
-+
-+#define KEY_FORMAT_LOCAL_BTREE 0
-+#define KEY_FORMAT_CURRENT 1
-+
-+enum bch_bkey_fields {
-+ BKEY_FIELD_INODE,
-+ BKEY_FIELD_OFFSET,
-+ BKEY_FIELD_SNAPSHOT,
-+ BKEY_FIELD_SIZE,
-+ BKEY_FIELD_VERSION_HI,
-+ BKEY_FIELD_VERSION_LO,
-+ BKEY_NR_FIELDS,
-+};
-+
-+#define bkey_format_field(name, field) \
-+ [BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8)
-+
-+#define BKEY_FORMAT_CURRENT \
-+((struct bkey_format) { \
-+ .key_u64s = BKEY_U64s, \
-+ .nr_fields = BKEY_NR_FIELDS, \
-+ .bits_per_field = { \
-+ bkey_format_field(INODE, p.inode), \
-+ bkey_format_field(OFFSET, p.offset), \
-+ bkey_format_field(SNAPSHOT, p.snapshot), \
-+ bkey_format_field(SIZE, size), \
-+ bkey_format_field(VERSION_HI, version.hi), \
-+ bkey_format_field(VERSION_LO, version.lo), \
-+ }, \
-+})
-+
-+/* bkey with inline value */
-+struct bkey_i {
-+ __u64 _data[0];
-+
-+ struct bkey k;
-+ struct bch_val v;
-+};
-+
-+#define KEY(_inode, _offset, _size) \
-+((struct bkey) { \
-+ .u64s = BKEY_U64s, \
-+ .format = KEY_FORMAT_CURRENT, \
-+ .p = POS(_inode, _offset), \
-+ .size = _size, \
-+})
-+
-+static inline void bkey_init(struct bkey *k)
-+{
-+ *k = KEY(0, 0, 0);
-+}
-+
-+#define bkey_bytes(_k) ((_k)->u64s * sizeof(__u64))
-+
-+#define __BKEY_PADDED(key, pad) \
-+ struct bkey_i key; __u64 key ## _pad[pad]
-+
-+/*
-+ * - DELETED keys are used internally to mark keys that should be ignored but
-+ * override keys in composition order. Their version number is ignored.
-+ *
-+ * - DISCARDED keys indicate that the data is all 0s because it has been
-+ * discarded. DISCARDs may have a version; if the version is nonzero the key
-+ * will be persistent, otherwise the key will be dropped whenever the btree
-+ * node is rewritten (like DELETED keys).
-+ *
-+ * - ERROR: any read of the data returns a read error, as the data was lost due
-+ * to a failing device. Like DISCARDED keys, they can be removed (overridden)
-+ * by new writes or cluster-wide GC. Node repair can also overwrite them with
-+ * the same or a more recent version number, but not with an older version
-+ * number.
-+ *
-+ * - WHITEOUT: for hash table btrees
-+ */
-+#define BCH_BKEY_TYPES() \
-+ x(deleted, 0) \
-+ x(whiteout, 1) \
-+ x(error, 2) \
-+ x(cookie, 3) \
-+ x(hash_whiteout, 4) \
-+ x(btree_ptr, 5) \
-+ x(extent, 6) \
-+ x(reservation, 7) \
-+ x(inode, 8) \
-+ x(inode_generation, 9) \
-+ x(dirent, 10) \
-+ x(xattr, 11) \
-+ x(alloc, 12) \
-+ x(quota, 13) \
-+ x(stripe, 14) \
-+ x(reflink_p, 15) \
-+ x(reflink_v, 16) \
-+ x(inline_data, 17) \
-+ x(btree_ptr_v2, 18) \
-+ x(indirect_inline_data, 19) \
-+ x(alloc_v2, 20) \
-+ x(subvolume, 21) \
-+ x(snapshot, 22) \
-+ x(inode_v2, 23) \
-+ x(alloc_v3, 24) \
-+ x(set, 25) \
-+ x(lru, 26) \
-+ x(alloc_v4, 27) \
-+ x(backpointer, 28) \
-+ x(inode_v3, 29) \
-+ x(bucket_gens, 30) \
-+ x(snapshot_tree, 31) \
-+ x(logged_op_truncate, 32) \
-+ x(logged_op_finsert, 33)
-+
-+enum bch_bkey_type {
-+#define x(name, nr) KEY_TYPE_##name = nr,
-+ BCH_BKEY_TYPES()
-+#undef x
-+ KEY_TYPE_MAX,
-+};
-+
-+struct bch_deleted {
-+ struct bch_val v;
-+};
-+
-+struct bch_whiteout {
-+ struct bch_val v;
-+};
-+
-+struct bch_error {
-+ struct bch_val v;
-+};
-+
-+struct bch_cookie {
-+ struct bch_val v;
-+ __le64 cookie;
-+};
-+
-+struct bch_hash_whiteout {
-+ struct bch_val v;
-+};
-+
-+struct bch_set {
-+ struct bch_val v;
-+};
-+
-+/* Extents */
-+
-+/*
-+ * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
-+ * preceded by checksum/compression information (bch_extent_crc32 or
-+ * bch_extent_crc64).
-+ *
-+ * One major determining factor in the format of extents is how we handle and
-+ * represent extents that have been partially overwritten and thus trimmed:
-+ *
-+ * If an extent is not checksummed or compressed, when the extent is trimmed we
-+ * don't have to remember the extent we originally allocated and wrote: we can
-+ * merely adjust ptr->offset to point to the start of the data that is currently
-+ * live. The size field in struct bkey records the current (live) size of the
-+ * extent, and is also used to mean "size of region on disk that we point to" in
-+ * this case.
-+ *
-+ * Thus an extent that is not checksummed or compressed will consist only of a
-+ * list of bch_extent_ptrs, with none of the fields in
-+ * bch_extent_crc32/bch_extent_crc64.
-+ *
-+ * When an extent is checksummed or compressed, it's not possible to read only
-+ * the data that is currently live: we have to read the entire extent that was
-+ * originally written, and then return only the part of the extent that is
-+ * currently live.
-+ *
-+ * Thus, in addition to the current size of the extent in struct bkey, we need
-+ * to store the size of the originally allocated space - this is the
-+ * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
-+ * when the extent is trimmed, instead of modifying the offset field of the
-+ * pointer, we keep a second smaller offset field - "offset into the original
-+ * extent of the currently live region".
-+ *
-+ * The other major determining factor is replication and data migration:
-+ *
-+ * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
-+ * write, we will initially write all the replicas in the same format, with the
-+ * same checksum type and compression format - however, when copygc runs later (or
-+ * tiering/cache promotion, anything that moves data), it is not in general
-+ * going to rewrite all the pointers at once - one of the replicas may be in a
-+ * bucket on one device that has very little fragmentation while another lives
-+ * in a bucket that has become heavily fragmented, and thus is being rewritten
-+ * sooner than the rest.
-+ *
-+ * Thus it will only move a subset of the pointers (or in the case of
-+ * tiering/cache promotion perhaps add a single pointer without dropping any
-+ * current pointers), and if the extent has been partially overwritten it must
-+ * write only the currently live portion (or copygc would not be able to reduce
-+ * fragmentation!) - which necessitates a different bch_extent_crc format for
-+ * the new pointer.
-+ *
-+ * But in the interests of space efficiency, we don't want to store one
-+ * bch_extent_crc for each pointer if we don't have to.
-+ *
-+ * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
-+ * bch_extent_ptrs appended arbitrarily one after the other. We determine the
-+ * type of a given entry with a scheme similar to utf8 (except we're encoding a
-+ * type, not a size), encoding the type in the position of the first set bit:
-+ *
-+ * bch_extent_crc32 - 0b1
-+ * bch_extent_ptr - 0b10
-+ * bch_extent_crc64 - 0b100
-+ *
-+ * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
-+ * bch_extent_crc64 is the least constrained).
-+ *
-+ * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
-+ * until the next bch_extent_crc32/64.
-+ *
-+ * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
-+ * is neither checksummed nor compressed.
-+ */
-+
-+/* 128 bits, sufficient for cryptographic MACs: */
-+struct bch_csum {
-+ __le64 lo;
-+ __le64 hi;
-+} __packed __aligned(8);
-+
-+#define BCH_EXTENT_ENTRY_TYPES() \
-+ x(ptr, 0) \
-+ x(crc32, 1) \
-+ x(crc64, 2) \
-+ x(crc128, 3) \
-+ x(stripe_ptr, 4) \
-+ x(rebalance, 5)
-+#define BCH_EXTENT_ENTRY_MAX 6
-+
-+enum bch_extent_entry_type {
-+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
-+ BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+};
-+
-+/* Compressed/uncompressed size are stored biased by 1: */
-+struct bch_extent_crc32 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+ __u32 type:2,
-+ _compressed_size:7,
-+ _uncompressed_size:7,
-+ offset:7,
-+ _unused:1,
-+ csum_type:4,
-+ compression_type:4;
-+ __u32 csum;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+ __u32 csum;
-+ __u32 compression_type:4,
-+ csum_type:4,
-+ _unused:1,
-+ offset:7,
-+ _uncompressed_size:7,
-+ _compressed_size:7,
-+ type:2;
-+#endif
-+} __packed __aligned(8);
-+
-+#define CRC32_SIZE_MAX (1U << 7)
-+#define CRC32_NONCE_MAX 0
-+
-+struct bch_extent_crc64 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+ __u64 type:3,
-+ _compressed_size:9,
-+ _uncompressed_size:9,
-+ offset:9,
-+ nonce:10,
-+ csum_type:4,
-+ compression_type:4,
-+ csum_hi:16;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+ __u64 csum_hi:16,
-+ compression_type:4,
-+ csum_type:4,
-+ nonce:10,
-+ offset:9,
-+ _uncompressed_size:9,
-+ _compressed_size:9,
-+ type:3;
-+#endif
-+ __u64 csum_lo;
-+} __packed __aligned(8);
-+
-+#define CRC64_SIZE_MAX (1U << 9)
-+#define CRC64_NONCE_MAX ((1U << 10) - 1)
-+
-+struct bch_extent_crc128 {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+ __u64 type:4,
-+ _compressed_size:13,
-+ _uncompressed_size:13,
-+ offset:13,
-+ nonce:13,
-+ csum_type:4,
-+ compression_type:4;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+ __u64 compression_type:4,
-+ csum_type:4,
-+ nonce:13,
-+ offset:13,
-+ _uncompressed_size:13,
-+ _compressed_size:13,
-+ type:4;
-+#endif
-+ struct bch_csum csum;
-+} __packed __aligned(8);
-+
-+#define CRC128_SIZE_MAX (1U << 13)
-+#define CRC128_NONCE_MAX ((1U << 13) - 1)
-+
-+/*
-+ * @reservation - pointer hasn't been written to, just reserved
-+ */
-+struct bch_extent_ptr {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+ __u64 type:1,
-+ cached:1,
-+ unused:1,
-+ unwritten:1,
-+ offset:44, /* 8 petabytes */
-+ dev:8,
-+ gen:8;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+ __u64 gen:8,
-+ dev:8,
-+ offset:44,
-+ unwritten:1,
-+ unused:1,
-+ cached:1,
-+ type:1;
-+#endif
-+} __packed __aligned(8);
-+
-+struct bch_extent_stripe_ptr {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+ __u64 type:5,
-+ block:8,
-+ redundancy:4,
-+ idx:47;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+ __u64 idx:47,
-+ redundancy:4,
-+ block:8,
-+ type:5;
-+#endif
-+};
-+
-+struct bch_extent_rebalance {
-+#if defined(__LITTLE_ENDIAN_BITFIELD)
-+ __u64 type:6,
-+ unused:34,
-+ compression:8, /* enum bch_compression_opt */
-+ target:16;
-+#elif defined (__BIG_ENDIAN_BITFIELD)
-+ __u64 target:16,
-+ compression:8,
-+ unused:34,
-+ type:6;
-+#endif
-+};
-+
-+union bch_extent_entry {
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64
-+ unsigned long type;
-+#elif __BITS_PER_LONG == 32
-+ struct {
-+ unsigned long pad;
-+ unsigned long type;
-+ };
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+#define x(f, n) struct bch_extent_##f f;
-+ BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+};
-+
-+struct bch_btree_ptr {
-+ struct bch_val v;
-+
-+ __u64 _data[0];
-+ struct bch_extent_ptr start[];
-+} __packed __aligned(8);
-+
-+struct bch_btree_ptr_v2 {
-+ struct bch_val v;
-+
-+ __u64 mem_ptr;
-+ __le64 seq;
-+ __le16 sectors_written;
-+ __le16 flags;
-+ struct bpos min_key;
-+ __u64 _data[0];
-+ struct bch_extent_ptr start[];
-+} __packed __aligned(8);
-+
-+LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1);
-+
-+struct bch_extent {
-+ struct bch_val v;
-+
-+ __u64 _data[0];
-+ union bch_extent_entry start[];
-+} __packed __aligned(8);
-+
-+struct bch_reservation {
-+ struct bch_val v;
-+
-+ __le32 generation;
-+ __u8 nr_replicas;
-+ __u8 pad[3];
-+} __packed __aligned(8);
-+
-+/* Maximum size (in u64s) a single pointer could be: */
-+#define BKEY_EXTENT_PTR_U64s_MAX\
-+ ((sizeof(struct bch_extent_crc128) + \
-+ sizeof(struct bch_extent_ptr)) / sizeof(__u64))
-+
-+/* Maximum possible size of an entire extent value: */
-+#define BKEY_EXTENT_VAL_U64s_MAX \
-+ (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
-+
-+/* * Maximum possible size of an entire extent, key + value: */
-+#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
-+
-+/* Btree pointers don't carry around checksums: */
-+#define BKEY_BTREE_PTR_VAL_U64s_MAX \
-+ ((sizeof(struct bch_btree_ptr_v2) + \
-+ sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
-+#define BKEY_BTREE_PTR_U64s_MAX \
-+ (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
-+
-+/* Inodes */
-+
-+#define BLOCKDEV_INODE_MAX 4096
-+
-+#define BCACHEFS_ROOT_INO 4096
-+
-+struct bch_inode {
-+ struct bch_val v;
-+
-+ __le64 bi_hash_seed;
-+ __le32 bi_flags;
-+ __le16 bi_mode;
-+ __u8 fields[];
-+} __packed __aligned(8);
-+
-+struct bch_inode_v2 {
-+ struct bch_val v;
-+
-+ __le64 bi_journal_seq;
-+ __le64 bi_hash_seed;
-+ __le64 bi_flags;
-+ __le16 bi_mode;
-+ __u8 fields[];
-+} __packed __aligned(8);
-+
-+struct bch_inode_v3 {
-+ struct bch_val v;
-+
-+ __le64 bi_journal_seq;
-+ __le64 bi_hash_seed;
-+ __le64 bi_flags;
-+ __le64 bi_sectors;
-+ __le64 bi_size;
-+ __le64 bi_version;
-+ __u8 fields[];
-+} __packed __aligned(8);
-+
-+#define INODEv3_FIELDS_START_INITIAL 6
-+#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
-+
-+struct bch_inode_generation {
-+ struct bch_val v;
-+
-+ __le32 bi_generation;
-+ __le32 pad;
-+} __packed __aligned(8);
-+
-+/*
-+ * bi_subvol and bi_parent_subvol are only set for subvolume roots:
-+ */
-+
-+#define BCH_INODE_FIELDS_v2() \
-+ x(bi_atime, 96) \
-+ x(bi_ctime, 96) \
-+ x(bi_mtime, 96) \
-+ x(bi_otime, 96) \
-+ x(bi_size, 64) \
-+ x(bi_sectors, 64) \
-+ x(bi_uid, 32) \
-+ x(bi_gid, 32) \
-+ x(bi_nlink, 32) \
-+ x(bi_generation, 32) \
-+ x(bi_dev, 32) \
-+ x(bi_data_checksum, 8) \
-+ x(bi_compression, 8) \
-+ x(bi_project, 32) \
-+ x(bi_background_compression, 8) \
-+ x(bi_data_replicas, 8) \
-+ x(bi_promote_target, 16) \
-+ x(bi_foreground_target, 16) \
-+ x(bi_background_target, 16) \
-+ x(bi_erasure_code, 16) \
-+ x(bi_fields_set, 16) \
-+ x(bi_dir, 64) \
-+ x(bi_dir_offset, 64) \
-+ x(bi_subvol, 32) \
-+ x(bi_parent_subvol, 32)
-+
-+#define BCH_INODE_FIELDS_v3() \
-+ x(bi_atime, 96) \
-+ x(bi_ctime, 96) \
-+ x(bi_mtime, 96) \
-+ x(bi_otime, 96) \
-+ x(bi_uid, 32) \
-+ x(bi_gid, 32) \
-+ x(bi_nlink, 32) \
-+ x(bi_generation, 32) \
-+ x(bi_dev, 32) \
-+ x(bi_data_checksum, 8) \
-+ x(bi_compression, 8) \
-+ x(bi_project, 32) \
-+ x(bi_background_compression, 8) \
-+ x(bi_data_replicas, 8) \
-+ x(bi_promote_target, 16) \
-+ x(bi_foreground_target, 16) \
-+ x(bi_background_target, 16) \
-+ x(bi_erasure_code, 16) \
-+ x(bi_fields_set, 16) \
-+ x(bi_dir, 64) \
-+ x(bi_dir_offset, 64) \
-+ x(bi_subvol, 32) \
-+ x(bi_parent_subvol, 32) \
-+ x(bi_nocow, 8)
-+
-+/* subset of BCH_INODE_FIELDS */
-+#define BCH_INODE_OPTS() \
-+ x(data_checksum, 8) \
-+ x(compression, 8) \
-+ x(project, 32) \
-+ x(background_compression, 8) \
-+ x(data_replicas, 8) \
-+ x(promote_target, 16) \
-+ x(foreground_target, 16) \
-+ x(background_target, 16) \
-+ x(erasure_code, 16) \
-+ x(nocow, 8)
-+
-+enum inode_opt_id {
-+#define x(name, ...) \
-+ Inode_opt_##name,
-+ BCH_INODE_OPTS()
-+#undef x
-+ Inode_opt_nr,
-+};
-+
-+#define BCH_INODE_FLAGS() \
-+ x(sync, 0) \
-+ x(immutable, 1) \
-+ x(append, 2) \
-+ x(nodump, 3) \
-+ x(noatime, 4) \
-+ x(i_size_dirty, 5) \
-+ x(i_sectors_dirty, 6) \
-+ x(unlinked, 7) \
-+ x(backptr_untrusted, 8)
-+
-+/* bits 20+ reserved for packed fields below: */
-+
-+enum bch_inode_flags {
-+#define x(t, n) BCH_INODE_##t = 1U << n,
-+ BCH_INODE_FLAGS()
-+#undef x
-+};
-+
-+enum __bch_inode_flags {
-+#define x(t, n) __BCH_INODE_##t = n,
-+ BCH_INODE_FLAGS()
-+#undef x
-+};
-+
-+LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
-+LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
-+LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
-+
-+LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
-+LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);
-+
-+LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24);
-+LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31);
-+
-+LE64_BITMASK(INODEv3_FIELDS_START,
-+ struct bch_inode_v3, bi_flags, 31, 36);
-+LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52);
-+
-+/* Dirents */
-+
-+/*
-+ * Dirents (and xattrs) have to implement string lookups; since our b-tree
-+ * doesn't support arbitrary length strings for the key, we instead index by a
-+ * 64 bit hash (currently truncated sha1) of the string, stored in the offset
-+ * field of the key - using linear probing to resolve hash collisions. This also
-+ * provides us with the readdir cookie posix requires.
-+ *
-+ * Linear probing requires us to use whiteouts for deletions, in the event of a
-+ * collision:
-+ */
-+
-+struct bch_dirent {
-+ struct bch_val v;
-+
-+ /* Target inode number: */
-+ union {
-+ __le64 d_inum;
-+ struct { /* DT_SUBVOL */
-+ __le32 d_child_subvol;
-+ __le32 d_parent_subvol;
-+ };
-+ };
-+
-+ /*
-+ * Copy of mode bits 12-15 from the target inode - so userspace can get
-+ * the filetype without having to do a stat()
-+ */
-+ __u8 d_type;
-+
-+ __u8 d_name[];
-+} __packed __aligned(8);
-+
-+#define DT_SUBVOL 16
-+#define BCH_DT_MAX 17
-+
-+#define BCH_NAME_MAX 512
-+
-+/* Xattrs */
-+
-+#define KEY_TYPE_XATTR_INDEX_USER 0
-+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1
-+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2
-+#define KEY_TYPE_XATTR_INDEX_TRUSTED 3
-+#define KEY_TYPE_XATTR_INDEX_SECURITY 4
-+
-+struct bch_xattr {
-+ struct bch_val v;
-+ __u8 x_type;
-+ __u8 x_name_len;
-+ __le16 x_val_len;
-+ __u8 x_name[];
-+} __packed __aligned(8);
-+
-+/* Bucket/allocation information: */
-+
-+struct bch_alloc {
-+ struct bch_val v;
-+ __u8 fields;
-+ __u8 gen;
-+ __u8 data[];
-+} __packed __aligned(8);
-+
-+#define BCH_ALLOC_FIELDS_V1() \
-+ x(read_time, 16) \
-+ x(write_time, 16) \
-+ x(data_type, 8) \
-+ x(dirty_sectors, 16) \
-+ x(cached_sectors, 16) \
-+ x(oldest_gen, 8) \
-+ x(stripe, 32) \
-+ x(stripe_redundancy, 8)
-+
-+enum {
-+#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
-+ BCH_ALLOC_FIELDS_V1()
-+#undef x
-+};
-+
-+struct bch_alloc_v2 {
-+ struct bch_val v;
-+ __u8 nr_fields;
-+ __u8 gen;
-+ __u8 oldest_gen;
-+ __u8 data_type;
-+ __u8 data[];
-+} __packed __aligned(8);
-+
-+#define BCH_ALLOC_FIELDS_V2() \
-+ x(read_time, 64) \
-+ x(write_time, 64) \
-+ x(dirty_sectors, 32) \
-+ x(cached_sectors, 32) \
-+ x(stripe, 32) \
-+ x(stripe_redundancy, 8)
-+
-+struct bch_alloc_v3 {
-+ struct bch_val v;
-+ __le64 journal_seq;
-+ __le32 flags;
-+ __u8 nr_fields;
-+ __u8 gen;
-+ __u8 oldest_gen;
-+ __u8 data_type;
-+ __u8 data[];
-+} __packed __aligned(8);
-+
-+LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
-+LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
-+
-+struct bch_alloc_v4 {
-+ struct bch_val v;
-+ __u64 journal_seq;
-+ __u32 flags;
-+ __u8 gen;
-+ __u8 oldest_gen;
-+ __u8 data_type;
-+ __u8 stripe_redundancy;
-+ __u32 dirty_sectors;
-+ __u32 cached_sectors;
-+ __u64 io_time[2];
-+ __u32 stripe;
-+ __u32 nr_external_backpointers;
-+ __u64 fragmentation_lru;
-+} __packed __aligned(8);
-+
-+#define BCH_ALLOC_V4_U64s_V0 6
-+#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64))
-+
-+BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
-+BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
-+BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
-+BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
-+
-+#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40
-+
-+struct bch_backpointer {
-+ struct bch_val v;
-+ __u8 btree_id;
-+ __u8 level;
-+ __u8 data_type;
-+ __u64 bucket_offset:40;
-+ __u32 bucket_len;
-+ struct bpos pos;
-+} __packed __aligned(8);
-+
-+#define KEY_TYPE_BUCKET_GENS_BITS 8
-+#define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS)
-+#define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1)
-+
-+struct bch_bucket_gens {
-+ struct bch_val v;
-+ u8 gens[KEY_TYPE_BUCKET_GENS_NR];
-+} __packed __aligned(8);
-+
-+/* Quotas: */
-+
-+enum quota_types {
-+ QTYP_USR = 0,
-+ QTYP_GRP = 1,
-+ QTYP_PRJ = 2,
-+ QTYP_NR = 3,
-+};
-+
-+enum quota_counters {
-+ Q_SPC = 0,
-+ Q_INO = 1,
-+ Q_COUNTERS = 2,
-+};
-+
-+struct bch_quota_counter {
-+ __le64 hardlimit;
-+ __le64 softlimit;
-+};
-+
-+struct bch_quota {
-+ struct bch_val v;
-+ struct bch_quota_counter c[Q_COUNTERS];
-+} __packed __aligned(8);
-+
-+/* Erasure coding */
-+
-+struct bch_stripe {
-+ struct bch_val v;
-+ __le16 sectors;
-+ __u8 algorithm;
-+ __u8 nr_blocks;
-+ __u8 nr_redundant;
-+
-+ __u8 csum_granularity_bits;
-+ __u8 csum_type;
-+ __u8 pad;
-+
-+ struct bch_extent_ptr ptrs[];
-+} __packed __aligned(8);
-+
-+/* Reflink: */
-+
-+struct bch_reflink_p {
-+ struct bch_val v;
-+ __le64 idx;
-+ /*
-+ * A reflink pointer might point to an indirect extent which is then
-+ * later split (by copygc or rebalance). If we only pointed to part of
-+ * the original indirect extent, and then one of the fragments is
-+ * outside the range we point to, we'd leak a refcount: so when creating
-+ * reflink pointers, we need to store pad values to remember the full
-+ * range we were taking a reference on.
-+ */
-+ __le32 front_pad;
-+ __le32 back_pad;
-+} __packed __aligned(8);
-+
-+struct bch_reflink_v {
-+ struct bch_val v;
-+ __le64 refcount;
-+ union bch_extent_entry start[0];
-+ __u64 _data[];
-+} __packed __aligned(8);
-+
-+struct bch_indirect_inline_data {
-+ struct bch_val v;
-+ __le64 refcount;
-+ u8 data[];
-+};
-+
-+/* Inline data */
-+
-+struct bch_inline_data {
-+ struct bch_val v;
-+ u8 data[];
-+};
-+
-+/* Subvolumes: */
-+
-+#define SUBVOL_POS_MIN POS(0, 1)
-+#define SUBVOL_POS_MAX POS(0, S32_MAX)
-+#define BCACHEFS_ROOT_SUBVOL 1
-+
-+struct bch_subvolume {
-+ struct bch_val v;
-+ __le32 flags;
-+ __le32 snapshot;
-+ __le64 inode;
-+ /*
-+ * Snapshot subvolumes form a tree, separate from the snapshot nodes
-+ * tree - if this subvolume is a snapshot, this is the ID of the
-+ * subvolume it was created from:
-+ */
-+ __le32 parent;
-+ __le32 pad;
-+ bch_le128 otime;
-+};
-+
-+LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1)
-+/*
-+ * We need to know whether a subvolume is a snapshot so we can know whether we
-+ * can delete it (or whether it should just be rm -rf'd)
-+ */
-+LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2)
-+LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3)
-+
-+/* Snapshots */
-+
-+struct bch_snapshot {
-+ struct bch_val v;
-+ __le32 flags;
-+ __le32 parent;
-+ __le32 children[2];
-+ __le32 subvol;
-+ /* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */
-+ __le32 tree;
-+ __le32 depth;
-+ __le32 skip[3];
-+};
-+
-+LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
-+
-+/* True if a subvolume points to this snapshot node: */
-+LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
-+
-+/*
-+ * Snapshot trees:
-+ *
-+ * The snapshot_trees btree gives us persistent indentifier for each tree of
-+ * bch_snapshot nodes, and allow us to record and easily find the root/master
-+ * subvolume that other snapshots were created from:
-+ */
-+struct bch_snapshot_tree {
-+ struct bch_val v;
-+ __le32 master_subvol;
-+ __le32 root_snapshot;
-+};
-+
-+/* LRU btree: */
-+
-+struct bch_lru {
-+ struct bch_val v;
-+ __le64 idx;
-+} __packed __aligned(8);
-+
-+#define LRU_ID_STRIPES (1U << 16)
-+
-+/* Logged operations btree: */
-+
-+struct bch_logged_op_truncate {
-+ struct bch_val v;
-+ __le32 subvol;
-+ __le32 pad;
-+ __le64 inum;
-+ __le64 new_i_size;
-+};
-+
-+enum logged_op_finsert_state {
-+ LOGGED_OP_FINSERT_start,
-+ LOGGED_OP_FINSERT_shift_extents,
-+ LOGGED_OP_FINSERT_finish,
-+};
-+
-+struct bch_logged_op_finsert {
-+ struct bch_val v;
-+ __u8 state;
-+ __u8 pad[3];
-+ __le32 subvol;
-+ __le64 inum;
-+ __le64 dst_offset;
-+ __le64 src_offset;
-+ __le64 pos;
-+};
-+
-+/* Optional/variable size superblock sections: */
-+
-+struct bch_sb_field {
-+ __u64 _data[0];
-+ __le32 u64s;
-+ __le32 type;
-+};
-+
-+#define BCH_SB_FIELDS() \
-+ x(journal, 0) \
-+ x(members_v1, 1) \
-+ x(crypt, 2) \
-+ x(replicas_v0, 3) \
-+ x(quota, 4) \
-+ x(disk_groups, 5) \
-+ x(clean, 6) \
-+ x(replicas, 7) \
-+ x(journal_seq_blacklist, 8) \
-+ x(journal_v2, 9) \
-+ x(counters, 10) \
-+ x(members_v2, 11) \
-+ x(errors, 12)
-+
-+enum bch_sb_field_type {
-+#define x(f, nr) BCH_SB_FIELD_##f = nr,
-+ BCH_SB_FIELDS()
-+#undef x
-+ BCH_SB_FIELD_NR
-+};
-+
-+/*
-+ * Most superblock fields are replicated in all device's superblocks - a few are
-+ * not:
-+ */
-+#define BCH_SINGLE_DEVICE_SB_FIELDS \
-+ ((1U << BCH_SB_FIELD_journal)| \
-+ (1U << BCH_SB_FIELD_journal_v2))
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+struct bch_sb_field_journal {
-+ struct bch_sb_field field;
-+ __le64 buckets[];
-+};
-+
-+struct bch_sb_field_journal_v2 {
-+ struct bch_sb_field field;
-+
-+ struct bch_sb_field_journal_v2_entry {
-+ __le64 start;
-+ __le64 nr;
-+ } d[];
-+};
-+
-+/* BCH_SB_FIELD_members_v1: */
-+
-+#define BCH_MIN_NR_NBUCKETS (1 << 6)
-+
-+#define BCH_IOPS_MEASUREMENTS() \
-+ x(seqread, 0) \
-+ x(seqwrite, 1) \
-+ x(randread, 2) \
-+ x(randwrite, 3)
-+
-+enum bch_iops_measurement {
-+#define x(t, n) BCH_IOPS_##t = n,
-+ BCH_IOPS_MEASUREMENTS()
-+#undef x
-+ BCH_IOPS_NR
-+};
-+
-+#define BCH_MEMBER_ERROR_TYPES() \
-+ x(read, 0) \
-+ x(write, 1) \
-+ x(checksum, 2)
-+
-+enum bch_member_error_type {
-+#define x(t, n) BCH_MEMBER_ERROR_##t = n,
-+ BCH_MEMBER_ERROR_TYPES()
-+#undef x
-+ BCH_MEMBER_ERROR_NR
-+};
-+
-+struct bch_member {
-+ __uuid_t uuid;
-+ __le64 nbuckets; /* device size */
-+ __le16 first_bucket; /* index of first bucket used */
-+ __le16 bucket_size; /* sectors */
-+ __le32 pad;
-+ __le64 last_mount; /* time_t */
-+
-+ __le64 flags;
-+ __le32 iops[4];
-+ __le64 errors[BCH_MEMBER_ERROR_NR];
-+ __le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
-+ __le64 errors_reset_time;
-+};
-+
-+#define BCH_MEMBER_V1_BYTES 56
-+
-+LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4)
-+/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
-+LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags, 14, 15)
-+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags, 15, 20)
-+LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28)
-+LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30)
-+LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
-+ struct bch_member, flags, 30, 31)
-+
-+#if 0
-+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
-+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
-+#endif
-+
-+#define BCH_MEMBER_STATES() \
-+ x(rw, 0) \
-+ x(ro, 1) \
-+ x(failed, 2) \
-+ x(spare, 3)
-+
-+enum bch_member_state {
-+#define x(t, n) BCH_MEMBER_STATE_##t = n,
-+ BCH_MEMBER_STATES()
-+#undef x
-+ BCH_MEMBER_STATE_NR
-+};
-+
-+struct bch_sb_field_members_v1 {
-+ struct bch_sb_field field;
-+ struct bch_member _members[]; //Members are now variable size
-+};
-+
-+struct bch_sb_field_members_v2 {
-+ struct bch_sb_field field;
-+ __le16 member_bytes; //size of single member entry
-+ u8 pad[6];
-+ struct bch_member _members[];
-+};
-+
-+/* BCH_SB_FIELD_crypt: */
-+
-+struct nonce {
-+ __le32 d[4];
-+};
-+
-+struct bch_key {
-+ __le64 key[4];
-+};
-+
-+#define BCH_KEY_MAGIC \
-+ (((__u64) 'b' << 0)|((__u64) 'c' << 8)| \
-+ ((__u64) 'h' << 16)|((__u64) '*' << 24)| \
-+ ((__u64) '*' << 32)|((__u64) 'k' << 40)| \
-+ ((__u64) 'e' << 48)|((__u64) 'y' << 56))
-+
-+struct bch_encrypted_key {
-+ __le64 magic;
-+ struct bch_key key;
-+};
-+
-+/*
-+ * If this field is present in the superblock, it stores an encryption key which
-+ * is used encrypt all other data/metadata. The key will normally be encrypted
-+ * with the key userspace provides, but if encryption has been turned off we'll
-+ * just store the master key unencrypted in the superblock so we can access the
-+ * previously encrypted data.
-+ */
-+struct bch_sb_field_crypt {
-+ struct bch_sb_field field;
-+
-+ __le64 flags;
-+ __le64 kdf_flags;
-+ struct bch_encrypted_key key;
-+};
-+
-+LE64_BITMASK(BCH_CRYPT_KDF_TYPE, struct bch_sb_field_crypt, flags, 0, 4);
-+
-+enum bch_kdf_types {
-+ BCH_KDF_SCRYPT = 0,
-+ BCH_KDF_NR = 1,
-+};
-+
-+/* stored as base 2 log of scrypt params: */
-+LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16);
-+LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
-+LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
-+
-+/* BCH_SB_FIELD_replicas: */
-+
-+#define BCH_DATA_TYPES() \
-+ x(free, 0) \
-+ x(sb, 1) \
-+ x(journal, 2) \
-+ x(btree, 3) \
-+ x(user, 4) \
-+ x(cached, 5) \
-+ x(parity, 6) \
-+ x(stripe, 7) \
-+ x(need_gc_gens, 8) \
-+ x(need_discard, 9)
-+
-+enum bch_data_type {
-+#define x(t, n) BCH_DATA_##t,
-+ BCH_DATA_TYPES()
-+#undef x
-+ BCH_DATA_NR
-+};
-+
-+static inline bool data_type_is_empty(enum bch_data_type type)
-+{
-+ switch (type) {
-+ case BCH_DATA_free:
-+ case BCH_DATA_need_gc_gens:
-+ case BCH_DATA_need_discard:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+static inline bool data_type_is_hidden(enum bch_data_type type)
-+{
-+ switch (type) {
-+ case BCH_DATA_sb:
-+ case BCH_DATA_journal:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+struct bch_replicas_entry_v0 {
-+ __u8 data_type;
-+ __u8 nr_devs;
-+ __u8 devs[];
-+} __packed;
-+
-+struct bch_sb_field_replicas_v0 {
-+ struct bch_sb_field field;
-+ struct bch_replicas_entry_v0 entries[];
-+} __packed __aligned(8);
-+
-+struct bch_replicas_entry {
-+ __u8 data_type;
-+ __u8 nr_devs;
-+ __u8 nr_required;
-+ __u8 devs[];
-+} __packed;
-+
-+#define replicas_entry_bytes(_i) \
-+ (offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
-+
-+struct bch_sb_field_replicas {
-+ struct bch_sb_field field;
-+ struct bch_replicas_entry entries[];
-+} __packed __aligned(8);
-+
-+/* BCH_SB_FIELD_quota: */
-+
-+struct bch_sb_quota_counter {
-+ __le32 timelimit;
-+ __le32 warnlimit;
-+};
-+
-+struct bch_sb_quota_type {
-+ __le64 flags;
-+ struct bch_sb_quota_counter c[Q_COUNTERS];
-+};
-+
-+struct bch_sb_field_quota {
-+ struct bch_sb_field field;
-+ struct bch_sb_quota_type q[QTYP_NR];
-+} __packed __aligned(8);
-+
-+/* BCH_SB_FIELD_disk_groups: */
-+
-+#define BCH_SB_LABEL_SIZE 32
-+
-+struct bch_disk_group {
-+ __u8 label[BCH_SB_LABEL_SIZE];
-+ __le64 flags[2];
-+} __packed __aligned(8);
-+
-+LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1)
-+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6)
-+LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
-+
-+struct bch_sb_field_disk_groups {
-+ struct bch_sb_field field;
-+ struct bch_disk_group entries[];
-+} __packed __aligned(8);
-+
-+/* BCH_SB_FIELD_counters */
-+
-+#define BCH_PERSISTENT_COUNTERS() \
-+ x(io_read, 0) \
-+ x(io_write, 1) \
-+ x(io_move, 2) \
-+ x(bucket_invalidate, 3) \
-+ x(bucket_discard, 4) \
-+ x(bucket_alloc, 5) \
-+ x(bucket_alloc_fail, 6) \
-+ x(btree_cache_scan, 7) \
-+ x(btree_cache_reap, 8) \
-+ x(btree_cache_cannibalize, 9) \
-+ x(btree_cache_cannibalize_lock, 10) \
-+ x(btree_cache_cannibalize_lock_fail, 11) \
-+ x(btree_cache_cannibalize_unlock, 12) \
-+ x(btree_node_write, 13) \
-+ x(btree_node_read, 14) \
-+ x(btree_node_compact, 15) \
-+ x(btree_node_merge, 16) \
-+ x(btree_node_split, 17) \
-+ x(btree_node_rewrite, 18) \
-+ x(btree_node_alloc, 19) \
-+ x(btree_node_free, 20) \
-+ x(btree_node_set_root, 21) \
-+ x(btree_path_relock_fail, 22) \
-+ x(btree_path_upgrade_fail, 23) \
-+ x(btree_reserve_get_fail, 24) \
-+ x(journal_entry_full, 25) \
-+ x(journal_full, 26) \
-+ x(journal_reclaim_finish, 27) \
-+ x(journal_reclaim_start, 28) \
-+ x(journal_write, 29) \
-+ x(read_promote, 30) \
-+ x(read_bounce, 31) \
-+ x(read_split, 33) \
-+ x(read_retry, 32) \
-+ x(read_reuse_race, 34) \
-+ x(move_extent_read, 35) \
-+ x(move_extent_write, 36) \
-+ x(move_extent_finish, 37) \
-+ x(move_extent_fail, 38) \
-+ x(move_extent_alloc_mem_fail, 39) \
-+ x(copygc, 40) \
-+ x(copygc_wait, 41) \
-+ x(gc_gens_end, 42) \
-+ x(gc_gens_start, 43) \
-+ x(trans_blocked_journal_reclaim, 44) \
-+ x(trans_restart_btree_node_reused, 45) \
-+ x(trans_restart_btree_node_split, 46) \
-+ x(trans_restart_fault_inject, 47) \
-+ x(trans_restart_iter_upgrade, 48) \
-+ x(trans_restart_journal_preres_get, 49) \
-+ x(trans_restart_journal_reclaim, 50) \
-+ x(trans_restart_journal_res_get, 51) \
-+ x(trans_restart_key_cache_key_realloced, 52) \
-+ x(trans_restart_key_cache_raced, 53) \
-+ x(trans_restart_mark_replicas, 54) \
-+ x(trans_restart_mem_realloced, 55) \
-+ x(trans_restart_memory_allocation_failure, 56) \
-+ x(trans_restart_relock, 57) \
-+ x(trans_restart_relock_after_fill, 58) \
-+ x(trans_restart_relock_key_cache_fill, 59) \
-+ x(trans_restart_relock_next_node, 60) \
-+ x(trans_restart_relock_parent_for_fill, 61) \
-+ x(trans_restart_relock_path, 62) \
-+ x(trans_restart_relock_path_intent, 63) \
-+ x(trans_restart_too_many_iters, 64) \
-+ x(trans_restart_traverse, 65) \
-+ x(trans_restart_upgrade, 66) \
-+ x(trans_restart_would_deadlock, 67) \
-+ x(trans_restart_would_deadlock_write, 68) \
-+ x(trans_restart_injected, 69) \
-+ x(trans_restart_key_cache_upgrade, 70) \
-+ x(trans_traverse_all, 71) \
-+ x(transaction_commit, 72) \
-+ x(write_super, 73) \
-+ x(trans_restart_would_deadlock_recursion_limit, 74) \
-+ x(trans_restart_write_buffer_flush, 75) \
-+ x(trans_restart_split_race, 76)
-+
-+enum bch_persistent_counters {
-+#define x(t, n, ...) BCH_COUNTER_##t,
-+ BCH_PERSISTENT_COUNTERS()
-+#undef x
-+ BCH_COUNTER_NR
-+};
-+
-+struct bch_sb_field_counters {
-+ struct bch_sb_field field;
-+ __le64 d[];
-+};
-+
-+/*
-+ * On clean shutdown, store btree roots and current journal sequence number in
-+ * the superblock:
-+ */
-+struct jset_entry {
-+ __le16 u64s;
-+ __u8 btree_id;
-+ __u8 level;
-+ __u8 type; /* designates what this jset holds */
-+ __u8 pad[3];
-+
-+ struct bkey_i start[0];
-+ __u64 _data[];
-+};
-+
-+struct bch_sb_field_clean {
-+ struct bch_sb_field field;
-+
-+ __le32 flags;
-+ __le16 _read_clock; /* no longer used */
-+ __le16 _write_clock;
-+ __le64 journal_seq;
-+
-+ struct jset_entry start[0];
-+ __u64 _data[];
-+};
-+
-+struct journal_seq_blacklist_entry {
-+ __le64 start;
-+ __le64 end;
-+};
-+
-+struct bch_sb_field_journal_seq_blacklist {
-+ struct bch_sb_field field;
-+ struct journal_seq_blacklist_entry start[];
-+};
-+
-+struct bch_sb_field_errors {
-+ struct bch_sb_field field;
-+ struct bch_sb_field_error_entry {
-+ __le64 v;
-+ __le64 last_error_time;
-+ } entries[];
-+};
-+
-+LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
-+LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
-+
-+/* Superblock: */
-+
-+/*
-+ * New versioning scheme:
-+ * One common version number for all on disk data structures - superblock, btree
-+ * nodes, journal entries
-+ */
-+#define BCH_VERSION_MAJOR(_v) ((__u16) ((_v) >> 10))
-+#define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10)))
-+#define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0)
-+
-+#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
-+
-+#define BCH_METADATA_VERSIONS() \
-+ x(bkey_renumber, BCH_VERSION(0, 10), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(inode_btree_change, BCH_VERSION(0, 11), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(snapshot, BCH_VERSION(0, 12), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(inode_backpointers, BCH_VERSION(0, 13), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(snapshot_2, BCH_VERSION(0, 15), \
-+ BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \
-+ BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(reflink_p_fix, BCH_VERSION(0, 16), \
-+ BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \
-+ x(subvol_dirent, BCH_VERSION(0, 17), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(inode_v2, BCH_VERSION(0, 18), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(freespace, BCH_VERSION(0, 19), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(alloc_v4, BCH_VERSION(0, 20), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(new_data_types, BCH_VERSION(0, 21), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(backpointers, BCH_VERSION(0, 22), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(inode_v3, BCH_VERSION(0, 23), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(unwritten_extents, BCH_VERSION(0, 24), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(bucket_gens, BCH_VERSION(0, 25), \
-+ BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(lru_v2, BCH_VERSION(0, 26), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(fragmentation_lru, BCH_VERSION(0, 27), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(snapshot_trees, BCH_VERSION(0, 29), \
-+ RECOVERY_PASS_ALL_FSCK) \
-+ x(major_minor, BCH_VERSION(1, 0), \
-+ 0) \
-+ x(snapshot_skiplists, BCH_VERSION(1, 1), \
-+ BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \
-+ x(deleted_inodes, BCH_VERSION(1, 2), \
-+ BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \
-+ x(rebalance_work, BCH_VERSION(1, 3), \
-+ BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
-+
-+enum bcachefs_metadata_version {
-+ bcachefs_metadata_version_min = 9,
-+#define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n,
-+ BCH_METADATA_VERSIONS()
-+#undef x
-+ bcachefs_metadata_version_max
-+};
-+
-+static const __maybe_unused
-+unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work;
-+
-+#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
-+
-+#define BCH_SB_SECTOR 8
-+#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */
-+
-+struct bch_sb_layout {
-+ __uuid_t magic; /* bcachefs superblock UUID */
-+ __u8 layout_type;
-+ __u8 sb_max_size_bits; /* base 2 of 512 byte sectors */
-+ __u8 nr_superblocks;
-+ __u8 pad[5];
-+ __le64 sb_offset[61];
-+} __packed __aligned(8);
-+
-+#define BCH_SB_LAYOUT_SECTOR 7
-+
-+/*
-+ * @offset - sector where this sb was written
-+ * @version - on disk format version
-+ * @version_min - Oldest metadata version this filesystem contains; so we can
-+ * safely drop compatibility code and refuse to mount filesystems
-+ * we'd need it for
-+ * @magic - identifies as a bcachefs superblock (BCHFS_MAGIC)
-+ * @seq - incremented each time superblock is written
-+ * @uuid - used for generating various magic numbers and identifying
-+ * member devices, never changes
-+ * @user_uuid - user visible UUID, may be changed
-+ * @label - filesystem label
-+ * @seq - identifies most recent superblock, incremented each time
-+ * superblock is written
-+ * @features - enabled incompatible features
-+ */
-+struct bch_sb {
-+ struct bch_csum csum;
-+ __le16 version;
-+ __le16 version_min;
-+ __le16 pad[2];
-+ __uuid_t magic;
-+ __uuid_t uuid;
-+ __uuid_t user_uuid;
-+ __u8 label[BCH_SB_LABEL_SIZE];
-+ __le64 offset;
-+ __le64 seq;
-+
-+ __le16 block_size;
-+ __u8 dev_idx;
-+ __u8 nr_devices;
-+ __le32 u64s;
-+
-+ __le64 time_base_lo;
-+ __le32 time_base_hi;
-+ __le32 time_precision;
-+
-+ __le64 flags[8];
-+ __le64 features[2];
-+ __le64 compat[2];
-+
-+ struct bch_sb_layout layout;
-+
-+ struct bch_sb_field start[0];
-+ __le64 _data[];
-+} __packed __aligned(8);
-+
-+/*
-+ * Flags:
-+ * BCH_SB_INITALIZED - set on first mount
-+ * BCH_SB_CLEAN - did we shut down cleanly? Just a hint, doesn't affect
-+ * behaviour of mount/recovery path:
-+ * BCH_SB_INODE_32BIT - limit inode numbers to 32 bits
-+ * BCH_SB_128_BIT_MACS - 128 bit macs instead of 80
-+ * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides
-+ * DATA/META_CSUM_TYPE. Also indicates encryption
-+ * algorithm in use, if/when we get more than one
-+ */
-+
-+LE16_BITMASK(BCH_SB_BLOCK_SIZE, struct bch_sb, block_size, 0, 16);
-+
-+LE64_BITMASK(BCH_SB_INITIALIZED, struct bch_sb, flags[0], 0, 1);
-+LE64_BITMASK(BCH_SB_CLEAN, struct bch_sb, flags[0], 1, 2);
-+LE64_BITMASK(BCH_SB_CSUM_TYPE, struct bch_sb, flags[0], 2, 8);
-+LE64_BITMASK(BCH_SB_ERROR_ACTION, struct bch_sb, flags[0], 8, 12);
-+
-+LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE, struct bch_sb, flags[0], 12, 28);
-+
-+LE64_BITMASK(BCH_SB_GC_RESERVE, struct bch_sb, flags[0], 28, 33);
-+LE64_BITMASK(BCH_SB_ROOT_RESERVE, struct bch_sb, flags[0], 33, 40);
-+
-+LE64_BITMASK(BCH_SB_META_CSUM_TYPE, struct bch_sb, flags[0], 40, 44);
-+LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE, struct bch_sb, flags[0], 44, 48);
-+
-+LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52);
-+LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56);
-+
-+LE64_BITMASK(BCH_SB_POSIX_ACL, struct bch_sb, flags[0], 56, 57);
-+LE64_BITMASK(BCH_SB_USRQUOTA, struct bch_sb, flags[0], 57, 58);
-+LE64_BITMASK(BCH_SB_GRPQUOTA, struct bch_sb, flags[0], 58, 59);
-+LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60);
-+
-+LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61);
-+LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62);
-+
-+LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63);
-+
-+LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
-+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8);
-+LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9);
-+
-+LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
-+LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14);
-+
-+/*
-+ * Max size of an extent that may require bouncing to read or write
-+ * (checksummed, compressed): 64k
-+ */
-+LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS,
-+ struct bch_sb, flags[1], 14, 20);
-+
-+LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
-+LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
-+
-+LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40);
-+LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52);
-+LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
-+
-+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO,
-+ struct bch_sb, flags[2], 0, 4);
-+LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64);
-+
-+LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
-+LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28);
-+LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
-+LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
-+LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
-+LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
-+LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
-+LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
-+LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
-+LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54);
-+LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56);
-+
-+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60);
-+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
-+ struct bch_sb, flags[4], 60, 64);
-+
-+LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
-+ struct bch_sb, flags[5], 0, 16);
-+
-+static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
-+{
-+ return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4);
-+}
-+
-+static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
-+{
-+ SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v);
-+ SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4);
-+}
-+
-+static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb)
-+{
-+ return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) |
-+ (BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4);
-+}
-+
-+static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
-+{
-+ SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v);
-+ SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4);
-+}
-+
-+/*
-+ * Features:
-+ *
-+ * journal_seq_blacklist_v3: gates BCH_SB_FIELD_journal_seq_blacklist
-+ * reflink: gates KEY_TYPE_reflink
-+ * inline_data: gates KEY_TYPE_inline_data
-+ * new_siphash: gates BCH_STR_HASH_siphash
-+ * new_extent_overwrite: gates BTREE_NODE_NEW_EXTENT_OVERWRITE
-+ */
-+#define BCH_SB_FEATURES() \
-+ x(lz4, 0) \
-+ x(gzip, 1) \
-+ x(zstd, 2) \
-+ x(atomic_nlink, 3) \
-+ x(ec, 4) \
-+ x(journal_seq_blacklist_v3, 5) \
-+ x(reflink, 6) \
-+ x(new_siphash, 7) \
-+ x(inline_data, 8) \
-+ x(new_extent_overwrite, 9) \
-+ x(incompressible, 10) \
-+ x(btree_ptr_v2, 11) \
-+ x(extents_above_btree_updates, 12) \
-+ x(btree_updates_journalled, 13) \
-+ x(reflink_inline_data, 14) \
-+ x(new_varint, 15) \
-+ x(journal_no_flush, 16) \
-+ x(alloc_v2, 17) \
-+ x(extents_across_btree_nodes, 18)
-+
-+#define BCH_SB_FEATURES_ALWAYS \
-+ ((1ULL << BCH_FEATURE_new_extent_overwrite)| \
-+ (1ULL << BCH_FEATURE_extents_above_btree_updates)|\
-+ (1ULL << BCH_FEATURE_btree_updates_journalled)|\
-+ (1ULL << BCH_FEATURE_alloc_v2)|\
-+ (1ULL << BCH_FEATURE_extents_across_btree_nodes))
-+
-+#define BCH_SB_FEATURES_ALL \
-+ (BCH_SB_FEATURES_ALWAYS| \
-+ (1ULL << BCH_FEATURE_new_siphash)| \
-+ (1ULL << BCH_FEATURE_btree_ptr_v2)| \
-+ (1ULL << BCH_FEATURE_new_varint)| \
-+ (1ULL << BCH_FEATURE_journal_no_flush))
-+
-+enum bch_sb_feature {
-+#define x(f, n) BCH_FEATURE_##f,
-+ BCH_SB_FEATURES()
-+#undef x
-+ BCH_FEATURE_NR,
-+};
-+
-+#define BCH_SB_COMPAT() \
-+ x(alloc_info, 0) \
-+ x(alloc_metadata, 1) \
-+ x(extents_above_btree_updates_done, 2) \
-+ x(bformat_overflow_done, 3)
-+
-+enum bch_sb_compat {
-+#define x(f, n) BCH_COMPAT_##f,
-+ BCH_SB_COMPAT()
-+#undef x
-+ BCH_COMPAT_NR,
-+};
-+
-+/* options: */
-+
-+#define BCH_VERSION_UPGRADE_OPTS() \
-+ x(compatible, 0) \
-+ x(incompatible, 1) \
-+ x(none, 2)
-+
-+enum bch_version_upgrade_opts {
-+#define x(t, n) BCH_VERSION_UPGRADE_##t = n,
-+ BCH_VERSION_UPGRADE_OPTS()
-+#undef x
-+};
-+
-+#define BCH_REPLICAS_MAX 4U
-+
-+#define BCH_BKEY_PTRS_MAX 16U
-+
-+#define BCH_ERROR_ACTIONS() \
-+ x(continue, 0) \
-+ x(ro, 1) \
-+ x(panic, 2)
-+
-+enum bch_error_actions {
-+#define x(t, n) BCH_ON_ERROR_##t = n,
-+ BCH_ERROR_ACTIONS()
-+#undef x
-+ BCH_ON_ERROR_NR
-+};
-+
-+#define BCH_STR_HASH_TYPES() \
-+ x(crc32c, 0) \
-+ x(crc64, 1) \
-+ x(siphash_old, 2) \
-+ x(siphash, 3)
-+
-+enum bch_str_hash_type {
-+#define x(t, n) BCH_STR_HASH_##t = n,
-+ BCH_STR_HASH_TYPES()
-+#undef x
-+ BCH_STR_HASH_NR
-+};
-+
-+#define BCH_STR_HASH_OPTS() \
-+ x(crc32c, 0) \
-+ x(crc64, 1) \
-+ x(siphash, 2)
-+
-+enum bch_str_hash_opts {
-+#define x(t, n) BCH_STR_HASH_OPT_##t = n,
-+ BCH_STR_HASH_OPTS()
-+#undef x
-+ BCH_STR_HASH_OPT_NR
-+};
-+
-+#define BCH_CSUM_TYPES() \
-+ x(none, 0) \
-+ x(crc32c_nonzero, 1) \
-+ x(crc64_nonzero, 2) \
-+ x(chacha20_poly1305_80, 3) \
-+ x(chacha20_poly1305_128, 4) \
-+ x(crc32c, 5) \
-+ x(crc64, 6) \
-+ x(xxhash, 7)
-+
-+enum bch_csum_type {
-+#define x(t, n) BCH_CSUM_##t = n,
-+ BCH_CSUM_TYPES()
-+#undef x
-+ BCH_CSUM_NR
-+};
-+
-+static const __maybe_unused unsigned bch_crc_bytes[] = {
-+ [BCH_CSUM_none] = 0,
-+ [BCH_CSUM_crc32c_nonzero] = 4,
-+ [BCH_CSUM_crc32c] = 4,
-+ [BCH_CSUM_crc64_nonzero] = 8,
-+ [BCH_CSUM_crc64] = 8,
-+ [BCH_CSUM_xxhash] = 8,
-+ [BCH_CSUM_chacha20_poly1305_80] = 10,
-+ [BCH_CSUM_chacha20_poly1305_128] = 16,
-+};
-+
-+static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
-+{
-+ switch (type) {
-+ case BCH_CSUM_chacha20_poly1305_80:
-+ case BCH_CSUM_chacha20_poly1305_128:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+#define BCH_CSUM_OPTS() \
-+ x(none, 0) \
-+ x(crc32c, 1) \
-+ x(crc64, 2) \
-+ x(xxhash, 3)
-+
-+enum bch_csum_opts {
-+#define x(t, n) BCH_CSUM_OPT_##t = n,
-+ BCH_CSUM_OPTS()
-+#undef x
-+ BCH_CSUM_OPT_NR
-+};
-+
-+#define BCH_COMPRESSION_TYPES() \
-+ x(none, 0) \
-+ x(lz4_old, 1) \
-+ x(gzip, 2) \
-+ x(lz4, 3) \
-+ x(zstd, 4) \
-+ x(incompressible, 5)
-+
-+enum bch_compression_type {
-+#define x(t, n) BCH_COMPRESSION_TYPE_##t = n,
-+ BCH_COMPRESSION_TYPES()
-+#undef x
-+ BCH_COMPRESSION_TYPE_NR
-+};
-+
-+#define BCH_COMPRESSION_OPTS() \
-+ x(none, 0) \
-+ x(lz4, 1) \
-+ x(gzip, 2) \
-+ x(zstd, 3)
-+
-+enum bch_compression_opts {
-+#define x(t, n) BCH_COMPRESSION_OPT_##t = n,
-+ BCH_COMPRESSION_OPTS()
-+#undef x
-+ BCH_COMPRESSION_OPT_NR
-+};
-+
-+/*
-+ * Magic numbers
-+ *
-+ * The various other data structures have their own magic numbers, which are
-+ * xored with the first part of the cache set's UUID
-+ */
-+
-+#define BCACHE_MAGIC \
-+ UUID_INIT(0xc68573f6, 0x4e1a, 0x45ca, \
-+ 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
-+#define BCHFS_MAGIC \
-+ UUID_INIT(0xc68573f6, 0x66ce, 0x90a9, \
-+ 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
-+
-+#define BCACHEFS_STATFS_MAGIC 0xca451a4e
-+
-+#define JSET_MAGIC __cpu_to_le64(0x245235c1a3625032ULL)
-+#define BSET_MAGIC __cpu_to_le64(0x90135c78b99e07f5ULL)
-+
-+static inline __le64 __bch2_sb_magic(struct bch_sb *sb)
-+{
-+ __le64 ret;
-+
-+ memcpy(&ret, &sb->uuid, sizeof(ret));
-+ return ret;
-+}
-+
-+static inline __u64 __jset_magic(struct bch_sb *sb)
-+{
-+ return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC);
-+}
-+
-+static inline __u64 __bset_magic(struct bch_sb *sb)
-+{
-+ return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC);
-+}
-+
-+/* Journal */
-+
-+#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
-+
-+#define BCH_JSET_ENTRY_TYPES() \
-+ x(btree_keys, 0) \
-+ x(btree_root, 1) \
-+ x(prio_ptrs, 2) \
-+ x(blacklist, 3) \
-+ x(blacklist_v2, 4) \
-+ x(usage, 5) \
-+ x(data_usage, 6) \
-+ x(clock, 7) \
-+ x(dev_usage, 8) \
-+ x(log, 9) \
-+ x(overwrite, 10)
-+
-+enum {
-+#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
-+ BCH_JSET_ENTRY_TYPES()
-+#undef x
-+ BCH_JSET_ENTRY_NR
-+};
-+
-+/*
-+ * Journal sequence numbers can be blacklisted: bsets record the max sequence
-+ * number of all the journal entries they contain updates for, so that on
-+ * recovery we can ignore those bsets that contain index updates newer that what
-+ * made it into the journal.
-+ *
-+ * This means that we can't reuse that journal_seq - we have to skip it, and
-+ * then record that we skipped it so that the next time we crash and recover we
-+ * don't think there was a missing journal entry.
-+ */
-+struct jset_entry_blacklist {
-+ struct jset_entry entry;
-+ __le64 seq;
-+};
-+
-+struct jset_entry_blacklist_v2 {
-+ struct jset_entry entry;
-+ __le64 start;
-+ __le64 end;
-+};
-+
-+#define BCH_FS_USAGE_TYPES() \
-+ x(reserved, 0) \
-+ x(inodes, 1) \
-+ x(key_version, 2)
-+
-+enum {
-+#define x(f, nr) BCH_FS_USAGE_##f = nr,
-+ BCH_FS_USAGE_TYPES()
-+#undef x
-+ BCH_FS_USAGE_NR
-+};
-+
-+struct jset_entry_usage {
-+ struct jset_entry entry;
-+ __le64 v;
-+} __packed;
-+
-+struct jset_entry_data_usage {
-+ struct jset_entry entry;
-+ __le64 v;
-+ struct bch_replicas_entry r;
-+} __packed;
-+
-+struct jset_entry_clock {
-+ struct jset_entry entry;
-+ __u8 rw;
-+ __u8 pad[7];
-+ __le64 time;
-+} __packed;
-+
-+struct jset_entry_dev_usage_type {
-+ __le64 buckets;
-+ __le64 sectors;
-+ __le64 fragmented;
-+} __packed;
-+
-+struct jset_entry_dev_usage {
-+ struct jset_entry entry;
-+ __le32 dev;
-+ __u32 pad;
-+
-+ __le64 buckets_ec;
-+ __le64 _buckets_unavailable; /* No longer used */
-+
-+ struct jset_entry_dev_usage_type d[];
-+};
-+
-+static inline unsigned jset_entry_dev_usage_nr_types(struct jset_entry_dev_usage *u)
-+{
-+ return (vstruct_bytes(&u->entry) - sizeof(struct jset_entry_dev_usage)) /
-+ sizeof(struct jset_entry_dev_usage_type);
-+}
-+
-+struct jset_entry_log {
-+ struct jset_entry entry;
-+ u8 d[];
-+} __packed;
-+
-+/*
-+ * On disk format for a journal entry:
-+ * seq is monotonically increasing; every journal entry has its own unique
-+ * sequence number.
-+ *
-+ * last_seq is the oldest journal entry that still has keys the btree hasn't
-+ * flushed to disk yet.
-+ *
-+ * version is for on disk format changes.
-+ */
-+struct jset {
-+ struct bch_csum csum;
-+
-+ __le64 magic;
-+ __le64 seq;
-+ __le32 version;
-+ __le32 flags;
-+
-+ __le32 u64s; /* size of d[] in u64s */
-+
-+ __u8 encrypted_start[0];
-+
-+ __le16 _read_clock; /* no longer used */
-+ __le16 _write_clock;
-+
-+ /* Sequence number of oldest dirty journal entry */
-+ __le64 last_seq;
-+
-+
-+ struct jset_entry start[0];
-+ __u64 _data[];
-+} __packed __aligned(8);
-+
-+LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
-+LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
-+LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
-+
-+#define BCH_JOURNAL_BUCKETS_MIN 8
-+
-+/* Btree: */
-+
-+enum btree_id_flags {
-+ BTREE_ID_EXTENTS = BIT(0),
-+ BTREE_ID_SNAPSHOTS = BIT(1),
-+ BTREE_ID_SNAPSHOT_FIELD = BIT(2),
-+ BTREE_ID_DATA = BIT(3),
-+};
-+
-+#define BCH_BTREE_IDS() \
-+ x(extents, 0, BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\
-+ BIT_ULL(KEY_TYPE_whiteout)| \
-+ BIT_ULL(KEY_TYPE_error)| \
-+ BIT_ULL(KEY_TYPE_cookie)| \
-+ BIT_ULL(KEY_TYPE_extent)| \
-+ BIT_ULL(KEY_TYPE_reservation)| \
-+ BIT_ULL(KEY_TYPE_reflink_p)| \
-+ BIT_ULL(KEY_TYPE_inline_data)) \
-+ x(inodes, 1, BTREE_ID_SNAPSHOTS, \
-+ BIT_ULL(KEY_TYPE_whiteout)| \
-+ BIT_ULL(KEY_TYPE_inode)| \
-+ BIT_ULL(KEY_TYPE_inode_v2)| \
-+ BIT_ULL(KEY_TYPE_inode_v3)| \
-+ BIT_ULL(KEY_TYPE_inode_generation)) \
-+ x(dirents, 2, BTREE_ID_SNAPSHOTS, \
-+ BIT_ULL(KEY_TYPE_whiteout)| \
-+ BIT_ULL(KEY_TYPE_hash_whiteout)| \
-+ BIT_ULL(KEY_TYPE_dirent)) \
-+ x(xattrs, 3, BTREE_ID_SNAPSHOTS, \
-+ BIT_ULL(KEY_TYPE_whiteout)| \
-+ BIT_ULL(KEY_TYPE_cookie)| \
-+ BIT_ULL(KEY_TYPE_hash_whiteout)| \
-+ BIT_ULL(KEY_TYPE_xattr)) \
-+ x(alloc, 4, 0, \
-+ BIT_ULL(KEY_TYPE_alloc)| \
-+ BIT_ULL(KEY_TYPE_alloc_v2)| \
-+ BIT_ULL(KEY_TYPE_alloc_v3)| \
-+ BIT_ULL(KEY_TYPE_alloc_v4)) \
-+ x(quotas, 5, 0, \
-+ BIT_ULL(KEY_TYPE_quota)) \
-+ x(stripes, 6, 0, \
-+ BIT_ULL(KEY_TYPE_stripe)) \
-+ x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
-+ BIT_ULL(KEY_TYPE_reflink_v)| \
-+ BIT_ULL(KEY_TYPE_indirect_inline_data)) \
-+ x(subvolumes, 8, 0, \
-+ BIT_ULL(KEY_TYPE_subvolume)) \
-+ x(snapshots, 9, 0, \
-+ BIT_ULL(KEY_TYPE_snapshot)) \
-+ x(lru, 10, 0, \
-+ BIT_ULL(KEY_TYPE_set)) \
-+ x(freespace, 11, BTREE_ID_EXTENTS, \
-+ BIT_ULL(KEY_TYPE_set)) \
-+ x(need_discard, 12, 0, \
-+ BIT_ULL(KEY_TYPE_set)) \
-+ x(backpointers, 13, 0, \
-+ BIT_ULL(KEY_TYPE_backpointer)) \
-+ x(bucket_gens, 14, 0, \
-+ BIT_ULL(KEY_TYPE_bucket_gens)) \
-+ x(snapshot_trees, 15, 0, \
-+ BIT_ULL(KEY_TYPE_snapshot_tree)) \
-+ x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \
-+ BIT_ULL(KEY_TYPE_set)) \
-+ x(logged_ops, 17, 0, \
-+ BIT_ULL(KEY_TYPE_logged_op_truncate)| \
-+ BIT_ULL(KEY_TYPE_logged_op_finsert)) \
-+ x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
-+ BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
-+
-+enum btree_id {
-+#define x(name, nr, ...) BTREE_ID_##name = nr,
-+ BCH_BTREE_IDS()
-+#undef x
-+ BTREE_ID_NR
-+};
-+
-+#define BTREE_MAX_DEPTH 4U
-+
-+/* Btree nodes */
-+
-+/*
-+ * Btree nodes
-+ *
-+ * On disk a btree node is a list/log of these; within each set the keys are
-+ * sorted
-+ */
-+struct bset {
-+ __le64 seq;
-+
-+ /*
-+ * Highest journal entry this bset contains keys for.
-+ * If on recovery we don't see that journal entry, this bset is ignored:
-+ * this allows us to preserve the order of all index updates after a
-+ * crash, since the journal records a total order of all index updates
-+ * and anything that didn't make it to the journal doesn't get used.
-+ */
-+ __le64 journal_seq;
-+
-+ __le32 flags;
-+ __le16 version;
-+ __le16 u64s; /* count of d[] in u64s */
-+
-+ struct bkey_packed start[0];
-+ __u64 _data[];
-+} __packed __aligned(8);
-+
-+LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4);
-+
-+LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 4, 5);
-+LE32_BITMASK(BSET_SEPARATE_WHITEOUTS,
-+ struct bset, flags, 5, 6);
-+
-+/* Sector offset within the btree node: */
-+LE32_BITMASK(BSET_OFFSET, struct bset, flags, 16, 32);
-+
-+struct btree_node {
-+ struct bch_csum csum;
-+ __le64 magic;
-+
-+ /* this flags field is encrypted, unlike bset->flags: */
-+ __le64 flags;
-+
-+ /* Closed interval: */
-+ struct bpos min_key;
-+ struct bpos max_key;
-+ struct bch_extent_ptr _ptr; /* not used anymore */
-+ struct bkey_format format;
-+
-+ union {
-+ struct bset keys;
-+ struct {
-+ __u8 pad[22];
-+ __le16 u64s;
-+ __u64 _data[0];
-+
-+ };
-+ };
-+} __packed __aligned(8);
-+
-+LE64_BITMASK(BTREE_NODE_ID_LO, struct btree_node, flags, 0, 4);
-+LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
-+LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE,
-+ struct btree_node, flags, 8, 9);
-+LE64_BITMASK(BTREE_NODE_ID_HI, struct btree_node, flags, 9, 25);
-+/* 25-32 unused */
-+LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64);
-+
-+static inline __u64 BTREE_NODE_ID(struct btree_node *n)
-+{
-+ return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4);
-+}
-+
-+static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v)
-+{
-+ SET_BTREE_NODE_ID_LO(n, v);
-+ SET_BTREE_NODE_ID_HI(n, v >> 4);
-+}
-+
-+struct btree_node_entry {
-+ struct bch_csum csum;
-+
-+ union {
-+ struct bset keys;
-+ struct {
-+ __u8 pad[22];
-+ __le16 u64s;
-+ __u64 _data[0];
-+ };
-+ };
-+} __packed __aligned(8);
-+
-+#endif /* _BCACHEFS_FORMAT_H */
-diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h
-new file mode 100644
-index 000000000000..f05881f7e113
---- /dev/null
-+++ b/fs/bcachefs/bcachefs_ioctl.h
-@@ -0,0 +1,368 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IOCTL_H
-+#define _BCACHEFS_IOCTL_H
-+
-+#include <linux/uuid.h>
-+#include <asm/ioctl.h>
-+#include "bcachefs_format.h"
-+
-+/*
-+ * Flags common to multiple ioctls:
-+ */
-+#define BCH_FORCE_IF_DATA_LOST (1 << 0)
-+#define BCH_FORCE_IF_METADATA_LOST (1 << 1)
-+#define BCH_FORCE_IF_DATA_DEGRADED (1 << 2)
-+#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
-+
-+#define BCH_FORCE_IF_LOST \
-+ (BCH_FORCE_IF_DATA_LOST| \
-+ BCH_FORCE_IF_METADATA_LOST)
-+#define BCH_FORCE_IF_DEGRADED \
-+ (BCH_FORCE_IF_DATA_DEGRADED| \
-+ BCH_FORCE_IF_METADATA_DEGRADED)
-+
-+/*
-+ * If cleared, ioctl that refer to a device pass it as a pointer to a pathname
-+ * (e.g. /dev/sda1); if set, the dev field is the device's index within the
-+ * filesystem:
-+ */
-+#define BCH_BY_INDEX (1 << 4)
-+
-+/*
-+ * For BCH_IOCTL_READ_SUPER: get superblock of a specific device, not filesystem
-+ * wide superblock:
-+ */
-+#define BCH_READ_DEV (1 << 5)
-+
-+/* global control dev: */
-+
-+/* These are currently broken, and probably unnecessary: */
-+#if 0
-+#define BCH_IOCTL_ASSEMBLE _IOW(0xbc, 1, struct bch_ioctl_assemble)
-+#define BCH_IOCTL_INCREMENTAL _IOW(0xbc, 2, struct bch_ioctl_incremental)
-+
-+struct bch_ioctl_assemble {
-+ __u32 flags;
-+ __u32 nr_devs;
-+ __u64 pad;
-+ __u64 devs[];
-+};
-+
-+struct bch_ioctl_incremental {
-+ __u32 flags;
-+ __u64 pad;
-+ __u64 dev;
-+};
-+#endif
-+
-+/* filesystem ioctls: */
-+
-+#define BCH_IOCTL_QUERY_UUID _IOR(0xbc, 1, struct bch_ioctl_query_uuid)
-+
-+/* These only make sense when we also have incremental assembly */
-+#if 0
-+#define BCH_IOCTL_START _IOW(0xbc, 2, struct bch_ioctl_start)
-+#define BCH_IOCTL_STOP _IO(0xbc, 3)
-+#endif
-+
-+#define BCH_IOCTL_DISK_ADD _IOW(0xbc, 4, struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_REMOVE _IOW(0xbc, 5, struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_ONLINE _IOW(0xbc, 6, struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_OFFLINE _IOW(0xbc, 7, struct bch_ioctl_disk)
-+#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc, 8, struct bch_ioctl_disk_set_state)
-+#define BCH_IOCTL_DATA _IOW(0xbc, 10, struct bch_ioctl_data)
-+#define BCH_IOCTL_FS_USAGE _IOWR(0xbc, 11, struct bch_ioctl_fs_usage)
-+#define BCH_IOCTL_DEV_USAGE _IOWR(0xbc, 11, struct bch_ioctl_dev_usage)
-+#define BCH_IOCTL_READ_SUPER _IOW(0xbc, 12, struct bch_ioctl_read_super)
-+#define BCH_IOCTL_DISK_GET_IDX _IOW(0xbc, 13, struct bch_ioctl_disk_get_idx)
-+#define BCH_IOCTL_DISK_RESIZE _IOW(0xbc, 14, struct bch_ioctl_disk_resize)
-+#define BCH_IOCTL_DISK_RESIZE_JOURNAL _IOW(0xbc,15, struct bch_ioctl_disk_resize_journal)
-+
-+#define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume)
-+#define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume)
-+
-+/* ioctl below act on a particular file, not the filesystem as a whole: */
-+
-+#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *)
-+
-+/*
-+ * BCH_IOCTL_QUERY_UUID: get filesystem UUID
-+ *
-+ * Returns user visible UUID, not internal UUID (which may not ever be changed);
-+ * the filesystem's sysfs directory may be found under /sys/fs/bcachefs with
-+ * this UUID.
-+ */
-+struct bch_ioctl_query_uuid {
-+ __uuid_t uuid;
-+};
-+
-+#if 0
-+struct bch_ioctl_start {
-+ __u32 flags;
-+ __u32 pad;
-+};
-+#endif
-+
-+/*
-+ * BCH_IOCTL_DISK_ADD: add a new device to an existing filesystem
-+ *
-+ * The specified device must not be open or in use. On success, the new device
-+ * will be an online member of the filesystem just like any other member.
-+ *
-+ * The device must first be prepared by userspace by formatting with a bcachefs
-+ * superblock, which is only used for passing in superblock options/parameters
-+ * for that device (in struct bch_member). The new device's superblock should
-+ * not claim to be a member of any existing filesystem - UUIDs on it will be
-+ * ignored.
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_REMOVE: permanently remove a member device from a filesystem
-+ *
-+ * Any data present on @dev will be permanently deleted, and @dev will be
-+ * removed from its slot in the filesystem's list of member devices. The device
-+ * may be either offline or offline.
-+ *
-+ * Will fail removing @dev would leave us with insufficient read write devices
-+ * or degraded/unavailable data, unless the approprate BCH_FORCE_IF_* flags are
-+ * set.
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_ONLINE: given a disk that is already a member of a filesystem
-+ * but is not open (e.g. because we started in degraded mode), bring it online
-+ *
-+ * all existing data on @dev will be available once the device is online,
-+ * exactly as if @dev was present when the filesystem was first mounted
-+ */
-+
-+/*
-+ * BCH_IOCTL_DISK_OFFLINE: offline a disk, causing the kernel to close that
-+ * block device, without removing it from the filesystem (so it can be brought
-+ * back online later)
-+ *
-+ * Data present on @dev will be unavailable while @dev is offline (unless
-+ * replicated), but will still be intact and untouched if @dev is brought back
-+ * online
-+ *
-+ * Will fail (similarly to BCH_IOCTL_DISK_SET_STATE) if offlining @dev would
-+ * leave us with insufficient read write devices or degraded/unavailable data,
-+ * unless the approprate BCH_FORCE_IF_* flags are set.
-+ */
-+
-+struct bch_ioctl_disk {
-+ __u32 flags;
-+ __u32 pad;
-+ __u64 dev;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_SET_STATE: modify state of a member device of a filesystem
-+ *
-+ * @new_state - one of the bch_member_state states (rw, ro, failed,
-+ * spare)
-+ *
-+ * Will refuse to change member state if we would then have insufficient devices
-+ * to write to, or if it would result in degraded data (when @new_state is
-+ * failed or spare) unless the appropriate BCH_FORCE_IF_* flags are set.
-+ */
-+struct bch_ioctl_disk_set_state {
-+ __u32 flags;
-+ __u8 new_state;
-+ __u8 pad[3];
-+ __u64 dev;
-+};
-+
-+enum bch_data_ops {
-+ BCH_DATA_OP_SCRUB = 0,
-+ BCH_DATA_OP_REREPLICATE = 1,
-+ BCH_DATA_OP_MIGRATE = 2,
-+ BCH_DATA_OP_REWRITE_OLD_NODES = 3,
-+ BCH_DATA_OP_NR = 4,
-+};
-+
-+/*
-+ * BCH_IOCTL_DATA: operations that walk and manipulate filesystem data (e.g.
-+ * scrub, rereplicate, migrate).
-+ *
-+ * This ioctl kicks off a job in the background, and returns a file descriptor.
-+ * Reading from the file descriptor returns a struct bch_ioctl_data_event,
-+ * indicating current progress, and closing the file descriptor will stop the
-+ * job. The file descriptor is O_CLOEXEC.
-+ */
-+struct bch_ioctl_data {
-+ __u16 op;
-+ __u8 start_btree;
-+ __u8 end_btree;
-+ __u32 flags;
-+
-+ struct bpos start_pos;
-+ struct bpos end_pos;
-+
-+ union {
-+ struct {
-+ __u32 dev;
-+ __u32 pad;
-+ } migrate;
-+ struct {
-+ __u64 pad[8];
-+ };
-+ };
-+} __packed __aligned(8);
-+
-+enum bch_data_event {
-+ BCH_DATA_EVENT_PROGRESS = 0,
-+ /* XXX: add an event for reporting errors */
-+ BCH_DATA_EVENT_NR = 1,
-+};
-+
-+struct bch_ioctl_data_progress {
-+ __u8 data_type;
-+ __u8 btree_id;
-+ __u8 pad[2];
-+ struct bpos pos;
-+
-+ __u64 sectors_done;
-+ __u64 sectors_total;
-+} __packed __aligned(8);
-+
-+struct bch_ioctl_data_event {
-+ __u8 type;
-+ __u8 pad[7];
-+ union {
-+ struct bch_ioctl_data_progress p;
-+ __u64 pad2[15];
-+ };
-+} __packed __aligned(8);
-+
-+struct bch_replicas_usage {
-+ __u64 sectors;
-+ struct bch_replicas_entry r;
-+} __packed;
-+
-+static inline struct bch_replicas_usage *
-+replicas_usage_next(struct bch_replicas_usage *u)
-+{
-+ return (void *) u + replicas_entry_bytes(&u->r) + 8;
-+}
-+
-+/*
-+ * BCH_IOCTL_FS_USAGE: query filesystem disk space usage
-+ *
-+ * Returns disk space usage broken out by data type, number of replicas, and
-+ * by component device
-+ *
-+ * @replica_entries_bytes - size, in bytes, allocated for replica usage entries
-+ *
-+ * On success, @replica_entries_bytes will be changed to indicate the number of
-+ * bytes actually used.
-+ *
-+ * Returns -ERANGE if @replica_entries_bytes was too small
-+ */
-+struct bch_ioctl_fs_usage {
-+ __u64 capacity;
-+ __u64 used;
-+ __u64 online_reserved;
-+ __u64 persistent_reserved[BCH_REPLICAS_MAX];
-+
-+ __u32 replica_entries_bytes;
-+ __u32 pad;
-+
-+ struct bch_replicas_usage replicas[0];
-+};
-+
-+/*
-+ * BCH_IOCTL_DEV_USAGE: query device disk space usage
-+ *
-+ * Returns disk space usage broken out by data type - both by buckets and
-+ * sectors.
-+ */
-+struct bch_ioctl_dev_usage {
-+ __u64 dev;
-+ __u32 flags;
-+ __u8 state;
-+ __u8 pad[7];
-+
-+ __u32 bucket_size;
-+ __u64 nr_buckets;
-+
-+ __u64 buckets_ec;
-+
-+ struct bch_ioctl_dev_usage_type {
-+ __u64 buckets;
-+ __u64 sectors;
-+ __u64 fragmented;
-+ } d[BCH_DATA_NR];
-+};
-+
-+/*
-+ * BCH_IOCTL_READ_SUPER: read filesystem superblock
-+ *
-+ * Equivalent to reading the superblock directly from the block device, except
-+ * avoids racing with the kernel writing the superblock or having to figure out
-+ * which block device to read
-+ *
-+ * @sb - buffer to read into
-+ * @size - size of userspace allocated buffer
-+ * @dev - device to read superblock for, if BCH_READ_DEV flag is
-+ * specified
-+ *
-+ * Returns -ERANGE if buffer provided is too small
-+ */
-+struct bch_ioctl_read_super {
-+ __u32 flags;
-+ __u32 pad;
-+ __u64 dev;
-+ __u64 size;
-+ __u64 sb;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_GET_IDX: give a path to a block device, query filesystem to
-+ * determine if disk is a (online) member - if so, returns device's index
-+ *
-+ * Returns -ENOENT if not found
-+ */
-+struct bch_ioctl_disk_get_idx {
-+ __u64 dev;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_RESIZE: resize filesystem on a device
-+ *
-+ * @dev - member to resize
-+ * @nbuckets - new number of buckets
-+ */
-+struct bch_ioctl_disk_resize {
-+ __u32 flags;
-+ __u32 pad;
-+ __u64 dev;
-+ __u64 nbuckets;
-+};
-+
-+/*
-+ * BCH_IOCTL_DISK_RESIZE_JOURNAL: resize journal on a device
-+ *
-+ * @dev - member to resize
-+ * @nbuckets - new number of buckets
-+ */
-+struct bch_ioctl_disk_resize_journal {
-+ __u32 flags;
-+ __u32 pad;
-+ __u64 dev;
-+ __u64 nbuckets;
-+};
-+
-+struct bch_ioctl_subvolume {
-+ __u32 flags;
-+ __u32 dirfd;
-+ __u16 mode;
-+ __u16 pad[3];
-+ __u64 dst_ptr;
-+ __u64 src_ptr;
-+};
-+
-+#define BCH_SUBVOL_SNAPSHOT_CREATE (1U << 0)
-+#define BCH_SUBVOL_SNAPSHOT_RO (1U << 1)
-+
-+#endif /* _BCACHEFS_IOCTL_H */
-diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
-new file mode 100644
-index 000000000000..abdb05507d16
---- /dev/null
-+++ b/fs/bcachefs/bkey.c
-@@ -0,0 +1,1120 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "bkey_cmp.h"
-+#include "bkey_methods.h"
-+#include "bset.h"
-+#include "util.h"
-+
-+const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
-+
-+void bch2_bkey_packed_to_binary_text(struct printbuf *out,
-+ const struct bkey_format *f,
-+ const struct bkey_packed *k)
-+{
-+ const u64 *p = high_word(f, k);
-+ unsigned word_bits = 64 - high_bit_offset;
-+ unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset;
-+ u64 v = *p & (~0ULL >> high_bit_offset);
-+
-+ if (!nr_key_bits) {
-+ prt_str(out, "(empty)");
-+ return;
-+ }
-+
-+ while (1) {
-+ unsigned next_key_bits = nr_key_bits;
-+
-+ if (nr_key_bits < 64) {
-+ v >>= 64 - nr_key_bits;
-+ next_key_bits = 0;
-+ } else {
-+ next_key_bits -= 64;
-+ }
-+
-+ bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits));
-+
-+ if (!next_key_bits)
-+ break;
-+
-+ prt_char(out, ' ');
-+
-+ p = next_word(p);
-+ v = *p;
-+ word_bits = 64;
-+ nr_key_bits = next_key_bits;
-+ }
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
-+ const struct bkey *unpacked,
-+ const struct bkey_format *format)
-+{
-+ struct bkey tmp;
-+
-+ BUG_ON(bkeyp_val_u64s(format, packed) !=
-+ bkey_val_u64s(unpacked));
-+
-+ BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed));
-+
-+ tmp = __bch2_bkey_unpack_key(format, packed);
-+
-+ if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n",
-+ format->key_u64s,
-+ format->bits_per_field[0],
-+ format->bits_per_field[1],
-+ format->bits_per_field[2],
-+ format->bits_per_field[3],
-+ format->bits_per_field[4]);
-+
-+ prt_printf(&buf, "compiled unpack: ");
-+ bch2_bkey_to_text(&buf, unpacked);
-+ prt_newline(&buf);
-+
-+ prt_printf(&buf, "c unpack: ");
-+ bch2_bkey_to_text(&buf, &tmp);
-+ prt_newline(&buf);
-+
-+ prt_printf(&buf, "compiled unpack: ");
-+ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
-+ (struct bkey_packed *) unpacked);
-+ prt_newline(&buf);
-+
-+ prt_printf(&buf, "c unpack: ");
-+ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
-+ (struct bkey_packed *) &tmp);
-+ prt_newline(&buf);
-+
-+ panic("%s", buf.buf);
-+ }
-+}
-+
-+#else
-+static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed,
-+ const struct bkey *unpacked,
-+ const struct bkey_format *format) {}
-+#endif
-+
-+struct pack_state {
-+ const struct bkey_format *format;
-+ unsigned bits; /* bits remaining in current word */
-+ u64 w; /* current word */
-+ u64 *p; /* pointer to next word */
-+};
-+
-+__always_inline
-+static struct pack_state pack_state_init(const struct bkey_format *format,
-+ struct bkey_packed *k)
-+{
-+ u64 *p = high_word(format, k);
-+
-+ return (struct pack_state) {
-+ .format = format,
-+ .bits = 64 - high_bit_offset,
-+ .w = 0,
-+ .p = p,
-+ };
-+}
-+
-+__always_inline
-+static void pack_state_finish(struct pack_state *state,
-+ struct bkey_packed *k)
-+{
-+ EBUG_ON(state->p < k->_data);
-+ EBUG_ON(state->p >= (u64 *) k->_data + state->format->key_u64s);
-+
-+ *state->p = state->w;
-+}
-+
-+struct unpack_state {
-+ const struct bkey_format *format;
-+ unsigned bits; /* bits remaining in current word */
-+ u64 w; /* current word */
-+ const u64 *p; /* pointer to next word */
-+};
-+
-+__always_inline
-+static struct unpack_state unpack_state_init(const struct bkey_format *format,
-+ const struct bkey_packed *k)
-+{
-+ const u64 *p = high_word(format, k);
-+
-+ return (struct unpack_state) {
-+ .format = format,
-+ .bits = 64 - high_bit_offset,
-+ .w = *p << high_bit_offset,
-+ .p = p,
-+ };
-+}
-+
-+__always_inline
-+static u64 get_inc_field(struct unpack_state *state, unsigned field)
-+{
-+ unsigned bits = state->format->bits_per_field[field];
-+ u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]);
-+
-+ if (bits >= state->bits) {
-+ v = state->w >> (64 - bits);
-+ bits -= state->bits;
-+
-+ state->p = next_word(state->p);
-+ state->w = *state->p;
-+ state->bits = 64;
-+ }
-+
-+ /* avoid shift by 64 if bits is 0 - bits is never 64 here: */
-+ v |= (state->w >> 1) >> (63 - bits);
-+ state->w <<= bits;
-+ state->bits -= bits;
-+
-+ return v + offset;
-+}
-+
-+__always_inline
-+static void __set_inc_field(struct pack_state *state, unsigned field, u64 v)
-+{
-+ unsigned bits = state->format->bits_per_field[field];
-+
-+ if (bits) {
-+ if (bits > state->bits) {
-+ bits -= state->bits;
-+ /* avoid shift by 64 if bits is 64 - bits is never 0 here: */
-+ state->w |= (v >> 1) >> (bits - 1);
-+
-+ *state->p = state->w;
-+ state->p = next_word(state->p);
-+ state->w = 0;
-+ state->bits = 64;
-+ }
-+
-+ state->bits -= bits;
-+ state->w |= v << state->bits;
-+ }
-+}
-+
-+__always_inline
-+static bool set_inc_field(struct pack_state *state, unsigned field, u64 v)
-+{
-+ unsigned bits = state->format->bits_per_field[field];
-+ u64 offset = le64_to_cpu(state->format->field_offset[field]);
-+
-+ if (v < offset)
-+ return false;
-+
-+ v -= offset;
-+
-+ if (fls64(v) > bits)
-+ return false;
-+
-+ __set_inc_field(state, field, v);
-+ return true;
-+}
-+
-+/*
-+ * Note: does NOT set out->format (we don't know what it should be here!)
-+ *
-+ * Also: doesn't work on extents - it doesn't preserve the invariant that
-+ * if k is packed bkey_start_pos(k) will successfully pack
-+ */
-+static bool bch2_bkey_transform_key(const struct bkey_format *out_f,
-+ struct bkey_packed *out,
-+ const struct bkey_format *in_f,
-+ const struct bkey_packed *in)
-+{
-+ struct pack_state out_s = pack_state_init(out_f, out);
-+ struct unpack_state in_s = unpack_state_init(in_f, in);
-+ u64 *w = out->_data;
-+ unsigned i;
-+
-+ *w = 0;
-+
-+ for (i = 0; i < BKEY_NR_FIELDS; i++)
-+ if (!set_inc_field(&out_s, i, get_inc_field(&in_s, i)))
-+ return false;
-+
-+ /* Can't happen because the val would be too big to unpack: */
-+ EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX);
-+
-+ pack_state_finish(&out_s, out);
-+ out->u64s = out_f->key_u64s + in->u64s - in_f->key_u64s;
-+ out->needs_whiteout = in->needs_whiteout;
-+ out->type = in->type;
-+
-+ return true;
-+}
-+
-+bool bch2_bkey_transform(const struct bkey_format *out_f,
-+ struct bkey_packed *out,
-+ const struct bkey_format *in_f,
-+ const struct bkey_packed *in)
-+{
-+ if (!bch2_bkey_transform_key(out_f, out, in_f, in))
-+ return false;
-+
-+ memcpy_u64s((u64 *) out + out_f->key_u64s,
-+ (u64 *) in + in_f->key_u64s,
-+ (in->u64s - in_f->key_u64s));
-+ return true;
-+}
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format,
-+ const struct bkey_packed *in)
-+{
-+ struct unpack_state state = unpack_state_init(format, in);
-+ struct bkey out;
-+
-+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+ EBUG_ON(in->u64s < format->key_u64s);
-+ EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
-+ EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX);
-+
-+ out.u64s = BKEY_U64s + in->u64s - format->key_u64s;
-+ out.format = KEY_FORMAT_CURRENT;
-+ out.needs_whiteout = in->needs_whiteout;
-+ out.type = in->type;
-+ out.pad[0] = 0;
-+
-+#define x(id, field) out.field = get_inc_field(&state, id);
-+ bkey_fields()
-+#undef x
-+
-+ return out;
-+}
-+
-+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
-+struct bpos __bkey_unpack_pos(const struct bkey_format *format,
-+ const struct bkey_packed *in)
-+{
-+ struct unpack_state state = unpack_state_init(format, in);
-+ struct bpos out;
-+
-+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+ EBUG_ON(in->u64s < format->key_u64s);
-+ EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
-+
-+ out.inode = get_inc_field(&state, BKEY_FIELD_INODE);
-+ out.offset = get_inc_field(&state, BKEY_FIELD_OFFSET);
-+ out.snapshot = get_inc_field(&state, BKEY_FIELD_SNAPSHOT);
-+
-+ return out;
-+}
-+#endif
-+
-+/**
-+ * bch2_bkey_pack_key -- pack just the key, not the value
-+ * @out: packed result
-+ * @in: key to pack
-+ * @format: format of packed result
-+ *
-+ * Returns: true on success, false on failure
-+ */
-+bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
-+ const struct bkey_format *format)
-+{
-+ struct pack_state state = pack_state_init(format, out);
-+ u64 *w = out->_data;
-+
-+ EBUG_ON((void *) in == (void *) out);
-+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
-+ EBUG_ON(in->format != KEY_FORMAT_CURRENT);
-+
-+ *w = 0;
-+
-+#define x(id, field) if (!set_inc_field(&state, id, in->field)) return false;
-+ bkey_fields()
-+#undef x
-+ pack_state_finish(&state, out);
-+ out->u64s = format->key_u64s + in->u64s - BKEY_U64s;
-+ out->format = KEY_FORMAT_LOCAL_BTREE;
-+ out->needs_whiteout = in->needs_whiteout;
-+ out->type = in->type;
-+
-+ bch2_bkey_pack_verify(out, in, format);
-+ return true;
-+}
-+
-+/**
-+ * bch2_bkey_unpack -- unpack the key and the value
-+ * @b: btree node of @src key (for packed format)
-+ * @dst: unpacked result
-+ * @src: packed input
-+ */
-+void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
-+ const struct bkey_packed *src)
-+{
-+ __bkey_unpack_key(b, &dst->k, src);
-+
-+ memcpy_u64s(&dst->v,
-+ bkeyp_val(&b->format, src),
-+ bkeyp_val_u64s(&b->format, src));
-+}
-+
-+/**
-+ * bch2_bkey_pack -- pack the key and the value
-+ * @dst: packed result
-+ * @src: unpacked input
-+ * @format: format of packed result
-+ *
-+ * Returns: true on success, false on failure
-+ */
-+bool bch2_bkey_pack(struct bkey_packed *dst, const struct bkey_i *src,
-+ const struct bkey_format *format)
-+{
-+ struct bkey_packed tmp;
-+
-+ if (!bch2_bkey_pack_key(&tmp, &src->k, format))
-+ return false;
-+
-+ memmove_u64s((u64 *) dst + format->key_u64s,
-+ &src->v,
-+ bkey_val_u64s(&src->k));
-+ memcpy_u64s_small(dst, &tmp, format->key_u64s);
-+
-+ return true;
-+}
-+
-+__always_inline
-+static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v)
-+{
-+ unsigned bits = state->format->bits_per_field[field];
-+ u64 offset = le64_to_cpu(state->format->field_offset[field]);
-+ bool ret = true;
-+
-+ EBUG_ON(v < offset);
-+ v -= offset;
-+
-+ if (fls64(v) > bits) {
-+ v = ~(~0ULL << bits);
-+ ret = false;
-+ }
-+
-+ __set_inc_field(state, field, v);
-+ return ret;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+static bool bkey_packed_successor(struct bkey_packed *out,
-+ const struct btree *b,
-+ struct bkey_packed k)
-+{
-+ const struct bkey_format *f = &b->format;
-+ unsigned nr_key_bits = b->nr_key_bits;
-+ unsigned first_bit, offset;
-+ u64 *p;
-+
-+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-+
-+ if (!nr_key_bits)
-+ return false;
-+
-+ *out = k;
-+
-+ first_bit = high_bit_offset + nr_key_bits - 1;
-+ p = nth_word(high_word(f, out), first_bit >> 6);
-+ offset = 63 - (first_bit & 63);
-+
-+ while (nr_key_bits) {
-+ unsigned bits = min(64 - offset, nr_key_bits);
-+ u64 mask = (~0ULL >> (64 - bits)) << offset;
-+
-+ if ((*p & mask) != mask) {
-+ *p += 1ULL << offset;
-+ EBUG_ON(bch2_bkey_cmp_packed(b, out, &k) <= 0);
-+ return true;
-+ }
-+
-+ *p &= ~mask;
-+ p = prev_word(p);
-+ nr_key_bits -= bits;
-+ offset = 0;
-+ }
-+
-+ return false;
-+}
-+
-+static bool bkey_format_has_too_big_fields(const struct bkey_format *f)
-+{
-+ for (unsigned i = 0; i < f->nr_fields; i++) {
-+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
-+ u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
-+ u64 packed_max = f->bits_per_field[i]
-+ ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
-+ : 0;
-+ u64 field_offset = le64_to_cpu(f->field_offset[i]);
-+
-+ if (packed_max + field_offset < packed_max ||
-+ packed_max + field_offset > unpacked_max)
-+ return true;
-+ }
-+
-+ return false;
-+}
-+#endif
-+
-+/*
-+ * Returns a packed key that compares <= in
-+ *
-+ * This is used in bset_search_tree(), where we need a packed pos in order to be
-+ * able to compare against the keys in the auxiliary search tree - and it's
-+ * legal to use a packed pos that isn't equivalent to the original pos,
-+ * _provided_ it compares <= to the original pos.
-+ */
-+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
-+ struct bpos in,
-+ const struct btree *b)
-+{
-+ const struct bkey_format *f = &b->format;
-+ struct pack_state state = pack_state_init(f, out);
-+ u64 *w = out->_data;
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ struct bpos orig = in;
-+#endif
-+ bool exact = true;
-+ unsigned i;
-+
-+ /*
-+ * bch2_bkey_pack_key() will write to all of f->key_u64s, minus the 3
-+ * byte header, but pack_pos() won't if the len/version fields are big
-+ * enough - we need to make sure to zero them out:
-+ */
-+ for (i = 0; i < f->key_u64s; i++)
-+ w[i] = 0;
-+
-+ if (unlikely(in.snapshot <
-+ le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) {
-+ if (!in.offset-- &&
-+ !in.inode--)
-+ return BKEY_PACK_POS_FAIL;
-+ in.snapshot = KEY_SNAPSHOT_MAX;
-+ exact = false;
-+ }
-+
-+ if (unlikely(in.offset <
-+ le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) {
-+ if (!in.inode--)
-+ return BKEY_PACK_POS_FAIL;
-+ in.offset = KEY_OFFSET_MAX;
-+ in.snapshot = KEY_SNAPSHOT_MAX;
-+ exact = false;
-+ }
-+
-+ if (unlikely(in.inode <
-+ le64_to_cpu(f->field_offset[BKEY_FIELD_INODE])))
-+ return BKEY_PACK_POS_FAIL;
-+
-+ if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode))) {
-+ in.offset = KEY_OFFSET_MAX;
-+ in.snapshot = KEY_SNAPSHOT_MAX;
-+ exact = false;
-+ }
-+
-+ if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset))) {
-+ in.snapshot = KEY_SNAPSHOT_MAX;
-+ exact = false;
-+ }
-+
-+ if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot)))
-+ exact = false;
-+
-+ pack_state_finish(&state, out);
-+ out->u64s = f->key_u64s;
-+ out->format = KEY_FORMAT_LOCAL_BTREE;
-+ out->type = KEY_TYPE_deleted;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ if (exact) {
-+ BUG_ON(bkey_cmp_left_packed(b, out, &orig));
-+ } else {
-+ struct bkey_packed successor;
-+
-+ BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0);
-+ BUG_ON(bkey_packed_successor(&successor, b, *out) &&
-+ bkey_cmp_left_packed(b, &successor, &orig) < 0 &&
-+ !bkey_format_has_too_big_fields(f));
-+ }
-+#endif
-+
-+ return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER;
-+}
-+
-+void bch2_bkey_format_init(struct bkey_format_state *s)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(s->field_min); i++)
-+ s->field_min[i] = U64_MAX;
-+
-+ for (i = 0; i < ARRAY_SIZE(s->field_max); i++)
-+ s->field_max[i] = 0;
-+
-+ /* Make sure we can store a size of 0: */
-+ s->field_min[BKEY_FIELD_SIZE] = 0;
-+}
-+
-+void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p)
-+{
-+ unsigned field = 0;
-+
-+ __bkey_format_add(s, field++, p.inode);
-+ __bkey_format_add(s, field++, p.offset);
-+ __bkey_format_add(s, field++, p.snapshot);
-+}
-+
-+/*
-+ * We don't want it to be possible for the packed format to represent fields
-+ * bigger than a u64... that will cause confusion and issues (like with
-+ * bkey_packed_successor())
-+ */
-+static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i,
-+ unsigned bits, u64 offset)
-+{
-+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
-+ u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
-+
-+ bits = min(bits, unpacked_bits);
-+
-+ offset = bits == unpacked_bits ? 0 : min(offset, unpacked_max - ((1ULL << bits) - 1));
-+
-+ f->bits_per_field[i] = bits;
-+ f->field_offset[i] = cpu_to_le64(offset);
-+}
-+
-+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s)
-+{
-+ unsigned i, bits = KEY_PACKED_BITS_START;
-+ struct bkey_format ret = {
-+ .nr_fields = BKEY_NR_FIELDS,
-+ };
-+
-+ for (i = 0; i < ARRAY_SIZE(s->field_min); i++) {
-+ s->field_min[i] = min(s->field_min[i], s->field_max[i]);
-+
-+ set_format_field(&ret, i,
-+ fls64(s->field_max[i] - s->field_min[i]),
-+ s->field_min[i]);
-+
-+ bits += ret.bits_per_field[i];
-+ }
-+
-+ /* allow for extent merging: */
-+ if (ret.bits_per_field[BKEY_FIELD_SIZE]) {
-+ unsigned b = min(4U, 32U - ret.bits_per_field[BKEY_FIELD_SIZE]);
-+
-+ ret.bits_per_field[BKEY_FIELD_SIZE] += b;
-+ bits += b;
-+ }
-+
-+ ret.key_u64s = DIV_ROUND_UP(bits, 64);
-+
-+ /* if we have enough spare bits, round fields up to nearest byte */
-+ bits = ret.key_u64s * 64 - bits;
-+
-+ for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) {
-+ unsigned r = round_up(ret.bits_per_field[i], 8) -
-+ ret.bits_per_field[i];
-+
-+ if (r <= bits) {
-+ set_format_field(&ret, i,
-+ ret.bits_per_field[i] + r,
-+ le64_to_cpu(ret.field_offset[i]));
-+ bits -= r;
-+ }
-+ }
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ {
-+ struct printbuf buf = PRINTBUF;
-+
-+ BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf));
-+ printbuf_exit(&buf);
-+ }
-+#endif
-+ return ret;
-+}
-+
-+int bch2_bkey_format_invalid(struct bch_fs *c,
-+ struct bkey_format *f,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ unsigned i, bits = KEY_PACKED_BITS_START;
-+
-+ if (f->nr_fields != BKEY_NR_FIELDS) {
-+ prt_printf(err, "incorrect number of fields: got %u, should be %u",
-+ f->nr_fields, BKEY_NR_FIELDS);
-+ return -BCH_ERR_invalid;
-+ }
-+
-+ /*
-+ * Verify that the packed format can't represent fields larger than the
-+ * unpacked format:
-+ */
-+ for (i = 0; i < f->nr_fields; i++) {
-+ if (!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) {
-+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
-+ u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
-+ u64 packed_max = f->bits_per_field[i]
-+ ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
-+ : 0;
-+ u64 field_offset = le64_to_cpu(f->field_offset[i]);
-+
-+ if (packed_max + field_offset < packed_max ||
-+ packed_max + field_offset > unpacked_max) {
-+ prt_printf(err, "field %u too large: %llu + %llu > %llu",
-+ i, packed_max, field_offset, unpacked_max);
-+ return -BCH_ERR_invalid;
-+ }
-+ }
-+
-+ bits += f->bits_per_field[i];
-+ }
-+
-+ if (f->key_u64s != DIV_ROUND_UP(bits, 64)) {
-+ prt_printf(err, "incorrect key_u64s: got %u, should be %u",
-+ f->key_u64s, DIV_ROUND_UP(bits, 64));
-+ return -BCH_ERR_invalid;
-+ }
-+
-+ return 0;
-+}
-+
-+void bch2_bkey_format_to_text(struct printbuf *out, const struct bkey_format *f)
-+{
-+ prt_printf(out, "u64s %u fields ", f->key_u64s);
-+
-+ for (unsigned i = 0; i < ARRAY_SIZE(f->bits_per_field); i++) {
-+ if (i)
-+ prt_str(out, ", ");
-+ prt_printf(out, "%u:%llu",
-+ f->bits_per_field[i],
-+ le64_to_cpu(f->field_offset[i]));
-+ }
-+}
-+
-+/*
-+ * Most significant differing bit
-+ * Bits are indexed from 0 - return is [0, nr_key_bits)
-+ */
-+__pure
-+unsigned bch2_bkey_greatest_differing_bit(const struct btree *b,
-+ const struct bkey_packed *l_k,
-+ const struct bkey_packed *r_k)
-+{
-+ const u64 *l = high_word(&b->format, l_k);
-+ const u64 *r = high_word(&b->format, r_k);
-+ unsigned nr_key_bits = b->nr_key_bits;
-+ unsigned word_bits = 64 - high_bit_offset;
-+ u64 l_v, r_v;
-+
-+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
-+
-+ /* for big endian, skip past header */
-+ l_v = *l & (~0ULL >> high_bit_offset);
-+ r_v = *r & (~0ULL >> high_bit_offset);
-+
-+ while (nr_key_bits) {
-+ if (nr_key_bits < word_bits) {
-+ l_v >>= word_bits - nr_key_bits;
-+ r_v >>= word_bits - nr_key_bits;
-+ nr_key_bits = 0;
-+ } else {
-+ nr_key_bits -= word_bits;
-+ }
-+
-+ if (l_v != r_v)
-+ return fls64(l_v ^ r_v) - 1 + nr_key_bits;
-+
-+ l = next_word(l);
-+ r = next_word(r);
-+
-+ l_v = *l;
-+ r_v = *r;
-+ word_bits = 64;
-+ }
-+
-+ return 0;
-+}
-+
-+/*
-+ * First set bit
-+ * Bits are indexed from 0 - return is [0, nr_key_bits)
-+ */
-+__pure
-+unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
-+{
-+ const u64 *p = high_word(&b->format, k);
-+ unsigned nr_key_bits = b->nr_key_bits;
-+ unsigned ret = 0, offset;
-+
-+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
-+
-+ offset = nr_key_bits;
-+ while (offset > 64) {
-+ p = next_word(p);
-+ offset -= 64;
-+ }
-+
-+ offset = 64 - offset;
-+
-+ while (nr_key_bits) {
-+ unsigned bits = nr_key_bits + offset < 64
-+ ? nr_key_bits
-+ : 64 - offset;
-+
-+ u64 mask = (~0ULL >> (64 - bits)) << offset;
-+
-+ if (*p & mask)
-+ return ret + __ffs64(*p & mask) - offset;
-+
-+ p = prev_word(p);
-+ nr_key_bits -= bits;
-+ ret += bits;
-+ offset = 0;
-+ }
-+
-+ return 0;
-+}
-+
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+
-+#define I(_x) (*(out)++ = (_x))
-+#define I1(i0) I(i0)
-+#define I2(i0, i1) (I1(i0), I(i1))
-+#define I3(i0, i1, i2) (I2(i0, i1), I(i2))
-+#define I4(i0, i1, i2, i3) (I3(i0, i1, i2), I(i3))
-+#define I5(i0, i1, i2, i3, i4) (I4(i0, i1, i2, i3), I(i4))
-+
-+static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
-+ enum bch_bkey_fields field,
-+ unsigned dst_offset, unsigned dst_size,
-+ bool *eax_zeroed)
-+{
-+ unsigned bits = format->bits_per_field[field];
-+ u64 offset = le64_to_cpu(format->field_offset[field]);
-+ unsigned i, byte, bit_offset, align, shl, shr;
-+
-+ if (!bits && !offset) {
-+ if (!*eax_zeroed) {
-+ /* xor eax, eax */
-+ I2(0x31, 0xc0);
-+ }
-+
-+ *eax_zeroed = true;
-+ goto set_field;
-+ }
-+
-+ if (!bits) {
-+ /* just return offset: */
-+
-+ switch (dst_size) {
-+ case 8:
-+ if (offset > S32_MAX) {
-+ /* mov [rdi + dst_offset], offset */
-+ I3(0xc7, 0x47, dst_offset);
-+ memcpy(out, &offset, 4);
-+ out += 4;
-+
-+ I3(0xc7, 0x47, dst_offset + 4);
-+ memcpy(out, (void *) &offset + 4, 4);
-+ out += 4;
-+ } else {
-+ /* mov [rdi + dst_offset], offset */
-+ /* sign extended */
-+ I4(0x48, 0xc7, 0x47, dst_offset);
-+ memcpy(out, &offset, 4);
-+ out += 4;
-+ }
-+ break;
-+ case 4:
-+ /* mov [rdi + dst_offset], offset */
-+ I3(0xc7, 0x47, dst_offset);
-+ memcpy(out, &offset, 4);
-+ out += 4;
-+ break;
-+ default:
-+ BUG();
-+ }
-+
-+ return out;
-+ }
-+
-+ bit_offset = format->key_u64s * 64;
-+ for (i = 0; i <= field; i++)
-+ bit_offset -= format->bits_per_field[i];
-+
-+ byte = bit_offset / 8;
-+ bit_offset -= byte * 8;
-+
-+ *eax_zeroed = false;
-+
-+ if (bit_offset == 0 && bits == 8) {
-+ /* movzx eax, BYTE PTR [rsi + imm8] */
-+ I4(0x0f, 0xb6, 0x46, byte);
-+ } else if (bit_offset == 0 && bits == 16) {
-+ /* movzx eax, WORD PTR [rsi + imm8] */
-+ I4(0x0f, 0xb7, 0x46, byte);
-+ } else if (bit_offset + bits <= 32) {
-+ align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
-+ byte -= align;
-+ bit_offset += align * 8;
-+
-+ BUG_ON(bit_offset + bits > 32);
-+
-+ /* mov eax, [rsi + imm8] */
-+ I3(0x8b, 0x46, byte);
-+
-+ if (bit_offset) {
-+ /* shr eax, imm8 */
-+ I3(0xc1, 0xe8, bit_offset);
-+ }
-+
-+ if (bit_offset + bits < 32) {
-+ unsigned mask = ~0U >> (32 - bits);
-+
-+ /* and eax, imm32 */
-+ I1(0x25);
-+ memcpy(out, &mask, 4);
-+ out += 4;
-+ }
-+ } else if (bit_offset + bits <= 64) {
-+ align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7);
-+ byte -= align;
-+ bit_offset += align * 8;
-+
-+ BUG_ON(bit_offset + bits > 64);
-+
-+ /* mov rax, [rsi + imm8] */
-+ I4(0x48, 0x8b, 0x46, byte);
-+
-+ shl = 64 - bit_offset - bits;
-+ shr = bit_offset + shl;
-+
-+ if (shl) {
-+ /* shl rax, imm8 */
-+ I4(0x48, 0xc1, 0xe0, shl);
-+ }
-+
-+ if (shr) {
-+ /* shr rax, imm8 */
-+ I4(0x48, 0xc1, 0xe8, shr);
-+ }
-+ } else {
-+ align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
-+ byte -= align;
-+ bit_offset += align * 8;
-+
-+ BUG_ON(bit_offset + bits > 96);
-+
-+ /* mov rax, [rsi + byte] */
-+ I4(0x48, 0x8b, 0x46, byte);
-+
-+ /* mov edx, [rsi + byte + 8] */
-+ I3(0x8b, 0x56, byte + 8);
-+
-+ /* bits from next word: */
-+ shr = bit_offset + bits - 64;
-+ BUG_ON(shr > bit_offset);
-+
-+ /* shr rax, bit_offset */
-+ I4(0x48, 0xc1, 0xe8, shr);
-+
-+ /* shl rdx, imm8 */
-+ I4(0x48, 0xc1, 0xe2, 64 - shr);
-+
-+ /* or rax, rdx */
-+ I3(0x48, 0x09, 0xd0);
-+
-+ shr = bit_offset - shr;
-+
-+ if (shr) {
-+ /* shr rax, imm8 */
-+ I4(0x48, 0xc1, 0xe8, shr);
-+ }
-+ }
-+
-+ /* rax += offset: */
-+ if (offset > S32_MAX) {
-+ /* mov rdx, imm64 */
-+ I2(0x48, 0xba);
-+ memcpy(out, &offset, 8);
-+ out += 8;
-+ /* add %rdx, %rax */
-+ I3(0x48, 0x01, 0xd0);
-+ } else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) {
-+ /* add rax, imm32 */
-+ I2(0x48, 0x05);
-+ memcpy(out, &offset, 4);
-+ out += 4;
-+ } else if (offset) {
-+ /* add eax, imm32 */
-+ I1(0x05);
-+ memcpy(out, &offset, 4);
-+ out += 4;
-+ }
-+set_field:
-+ switch (dst_size) {
-+ case 8:
-+ /* mov [rdi + dst_offset], rax */
-+ I4(0x48, 0x89, 0x47, dst_offset);
-+ break;
-+ case 4:
-+ /* mov [rdi + dst_offset], eax */
-+ I3(0x89, 0x47, dst_offset);
-+ break;
-+ default:
-+ BUG();
-+ }
-+
-+ return out;
-+}
-+
-+int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
-+{
-+ bool eax_zeroed = false;
-+ u8 *out = _out;
-+
-+ /*
-+ * rdi: dst - unpacked key
-+ * rsi: src - packed key
-+ */
-+
-+ /* k->u64s, k->format, k->type */
-+
-+ /* mov eax, [rsi] */
-+ I2(0x8b, 0x06);
-+
-+ /* add eax, BKEY_U64s - format->key_u64s */
-+ I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0);
-+
-+ /* and eax, imm32: mask out k->pad: */
-+ I5(0x25, 0xff, 0xff, 0xff, 0);
-+
-+ /* mov [rdi], eax */
-+ I2(0x89, 0x07);
-+
-+#define x(id, field) \
-+ out = compile_bkey_field(format, out, id, \
-+ offsetof(struct bkey, field), \
-+ sizeof(((struct bkey *) NULL)->field), \
-+ &eax_zeroed);
-+ bkey_fields()
-+#undef x
-+
-+ /* retq */
-+ I1(0xc3);
-+
-+ return (void *) out - _out;
-+}
-+
-+#else
-+#endif
-+
-+__pure
-+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
-+ const struct bkey_packed *r,
-+ const struct btree *b)
-+{
-+ return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b,
-+ const struct bkey_packed *l,
-+ const struct bpos *r)
-+{
-+ return bpos_cmp(bkey_unpack_pos_format_checked(b, l), *r);
-+}
-+
-+__pure __flatten
-+int bch2_bkey_cmp_packed(const struct btree *b,
-+ const struct bkey_packed *l,
-+ const struct bkey_packed *r)
-+{
-+ return bch2_bkey_cmp_packed_inlined(b, l, r);
-+}
-+
-+__pure __flatten
-+int __bch2_bkey_cmp_left_packed(const struct btree *b,
-+ const struct bkey_packed *l,
-+ const struct bpos *r)
-+{
-+ const struct bkey *l_unpacked;
-+
-+ return unlikely(l_unpacked = packed_to_bkey_c(l))
-+ ? bpos_cmp(l_unpacked->p, *r)
-+ : __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
-+}
-+
-+void bch2_bpos_swab(struct bpos *p)
-+{
-+ u8 *l = (u8 *) p;
-+ u8 *h = ((u8 *) &p[1]) - 1;
-+
-+ while (l < h) {
-+ swap(*l, *h);
-+ l++;
-+ --h;
-+ }
-+}
-+
-+void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
-+{
-+ const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
-+ u8 *l = k->key_start;
-+ u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
-+
-+ while (l < h) {
-+ swap(*l, *h);
-+ l++;
-+ --h;
-+ }
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_bkey_pack_test(void)
-+{
-+ struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0);
-+ struct bkey_packed p;
-+
-+ struct bkey_format test_format = {
-+ .key_u64s = 3,
-+ .nr_fields = BKEY_NR_FIELDS,
-+ .bits_per_field = {
-+ 13,
-+ 64,
-+ 32,
-+ },
-+ };
-+
-+ struct unpack_state in_s =
-+ unpack_state_init(&bch2_bkey_format_current, (void *) &t);
-+ struct pack_state out_s = pack_state_init(&test_format, &p);
-+ unsigned i;
-+
-+ for (i = 0; i < out_s.format->nr_fields; i++) {
-+ u64 a, v = get_inc_field(&in_s, i);
-+
-+ switch (i) {
-+#define x(id, field) case id: a = t.field; break;
-+ bkey_fields()
-+#undef x
-+ default:
-+ BUG();
-+ }
-+
-+ if (a != v)
-+ panic("got %llu actual %llu i %u\n", v, a, i);
-+
-+ if (!set_inc_field(&out_s, i, v))
-+ panic("failed at %u\n", i);
-+ }
-+
-+ BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format));
-+}
-+#endif
-diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
-new file mode 100644
-index 000000000000..831be01809f2
---- /dev/null
-+++ b/fs/bcachefs/bkey.h
-@@ -0,0 +1,778 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_H
-+#define _BCACHEFS_BKEY_H
-+
-+#include <linux/bug.h>
-+#include "bcachefs_format.h"
-+
-+#include "btree_types.h"
-+#include "util.h"
-+#include "vstructs.h"
-+
-+enum bkey_invalid_flags {
-+ BKEY_INVALID_WRITE = (1U << 0),
-+ BKEY_INVALID_COMMIT = (1U << 1),
-+ BKEY_INVALID_JOURNAL = (1U << 2),
-+};
-+
-+#if 0
-+
-+/*
-+ * compiled unpack functions are disabled, pending a new interface for
-+ * dynamically allocating executable memory:
-+ */
-+
-+#ifdef CONFIG_X86_64
-+#define HAVE_BCACHEFS_COMPILED_UNPACK 1
-+#endif
-+#endif
-+
-+void bch2_bkey_packed_to_binary_text(struct printbuf *,
-+ const struct bkey_format *,
-+ const struct bkey_packed *);
-+
-+/* bkey with split value, const */
-+struct bkey_s_c {
-+ const struct bkey *k;
-+ const struct bch_val *v;
-+};
-+
-+/* bkey with split value */
-+struct bkey_s {
-+ union {
-+ struct {
-+ struct bkey *k;
-+ struct bch_val *v;
-+ };
-+ struct bkey_s_c s_c;
-+ };
-+};
-+
-+#define bkey_p_next(_k) vstruct_next(_k)
-+
-+static inline struct bkey_i *bkey_next(struct bkey_i *k)
-+{
-+ return (struct bkey_i *) ((u64 *) k->_data + k->k.u64s);
-+}
-+
-+#define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s)
-+
-+static inline size_t bkey_val_bytes(const struct bkey *k)
-+{
-+ return bkey_val_u64s(k) * sizeof(u64);
-+}
-+
-+static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
-+{
-+ unsigned u64s = BKEY_U64s + val_u64s;
-+
-+ BUG_ON(u64s > U8_MAX);
-+ k->u64s = u64s;
-+}
-+
-+static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
-+{
-+ set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64)));
-+}
-+
-+#define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
-+
-+#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted)
-+
-+#define bkey_whiteout(_k) \
-+ ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout)
-+
-+enum bkey_lr_packed {
-+ BKEY_PACKED_BOTH,
-+ BKEY_PACKED_RIGHT,
-+ BKEY_PACKED_LEFT,
-+ BKEY_PACKED_NONE,
-+};
-+
-+#define bkey_lr_packed(_l, _r) \
-+ ((_l)->format + ((_r)->format << 1))
-+
-+static inline void bkey_p_copy(struct bkey_packed *dst, const struct bkey_packed *src)
-+{
-+ memcpy_u64s_small(dst, src, src->u64s);
-+}
-+
-+static inline void bkey_copy(struct bkey_i *dst, const struct bkey_i *src)
-+{
-+ memcpy_u64s_small(dst, src, src->k.u64s);
-+}
-+
-+struct btree;
-+
-+__pure
-+unsigned bch2_bkey_greatest_differing_bit(const struct btree *,
-+ const struct bkey_packed *,
-+ const struct bkey_packed *);
-+__pure
-+unsigned bch2_bkey_ffs(const struct btree *, const struct bkey_packed *);
-+
-+__pure
-+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *,
-+ const struct bkey_packed *,
-+ const struct btree *);
-+
-+__pure
-+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *,
-+ const struct bkey_packed *,
-+ const struct bpos *);
-+
-+__pure
-+int bch2_bkey_cmp_packed(const struct btree *,
-+ const struct bkey_packed *,
-+ const struct bkey_packed *);
-+
-+__pure
-+int __bch2_bkey_cmp_left_packed(const struct btree *,
-+ const struct bkey_packed *,
-+ const struct bpos *);
-+
-+static inline __pure
-+int bkey_cmp_left_packed(const struct btree *b,
-+ const struct bkey_packed *l, const struct bpos *r)
-+{
-+ return __bch2_bkey_cmp_left_packed(b, l, r);
-+}
-+
-+/*
-+ * The compiler generates better code when we pass bpos by ref, but it's often
-+ * enough terribly convenient to pass it by val... as much as I hate c++, const
-+ * ref would be nice here:
-+ */
-+__pure __flatten
-+static inline int bkey_cmp_left_packed_byval(const struct btree *b,
-+ const struct bkey_packed *l,
-+ struct bpos r)
-+{
-+ return bkey_cmp_left_packed(b, l, &r);
-+}
-+
-+static __always_inline bool bpos_eq(struct bpos l, struct bpos r)
-+{
-+ return !((l.inode ^ r.inode) |
-+ (l.offset ^ r.offset) |
-+ (l.snapshot ^ r.snapshot));
-+}
-+
-+static __always_inline bool bpos_lt(struct bpos l, struct bpos r)
-+{
-+ return l.inode != r.inode ? l.inode < r.inode :
-+ l.offset != r.offset ? l.offset < r.offset :
-+ l.snapshot != r.snapshot ? l.snapshot < r.snapshot : false;
-+}
-+
-+static __always_inline bool bpos_le(struct bpos l, struct bpos r)
-+{
-+ return l.inode != r.inode ? l.inode < r.inode :
-+ l.offset != r.offset ? l.offset < r.offset :
-+ l.snapshot != r.snapshot ? l.snapshot < r.snapshot : true;
-+}
-+
-+static __always_inline bool bpos_gt(struct bpos l, struct bpos r)
-+{
-+ return bpos_lt(r, l);
-+}
-+
-+static __always_inline bool bpos_ge(struct bpos l, struct bpos r)
-+{
-+ return bpos_le(r, l);
-+}
-+
-+static __always_inline int bpos_cmp(struct bpos l, struct bpos r)
-+{
-+ return cmp_int(l.inode, r.inode) ?:
-+ cmp_int(l.offset, r.offset) ?:
-+ cmp_int(l.snapshot, r.snapshot);
-+}
-+
-+static inline struct bpos bpos_min(struct bpos l, struct bpos r)
-+{
-+ return bpos_lt(l, r) ? l : r;
-+}
-+
-+static inline struct bpos bpos_max(struct bpos l, struct bpos r)
-+{
-+ return bpos_gt(l, r) ? l : r;
-+}
-+
-+static __always_inline bool bkey_eq(struct bpos l, struct bpos r)
-+{
-+ return !((l.inode ^ r.inode) |
-+ (l.offset ^ r.offset));
-+}
-+
-+static __always_inline bool bkey_lt(struct bpos l, struct bpos r)
-+{
-+ return l.inode != r.inode
-+ ? l.inode < r.inode
-+ : l.offset < r.offset;
-+}
-+
-+static __always_inline bool bkey_le(struct bpos l, struct bpos r)
-+{
-+ return l.inode != r.inode
-+ ? l.inode < r.inode
-+ : l.offset <= r.offset;
-+}
-+
-+static __always_inline bool bkey_gt(struct bpos l, struct bpos r)
-+{
-+ return bkey_lt(r, l);
-+}
-+
-+static __always_inline bool bkey_ge(struct bpos l, struct bpos r)
-+{
-+ return bkey_le(r, l);
-+}
-+
-+static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
-+{
-+ return cmp_int(l.inode, r.inode) ?:
-+ cmp_int(l.offset, r.offset);
-+}
-+
-+static inline struct bpos bkey_min(struct bpos l, struct bpos r)
-+{
-+ return bkey_lt(l, r) ? l : r;
-+}
-+
-+static inline struct bpos bkey_max(struct bpos l, struct bpos r)
-+{
-+ return bkey_gt(l, r) ? l : r;
-+}
-+
-+void bch2_bpos_swab(struct bpos *);
-+void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
-+
-+static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
-+{
-+ return cmp_int(l.hi, r.hi) ?:
-+ cmp_int(l.lo, r.lo);
-+}
-+
-+#define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 })
-+#define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL })
-+
-+static __always_inline int bversion_zero(struct bversion v)
-+{
-+ return !bversion_cmp(v, ZERO_VERSION);
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+/* statement expressions confusing unlikely()? */
-+#define bkey_packed(_k) \
-+ ({ EBUG_ON((_k)->format > KEY_FORMAT_CURRENT); \
-+ (_k)->format != KEY_FORMAT_CURRENT; })
-+#else
-+#define bkey_packed(_k) ((_k)->format != KEY_FORMAT_CURRENT)
-+#endif
-+
-+/*
-+ * It's safe to treat an unpacked bkey as a packed one, but not the reverse
-+ */
-+static inline struct bkey_packed *bkey_to_packed(struct bkey_i *k)
-+{
-+ return (struct bkey_packed *) k;
-+}
-+
-+static inline const struct bkey_packed *bkey_to_packed_c(const struct bkey_i *k)
-+{
-+ return (const struct bkey_packed *) k;
-+}
-+
-+static inline struct bkey_i *packed_to_bkey(struct bkey_packed *k)
-+{
-+ return bkey_packed(k) ? NULL : (struct bkey_i *) k;
-+}
-+
-+static inline const struct bkey *packed_to_bkey_c(const struct bkey_packed *k)
-+{
-+ return bkey_packed(k) ? NULL : (const struct bkey *) k;
-+}
-+
-+static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
-+{
-+ return format->bits_per_field[BKEY_FIELD_INODE] +
-+ format->bits_per_field[BKEY_FIELD_OFFSET] +
-+ format->bits_per_field[BKEY_FIELD_SNAPSHOT];
-+}
-+
-+static inline struct bpos bpos_successor(struct bpos p)
-+{
-+ if (!++p.snapshot &&
-+ !++p.offset &&
-+ !++p.inode)
-+ BUG();
-+
-+ return p;
-+}
-+
-+static inline struct bpos bpos_predecessor(struct bpos p)
-+{
-+ if (!p.snapshot-- &&
-+ !p.offset-- &&
-+ !p.inode--)
-+ BUG();
-+
-+ return p;
-+}
-+
-+static inline struct bpos bpos_nosnap_successor(struct bpos p)
-+{
-+ p.snapshot = 0;
-+
-+ if (!++p.offset &&
-+ !++p.inode)
-+ BUG();
-+
-+ return p;
-+}
-+
-+static inline struct bpos bpos_nosnap_predecessor(struct bpos p)
-+{
-+ p.snapshot = 0;
-+
-+ if (!p.offset-- &&
-+ !p.inode--)
-+ BUG();
-+
-+ return p;
-+}
-+
-+static inline u64 bkey_start_offset(const struct bkey *k)
-+{
-+ return k->p.offset - k->size;
-+}
-+
-+static inline struct bpos bkey_start_pos(const struct bkey *k)
-+{
-+ return (struct bpos) {
-+ .inode = k->p.inode,
-+ .offset = bkey_start_offset(k),
-+ .snapshot = k->p.snapshot,
-+ };
-+}
-+
-+/* Packed helpers */
-+
-+static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
-+ const struct bkey_packed *k)
-+{
-+ unsigned ret = bkey_packed(k) ? format->key_u64s : BKEY_U64s;
-+
-+ EBUG_ON(k->u64s < ret);
-+ return ret;
-+}
-+
-+static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
-+ const struct bkey_packed *k)
-+{
-+ return bkeyp_key_u64s(format, k) * sizeof(u64);
-+}
-+
-+static inline unsigned bkeyp_val_u64s(const struct bkey_format *format,
-+ const struct bkey_packed *k)
-+{
-+ return k->u64s - bkeyp_key_u64s(format, k);
-+}
-+
-+static inline size_t bkeyp_val_bytes(const struct bkey_format *format,
-+ const struct bkey_packed *k)
-+{
-+ return bkeyp_val_u64s(format, k) * sizeof(u64);
-+}
-+
-+static inline void set_bkeyp_val_u64s(const struct bkey_format *format,
-+ struct bkey_packed *k, unsigned val_u64s)
-+{
-+ k->u64s = bkeyp_key_u64s(format, k) + val_u64s;
-+}
-+
-+#define bkeyp_val(_format, _k) \
-+ ((struct bch_val *) ((u64 *) (_k)->_data + bkeyp_key_u64s(_format, _k)))
-+
-+extern const struct bkey_format bch2_bkey_format_current;
-+
-+bool bch2_bkey_transform(const struct bkey_format *,
-+ struct bkey_packed *,
-+ const struct bkey_format *,
-+ const struct bkey_packed *);
-+
-+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
-+ const struct bkey_packed *);
-+
-+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
-+struct bpos __bkey_unpack_pos(const struct bkey_format *,
-+ const struct bkey_packed *);
-+#endif
-+
-+bool bch2_bkey_pack_key(struct bkey_packed *, const struct bkey *,
-+ const struct bkey_format *);
-+
-+enum bkey_pack_pos_ret {
-+ BKEY_PACK_POS_EXACT,
-+ BKEY_PACK_POS_SMALLER,
-+ BKEY_PACK_POS_FAIL,
-+};
-+
-+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *, struct bpos,
-+ const struct btree *);
-+
-+static inline bool bkey_pack_pos(struct bkey_packed *out, struct bpos in,
-+ const struct btree *b)
-+{
-+ return bch2_bkey_pack_pos_lossy(out, in, b) == BKEY_PACK_POS_EXACT;
-+}
-+
-+void bch2_bkey_unpack(const struct btree *, struct bkey_i *,
-+ const struct bkey_packed *);
-+bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *,
-+ const struct bkey_format *);
-+
-+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
-+
-+static inline void
-+__bkey_unpack_key_format_checked(const struct btree *b,
-+ struct bkey *dst,
-+ const struct bkey_packed *src)
-+{
-+ if (IS_ENABLED(HAVE_BCACHEFS_COMPILED_UNPACK)) {
-+ compiled_unpack_fn unpack_fn = b->aux_data;
-+ unpack_fn(dst, src);
-+
-+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
-+ bch2_expensive_debug_checks) {
-+ struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
-+
-+ BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
-+ }
-+ } else {
-+ *dst = __bch2_bkey_unpack_key(&b->format, src);
-+ }
-+}
-+
-+static inline struct bkey
-+bkey_unpack_key_format_checked(const struct btree *b,
-+ const struct bkey_packed *src)
-+{
-+ struct bkey dst;
-+
-+ __bkey_unpack_key_format_checked(b, &dst, src);
-+ return dst;
-+}
-+
-+static inline void __bkey_unpack_key(const struct btree *b,
-+ struct bkey *dst,
-+ const struct bkey_packed *src)
-+{
-+ if (likely(bkey_packed(src)))
-+ __bkey_unpack_key_format_checked(b, dst, src);
-+ else
-+ *dst = *packed_to_bkey_c(src);
-+}
-+
-+/**
-+ * bkey_unpack_key -- unpack just the key, not the value
-+ */
-+static inline struct bkey bkey_unpack_key(const struct btree *b,
-+ const struct bkey_packed *src)
-+{
-+ return likely(bkey_packed(src))
-+ ? bkey_unpack_key_format_checked(b, src)
-+ : *packed_to_bkey_c(src);
-+}
-+
-+static inline struct bpos
-+bkey_unpack_pos_format_checked(const struct btree *b,
-+ const struct bkey_packed *src)
-+{
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+ return bkey_unpack_key_format_checked(b, src).p;
-+#else
-+ return __bkey_unpack_pos(&b->format, src);
-+#endif
-+}
-+
-+static inline struct bpos bkey_unpack_pos(const struct btree *b,
-+ const struct bkey_packed *src)
-+{
-+ return likely(bkey_packed(src))
-+ ? bkey_unpack_pos_format_checked(b, src)
-+ : packed_to_bkey_c(src)->p;
-+}
-+
-+/* Disassembled bkeys */
-+
-+static inline struct bkey_s_c bkey_disassemble(const struct btree *b,
-+ const struct bkey_packed *k,
-+ struct bkey *u)
-+{
-+ __bkey_unpack_key(b, u, k);
-+
-+ return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
-+}
-+
-+/* non const version: */
-+static inline struct bkey_s __bkey_disassemble(const struct btree *b,
-+ struct bkey_packed *k,
-+ struct bkey *u)
-+{
-+ __bkey_unpack_key(b, u, k);
-+
-+ return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
-+}
-+
-+static inline u64 bkey_field_max(const struct bkey_format *f,
-+ enum bch_bkey_fields nr)
-+{
-+ return f->bits_per_field[nr] < 64
-+ ? (le64_to_cpu(f->field_offset[nr]) +
-+ ~(~0ULL << f->bits_per_field[nr]))
-+ : U64_MAX;
-+}
-+
-+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
-+
-+int bch2_compile_bkey_format(const struct bkey_format *, void *);
-+
-+#else
-+
-+static inline int bch2_compile_bkey_format(const struct bkey_format *format,
-+ void *out) { return 0; }
-+
-+#endif
-+
-+static inline void bkey_reassemble(struct bkey_i *dst,
-+ struct bkey_s_c src)
-+{
-+ dst->k = *src.k;
-+ memcpy_u64s_small(&dst->v, src.v, bkey_val_u64s(src.k));
-+}
-+
-+#define bkey_s_null ((struct bkey_s) { .k = NULL })
-+#define bkey_s_c_null ((struct bkey_s_c) { .k = NULL })
-+
-+#define bkey_s_err(err) ((struct bkey_s) { .k = ERR_PTR(err) })
-+#define bkey_s_c_err(err) ((struct bkey_s_c) { .k = ERR_PTR(err) })
-+
-+static inline struct bkey_s bkey_to_s(struct bkey *k)
-+{
-+ return (struct bkey_s) { .k = k, .v = NULL };
-+}
-+
-+static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k)
-+{
-+ return (struct bkey_s_c) { .k = k, .v = NULL };
-+}
-+
-+static inline struct bkey_s bkey_i_to_s(struct bkey_i *k)
-+{
-+ return (struct bkey_s) { .k = &k->k, .v = &k->v };
-+}
-+
-+static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
-+{
-+ return (struct bkey_s_c) { .k = &k->k, .v = &k->v };
-+}
-+
-+/*
-+ * For a given type of value (e.g. struct bch_extent), generates the types for
-+ * bkey + bch_extent - inline, split, split const - and also all the conversion
-+ * functions, which also check that the value is of the correct type.
-+ *
-+ * We use anonymous unions for upcasting - e.g. converting from e.g. a
-+ * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
-+ * functions.
-+ */
-+#define x(name, ...) \
-+struct bkey_i_##name { \
-+ union { \
-+ struct bkey k; \
-+ struct bkey_i k_i; \
-+ }; \
-+ struct bch_##name v; \
-+}; \
-+ \
-+struct bkey_s_c_##name { \
-+ union { \
-+ struct { \
-+ const struct bkey *k; \
-+ const struct bch_##name *v; \
-+ }; \
-+ struct bkey_s_c s_c; \
-+ }; \
-+}; \
-+ \
-+struct bkey_s_##name { \
-+ union { \
-+ struct { \
-+ struct bkey *k; \
-+ struct bch_##name *v; \
-+ }; \
-+ struct bkey_s_c_##name c; \
-+ struct bkey_s s; \
-+ struct bkey_s_c s_c; \
-+ }; \
-+}; \
-+ \
-+static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \
-+{ \
-+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \
-+ return container_of(&k->k, struct bkey_i_##name, k); \
-+} \
-+ \
-+static inline const struct bkey_i_##name * \
-+bkey_i_to_##name##_c(const struct bkey_i *k) \
-+{ \
-+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \
-+ return container_of(&k->k, struct bkey_i_##name, k); \
-+} \
-+ \
-+static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \
-+{ \
-+ EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name); \
-+ return (struct bkey_s_##name) { \
-+ .k = k.k, \
-+ .v = container_of(k.v, struct bch_##name, v), \
-+ }; \
-+} \
-+ \
-+static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
-+{ \
-+ EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name); \
-+ return (struct bkey_s_c_##name) { \
-+ .k = k.k, \
-+ .v = container_of(k.v, struct bch_##name, v), \
-+ }; \
-+} \
-+ \
-+static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\
-+{ \
-+ return (struct bkey_s_##name) { \
-+ .k = &k->k, \
-+ .v = &k->v, \
-+ }; \
-+} \
-+ \
-+static inline struct bkey_s_c_##name \
-+name##_i_to_s_c(const struct bkey_i_##name *k) \
-+{ \
-+ return (struct bkey_s_c_##name) { \
-+ .k = &k->k, \
-+ .v = &k->v, \
-+ }; \
-+} \
-+ \
-+static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \
-+{ \
-+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \
-+ return (struct bkey_s_##name) { \
-+ .k = &k->k, \
-+ .v = container_of(&k->v, struct bch_##name, v), \
-+ }; \
-+} \
-+ \
-+static inline struct bkey_s_c_##name \
-+bkey_i_to_s_c_##name(const struct bkey_i *k) \
-+{ \
-+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \
-+ return (struct bkey_s_c_##name) { \
-+ .k = &k->k, \
-+ .v = container_of(&k->v, struct bch_##name, v), \
-+ }; \
-+} \
-+ \
-+static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
-+{ \
-+ struct bkey_i_##name *k = \
-+ container_of(&_k->k, struct bkey_i_##name, k); \
-+ \
-+ bkey_init(&k->k); \
-+ memset(&k->v, 0, sizeof(k->v)); \
-+ k->k.type = KEY_TYPE_##name; \
-+ set_bkey_val_bytes(&k->k, sizeof(k->v)); \
-+ \
-+ return k; \
-+}
-+
-+BCH_BKEY_TYPES();
-+#undef x
-+
-+/* byte order helpers */
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+
-+static inline unsigned high_word_offset(const struct bkey_format *f)
-+{
-+ return f->key_u64s - 1;
-+}
-+
-+#define high_bit_offset 0
-+#define nth_word(p, n) ((p) - (n))
-+
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+
-+static inline unsigned high_word_offset(const struct bkey_format *f)
-+{
-+ return 0;
-+}
-+
-+#define high_bit_offset KEY_PACKED_BITS_START
-+#define nth_word(p, n) ((p) + (n))
-+
-+#else
-+#error edit for your odd byteorder.
-+#endif
-+
-+#define high_word(f, k) ((u64 *) (k)->_data + high_word_offset(f))
-+#define next_word(p) nth_word(p, 1)
-+#define prev_word(p) nth_word(p, -1)
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_bkey_pack_test(void);
-+#else
-+static inline void bch2_bkey_pack_test(void) {}
-+#endif
-+
-+#define bkey_fields() \
-+ x(BKEY_FIELD_INODE, p.inode) \
-+ x(BKEY_FIELD_OFFSET, p.offset) \
-+ x(BKEY_FIELD_SNAPSHOT, p.snapshot) \
-+ x(BKEY_FIELD_SIZE, size) \
-+ x(BKEY_FIELD_VERSION_HI, version.hi) \
-+ x(BKEY_FIELD_VERSION_LO, version.lo)
-+
-+struct bkey_format_state {
-+ u64 field_min[BKEY_NR_FIELDS];
-+ u64 field_max[BKEY_NR_FIELDS];
-+};
-+
-+void bch2_bkey_format_init(struct bkey_format_state *);
-+
-+static inline void __bkey_format_add(struct bkey_format_state *s, unsigned field, u64 v)
-+{
-+ s->field_min[field] = min(s->field_min[field], v);
-+ s->field_max[field] = max(s->field_max[field], v);
-+}
-+
-+/*
-+ * Changes @format so that @k can be successfully packed with @format
-+ */
-+static inline void bch2_bkey_format_add_key(struct bkey_format_state *s, const struct bkey *k)
-+{
-+#define x(id, field) __bkey_format_add(s, id, k->field);
-+ bkey_fields()
-+#undef x
-+}
-+
-+void bch2_bkey_format_add_pos(struct bkey_format_state *, struct bpos);
-+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *);
-+int bch2_bkey_format_invalid(struct bch_fs *, struct bkey_format *,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_bkey_format_to_text(struct printbuf *, const struct bkey_format *);
-+
-+#endif /* _BCACHEFS_BKEY_H */
-diff --git a/fs/bcachefs/bkey_buf.h b/fs/bcachefs/bkey_buf.h
-new file mode 100644
-index 000000000000..a30c4ae8eb36
---- /dev/null
-+++ b/fs/bcachefs/bkey_buf.h
-@@ -0,0 +1,61 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_BUF_H
-+#define _BCACHEFS_BKEY_BUF_H
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+
-+struct bkey_buf {
-+ struct bkey_i *k;
-+ u64 onstack[12];
-+};
-+
-+static inline void bch2_bkey_buf_realloc(struct bkey_buf *s,
-+ struct bch_fs *c, unsigned u64s)
-+{
-+ if (s->k == (void *) s->onstack &&
-+ u64s > ARRAY_SIZE(s->onstack)) {
-+ s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
-+ memcpy(s->k, s->onstack, sizeof(s->onstack));
-+ }
-+}
-+
-+static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s,
-+ struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ bch2_bkey_buf_realloc(s, c, k.k->u64s);
-+ bkey_reassemble(s->k, k);
-+}
-+
-+static inline void bch2_bkey_buf_copy(struct bkey_buf *s,
-+ struct bch_fs *c,
-+ struct bkey_i *src)
-+{
-+ bch2_bkey_buf_realloc(s, c, src->k.u64s);
-+ bkey_copy(s->k, src);
-+}
-+
-+static inline void bch2_bkey_buf_unpack(struct bkey_buf *s,
-+ struct bch_fs *c,
-+ struct btree *b,
-+ struct bkey_packed *src)
-+{
-+ bch2_bkey_buf_realloc(s, c, BKEY_U64s +
-+ bkeyp_val_u64s(&b->format, src));
-+ bch2_bkey_unpack(b, s->k, src);
-+}
-+
-+static inline void bch2_bkey_buf_init(struct bkey_buf *s)
-+{
-+ s->k = (void *) s->onstack;
-+}
-+
-+static inline void bch2_bkey_buf_exit(struct bkey_buf *s, struct bch_fs *c)
-+{
-+ if (s->k != (void *) s->onstack)
-+ mempool_free(s->k, &c->large_bkey_pool);
-+ s->k = NULL;
-+}
-+
-+#endif /* _BCACHEFS_BKEY_BUF_H */
-diff --git a/fs/bcachefs/bkey_cmp.h b/fs/bcachefs/bkey_cmp.h
-new file mode 100644
-index 000000000000..5f42a6e69360
---- /dev/null
-+++ b/fs/bcachefs/bkey_cmp.h
-@@ -0,0 +1,129 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_CMP_H
-+#define _BCACHEFS_BKEY_CMP_H
-+
-+#include "bkey.h"
-+
-+#ifdef CONFIG_X86_64
-+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-+ unsigned nr_key_bits)
-+{
-+ long d0, d1, d2, d3;
-+ int cmp;
-+
-+ /* we shouldn't need asm for this, but gcc is being retarded: */
-+
-+ asm(".intel_syntax noprefix;"
-+ "xor eax, eax;"
-+ "xor edx, edx;"
-+ "1:;"
-+ "mov r8, [rdi];"
-+ "mov r9, [rsi];"
-+ "sub ecx, 64;"
-+ "jl 2f;"
-+
-+ "cmp r8, r9;"
-+ "jnz 3f;"
-+
-+ "lea rdi, [rdi - 8];"
-+ "lea rsi, [rsi - 8];"
-+ "jmp 1b;"
-+
-+ "2:;"
-+ "not ecx;"
-+ "shr r8, 1;"
-+ "shr r9, 1;"
-+ "shr r8, cl;"
-+ "shr r9, cl;"
-+ "cmp r8, r9;"
-+
-+ "3:\n"
-+ "seta al;"
-+ "setb dl;"
-+ "sub eax, edx;"
-+ ".att_syntax prefix;"
-+ : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
-+ : "0" (l), "1" (r), "3" (nr_key_bits)
-+ : "r8", "r9", "cc", "memory");
-+
-+ return cmp;
-+}
-+#else
-+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-+ unsigned nr_key_bits)
-+{
-+ u64 l_v, r_v;
-+
-+ if (!nr_key_bits)
-+ return 0;
-+
-+ /* for big endian, skip past header */
-+ nr_key_bits += high_bit_offset;
-+ l_v = *l & (~0ULL >> high_bit_offset);
-+ r_v = *r & (~0ULL >> high_bit_offset);
-+
-+ while (1) {
-+ if (nr_key_bits < 64) {
-+ l_v >>= 64 - nr_key_bits;
-+ r_v >>= 64 - nr_key_bits;
-+ nr_key_bits = 0;
-+ } else {
-+ nr_key_bits -= 64;
-+ }
-+
-+ if (!nr_key_bits || l_v != r_v)
-+ break;
-+
-+ l = next_word(l);
-+ r = next_word(r);
-+
-+ l_v = *l;
-+ r_v = *r;
-+ }
-+
-+ return cmp_int(l_v, r_v);
-+}
-+#endif
-+
-+static inline __pure __flatten
-+int __bch2_bkey_cmp_packed_format_checked_inlined(const struct bkey_packed *l,
-+ const struct bkey_packed *r,
-+ const struct btree *b)
-+{
-+ const struct bkey_format *f = &b->format;
-+ int ret;
-+
-+ EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
-+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-+
-+ ret = __bkey_cmp_bits(high_word(f, l),
-+ high_word(f, r),
-+ b->nr_key_bits);
-+
-+ EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
-+ bkey_unpack_pos(b, r)));
-+ return ret;
-+}
-+
-+static inline __pure __flatten
-+int bch2_bkey_cmp_packed_inlined(const struct btree *b,
-+ const struct bkey_packed *l,
-+ const struct bkey_packed *r)
-+{
-+ struct bkey unpacked;
-+
-+ if (likely(bkey_packed(l) && bkey_packed(r)))
-+ return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
-+
-+ if (bkey_packed(l)) {
-+ __bkey_unpack_key_format_checked(b, &unpacked, l);
-+ l = (void *) &unpacked;
-+ } else if (bkey_packed(r)) {
-+ __bkey_unpack_key_format_checked(b, &unpacked, r);
-+ r = (void *) &unpacked;
-+ }
-+
-+ return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
-+}
-+
-+#endif /* _BCACHEFS_BKEY_CMP_H */
-diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
-new file mode 100644
-index 000000000000..761f5e33b1e6
---- /dev/null
-+++ b/fs/bcachefs/bkey_methods.c
-@@ -0,0 +1,459 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "backpointers.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_types.h"
-+#include "alloc_background.h"
-+#include "dirent.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "io_misc.h"
-+#include "lru.h"
-+#include "quota.h"
-+#include "reflink.h"
-+#include "snapshot.h"
-+#include "subvolume.h"
-+#include "xattr.h"
-+
-+const char * const bch2_bkey_types[] = {
-+#define x(name, nr) #name,
-+ BCH_BKEY_TYPES()
-+#undef x
-+ NULL
-+};
-+
-+static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags, struct printbuf *err)
-+{
-+ return 0;
-+}
-+
-+#define bch2_bkey_ops_deleted ((struct bkey_ops) { \
-+ .key_invalid = deleted_key_invalid, \
-+})
-+
-+#define bch2_bkey_ops_whiteout ((struct bkey_ops) { \
-+ .key_invalid = deleted_key_invalid, \
-+})
-+
-+static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags, struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_val_bytes(k.k), c, err,
-+ bkey_val_size_nonzero,
-+ "incorrect value size (%zu != 0)",
-+ bkey_val_bytes(k.k));
-+fsck_err:
-+ return ret;
-+}
-+
-+#define bch2_bkey_ops_error ((struct bkey_ops) { \
-+ .key_invalid = empty_val_key_invalid, \
-+})
-+
-+static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags, struct printbuf *err)
-+{
-+ return 0;
-+}
-+
-+#define bch2_bkey_ops_cookie ((struct bkey_ops) { \
-+ .key_invalid = key_type_cookie_invalid, \
-+ .min_val_size = 8, \
-+})
-+
-+#define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\
-+ .key_invalid = empty_val_key_invalid, \
-+})
-+
-+static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags, struct printbuf *err)
-+{
-+ return 0;
-+}
-+
-+static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k);
-+ unsigned datalen = bkey_inline_data_bytes(k.k);
-+
-+ prt_printf(out, "datalen %u: %*phN",
-+ datalen, min(datalen, 32U), d.v->data);
-+}
-+
-+#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \
-+ .key_invalid = key_type_inline_data_invalid, \
-+ .val_to_text = key_type_inline_data_to_text, \
-+})
-+
-+static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
-+{
-+ bch2_key_resize(l.k, l.k->size + r.k->size);
-+ return true;
-+}
-+
-+#define bch2_bkey_ops_set ((struct bkey_ops) { \
-+ .key_invalid = empty_val_key_invalid, \
-+ .key_merge = key_type_set_merge, \
-+})
-+
-+const struct bkey_ops bch2_bkey_ops[] = {
-+#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
-+ BCH_BKEY_TYPES()
-+#undef x
-+};
-+
-+const struct bkey_ops bch2_bkey_null_ops = {
-+};
-+
-+int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err,
-+ bkey_val_size_too_small,
-+ "bad val size (%zu < %u)",
-+ bkey_val_bytes(k.k), ops->min_val_size);
-+
-+ if (!ops->key_invalid)
-+ return 0;
-+
-+ ret = ops->key_invalid(c, k, flags, err);
-+fsck_err:
-+ return ret;
-+}
-+
-+static u64 bch2_key_types_allowed[] = {
-+ [BKEY_TYPE_btree] =
-+ BIT_ULL(KEY_TYPE_deleted)|
-+ BIT_ULL(KEY_TYPE_btree_ptr)|
-+ BIT_ULL(KEY_TYPE_btree_ptr_v2),
-+#define x(name, nr, flags, keys) [BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys,
-+ BCH_BTREE_IDS()
-+#undef x
-+};
-+
-+const char *bch2_btree_node_type_str(enum btree_node_type type)
-+{
-+ return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1);
-+}
-+
-+int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum btree_node_type type,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err,
-+ bkey_u64s_too_small,
-+ "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s);
-+
-+ if (type >= BKEY_TYPE_NR)
-+ return 0;
-+
-+ bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
-+ !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
-+ bkey_invalid_type_for_btree,
-+ "invalid key type for btree %s (%s)",
-+ bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
-+
-+ if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
-+ bkey_fsck_err_on(k.k->size == 0, c, err,
-+ bkey_extent_size_zero,
-+ "size == 0");
-+
-+ bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err,
-+ bkey_extent_size_greater_than_offset,
-+ "size greater than offset (%u > %llu)",
-+ k.k->size, k.k->p.offset);
-+ } else {
-+ bkey_fsck_err_on(k.k->size, c, err,
-+ bkey_size_nonzero,
-+ "size != 0");
-+ }
-+
-+ if (type != BKEY_TYPE_btree) {
-+ enum btree_id btree = type - 1;
-+
-+ if (btree_type_has_snapshots(btree)) {
-+ bkey_fsck_err_on(!k.k->p.snapshot, c, err,
-+ bkey_snapshot_zero,
-+ "snapshot == 0");
-+ } else if (!btree_type_has_snapshot_field(btree)) {
-+ bkey_fsck_err_on(k.k->p.snapshot, c, err,
-+ bkey_snapshot_nonzero,
-+ "nonzero snapshot");
-+ } else {
-+ /*
-+ * btree uses snapshot field but it's not required to be
-+ * nonzero
-+ */
-+ }
-+
-+ bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err,
-+ bkey_at_pos_max,
-+ "key at POS_MAX");
-+ }
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum btree_node_type type,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ return __bch2_bkey_invalid(c, k, type, flags, err) ?:
-+ bch2_bkey_val_invalid(c, k, flags, err);
-+}
-+
-+int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b,
-+ struct bkey_s_c k, struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err,
-+ bkey_before_start_of_btree_node,
-+ "key before start of btree node");
-+
-+ bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err,
-+ bkey_after_end_of_btree_node,
-+ "key past end of btree node");
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
-+{
-+ if (bpos_eq(pos, POS_MIN))
-+ prt_printf(out, "POS_MIN");
-+ else if (bpos_eq(pos, POS_MAX))
-+ prt_printf(out, "POS_MAX");
-+ else if (bpos_eq(pos, SPOS_MAX))
-+ prt_printf(out, "SPOS_MAX");
-+ else {
-+ if (pos.inode == U64_MAX)
-+ prt_printf(out, "U64_MAX");
-+ else
-+ prt_printf(out, "%llu", pos.inode);
-+ prt_printf(out, ":");
-+ if (pos.offset == U64_MAX)
-+ prt_printf(out, "U64_MAX");
-+ else
-+ prt_printf(out, "%llu", pos.offset);
-+ prt_printf(out, ":");
-+ if (pos.snapshot == U32_MAX)
-+ prt_printf(out, "U32_MAX");
-+ else
-+ prt_printf(out, "%u", pos.snapshot);
-+ }
-+}
-+
-+void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
-+{
-+ if (k) {
-+ prt_printf(out, "u64s %u type ", k->u64s);
-+
-+ if (k->type < KEY_TYPE_MAX)
-+ prt_printf(out, "%s ", bch2_bkey_types[k->type]);
-+ else
-+ prt_printf(out, "%u ", k->type);
-+
-+ bch2_bpos_to_text(out, k->p);
-+
-+ prt_printf(out, " len %u ver %llu", k->size, k->version.lo);
-+ } else {
-+ prt_printf(out, "(null)");
-+ }
-+}
-+
-+void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
-+
-+ if (likely(ops->val_to_text))
-+ ops->val_to_text(out, c, k);
-+}
-+
-+void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ bch2_bkey_to_text(out, k.k);
-+
-+ if (bkey_val_bytes(k.k)) {
-+ prt_printf(out, ": ");
-+ bch2_val_to_text(out, c, k);
-+ }
-+}
-+
-+void bch2_bkey_swab_val(struct bkey_s k)
-+{
-+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
-+
-+ if (ops->swab)
-+ ops->swab(k);
-+}
-+
-+bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
-+{
-+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
-+
-+ return ops->key_normalize
-+ ? ops->key_normalize(c, k)
-+ : false;
-+}
-+
-+bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
-+{
-+ const struct bkey_ops *ops = bch2_bkey_type_ops(l.k->type);
-+
-+ return ops->key_merge &&
-+ bch2_bkey_maybe_mergable(l.k, r.k) &&
-+ (u64) l.k->size + r.k->size <= KEY_SIZE_MAX &&
-+ !bch2_key_merging_disabled &&
-+ ops->key_merge(c, l, r);
-+}
-+
-+static const struct old_bkey_type {
-+ u8 btree_node_type;
-+ u8 old;
-+ u8 new;
-+} bkey_renumber_table[] = {
-+ {BKEY_TYPE_btree, 128, KEY_TYPE_btree_ptr },
-+ {BKEY_TYPE_extents, 128, KEY_TYPE_extent },
-+ {BKEY_TYPE_extents, 129, KEY_TYPE_extent },
-+ {BKEY_TYPE_extents, 130, KEY_TYPE_reservation },
-+ {BKEY_TYPE_inodes, 128, KEY_TYPE_inode },
-+ {BKEY_TYPE_inodes, 130, KEY_TYPE_inode_generation },
-+ {BKEY_TYPE_dirents, 128, KEY_TYPE_dirent },
-+ {BKEY_TYPE_dirents, 129, KEY_TYPE_hash_whiteout },
-+ {BKEY_TYPE_xattrs, 128, KEY_TYPE_xattr },
-+ {BKEY_TYPE_xattrs, 129, KEY_TYPE_hash_whiteout },
-+ {BKEY_TYPE_alloc, 128, KEY_TYPE_alloc },
-+ {BKEY_TYPE_quotas, 128, KEY_TYPE_quota },
-+};
-+
-+void bch2_bkey_renumber(enum btree_node_type btree_node_type,
-+ struct bkey_packed *k,
-+ int write)
-+{
-+ const struct old_bkey_type *i;
-+
-+ for (i = bkey_renumber_table;
-+ i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table);
-+ i++)
-+ if (btree_node_type == i->btree_node_type &&
-+ k->type == (write ? i->new : i->old)) {
-+ k->type = write ? i->old : i->new;
-+ break;
-+ }
-+}
-+
-+void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
-+ unsigned version, unsigned big_endian,
-+ int write,
-+ struct bkey_format *f,
-+ struct bkey_packed *k)
-+{
-+ const struct bkey_ops *ops;
-+ struct bkey uk;
-+ unsigned nr_compat = 5;
-+ int i;
-+
-+ /*
-+ * Do these operations in reverse order in the write path:
-+ */
-+
-+ for (i = 0; i < nr_compat; i++)
-+ switch (!write ? i : nr_compat - 1 - i) {
-+ case 0:
-+ if (big_endian != CPU_BIG_ENDIAN)
-+ bch2_bkey_swab_key(f, k);
-+ break;
-+ case 1:
-+ if (version < bcachefs_metadata_version_bkey_renumber)
-+ bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
-+ break;
-+ case 2:
-+ if (version < bcachefs_metadata_version_inode_btree_change &&
-+ btree_id == BTREE_ID_inodes) {
-+ if (!bkey_packed(k)) {
-+ struct bkey_i *u = packed_to_bkey(k);
-+
-+ swap(u->k.p.inode, u->k.p.offset);
-+ } else if (f->bits_per_field[BKEY_FIELD_INODE] &&
-+ f->bits_per_field[BKEY_FIELD_OFFSET]) {
-+ struct bkey_format tmp = *f, *in = f, *out = &tmp;
-+
-+ swap(tmp.bits_per_field[BKEY_FIELD_INODE],
-+ tmp.bits_per_field[BKEY_FIELD_OFFSET]);
-+ swap(tmp.field_offset[BKEY_FIELD_INODE],
-+ tmp.field_offset[BKEY_FIELD_OFFSET]);
-+
-+ if (!write)
-+ swap(in, out);
-+
-+ uk = __bch2_bkey_unpack_key(in, k);
-+ swap(uk.p.inode, uk.p.offset);
-+ BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
-+ }
-+ }
-+ break;
-+ case 3:
-+ if (version < bcachefs_metadata_version_snapshot &&
-+ (level || btree_type_has_snapshots(btree_id))) {
-+ struct bkey_i *u = packed_to_bkey(k);
-+
-+ if (u) {
-+ u->k.p.snapshot = write
-+ ? 0 : U32_MAX;
-+ } else {
-+ u64 min_packed = le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]);
-+ u64 max_packed = min_packed +
-+ ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
-+
-+ uk = __bch2_bkey_unpack_key(f, k);
-+ uk.p.snapshot = write
-+ ? min_packed : min_t(u64, U32_MAX, max_packed);
-+
-+ BUG_ON(!bch2_bkey_pack_key(k, &uk, f));
-+ }
-+ }
-+
-+ break;
-+ case 4: {
-+ struct bkey_s u;
-+
-+ if (!bkey_packed(k)) {
-+ u = bkey_i_to_s(packed_to_bkey(k));
-+ } else {
-+ uk = __bch2_bkey_unpack_key(f, k);
-+ u.k = &uk;
-+ u.v = bkeyp_val(f, k);
-+ }
-+
-+ if (big_endian != CPU_BIG_ENDIAN)
-+ bch2_bkey_swab_val(u);
-+
-+ ops = bch2_bkey_type_ops(k->type);
-+
-+ if (ops->compat)
-+ ops->compat(btree_id, version, big_endian, write, u);
-+ break;
-+ }
-+ default:
-+ BUG();
-+ }
-+}
-diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
-new file mode 100644
-index 000000000000..3a370b7087ac
---- /dev/null
-+++ b/fs/bcachefs/bkey_methods.h
-@@ -0,0 +1,179 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_METHODS_H
-+#define _BCACHEFS_BKEY_METHODS_H
-+
-+#include "bkey.h"
-+
-+struct bch_fs;
-+struct btree;
-+struct btree_trans;
-+struct bkey;
-+enum btree_node_type;
-+
-+extern const char * const bch2_bkey_types[];
-+extern const struct bkey_ops bch2_bkey_null_ops;
-+
-+/*
-+ * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If
-+ * invalid, entire key will be deleted.
-+ *
-+ * When invalid, error string is returned via @err. @rw indicates whether key is
-+ * being read or written; more aggressive checks can be enabled when rw == WRITE.
-+ */
-+struct bkey_ops {
-+ int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags, struct printbuf *err);
-+ void (*val_to_text)(struct printbuf *, struct bch_fs *,
-+ struct bkey_s_c);
-+ void (*swab)(struct bkey_s);
-+ bool (*key_normalize)(struct bch_fs *, struct bkey_s);
-+ bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
-+ int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_i *, unsigned);
-+ int (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_s_c, unsigned);
-+ void (*compat)(enum btree_id id, unsigned version,
-+ unsigned big_endian, int write,
-+ struct bkey_s);
-+
-+ /* Size of value type when first created: */
-+ unsigned min_val_size;
-+};
-+
-+extern const struct bkey_ops bch2_bkey_ops[];
-+
-+static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type)
-+{
-+ return likely(type < KEY_TYPE_MAX)
-+ ? &bch2_bkey_ops[type]
-+ : &bch2_bkey_null_ops;
-+}
-+
-+int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
-+ enum bkey_invalid_flags, struct printbuf *);
-+int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
-+ enum bkey_invalid_flags, struct printbuf *);
-+int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *,
-+ struct bkey_s_c, struct printbuf *);
-+
-+void bch2_bpos_to_text(struct printbuf *, struct bpos);
-+void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
-+void bch2_val_to_text(struct printbuf *, struct bch_fs *,
-+ struct bkey_s_c);
-+void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
-+ struct bkey_s_c);
-+
-+void bch2_bkey_swab_val(struct bkey_s);
-+
-+bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
-+
-+static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r)
-+{
-+ return l->type == r->type &&
-+ !bversion_cmp(l->version, r->version) &&
-+ bpos_eq(l->p, bkey_start_pos(r));
-+}
-+
-+bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
-+
-+static inline int bch2_mark_key(struct btree_trans *trans,
-+ enum btree_id btree, unsigned level,
-+ struct bkey_s_c old, struct bkey_s_c new,
-+ unsigned flags)
-+{
-+ const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type);
-+
-+ return ops->atomic_trigger
-+ ? ops->atomic_trigger(trans, btree, level, old, new, flags)
-+ : 0;
-+}
-+
-+enum btree_update_flags {
-+ __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
-+ __BTREE_UPDATE_NOJOURNAL,
-+ __BTREE_UPDATE_PREJOURNAL,
-+ __BTREE_UPDATE_KEY_CACHE_RECLAIM,
-+
-+ __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
-+
-+ __BTREE_TRIGGER_INSERT,
-+ __BTREE_TRIGGER_OVERWRITE,
-+
-+ __BTREE_TRIGGER_GC,
-+ __BTREE_TRIGGER_BUCKET_INVALIDATE,
-+ __BTREE_TRIGGER_NOATOMIC,
-+};
-+
-+#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
-+#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
-+#define BTREE_UPDATE_PREJOURNAL (1U << __BTREE_UPDATE_PREJOURNAL)
-+#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
-+
-+#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
-+
-+#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT)
-+#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE)
-+
-+#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
-+#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
-+#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
-+
-+static inline int bch2_trans_mark_key(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_i *new,
-+ unsigned flags)
-+{
-+ const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new->k.type);
-+
-+ return ops->trans_trigger
-+ ? ops->trans_trigger(trans, btree_id, level, old, new, flags)
-+ : 0;
-+}
-+
-+static inline int bch2_trans_mark_old(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, unsigned flags)
-+{
-+ struct bkey_i deleted;
-+
-+ bkey_init(&deleted.k);
-+ deleted.k.p = old.k->p;
-+
-+ return bch2_trans_mark_key(trans, btree_id, level, old, &deleted,
-+ BTREE_TRIGGER_OVERWRITE|flags);
-+}
-+
-+static inline int bch2_trans_mark_new(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_i *new, unsigned flags)
-+{
-+ struct bkey_i deleted;
-+
-+ bkey_init(&deleted.k);
-+ deleted.k.p = new->k.p;
-+
-+ return bch2_trans_mark_key(trans, btree_id, level, bkey_i_to_s_c(&deleted), new,
-+ BTREE_TRIGGER_INSERT|flags);
-+}
-+
-+void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
-+
-+void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
-+ int, struct bkey_format *, struct bkey_packed *);
-+
-+static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
-+ unsigned version, unsigned big_endian,
-+ int write,
-+ struct bkey_format *f,
-+ struct bkey_packed *k)
-+{
-+ if (version < bcachefs_metadata_version_current ||
-+ big_endian != CPU_BIG_ENDIAN)
-+ __bch2_bkey_compat(level, btree_id, version,
-+ big_endian, write, f, k);
-+
-+}
-+
-+#endif /* _BCACHEFS_BKEY_METHODS_H */
-diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c
-new file mode 100644
-index 000000000000..bcca9e76a0b4
---- /dev/null
-+++ b/fs/bcachefs/bkey_sort.c
-@@ -0,0 +1,201 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_buf.h"
-+#include "bkey_cmp.h"
-+#include "bkey_sort.h"
-+#include "bset.h"
-+#include "extents.h"
-+
-+typedef int (*sort_cmp_fn)(struct btree *,
-+ struct bkey_packed *,
-+ struct bkey_packed *);
-+
-+static inline bool sort_iter_end(struct sort_iter *iter)
-+{
-+ return !iter->used;
-+}
-+
-+static inline void sort_iter_sift(struct sort_iter *iter, unsigned from,
-+ sort_cmp_fn cmp)
-+{
-+ unsigned i;
-+
-+ for (i = from;
-+ i + 1 < iter->used &&
-+ cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
-+ i++)
-+ swap(iter->data[i], iter->data[i + 1]);
-+}
-+
-+static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+ unsigned i = iter->used;
-+
-+ while (i--)
-+ sort_iter_sift(iter, i, cmp);
-+}
-+
-+static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
-+{
-+ return !sort_iter_end(iter) ? iter->data->k : NULL;
-+}
-+
-+static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
-+{
-+ struct sort_iter_set *i = iter->data;
-+
-+ BUG_ON(!iter->used);
-+
-+ i->k = bkey_p_next(i->k);
-+
-+ BUG_ON(i->k > i->end);
-+
-+ if (i->k == i->end)
-+ array_remove_item(iter->data, iter->used, 0);
-+ else
-+ sort_iter_sift(iter, 0, cmp);
-+}
-+
-+static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
-+ sort_cmp_fn cmp)
-+{
-+ struct bkey_packed *ret = sort_iter_peek(iter);
-+
-+ if (ret)
-+ sort_iter_advance(iter, cmp);
-+
-+ return ret;
-+}
-+
-+/*
-+ * If keys compare equal, compare by pointer order:
-+ */
-+static inline int key_sort_fix_overlapping_cmp(struct btree *b,
-+ struct bkey_packed *l,
-+ struct bkey_packed *r)
-+{
-+ return bch2_bkey_cmp_packed(b, l, r) ?:
-+ cmp_int((unsigned long) l, (unsigned long) r);
-+}
-+
-+static inline bool should_drop_next_key(struct sort_iter *iter)
-+{
-+ /*
-+ * key_sort_cmp() ensures that when keys compare equal the older key
-+ * comes first; so if l->k compares equal to r->k then l->k is older
-+ * and should be dropped.
-+ */
-+ return iter->used >= 2 &&
-+ !bch2_bkey_cmp_packed(iter->b,
-+ iter->data[0].k,
-+ iter->data[1].k);
-+}
-+
-+struct btree_nr_keys
-+bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
-+ struct sort_iter *iter)
-+{
-+ struct bkey_packed *out = dst->start;
-+ struct bkey_packed *k;
-+ struct btree_nr_keys nr;
-+
-+ memset(&nr, 0, sizeof(nr));
-+
-+ sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
-+
-+ while ((k = sort_iter_peek(iter))) {
-+ if (!bkey_deleted(k) &&
-+ !should_drop_next_key(iter)) {
-+ bkey_p_copy(out, k);
-+ btree_keys_account_key_add(&nr, 0, out);
-+ out = bkey_p_next(out);
-+ }
-+
-+ sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
-+ }
-+
-+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+ return nr;
-+}
-+
-+/* Sort + repack in a new format: */
-+struct btree_nr_keys
-+bch2_sort_repack(struct bset *dst, struct btree *src,
-+ struct btree_node_iter *src_iter,
-+ struct bkey_format *out_f,
-+ bool filter_whiteouts)
-+{
-+ struct bkey_format *in_f = &src->format;
-+ struct bkey_packed *in, *out = vstruct_last(dst);
-+ struct btree_nr_keys nr;
-+ bool transform = memcmp(out_f, &src->format, sizeof(*out_f));
-+
-+ memset(&nr, 0, sizeof(nr));
-+
-+ while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
-+ if (filter_whiteouts && bkey_deleted(in))
-+ continue;
-+
-+ if (!transform)
-+ bkey_p_copy(out, in);
-+ else if (bch2_bkey_transform(out_f, out, bkey_packed(in)
-+ ? in_f : &bch2_bkey_format_current, in))
-+ out->format = KEY_FORMAT_LOCAL_BTREE;
-+ else
-+ bch2_bkey_unpack(src, (void *) out, in);
-+
-+ out->needs_whiteout = false;
-+
-+ btree_keys_account_key_add(&nr, 0, out);
-+ out = bkey_p_next(out);
-+ }
-+
-+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
-+ return nr;
-+}
-+
-+static inline int sort_keys_cmp(struct btree *b,
-+ struct bkey_packed *l,
-+ struct bkey_packed *r)
-+{
-+ return bch2_bkey_cmp_packed_inlined(b, l, r) ?:
-+ (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
-+ (int) l->needs_whiteout - (int) r->needs_whiteout;
-+}
-+
-+unsigned bch2_sort_keys(struct bkey_packed *dst,
-+ struct sort_iter *iter,
-+ bool filter_whiteouts)
-+{
-+ const struct bkey_format *f = &iter->b->format;
-+ struct bkey_packed *in, *next, *out = dst;
-+
-+ sort_iter_sort(iter, sort_keys_cmp);
-+
-+ while ((in = sort_iter_next(iter, sort_keys_cmp))) {
-+ bool needs_whiteout = false;
-+
-+ if (bkey_deleted(in) &&
-+ (filter_whiteouts || !in->needs_whiteout))
-+ continue;
-+
-+ while ((next = sort_iter_peek(iter)) &&
-+ !bch2_bkey_cmp_packed_inlined(iter->b, in, next)) {
-+ BUG_ON(in->needs_whiteout &&
-+ next->needs_whiteout);
-+ needs_whiteout |= in->needs_whiteout;
-+ in = sort_iter_next(iter, sort_keys_cmp);
-+ }
-+
-+ if (bkey_deleted(in)) {
-+ memcpy_u64s_small(out, in, bkeyp_key_u64s(f, in));
-+ set_bkeyp_val_u64s(f, out, 0);
-+ } else {
-+ bkey_p_copy(out, in);
-+ }
-+ out->needs_whiteout |= needs_whiteout;
-+ out = bkey_p_next(out);
-+ }
-+
-+ return (u64 *) out - (u64 *) dst;
-+}
-diff --git a/fs/bcachefs/bkey_sort.h b/fs/bcachefs/bkey_sort.h
-new file mode 100644
-index 000000000000..7c0f0b160f18
---- /dev/null
-+++ b/fs/bcachefs/bkey_sort.h
-@@ -0,0 +1,54 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BKEY_SORT_H
-+#define _BCACHEFS_BKEY_SORT_H
-+
-+struct sort_iter {
-+ struct btree *b;
-+ unsigned used;
-+ unsigned size;
-+
-+ struct sort_iter_set {
-+ struct bkey_packed *k, *end;
-+ } data[];
-+};
-+
-+static inline void sort_iter_init(struct sort_iter *iter, struct btree *b, unsigned size)
-+{
-+ iter->b = b;
-+ iter->used = 0;
-+ iter->size = size;
-+}
-+
-+struct sort_iter_stack {
-+ struct sort_iter iter;
-+ struct sort_iter_set sets[MAX_BSETS + 1];
-+};
-+
-+static inline void sort_iter_stack_init(struct sort_iter_stack *iter, struct btree *b)
-+{
-+ sort_iter_init(&iter->iter, b, ARRAY_SIZE(iter->sets));
-+}
-+
-+static inline void sort_iter_add(struct sort_iter *iter,
-+ struct bkey_packed *k,
-+ struct bkey_packed *end)
-+{
-+ BUG_ON(iter->used >= iter->size);
-+
-+ if (k != end)
-+ iter->data[iter->used++] = (struct sort_iter_set) { k, end };
-+}
-+
-+struct btree_nr_keys
-+bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *,
-+ struct sort_iter *);
-+
-+struct btree_nr_keys
-+bch2_sort_repack(struct bset *, struct btree *,
-+ struct btree_node_iter *,
-+ struct bkey_format *, bool);
-+
-+unsigned bch2_sort_keys(struct bkey_packed *,
-+ struct sort_iter *, bool);
-+
-+#endif /* _BCACHEFS_BKEY_SORT_H */
-diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
-new file mode 100644
-index 000000000000..bb73ba9017b0
---- /dev/null
-+++ b/fs/bcachefs/bset.c
-@@ -0,0 +1,1592 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for working with individual keys, and sorted sets of keys with in a
-+ * btree node
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "bset.h"
-+#include "eytzinger.h"
-+#include "trace.h"
-+#include "util.h"
-+
-+#include <asm/unaligned.h>
-+#include <linux/console.h>
-+#include <linux/random.h>
-+#include <linux/prefetch.h>
-+
-+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
-+ struct btree *);
-+
-+static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
-+{
-+ unsigned n = ARRAY_SIZE(iter->data);
-+
-+ while (n && __btree_node_iter_set_end(iter, n - 1))
-+ --n;
-+
-+ return n;
-+}
-+
-+struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
-+{
-+ return bch2_bkey_to_bset_inlined(b, k);
-+}
-+
-+/*
-+ * There are never duplicate live keys in the btree - but including keys that
-+ * have been flagged as deleted (and will be cleaned up later) we _will_ see
-+ * duplicates.
-+ *
-+ * Thus the sort order is: usual key comparison first, but for keys that compare
-+ * equal the deleted key(s) come first, and the (at most one) live version comes
-+ * last.
-+ *
-+ * The main reason for this is insertion: to handle overwrites, we first iterate
-+ * over keys that compare equal to our insert key, and then insert immediately
-+ * prior to the first key greater than the key we're inserting - our insert
-+ * position will be after all keys that compare equal to our insert key, which
-+ * by the time we actually do the insert will all be deleted.
-+ */
-+
-+void bch2_dump_bset(struct bch_fs *c, struct btree *b,
-+ struct bset *i, unsigned set)
-+{
-+ struct bkey_packed *_k, *_n;
-+ struct bkey uk, n;
-+ struct bkey_s_c k;
-+ struct printbuf buf = PRINTBUF;
-+
-+ if (!i->u64s)
-+ return;
-+
-+ for (_k = i->start;
-+ _k < vstruct_last(i);
-+ _k = _n) {
-+ _n = bkey_p_next(_k);
-+
-+ k = bkey_disassemble(b, _k, &uk);
-+
-+ printbuf_reset(&buf);
-+ if (c)
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ else
-+ bch2_bkey_to_text(&buf, k.k);
-+ printk(KERN_ERR "block %u key %5zu: %s\n", set,
-+ _k->_data - i->_data, buf.buf);
-+
-+ if (_n == vstruct_last(i))
-+ continue;
-+
-+ n = bkey_unpack_key(b, _n);
-+
-+ if (bpos_lt(n.p, k.k->p)) {
-+ printk(KERN_ERR "Key skipped backwards\n");
-+ continue;
-+ }
-+
-+ if (!bkey_deleted(k.k) && bpos_eq(n.p, k.k->p))
-+ printk(KERN_ERR "Duplicate keys\n");
-+ }
-+
-+ printbuf_exit(&buf);
-+}
-+
-+void bch2_dump_btree_node(struct bch_fs *c, struct btree *b)
-+{
-+ struct bset_tree *t;
-+
-+ console_lock();
-+ for_each_bset(b, t)
-+ bch2_dump_bset(c, b, bset(b, t), t - b->set);
-+ console_unlock();
-+}
-+
-+void bch2_dump_btree_node_iter(struct btree *b,
-+ struct btree_node_iter *iter)
-+{
-+ struct btree_node_iter_set *set;
-+ struct printbuf buf = PRINTBUF;
-+
-+ printk(KERN_ERR "btree node iter with %u/%u sets:\n",
-+ __btree_node_iter_used(iter), b->nsets);
-+
-+ btree_node_iter_for_each(iter, set) {
-+ struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
-+ struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+ struct bkey uk = bkey_unpack_key(b, k);
-+
-+ printbuf_reset(&buf);
-+ bch2_bkey_to_text(&buf, &uk);
-+ printk(KERN_ERR "set %zu key %u: %s\n",
-+ t - b->set, set->k, buf.buf);
-+ }
-+
-+ printbuf_exit(&buf);
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_verify_btree_nr_keys(struct btree *b)
-+{
-+ struct bset_tree *t;
-+ struct bkey_packed *k;
-+ struct btree_nr_keys nr = { 0 };
-+
-+ for_each_bset(b, t)
-+ bset_tree_for_each_key(b, t, k)
-+ if (!bkey_deleted(k))
-+ btree_keys_account_key_add(&nr, t - b->set, k);
-+
-+ BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
-+}
-+
-+static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
-+ struct btree *b)
-+{
-+ struct btree_node_iter iter = *_iter;
-+ const struct bkey_packed *k, *n;
-+
-+ k = bch2_btree_node_iter_peek_all(&iter, b);
-+ __bch2_btree_node_iter_advance(&iter, b);
-+ n = bch2_btree_node_iter_peek_all(&iter, b);
-+
-+ bkey_unpack_key(b, k);
-+
-+ if (n &&
-+ bkey_iter_cmp(b, k, n) > 0) {
-+ struct btree_node_iter_set *set;
-+ struct bkey ku = bkey_unpack_key(b, k);
-+ struct bkey nu = bkey_unpack_key(b, n);
-+ struct printbuf buf1 = PRINTBUF;
-+ struct printbuf buf2 = PRINTBUF;
-+
-+ bch2_dump_btree_node(NULL, b);
-+ bch2_bkey_to_text(&buf1, &ku);
-+ bch2_bkey_to_text(&buf2, &nu);
-+ printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
-+ buf1.buf, buf2.buf);
-+ printk(KERN_ERR "iter was:");
-+
-+ btree_node_iter_for_each(_iter, set) {
-+ struct bkey_packed *k2 = __btree_node_offset_to_key(b, set->k);
-+ struct bset_tree *t = bch2_bkey_to_bset(b, k2);
-+ printk(" [%zi %zi]", t - b->set,
-+ k2->_data - bset(b, t)->_data);
-+ }
-+ panic("\n");
-+ }
-+}
-+
-+void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
-+ struct btree *b)
-+{
-+ struct btree_node_iter_set *set, *s2;
-+ struct bkey_packed *k, *p;
-+ struct bset_tree *t;
-+
-+ if (bch2_btree_node_iter_end(iter))
-+ return;
-+
-+ /* Verify no duplicates: */
-+ btree_node_iter_for_each(iter, set) {
-+ BUG_ON(set->k > set->end);
-+ btree_node_iter_for_each(iter, s2)
-+ BUG_ON(set != s2 && set->end == s2->end);
-+ }
-+
-+ /* Verify that set->end is correct: */
-+ btree_node_iter_for_each(iter, set) {
-+ for_each_bset(b, t)
-+ if (set->end == t->end_offset)
-+ goto found;
-+ BUG();
-+found:
-+ BUG_ON(set->k < btree_bkey_first_offset(t) ||
-+ set->k >= t->end_offset);
-+ }
-+
-+ /* Verify iterator is sorted: */
-+ btree_node_iter_for_each(iter, set)
-+ BUG_ON(set != iter->data &&
-+ btree_node_iter_cmp(b, set[-1], set[0]) > 0);
-+
-+ k = bch2_btree_node_iter_peek_all(iter, b);
-+
-+ for_each_bset(b, t) {
-+ if (iter->data[0].end == t->end_offset)
-+ continue;
-+
-+ p = bch2_bkey_prev_all(b, t,
-+ bch2_btree_node_iter_bset_pos(iter, b, t));
-+
-+ BUG_ON(p && bkey_iter_cmp(b, k, p) < 0);
-+ }
-+}
-+
-+void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
-+ struct bkey_packed *insert, unsigned clobber_u64s)
-+{
-+ struct bset_tree *t = bch2_bkey_to_bset(b, where);
-+ struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
-+ struct bkey_packed *next = (void *) ((u64 *) where->_data + clobber_u64s);
-+ struct printbuf buf1 = PRINTBUF;
-+ struct printbuf buf2 = PRINTBUF;
-+#if 0
-+ BUG_ON(prev &&
-+ bkey_iter_cmp(b, prev, insert) > 0);
-+#else
-+ if (prev &&
-+ bkey_iter_cmp(b, prev, insert) > 0) {
-+ struct bkey k1 = bkey_unpack_key(b, prev);
-+ struct bkey k2 = bkey_unpack_key(b, insert);
-+
-+ bch2_dump_btree_node(NULL, b);
-+ bch2_bkey_to_text(&buf1, &k1);
-+ bch2_bkey_to_text(&buf2, &k2);
-+
-+ panic("prev > insert:\n"
-+ "prev key %s\n"
-+ "insert key %s\n",
-+ buf1.buf, buf2.buf);
-+ }
-+#endif
-+#if 0
-+ BUG_ON(next != btree_bkey_last(b, t) &&
-+ bkey_iter_cmp(b, insert, next) > 0);
-+#else
-+ if (next != btree_bkey_last(b, t) &&
-+ bkey_iter_cmp(b, insert, next) > 0) {
-+ struct bkey k1 = bkey_unpack_key(b, insert);
-+ struct bkey k2 = bkey_unpack_key(b, next);
-+
-+ bch2_dump_btree_node(NULL, b);
-+ bch2_bkey_to_text(&buf1, &k1);
-+ bch2_bkey_to_text(&buf2, &k2);
-+
-+ panic("insert > next:\n"
-+ "insert key %s\n"
-+ "next key %s\n",
-+ buf1.buf, buf2.buf);
-+ }
-+#endif
-+}
-+
-+#else
-+
-+static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
-+ struct btree *b) {}
-+
-+#endif
-+
-+/* Auxiliary search trees */
-+
-+#define BFLOAT_FAILED_UNPACKED U8_MAX
-+#define BFLOAT_FAILED U8_MAX
-+
-+struct bkey_float {
-+ u8 exponent;
-+ u8 key_offset;
-+ u16 mantissa;
-+};
-+#define BKEY_MANTISSA_BITS 16
-+
-+static unsigned bkey_float_byte_offset(unsigned idx)
-+{
-+ return idx * sizeof(struct bkey_float);
-+}
-+
-+struct ro_aux_tree {
-+ u8 nothing[0];
-+ struct bkey_float f[];
-+};
-+
-+struct rw_aux_tree {
-+ u16 offset;
-+ struct bpos k;
-+};
-+
-+static unsigned bset_aux_tree_buf_end(const struct bset_tree *t)
-+{
-+ BUG_ON(t->aux_data_offset == U16_MAX);
-+
-+ switch (bset_aux_tree_type(t)) {
-+ case BSET_NO_AUX_TREE:
-+ return t->aux_data_offset;
-+ case BSET_RO_AUX_TREE:
-+ return t->aux_data_offset +
-+ DIV_ROUND_UP(t->size * sizeof(struct bkey_float) +
-+ t->size * sizeof(u8), 8);
-+ case BSET_RW_AUX_TREE:
-+ return t->aux_data_offset +
-+ DIV_ROUND_UP(sizeof(struct rw_aux_tree) * t->size, 8);
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static unsigned bset_aux_tree_buf_start(const struct btree *b,
-+ const struct bset_tree *t)
-+{
-+ return t == b->set
-+ ? DIV_ROUND_UP(b->unpack_fn_len, 8)
-+ : bset_aux_tree_buf_end(t - 1);
-+}
-+
-+static void *__aux_tree_base(const struct btree *b,
-+ const struct bset_tree *t)
-+{
-+ return b->aux_data + t->aux_data_offset * 8;
-+}
-+
-+static struct ro_aux_tree *ro_aux_tree_base(const struct btree *b,
-+ const struct bset_tree *t)
-+{
-+ EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+ return __aux_tree_base(b, t);
-+}
-+
-+static u8 *ro_aux_tree_prev(const struct btree *b,
-+ const struct bset_tree *t)
-+{
-+ EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
-+
-+ return __aux_tree_base(b, t) + bkey_float_byte_offset(t->size);
-+}
-+
-+static struct bkey_float *bkey_float(const struct btree *b,
-+ const struct bset_tree *t,
-+ unsigned idx)
-+{
-+ return ro_aux_tree_base(b, t)->f + idx;
-+}
-+
-+static void bset_aux_tree_verify(const struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ const struct bset_tree *t;
-+
-+ for_each_bset(b, t) {
-+ if (t->aux_data_offset == U16_MAX)
-+ continue;
-+
-+ BUG_ON(t != b->set &&
-+ t[-1].aux_data_offset == U16_MAX);
-+
-+ BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t));
-+ BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b));
-+ BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b));
-+ }
-+#endif
-+}
-+
-+void bch2_btree_keys_init(struct btree *b)
-+{
-+ unsigned i;
-+
-+ b->nsets = 0;
-+ memset(&b->nr, 0, sizeof(b->nr));
-+
-+ for (i = 0; i < MAX_BSETS; i++)
-+ b->set[i].data_offset = U16_MAX;
-+
-+ bch2_bset_set_no_aux_tree(b, b->set);
-+}
-+
-+/* Binary tree stuff for auxiliary search trees */
-+
-+/*
-+ * Cacheline/offset <-> bkey pointer arithmetic:
-+ *
-+ * t->tree is a binary search tree in an array; each node corresponds to a key
-+ * in one cacheline in t->set (BSET_CACHELINE bytes).
-+ *
-+ * This means we don't have to store the full index of the key that a node in
-+ * the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and
-+ * then bkey_float->m gives us the offset within that cacheline, in units of 8
-+ * bytes.
-+ *
-+ * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to
-+ * make this work.
-+ *
-+ * To construct the bfloat for an arbitrary key we need to know what the key
-+ * immediately preceding it is: we have to check if the two keys differ in the
-+ * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size
-+ * of the previous key so we can walk backwards to it from t->tree[j]'s key.
-+ */
-+
-+static inline void *bset_cacheline(const struct btree *b,
-+ const struct bset_tree *t,
-+ unsigned cacheline)
-+{
-+ return (void *) round_down((unsigned long) btree_bkey_first(b, t),
-+ L1_CACHE_BYTES) +
-+ cacheline * BSET_CACHELINE;
-+}
-+
-+static struct bkey_packed *cacheline_to_bkey(const struct btree *b,
-+ const struct bset_tree *t,
-+ unsigned cacheline,
-+ unsigned offset)
-+{
-+ return bset_cacheline(b, t, cacheline) + offset * 8;
-+}
-+
-+static unsigned bkey_to_cacheline(const struct btree *b,
-+ const struct bset_tree *t,
-+ const struct bkey_packed *k)
-+{
-+ return ((void *) k - bset_cacheline(b, t, 0)) / BSET_CACHELINE;
-+}
-+
-+static ssize_t __bkey_to_cacheline_offset(const struct btree *b,
-+ const struct bset_tree *t,
-+ unsigned cacheline,
-+ const struct bkey_packed *k)
-+{
-+ return (u64 *) k - (u64 *) bset_cacheline(b, t, cacheline);
-+}
-+
-+static unsigned bkey_to_cacheline_offset(const struct btree *b,
-+ const struct bset_tree *t,
-+ unsigned cacheline,
-+ const struct bkey_packed *k)
-+{
-+ size_t m = __bkey_to_cacheline_offset(b, t, cacheline, k);
-+
-+ EBUG_ON(m > U8_MAX);
-+ return m;
-+}
-+
-+static inline struct bkey_packed *tree_to_bkey(const struct btree *b,
-+ const struct bset_tree *t,
-+ unsigned j)
-+{
-+ return cacheline_to_bkey(b, t,
-+ __eytzinger1_to_inorder(j, t->size - 1, t->extra),
-+ bkey_float(b, t, j)->key_offset);
-+}
-+
-+static struct bkey_packed *tree_to_prev_bkey(const struct btree *b,
-+ const struct bset_tree *t,
-+ unsigned j)
-+{
-+ unsigned prev_u64s = ro_aux_tree_prev(b, t)[j];
-+
-+ return (void *) ((u64 *) tree_to_bkey(b, t, j)->_data - prev_u64s);
-+}
-+
-+static struct rw_aux_tree *rw_aux_tree(const struct btree *b,
-+ const struct bset_tree *t)
-+{
-+ EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
-+
-+ return __aux_tree_base(b, t);
-+}
-+
-+/*
-+ * For the write set - the one we're currently inserting keys into - we don't
-+ * maintain a full search tree, we just keep a simple lookup table in t->prev.
-+ */
-+static struct bkey_packed *rw_aux_to_bkey(const struct btree *b,
-+ struct bset_tree *t,
-+ unsigned j)
-+{
-+ return __btree_node_offset_to_key(b, rw_aux_tree(b, t)[j].offset);
-+}
-+
-+static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t,
-+ unsigned j, struct bkey_packed *k)
-+{
-+ EBUG_ON(k >= btree_bkey_last(b, t));
-+
-+ rw_aux_tree(b, t)[j] = (struct rw_aux_tree) {
-+ .offset = __btree_node_key_to_offset(b, k),
-+ .k = bkey_unpack_pos(b, k),
-+ };
-+}
-+
-+static void bch2_bset_verify_rw_aux_tree(struct btree *b,
-+ struct bset_tree *t)
-+{
-+ struct bkey_packed *k = btree_bkey_first(b, t);
-+ unsigned j = 0;
-+
-+ if (!bch2_expensive_debug_checks)
-+ return;
-+
-+ BUG_ON(bset_has_ro_aux_tree(t));
-+
-+ if (!bset_has_rw_aux_tree(t))
-+ return;
-+
-+ BUG_ON(t->size < 1);
-+ BUG_ON(rw_aux_to_bkey(b, t, j) != k);
-+
-+ goto start;
-+ while (1) {
-+ if (rw_aux_to_bkey(b, t, j) == k) {
-+ BUG_ON(!bpos_eq(rw_aux_tree(b, t)[j].k,
-+ bkey_unpack_pos(b, k)));
-+start:
-+ if (++j == t->size)
-+ break;
-+
-+ BUG_ON(rw_aux_tree(b, t)[j].offset <=
-+ rw_aux_tree(b, t)[j - 1].offset);
-+ }
-+
-+ k = bkey_p_next(k);
-+ BUG_ON(k >= btree_bkey_last(b, t));
-+ }
-+}
-+
-+/* returns idx of first entry >= offset: */
-+static unsigned rw_aux_tree_bsearch(struct btree *b,
-+ struct bset_tree *t,
-+ unsigned offset)
-+{
-+ unsigned bset_offs = offset - btree_bkey_first_offset(t);
-+ unsigned bset_u64s = t->end_offset - btree_bkey_first_offset(t);
-+ unsigned idx = bset_u64s ? bset_offs * t->size / bset_u64s : 0;
-+
-+ EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
-+ EBUG_ON(!t->size);
-+ EBUG_ON(idx > t->size);
-+
-+ while (idx < t->size &&
-+ rw_aux_tree(b, t)[idx].offset < offset)
-+ idx++;
-+
-+ while (idx &&
-+ rw_aux_tree(b, t)[idx - 1].offset >= offset)
-+ idx--;
-+
-+ EBUG_ON(idx < t->size &&
-+ rw_aux_tree(b, t)[idx].offset < offset);
-+ EBUG_ON(idx && rw_aux_tree(b, t)[idx - 1].offset >= offset);
-+ EBUG_ON(idx + 1 < t->size &&
-+ rw_aux_tree(b, t)[idx].offset ==
-+ rw_aux_tree(b, t)[idx + 1].offset);
-+
-+ return idx;
-+}
-+
-+static inline unsigned bkey_mantissa(const struct bkey_packed *k,
-+ const struct bkey_float *f,
-+ unsigned idx)
-+{
-+ u64 v;
-+
-+ EBUG_ON(!bkey_packed(k));
-+
-+ v = get_unaligned((u64 *) (((u8 *) k->_data) + (f->exponent >> 3)));
-+
-+ /*
-+ * In little endian, we're shifting off low bits (and then the bits we
-+ * want are at the low end), in big endian we're shifting off high bits
-+ * (and then the bits we want are at the high end, so we shift them
-+ * back down):
-+ */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+ v >>= f->exponent & 7;
-+#else
-+ v >>= 64 - (f->exponent & 7) - BKEY_MANTISSA_BITS;
-+#endif
-+ return (u16) v;
-+}
-+
-+static __always_inline void make_bfloat(struct btree *b, struct bset_tree *t,
-+ unsigned j,
-+ struct bkey_packed *min_key,
-+ struct bkey_packed *max_key)
-+{
-+ struct bkey_float *f = bkey_float(b, t, j);
-+ struct bkey_packed *m = tree_to_bkey(b, t, j);
-+ struct bkey_packed *l = is_power_of_2(j)
-+ ? min_key
-+ : tree_to_prev_bkey(b, t, j >> ffs(j));
-+ struct bkey_packed *r = is_power_of_2(j + 1)
-+ ? max_key
-+ : tree_to_bkey(b, t, j >> (ffz(j) + 1));
-+ unsigned mantissa;
-+ int shift, exponent, high_bit;
-+
-+ /*
-+ * for failed bfloats, the lookup code falls back to comparing against
-+ * the original key.
-+ */
-+
-+ if (!bkey_packed(l) || !bkey_packed(r) || !bkey_packed(m) ||
-+ !b->nr_key_bits) {
-+ f->exponent = BFLOAT_FAILED_UNPACKED;
-+ return;
-+ }
-+
-+ /*
-+ * The greatest differing bit of l and r is the first bit we must
-+ * include in the bfloat mantissa we're creating in order to do
-+ * comparisons - that bit always becomes the high bit of
-+ * bfloat->mantissa, and thus the exponent we're calculating here is
-+ * the position of what will become the low bit in bfloat->mantissa:
-+ *
-+ * Note that this may be negative - we may be running off the low end
-+ * of the key: we handle this later:
-+ */
-+ high_bit = max(bch2_bkey_greatest_differing_bit(b, l, r),
-+ min_t(unsigned, BKEY_MANTISSA_BITS, b->nr_key_bits) - 1);
-+ exponent = high_bit - (BKEY_MANTISSA_BITS - 1);
-+
-+ /*
-+ * Then we calculate the actual shift value, from the start of the key
-+ * (k->_data), to get the key bits starting at exponent:
-+ */
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+ shift = (int) (b->format.key_u64s * 64 - b->nr_key_bits) + exponent;
-+
-+ EBUG_ON(shift + BKEY_MANTISSA_BITS > b->format.key_u64s * 64);
-+#else
-+ shift = high_bit_offset +
-+ b->nr_key_bits -
-+ exponent -
-+ BKEY_MANTISSA_BITS;
-+
-+ EBUG_ON(shift < KEY_PACKED_BITS_START);
-+#endif
-+ EBUG_ON(shift < 0 || shift >= BFLOAT_FAILED);
-+
-+ f->exponent = shift;
-+ mantissa = bkey_mantissa(m, f, j);
-+
-+ /*
-+ * If we've got garbage bits, set them to all 1s - it's legal for the
-+ * bfloat to compare larger than the original key, but not smaller:
-+ */
-+ if (exponent < 0)
-+ mantissa |= ~(~0U << -exponent);
-+
-+ f->mantissa = mantissa;
-+}
-+
-+/* bytes remaining - only valid for last bset: */
-+static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t)
-+{
-+ bset_aux_tree_verify(b);
-+
-+ return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64);
-+}
-+
-+static unsigned bset_ro_tree_capacity(const struct btree *b, const struct bset_tree *t)
-+{
-+ return __bset_tree_capacity(b, t) /
-+ (sizeof(struct bkey_float) + sizeof(u8));
-+}
-+
-+static unsigned bset_rw_tree_capacity(const struct btree *b, const struct bset_tree *t)
-+{
-+ return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree);
-+}
-+
-+static noinline void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
-+{
-+ struct bkey_packed *k;
-+
-+ t->size = 1;
-+ t->extra = BSET_RW_AUX_TREE_VAL;
-+ rw_aux_tree(b, t)[0].offset =
-+ __btree_node_key_to_offset(b, btree_bkey_first(b, t));
-+
-+ bset_tree_for_each_key(b, t, k) {
-+ if (t->size == bset_rw_tree_capacity(b, t))
-+ break;
-+
-+ if ((void *) k - (void *) rw_aux_to_bkey(b, t, t->size - 1) >
-+ L1_CACHE_BYTES)
-+ rw_aux_tree_set(b, t, t->size++, k);
-+ }
-+}
-+
-+static noinline void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
-+{
-+ struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
-+ struct bkey_i min_key, max_key;
-+ unsigned j, cacheline = 1;
-+
-+ t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
-+ bset_ro_tree_capacity(b, t));
-+retry:
-+ if (t->size < 2) {
-+ t->size = 0;
-+ t->extra = BSET_NO_AUX_TREE_VAL;
-+ return;
-+ }
-+
-+ t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
-+
-+ /* First we figure out where the first key in each cacheline is */
-+ eytzinger1_for_each(j, t->size - 1) {
-+ while (bkey_to_cacheline(b, t, k) < cacheline)
-+ prev = k, k = bkey_p_next(k);
-+
-+ if (k >= btree_bkey_last(b, t)) {
-+ /* XXX: this path sucks */
-+ t->size--;
-+ goto retry;
-+ }
-+
-+ ro_aux_tree_prev(b, t)[j] = prev->u64s;
-+ bkey_float(b, t, j)->key_offset =
-+ bkey_to_cacheline_offset(b, t, cacheline++, k);
-+
-+ EBUG_ON(tree_to_prev_bkey(b, t, j) != prev);
-+ EBUG_ON(tree_to_bkey(b, t, j) != k);
-+ }
-+
-+ while (k != btree_bkey_last(b, t))
-+ prev = k, k = bkey_p_next(k);
-+
-+ if (!bkey_pack_pos(bkey_to_packed(&min_key), b->data->min_key, b)) {
-+ bkey_init(&min_key.k);
-+ min_key.k.p = b->data->min_key;
-+ }
-+
-+ if (!bkey_pack_pos(bkey_to_packed(&max_key), b->data->max_key, b)) {
-+ bkey_init(&max_key.k);
-+ max_key.k.p = b->data->max_key;
-+ }
-+
-+ /* Then we build the tree */
-+ eytzinger1_for_each(j, t->size - 1)
-+ make_bfloat(b, t, j,
-+ bkey_to_packed(&min_key),
-+ bkey_to_packed(&max_key));
-+}
-+
-+static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
-+{
-+ struct bset_tree *i;
-+
-+ for (i = b->set; i != t; i++)
-+ BUG_ON(bset_has_rw_aux_tree(i));
-+
-+ bch2_bset_set_no_aux_tree(b, t);
-+
-+ /* round up to next cacheline: */
-+ t->aux_data_offset = round_up(bset_aux_tree_buf_start(b, t),
-+ SMP_CACHE_BYTES / sizeof(u64));
-+
-+ bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_build_aux_tree(struct btree *b, struct bset_tree *t,
-+ bool writeable)
-+{
-+ if (writeable
-+ ? bset_has_rw_aux_tree(t)
-+ : bset_has_ro_aux_tree(t))
-+ return;
-+
-+ bset_alloc_tree(b, t);
-+
-+ if (!__bset_tree_capacity(b, t))
-+ return;
-+
-+ if (writeable)
-+ __build_rw_aux_tree(b, t);
-+ else
-+ __build_ro_aux_tree(b, t);
-+
-+ bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_init_first(struct btree *b, struct bset *i)
-+{
-+ struct bset_tree *t;
-+
-+ BUG_ON(b->nsets);
-+
-+ memset(i, 0, sizeof(*i));
-+ get_random_bytes(&i->seq, sizeof(i->seq));
-+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+ t = &b->set[b->nsets++];
-+ set_btree_bset(b, t, i);
-+}
-+
-+void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
-+ struct btree_node_entry *bne)
-+{
-+ struct bset *i = &bne->keys;
-+ struct bset_tree *t;
-+
-+ BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c));
-+ BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
-+ BUG_ON(b->nsets >= MAX_BSETS);
-+
-+ memset(i, 0, sizeof(*i));
-+ i->seq = btree_bset_first(b)->seq;
-+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+ t = &b->set[b->nsets++];
-+ set_btree_bset(b, t, i);
-+}
-+
-+/*
-+ * find _some_ key in the same bset as @k that precedes @k - not necessarily the
-+ * immediate predecessor:
-+ */
-+static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
-+ struct bkey_packed *k)
-+{
-+ struct bkey_packed *p;
-+ unsigned offset;
-+ int j;
-+
-+ EBUG_ON(k < btree_bkey_first(b, t) ||
-+ k > btree_bkey_last(b, t));
-+
-+ if (k == btree_bkey_first(b, t))
-+ return NULL;
-+
-+ switch (bset_aux_tree_type(t)) {
-+ case BSET_NO_AUX_TREE:
-+ p = btree_bkey_first(b, t);
-+ break;
-+ case BSET_RO_AUX_TREE:
-+ j = min_t(unsigned, t->size - 1, bkey_to_cacheline(b, t, k));
-+
-+ do {
-+ p = j ? tree_to_bkey(b, t,
-+ __inorder_to_eytzinger1(j--,
-+ t->size - 1, t->extra))
-+ : btree_bkey_first(b, t);
-+ } while (p >= k);
-+ break;
-+ case BSET_RW_AUX_TREE:
-+ offset = __btree_node_key_to_offset(b, k);
-+ j = rw_aux_tree_bsearch(b, t, offset);
-+ p = j ? rw_aux_to_bkey(b, t, j - 1)
-+ : btree_bkey_first(b, t);
-+ break;
-+ }
-+
-+ return p;
-+}
-+
-+struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
-+ struct bset_tree *t,
-+ struct bkey_packed *k,
-+ unsigned min_key_type)
-+{
-+ struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
-+
-+ while ((p = __bkey_prev(b, t, k)) && !ret) {
-+ for (i = p; i != k; i = bkey_p_next(i))
-+ if (i->type >= min_key_type)
-+ ret = i;
-+
-+ k = p;
-+ }
-+
-+ if (bch2_expensive_debug_checks) {
-+ BUG_ON(ret >= orig_k);
-+
-+ for (i = ret
-+ ? bkey_p_next(ret)
-+ : btree_bkey_first(b, t);
-+ i != orig_k;
-+ i = bkey_p_next(i))
-+ BUG_ON(i->type >= min_key_type);
-+ }
-+
-+ return ret;
-+}
-+
-+/* Insert */
-+
-+static void bch2_bset_fix_lookup_table(struct btree *b,
-+ struct bset_tree *t,
-+ struct bkey_packed *_where,
-+ unsigned clobber_u64s,
-+ unsigned new_u64s)
-+{
-+ int shift = new_u64s - clobber_u64s;
-+ unsigned l, j, where = __btree_node_key_to_offset(b, _where);
-+
-+ EBUG_ON(bset_has_ro_aux_tree(t));
-+
-+ if (!bset_has_rw_aux_tree(t))
-+ return;
-+
-+ /* returns first entry >= where */
-+ l = rw_aux_tree_bsearch(b, t, where);
-+
-+ if (!l) /* never delete first entry */
-+ l++;
-+ else if (l < t->size &&
-+ where < t->end_offset &&
-+ rw_aux_tree(b, t)[l].offset == where)
-+ rw_aux_tree_set(b, t, l++, _where);
-+
-+ /* l now > where */
-+
-+ for (j = l;
-+ j < t->size &&
-+ rw_aux_tree(b, t)[j].offset < where + clobber_u64s;
-+ j++)
-+ ;
-+
-+ if (j < t->size &&
-+ rw_aux_tree(b, t)[j].offset + shift ==
-+ rw_aux_tree(b, t)[l - 1].offset)
-+ j++;
-+
-+ memmove(&rw_aux_tree(b, t)[l],
-+ &rw_aux_tree(b, t)[j],
-+ (void *) &rw_aux_tree(b, t)[t->size] -
-+ (void *) &rw_aux_tree(b, t)[j]);
-+ t->size -= j - l;
-+
-+ for (j = l; j < t->size; j++)
-+ rw_aux_tree(b, t)[j].offset += shift;
-+
-+ EBUG_ON(l < t->size &&
-+ rw_aux_tree(b, t)[l].offset ==
-+ rw_aux_tree(b, t)[l - 1].offset);
-+
-+ if (t->size < bset_rw_tree_capacity(b, t) &&
-+ (l < t->size
-+ ? rw_aux_tree(b, t)[l].offset
-+ : t->end_offset) -
-+ rw_aux_tree(b, t)[l - 1].offset >
-+ L1_CACHE_BYTES / sizeof(u64)) {
-+ struct bkey_packed *start = rw_aux_to_bkey(b, t, l - 1);
-+ struct bkey_packed *end = l < t->size
-+ ? rw_aux_to_bkey(b, t, l)
-+ : btree_bkey_last(b, t);
-+ struct bkey_packed *k = start;
-+
-+ while (1) {
-+ k = bkey_p_next(k);
-+ if (k == end)
-+ break;
-+
-+ if ((void *) k - (void *) start >= L1_CACHE_BYTES) {
-+ memmove(&rw_aux_tree(b, t)[l + 1],
-+ &rw_aux_tree(b, t)[l],
-+ (void *) &rw_aux_tree(b, t)[t->size] -
-+ (void *) &rw_aux_tree(b, t)[l]);
-+ t->size++;
-+ rw_aux_tree_set(b, t, l, k);
-+ break;
-+ }
-+ }
-+ }
-+
-+ bch2_bset_verify_rw_aux_tree(b, t);
-+ bset_aux_tree_verify(b);
-+}
-+
-+void bch2_bset_insert(struct btree *b,
-+ struct btree_node_iter *iter,
-+ struct bkey_packed *where,
-+ struct bkey_i *insert,
-+ unsigned clobber_u64s)
-+{
-+ struct bkey_format *f = &b->format;
-+ struct bset_tree *t = bset_tree_last(b);
-+ struct bkey_packed packed, *src = bkey_to_packed(insert);
-+
-+ bch2_bset_verify_rw_aux_tree(b, t);
-+ bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
-+
-+ if (bch2_bkey_pack_key(&packed, &insert->k, f))
-+ src = &packed;
-+
-+ if (!bkey_deleted(&insert->k))
-+ btree_keys_account_key_add(&b->nr, t - b->set, src);
-+
-+ if (src->u64s != clobber_u64s) {
-+ u64 *src_p = (u64 *) where->_data + clobber_u64s;
-+ u64 *dst_p = (u64 *) where->_data + src->u64s;
-+
-+ EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) <
-+ (int) clobber_u64s - src->u64s);
-+
-+ memmove_u64s(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
-+ le16_add_cpu(&bset(b, t)->u64s, src->u64s - clobber_u64s);
-+ set_btree_bset_end(b, t);
-+ }
-+
-+ memcpy_u64s_small(where, src,
-+ bkeyp_key_u64s(f, src));
-+ memcpy_u64s(bkeyp_val(f, where), &insert->v,
-+ bkeyp_val_u64s(f, src));
-+
-+ if (src->u64s != clobber_u64s)
-+ bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
-+
-+ bch2_verify_btree_nr_keys(b);
-+}
-+
-+void bch2_bset_delete(struct btree *b,
-+ struct bkey_packed *where,
-+ unsigned clobber_u64s)
-+{
-+ struct bset_tree *t = bset_tree_last(b);
-+ u64 *src_p = (u64 *) where->_data + clobber_u64s;
-+ u64 *dst_p = where->_data;
-+
-+ bch2_bset_verify_rw_aux_tree(b, t);
-+
-+ EBUG_ON(le16_to_cpu(bset(b, t)->u64s) < clobber_u64s);
-+
-+ memmove_u64s_down(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
-+ le16_add_cpu(&bset(b, t)->u64s, -clobber_u64s);
-+ set_btree_bset_end(b, t);
-+
-+ bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, 0);
-+}
-+
-+/* Lookup */
-+
-+__flatten
-+static struct bkey_packed *bset_search_write_set(const struct btree *b,
-+ struct bset_tree *t,
-+ struct bpos *search)
-+{
-+ unsigned l = 0, r = t->size;
-+
-+ while (l + 1 != r) {
-+ unsigned m = (l + r) >> 1;
-+
-+ if (bpos_lt(rw_aux_tree(b, t)[m].k, *search))
-+ l = m;
-+ else
-+ r = m;
-+ }
-+
-+ return rw_aux_to_bkey(b, t, l);
-+}
-+
-+static inline void prefetch_four_cachelines(void *p)
-+{
-+#ifdef CONFIG_X86_64
-+ asm("prefetcht0 (-127 + 64 * 0)(%0);"
-+ "prefetcht0 (-127 + 64 * 1)(%0);"
-+ "prefetcht0 (-127 + 64 * 2)(%0);"
-+ "prefetcht0 (-127 + 64 * 3)(%0);"
-+ :
-+ : "r" (p + 127));
-+#else
-+ prefetch(p + L1_CACHE_BYTES * 0);
-+ prefetch(p + L1_CACHE_BYTES * 1);
-+ prefetch(p + L1_CACHE_BYTES * 2);
-+ prefetch(p + L1_CACHE_BYTES * 3);
-+#endif
-+}
-+
-+static inline bool bkey_mantissa_bits_dropped(const struct btree *b,
-+ const struct bkey_float *f,
-+ unsigned idx)
-+{
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+ unsigned key_bits_start = b->format.key_u64s * 64 - b->nr_key_bits;
-+
-+ return f->exponent > key_bits_start;
-+#else
-+ unsigned key_bits_end = high_bit_offset + b->nr_key_bits;
-+
-+ return f->exponent + BKEY_MANTISSA_BITS < key_bits_end;
-+#endif
-+}
-+
-+__flatten
-+static struct bkey_packed *bset_search_tree(const struct btree *b,
-+ const struct bset_tree *t,
-+ const struct bpos *search,
-+ const struct bkey_packed *packed_search)
-+{
-+ struct ro_aux_tree *base = ro_aux_tree_base(b, t);
-+ struct bkey_float *f;
-+ struct bkey_packed *k;
-+ unsigned inorder, n = 1, l, r;
-+ int cmp;
-+
-+ do {
-+ if (likely(n << 4 < t->size))
-+ prefetch(&base->f[n << 4]);
-+
-+ f = &base->f[n];
-+ if (unlikely(f->exponent >= BFLOAT_FAILED))
-+ goto slowpath;
-+
-+ l = f->mantissa;
-+ r = bkey_mantissa(packed_search, f, n);
-+
-+ if (unlikely(l == r) && bkey_mantissa_bits_dropped(b, f, n))
-+ goto slowpath;
-+
-+ n = n * 2 + (l < r);
-+ continue;
-+slowpath:
-+ k = tree_to_bkey(b, t, n);
-+ cmp = bkey_cmp_p_or_unp(b, k, packed_search, search);
-+ if (!cmp)
-+ return k;
-+
-+ n = n * 2 + (cmp < 0);
-+ } while (n < t->size);
-+
-+ inorder = __eytzinger1_to_inorder(n >> 1, t->size - 1, t->extra);
-+
-+ /*
-+ * n would have been the node we recursed to - the low bit tells us if
-+ * we recursed left or recursed right.
-+ */
-+ if (likely(!(n & 1))) {
-+ --inorder;
-+ if (unlikely(!inorder))
-+ return btree_bkey_first(b, t);
-+
-+ f = &base->f[eytzinger1_prev(n >> 1, t->size - 1)];
-+ }
-+
-+ return cacheline_to_bkey(b, t, inorder, f->key_offset);
-+}
-+
-+static __always_inline __flatten
-+struct bkey_packed *__bch2_bset_search(struct btree *b,
-+ struct bset_tree *t,
-+ struct bpos *search,
-+ const struct bkey_packed *lossy_packed_search)
-+{
-+
-+ /*
-+ * First, we search for a cacheline, then lastly we do a linear search
-+ * within that cacheline.
-+ *
-+ * To search for the cacheline, there's three different possibilities:
-+ * * The set is too small to have a search tree, so we just do a linear
-+ * search over the whole set.
-+ * * The set is the one we're currently inserting into; keeping a full
-+ * auxiliary search tree up to date would be too expensive, so we
-+ * use a much simpler lookup table to do a binary search -
-+ * bset_search_write_set().
-+ * * Or we use the auxiliary search tree we constructed earlier -
-+ * bset_search_tree()
-+ */
-+
-+ switch (bset_aux_tree_type(t)) {
-+ case BSET_NO_AUX_TREE:
-+ return btree_bkey_first(b, t);
-+ case BSET_RW_AUX_TREE:
-+ return bset_search_write_set(b, t, search);
-+ case BSET_RO_AUX_TREE:
-+ return bset_search_tree(b, t, search, lossy_packed_search);
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static __always_inline __flatten
-+struct bkey_packed *bch2_bset_search_linear(struct btree *b,
-+ struct bset_tree *t,
-+ struct bpos *search,
-+ struct bkey_packed *packed_search,
-+ const struct bkey_packed *lossy_packed_search,
-+ struct bkey_packed *m)
-+{
-+ if (lossy_packed_search)
-+ while (m != btree_bkey_last(b, t) &&
-+ bkey_iter_cmp_p_or_unp(b, m,
-+ lossy_packed_search, search) < 0)
-+ m = bkey_p_next(m);
-+
-+ if (!packed_search)
-+ while (m != btree_bkey_last(b, t) &&
-+ bkey_iter_pos_cmp(b, m, search) < 0)
-+ m = bkey_p_next(m);
-+
-+ if (bch2_expensive_debug_checks) {
-+ struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
-+
-+ BUG_ON(prev &&
-+ bkey_iter_cmp_p_or_unp(b, prev,
-+ packed_search, search) >= 0);
-+ }
-+
-+ return m;
-+}
-+
-+/* Btree node iterator */
-+
-+static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
-+ struct btree *b,
-+ const struct bkey_packed *k,
-+ const struct bkey_packed *end)
-+{
-+ if (k != end) {
-+ struct btree_node_iter_set *pos;
-+
-+ btree_node_iter_for_each(iter, pos)
-+ ;
-+
-+ BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data));
-+ *pos = (struct btree_node_iter_set) {
-+ __btree_node_key_to_offset(b, k),
-+ __btree_node_key_to_offset(b, end)
-+ };
-+ }
-+}
-+
-+void bch2_btree_node_iter_push(struct btree_node_iter *iter,
-+ struct btree *b,
-+ const struct bkey_packed *k,
-+ const struct bkey_packed *end)
-+{
-+ __bch2_btree_node_iter_push(iter, b, k, end);
-+ bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+noinline __flatten __cold
-+static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
-+ struct btree *b, struct bpos *search)
-+{
-+ struct bkey_packed *k;
-+
-+ trace_bkey_pack_pos_fail(search);
-+
-+ bch2_btree_node_iter_init_from_start(iter, b);
-+
-+ while ((k = bch2_btree_node_iter_peek(iter, b)) &&
-+ bkey_iter_pos_cmp(b, k, search) < 0)
-+ bch2_btree_node_iter_advance(iter, b);
-+}
-+
-+/**
-+ * bch2_btree_node_iter_init - initialize a btree node iterator, starting from a
-+ * given position
-+ *
-+ * @iter: iterator to initialize
-+ * @b: btree node to search
-+ * @search: search key
-+ *
-+ * Main entry point to the lookup code for individual btree nodes:
-+ *
-+ * NOTE:
-+ *
-+ * When you don't filter out deleted keys, btree nodes _do_ contain duplicate
-+ * keys. This doesn't matter for most code, but it does matter for lookups.
-+ *
-+ * Some adjacent keys with a string of equal keys:
-+ * i j k k k k l m
-+ *
-+ * If you search for k, the lookup code isn't guaranteed to return you any
-+ * specific k. The lookup code is conceptually doing a binary search and
-+ * iterating backwards is very expensive so if the pivot happens to land at the
-+ * last k that's what you'll get.
-+ *
-+ * This works out ok, but it's something to be aware of:
-+ *
-+ * - For non extents, we guarantee that the live key comes last - see
-+ * btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't
-+ * see will only be deleted keys you don't care about.
-+ *
-+ * - For extents, deleted keys sort last (see the comment at the top of this
-+ * file). But when you're searching for extents, you actually want the first
-+ * key strictly greater than your search key - an extent that compares equal
-+ * to the search key is going to have 0 sectors after the search key.
-+ *
-+ * But this does mean that we can't just search for
-+ * bpos_successor(start_of_range) to get the first extent that overlaps with
-+ * the range we want - if we're unlucky and there's an extent that ends
-+ * exactly where we searched, then there could be a deleted key at the same
-+ * position and we'd get that when we search instead of the preceding extent
-+ * we needed.
-+ *
-+ * So we've got to search for start_of_range, then after the lookup iterate
-+ * past any extents that compare equal to the position we searched for.
-+ */
-+__flatten
-+void bch2_btree_node_iter_init(struct btree_node_iter *iter,
-+ struct btree *b, struct bpos *search)
-+{
-+ struct bkey_packed p, *packed_search = NULL;
-+ struct btree_node_iter_set *pos = iter->data;
-+ struct bkey_packed *k[MAX_BSETS];
-+ unsigned i;
-+
-+ EBUG_ON(bpos_lt(*search, b->data->min_key));
-+ EBUG_ON(bpos_gt(*search, b->data->max_key));
-+ bset_aux_tree_verify(b);
-+
-+ memset(iter, 0, sizeof(*iter));
-+
-+ switch (bch2_bkey_pack_pos_lossy(&p, *search, b)) {
-+ case BKEY_PACK_POS_EXACT:
-+ packed_search = &p;
-+ break;
-+ case BKEY_PACK_POS_SMALLER:
-+ packed_search = NULL;
-+ break;
-+ case BKEY_PACK_POS_FAIL:
-+ btree_node_iter_init_pack_failed(iter, b, search);
-+ return;
-+ }
-+
-+ for (i = 0; i < b->nsets; i++) {
-+ k[i] = __bch2_bset_search(b, b->set + i, search, &p);
-+ prefetch_four_cachelines(k[i]);
-+ }
-+
-+ for (i = 0; i < b->nsets; i++) {
-+ struct bset_tree *t = b->set + i;
-+ struct bkey_packed *end = btree_bkey_last(b, t);
-+
-+ k[i] = bch2_bset_search_linear(b, t, search,
-+ packed_search, &p, k[i]);
-+ if (k[i] != end)
-+ *pos++ = (struct btree_node_iter_set) {
-+ __btree_node_key_to_offset(b, k[i]),
-+ __btree_node_key_to_offset(b, end)
-+ };
-+ }
-+
-+ bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
-+ struct btree *b)
-+{
-+ struct bset_tree *t;
-+
-+ memset(iter, 0, sizeof(*iter));
-+
-+ for_each_bset(b, t)
-+ __bch2_btree_node_iter_push(iter, b,
-+ btree_bkey_first(b, t),
-+ btree_bkey_last(b, t));
-+ bch2_btree_node_iter_sort(iter, b);
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter,
-+ struct btree *b,
-+ struct bset_tree *t)
-+{
-+ struct btree_node_iter_set *set;
-+
-+ btree_node_iter_for_each(iter, set)
-+ if (set->end == t->end_offset)
-+ return __btree_node_offset_to_key(b, set->k);
-+
-+ return btree_bkey_last(b, t);
-+}
-+
-+static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
-+ struct btree *b,
-+ unsigned first)
-+{
-+ bool ret;
-+
-+ if ((ret = (btree_node_iter_cmp(b,
-+ iter->data[first],
-+ iter->data[first + 1]) > 0)))
-+ swap(iter->data[first], iter->data[first + 1]);
-+ return ret;
-+}
-+
-+void bch2_btree_node_iter_sort(struct btree_node_iter *iter,
-+ struct btree *b)
-+{
-+ /* unrolled bubble sort: */
-+
-+ if (!__btree_node_iter_set_end(iter, 2)) {
-+ btree_node_iter_sort_two(iter, b, 0);
-+ btree_node_iter_sort_two(iter, b, 1);
-+ }
-+
-+ if (!__btree_node_iter_set_end(iter, 1))
-+ btree_node_iter_sort_two(iter, b, 0);
-+}
-+
-+void bch2_btree_node_iter_set_drop(struct btree_node_iter *iter,
-+ struct btree_node_iter_set *set)
-+{
-+ struct btree_node_iter_set *last =
-+ iter->data + ARRAY_SIZE(iter->data) - 1;
-+
-+ memmove(&set[0], &set[1], (void *) last - (void *) set);
-+ *last = (struct btree_node_iter_set) { 0, 0 };
-+}
-+
-+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
-+ struct btree *b)
-+{
-+ iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s;
-+
-+ EBUG_ON(iter->data->k > iter->data->end);
-+
-+ if (unlikely(__btree_node_iter_set_end(iter, 0))) {
-+ /* avoid an expensive memmove call: */
-+ iter->data[0] = iter->data[1];
-+ iter->data[1] = iter->data[2];
-+ iter->data[2] = (struct btree_node_iter_set) { 0, 0 };
-+ return;
-+ }
-+
-+ if (__btree_node_iter_set_end(iter, 1))
-+ return;
-+
-+ if (!btree_node_iter_sort_two(iter, b, 0))
-+ return;
-+
-+ if (__btree_node_iter_set_end(iter, 2))
-+ return;
-+
-+ btree_node_iter_sort_two(iter, b, 1);
-+}
-+
-+void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
-+ struct btree *b)
-+{
-+ if (bch2_expensive_debug_checks) {
-+ bch2_btree_node_iter_verify(iter, b);
-+ bch2_btree_node_iter_next_check(iter, b);
-+ }
-+
-+ __bch2_btree_node_iter_advance(iter, b);
-+}
-+
-+/*
-+ * Expensive:
-+ */
-+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
-+ struct btree *b)
-+{
-+ struct bkey_packed *k, *prev = NULL;
-+ struct btree_node_iter_set *set;
-+ struct bset_tree *t;
-+ unsigned end = 0;
-+
-+ if (bch2_expensive_debug_checks)
-+ bch2_btree_node_iter_verify(iter, b);
-+
-+ for_each_bset(b, t) {
-+ k = bch2_bkey_prev_all(b, t,
-+ bch2_btree_node_iter_bset_pos(iter, b, t));
-+ if (k &&
-+ (!prev || bkey_iter_cmp(b, k, prev) > 0)) {
-+ prev = k;
-+ end = t->end_offset;
-+ }
-+ }
-+
-+ if (!prev)
-+ return NULL;
-+
-+ /*
-+ * We're manually memmoving instead of just calling sort() to ensure the
-+ * prev we picked ends up in slot 0 - sort won't necessarily put it
-+ * there because of duplicate deleted keys:
-+ */
-+ btree_node_iter_for_each(iter, set)
-+ if (set->end == end)
-+ goto found;
-+
-+ BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]);
-+found:
-+ BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data));
-+
-+ memmove(&iter->data[1],
-+ &iter->data[0],
-+ (void *) set - (void *) &iter->data[0]);
-+
-+ iter->data[0].k = __btree_node_key_to_offset(b, prev);
-+ iter->data[0].end = end;
-+
-+ if (bch2_expensive_debug_checks)
-+ bch2_btree_node_iter_verify(iter, b);
-+ return prev;
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *iter,
-+ struct btree *b)
-+{
-+ struct bkey_packed *prev;
-+
-+ do {
-+ prev = bch2_btree_node_iter_prev_all(iter, b);
-+ } while (prev && bkey_deleted(prev));
-+
-+ return prev;
-+}
-+
-+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
-+ struct btree *b,
-+ struct bkey *u)
-+{
-+ struct bkey_packed *k = bch2_btree_node_iter_peek(iter, b);
-+
-+ return k ? bkey_disassemble(b, k, u) : bkey_s_c_null;
-+}
-+
-+/* Mergesort */
-+
-+void bch2_btree_keys_stats(const struct btree *b, struct bset_stats *stats)
-+{
-+ const struct bset_tree *t;
-+
-+ for_each_bset(b, t) {
-+ enum bset_aux_tree_type type = bset_aux_tree_type(t);
-+ size_t j;
-+
-+ stats->sets[type].nr++;
-+ stats->sets[type].bytes += le16_to_cpu(bset(b, t)->u64s) *
-+ sizeof(u64);
-+
-+ if (bset_has_ro_aux_tree(t)) {
-+ stats->floats += t->size - 1;
-+
-+ for (j = 1; j < t->size; j++)
-+ stats->failed +=
-+ bkey_float(b, t, j)->exponent ==
-+ BFLOAT_FAILED;
-+ }
-+ }
-+}
-+
-+void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
-+ struct bkey_packed *k)
-+{
-+ struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+ struct bkey uk;
-+ unsigned j, inorder;
-+
-+ if (!bset_has_ro_aux_tree(t))
-+ return;
-+
-+ inorder = bkey_to_cacheline(b, t, k);
-+ if (!inorder || inorder >= t->size)
-+ return;
-+
-+ j = __inorder_to_eytzinger1(inorder, t->size - 1, t->extra);
-+ if (k != tree_to_bkey(b, t, j))
-+ return;
-+
-+ switch (bkey_float(b, t, j)->exponent) {
-+ case BFLOAT_FAILED:
-+ uk = bkey_unpack_key(b, k);
-+ prt_printf(out,
-+ " failed unpacked at depth %u\n"
-+ "\t",
-+ ilog2(j));
-+ bch2_bpos_to_text(out, uk.p);
-+ prt_printf(out, "\n");
-+ break;
-+ }
-+}
-diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h
-new file mode 100644
-index 000000000000..632c2b8c5460
---- /dev/null
-+++ b/fs/bcachefs/bset.h
-@@ -0,0 +1,541 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BSET_H
-+#define _BCACHEFS_BSET_H
-+
-+#include <linux/kernel.h>
-+#include <linux/types.h>
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "bkey_methods.h"
-+#include "btree_types.h"
-+#include "util.h" /* for time_stats */
-+#include "vstructs.h"
-+
-+/*
-+ * BKEYS:
-+ *
-+ * A bkey contains a key, a size field, a variable number of pointers, and some
-+ * ancillary flag bits.
-+ *
-+ * We use two different functions for validating bkeys, bkey_invalid and
-+ * bkey_deleted().
-+ *
-+ * The one exception to the rule that ptr_invalid() filters out invalid keys is
-+ * that it also filters out keys of size 0 - these are keys that have been
-+ * completely overwritten. It'd be safe to delete these in memory while leaving
-+ * them on disk, just unnecessary work - so we filter them out when resorting
-+ * instead.
-+ *
-+ * We can't filter out stale keys when we're resorting, because garbage
-+ * collection needs to find them to ensure bucket gens don't wrap around -
-+ * unless we're rewriting the btree node those stale keys still exist on disk.
-+ *
-+ * We also implement functions here for removing some number of sectors from the
-+ * front or the back of a bkey - this is mainly used for fixing overlapping
-+ * extents, by removing the overlapping sectors from the older key.
-+ *
-+ * BSETS:
-+ *
-+ * A bset is an array of bkeys laid out contiguously in memory in sorted order,
-+ * along with a header. A btree node is made up of a number of these, written at
-+ * different times.
-+ *
-+ * There could be many of them on disk, but we never allow there to be more than
-+ * 4 in memory - we lazily resort as needed.
-+ *
-+ * We implement code here for creating and maintaining auxiliary search trees
-+ * (described below) for searching an individial bset, and on top of that we
-+ * implement a btree iterator.
-+ *
-+ * BTREE ITERATOR:
-+ *
-+ * Most of the code in bcache doesn't care about an individual bset - it needs
-+ * to search entire btree nodes and iterate over them in sorted order.
-+ *
-+ * The btree iterator code serves both functions; it iterates through the keys
-+ * in a btree node in sorted order, starting from either keys after a specific
-+ * point (if you pass it a search key) or the start of the btree node.
-+ *
-+ * AUXILIARY SEARCH TREES:
-+ *
-+ * Since keys are variable length, we can't use a binary search on a bset - we
-+ * wouldn't be able to find the start of the next key. But binary searches are
-+ * slow anyways, due to terrible cache behaviour; bcache originally used binary
-+ * searches and that code topped out at under 50k lookups/second.
-+ *
-+ * So we need to construct some sort of lookup table. Since we only insert keys
-+ * into the last (unwritten) set, most of the keys within a given btree node are
-+ * usually in sets that are mostly constant. We use two different types of
-+ * lookup tables to take advantage of this.
-+ *
-+ * Both lookup tables share in common that they don't index every key in the
-+ * set; they index one key every BSET_CACHELINE bytes, and then a linear search
-+ * is used for the rest.
-+ *
-+ * For sets that have been written to disk and are no longer being inserted
-+ * into, we construct a binary search tree in an array - traversing a binary
-+ * search tree in an array gives excellent locality of reference and is very
-+ * fast, since both children of any node are adjacent to each other in memory
-+ * (and their grandchildren, and great grandchildren...) - this means
-+ * prefetching can be used to great effect.
-+ *
-+ * It's quite useful performance wise to keep these nodes small - not just
-+ * because they're more likely to be in L2, but also because we can prefetch
-+ * more nodes on a single cacheline and thus prefetch more iterations in advance
-+ * when traversing this tree.
-+ *
-+ * Nodes in the auxiliary search tree must contain both a key to compare against
-+ * (we don't want to fetch the key from the set, that would defeat the purpose),
-+ * and a pointer to the key. We use a few tricks to compress both of these.
-+ *
-+ * To compress the pointer, we take advantage of the fact that one node in the
-+ * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have
-+ * a function (to_inorder()) that takes the index of a node in a binary tree and
-+ * returns what its index would be in an inorder traversal, so we only have to
-+ * store the low bits of the offset.
-+ *
-+ * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To
-+ * compress that, we take advantage of the fact that when we're traversing the
-+ * search tree at every iteration we know that both our search key and the key
-+ * we're looking for lie within some range - bounded by our previous
-+ * comparisons. (We special case the start of a search so that this is true even
-+ * at the root of the tree).
-+ *
-+ * So we know the key we're looking for is between a and b, and a and b don't
-+ * differ higher than bit 50, we don't need to check anything higher than bit
-+ * 50.
-+ *
-+ * We don't usually need the rest of the bits, either; we only need enough bits
-+ * to partition the key range we're currently checking. Consider key n - the
-+ * key our auxiliary search tree node corresponds to, and key p, the key
-+ * immediately preceding n. The lowest bit we need to store in the auxiliary
-+ * search tree is the highest bit that differs between n and p.
-+ *
-+ * Note that this could be bit 0 - we might sometimes need all 80 bits to do the
-+ * comparison. But we'd really like our nodes in the auxiliary search tree to be
-+ * of fixed size.
-+ *
-+ * The solution is to make them fixed size, and when we're constructing a node
-+ * check if p and n differed in the bits we needed them to. If they don't we
-+ * flag that node, and when doing lookups we fallback to comparing against the
-+ * real key. As long as this doesn't happen to often (and it seems to reliably
-+ * happen a bit less than 1% of the time), we win - even on failures, that key
-+ * is then more likely to be in cache than if we were doing binary searches all
-+ * the way, since we're touching so much less memory.
-+ *
-+ * The keys in the auxiliary search tree are stored in (software) floating
-+ * point, with an exponent and a mantissa. The exponent needs to be big enough
-+ * to address all the bits in the original key, but the number of bits in the
-+ * mantissa is somewhat arbitrary; more bits just gets us fewer failures.
-+ *
-+ * We need 7 bits for the exponent and 3 bits for the key's offset (since keys
-+ * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes.
-+ * We need one node per 128 bytes in the btree node, which means the auxiliary
-+ * search trees take up 3% as much memory as the btree itself.
-+ *
-+ * Constructing these auxiliary search trees is moderately expensive, and we
-+ * don't want to be constantly rebuilding the search tree for the last set
-+ * whenever we insert another key into it. For the unwritten set, we use a much
-+ * simpler lookup table - it's just a flat array, so index i in the lookup table
-+ * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing
-+ * within each byte range works the same as with the auxiliary search trees.
-+ *
-+ * These are much easier to keep up to date when we insert a key - we do it
-+ * somewhat lazily; when we shift a key up we usually just increment the pointer
-+ * to it, only when it would overflow do we go to the trouble of finding the
-+ * first key in that range of bytes again.
-+ */
-+
-+enum bset_aux_tree_type {
-+ BSET_NO_AUX_TREE,
-+ BSET_RO_AUX_TREE,
-+ BSET_RW_AUX_TREE,
-+};
-+
-+#define BSET_TREE_NR_TYPES 3
-+
-+#define BSET_NO_AUX_TREE_VAL (U16_MAX)
-+#define BSET_RW_AUX_TREE_VAL (U16_MAX - 1)
-+
-+static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree *t)
-+{
-+ switch (t->extra) {
-+ case BSET_NO_AUX_TREE_VAL:
-+ EBUG_ON(t->size);
-+ return BSET_NO_AUX_TREE;
-+ case BSET_RW_AUX_TREE_VAL:
-+ EBUG_ON(!t->size);
-+ return BSET_RW_AUX_TREE;
-+ default:
-+ EBUG_ON(!t->size);
-+ return BSET_RO_AUX_TREE;
-+ }
-+}
-+
-+/*
-+ * BSET_CACHELINE was originally intended to match the hardware cacheline size -
-+ * it used to be 64, but I realized the lookup code would touch slightly less
-+ * memory if it was 128.
-+ *
-+ * It definites the number of bytes (in struct bset) per struct bkey_float in
-+ * the auxiliar search tree - when we're done searching the bset_float tree we
-+ * have this many bytes left that we do a linear search over.
-+ *
-+ * Since (after level 5) every level of the bset_tree is on a new cacheline,
-+ * we're touching one fewer cacheline in the bset tree in exchange for one more
-+ * cacheline in the linear search - but the linear search might stop before it
-+ * gets to the second cacheline.
-+ */
-+
-+#define BSET_CACHELINE 256
-+
-+static inline size_t btree_keys_cachelines(const struct btree *b)
-+{
-+ return (1U << b->byte_order) / BSET_CACHELINE;
-+}
-+
-+static inline size_t btree_aux_data_bytes(const struct btree *b)
-+{
-+ return btree_keys_cachelines(b) * 8;
-+}
-+
-+static inline size_t btree_aux_data_u64s(const struct btree *b)
-+{
-+ return btree_aux_data_bytes(b) / sizeof(u64);
-+}
-+
-+#define for_each_bset(_b, _t) \
-+ for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
-+
-+#define bset_tree_for_each_key(_b, _t, _k) \
-+ for (_k = btree_bkey_first(_b, _t); \
-+ _k != btree_bkey_last(_b, _t); \
-+ _k = bkey_p_next(_k))
-+
-+static inline bool bset_has_ro_aux_tree(const struct bset_tree *t)
-+{
-+ return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
-+}
-+
-+static inline bool bset_has_rw_aux_tree(struct bset_tree *t)
-+{
-+ return bset_aux_tree_type(t) == BSET_RW_AUX_TREE;
-+}
-+
-+static inline void bch2_bset_set_no_aux_tree(struct btree *b,
-+ struct bset_tree *t)
-+{
-+ BUG_ON(t < b->set);
-+
-+ for (; t < b->set + ARRAY_SIZE(b->set); t++) {
-+ t->size = 0;
-+ t->extra = BSET_NO_AUX_TREE_VAL;
-+ t->aux_data_offset = U16_MAX;
-+ }
-+}
-+
-+static inline void btree_node_set_format(struct btree *b,
-+ struct bkey_format f)
-+{
-+ int len;
-+
-+ b->format = f;
-+ b->nr_key_bits = bkey_format_key_bits(&f);
-+
-+ len = bch2_compile_bkey_format(&b->format, b->aux_data);
-+ BUG_ON(len < 0 || len > U8_MAX);
-+
-+ b->unpack_fn_len = len;
-+
-+ bch2_bset_set_no_aux_tree(b, b->set);
-+}
-+
-+static inline struct bset *bset_next_set(struct btree *b,
-+ unsigned block_bytes)
-+{
-+ struct bset *i = btree_bset_last(b);
-+
-+ EBUG_ON(!is_power_of_2(block_bytes));
-+
-+ return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
-+}
-+
-+void bch2_btree_keys_init(struct btree *);
-+
-+void bch2_bset_init_first(struct btree *, struct bset *);
-+void bch2_bset_init_next(struct bch_fs *, struct btree *,
-+ struct btree_node_entry *);
-+void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
-+
-+void bch2_bset_insert(struct btree *, struct btree_node_iter *,
-+ struct bkey_packed *, struct bkey_i *, unsigned);
-+void bch2_bset_delete(struct btree *, struct bkey_packed *, unsigned);
-+
-+/* Bkey utility code */
-+
-+/* packed or unpacked */
-+static inline int bkey_cmp_p_or_unp(const struct btree *b,
-+ const struct bkey_packed *l,
-+ const struct bkey_packed *r_packed,
-+ const struct bpos *r)
-+{
-+ EBUG_ON(r_packed && !bkey_packed(r_packed));
-+
-+ if (unlikely(!bkey_packed(l)))
-+ return bpos_cmp(packed_to_bkey_c(l)->p, *r);
-+
-+ if (likely(r_packed))
-+ return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b);
-+
-+ return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
-+}
-+
-+static inline struct bset_tree *
-+bch2_bkey_to_bset_inlined(struct btree *b, struct bkey_packed *k)
-+{
-+ unsigned offset = __btree_node_key_to_offset(b, k);
-+ struct bset_tree *t;
-+
-+ for_each_bset(b, t)
-+ if (offset <= t->end_offset) {
-+ EBUG_ON(offset < btree_bkey_first_offset(t));
-+ return t;
-+ }
-+
-+ BUG();
-+}
-+
-+struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
-+
-+struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
-+ struct bkey_packed *, unsigned);
-+
-+static inline struct bkey_packed *
-+bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
-+{
-+ return bch2_bkey_prev_filter(b, t, k, 0);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
-+{
-+ return bch2_bkey_prev_filter(b, t, k, 1);
-+}
-+
-+/* Btree key iteration */
-+
-+void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
-+ const struct bkey_packed *,
-+ const struct bkey_packed *);
-+void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
-+ struct bpos *);
-+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
-+ struct btree *);
-+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
-+ struct btree *,
-+ struct bset_tree *);
-+
-+void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *);
-+void bch2_btree_node_iter_set_drop(struct btree_node_iter *,
-+ struct btree_node_iter_set *);
-+void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *);
-+
-+#define btree_node_iter_for_each(_iter, _set) \
-+ for (_set = (_iter)->data; \
-+ _set < (_iter)->data + ARRAY_SIZE((_iter)->data) && \
-+ (_set)->k != (_set)->end; \
-+ _set++)
-+
-+static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter,
-+ unsigned i)
-+{
-+ return iter->data[i].k == iter->data[i].end;
-+}
-+
-+static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
-+{
-+ return __btree_node_iter_set_end(iter, 0);
-+}
-+
-+/*
-+ * When keys compare equal, deleted keys compare first:
-+ *
-+ * XXX: only need to compare pointers for keys that are both within a
-+ * btree_node_iterator - we need to break ties for prev() to work correctly
-+ */
-+static inline int bkey_iter_cmp(const struct btree *b,
-+ const struct bkey_packed *l,
-+ const struct bkey_packed *r)
-+{
-+ return bch2_bkey_cmp_packed(b, l, r)
-+ ?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
-+ ?: cmp_int(l, r);
-+}
-+
-+static inline int btree_node_iter_cmp(const struct btree *b,
-+ struct btree_node_iter_set l,
-+ struct btree_node_iter_set r)
-+{
-+ return bkey_iter_cmp(b,
-+ __btree_node_offset_to_key(b, l.k),
-+ __btree_node_offset_to_key(b, r.k));
-+}
-+
-+/* These assume r (the search key) is not a deleted key: */
-+static inline int bkey_iter_pos_cmp(const struct btree *b,
-+ const struct bkey_packed *l,
-+ const struct bpos *r)
-+{
-+ return bkey_cmp_left_packed(b, l, r)
-+ ?: -((int) bkey_deleted(l));
-+}
-+
-+static inline int bkey_iter_cmp_p_or_unp(const struct btree *b,
-+ const struct bkey_packed *l,
-+ const struct bkey_packed *r_packed,
-+ const struct bpos *r)
-+{
-+ return bkey_cmp_p_or_unp(b, l, r_packed, r)
-+ ?: -((int) bkey_deleted(l));
-+}
-+
-+static inline struct bkey_packed *
-+__bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
-+ struct btree *b)
-+{
-+ return __btree_node_offset_to_key(b, iter->data->k);
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, struct btree *b)
-+{
-+ return !bch2_btree_node_iter_end(iter)
-+ ? __btree_node_offset_to_key(b, iter->data->k)
-+ : NULL;
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
-+{
-+ struct bkey_packed *k;
-+
-+ while ((k = bch2_btree_node_iter_peek_all(iter, b)) &&
-+ bkey_deleted(k))
-+ bch2_btree_node_iter_advance(iter, b);
-+
-+ return k;
-+}
-+
-+static inline struct bkey_packed *
-+bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b)
-+{
-+ struct bkey_packed *ret = bch2_btree_node_iter_peek_all(iter, b);
-+
-+ if (ret)
-+ bch2_btree_node_iter_advance(iter, b);
-+
-+ return ret;
-+}
-+
-+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
-+ struct btree *);
-+struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *,
-+ struct btree *);
-+
-+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
-+ struct btree *,
-+ struct bkey *);
-+
-+#define for_each_btree_node_key(b, k, iter) \
-+ for (bch2_btree_node_iter_init_from_start((iter), (b)); \
-+ (k = bch2_btree_node_iter_peek((iter), (b))); \
-+ bch2_btree_node_iter_advance(iter, b))
-+
-+#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \
-+ for (bch2_btree_node_iter_init_from_start((iter), (b)); \
-+ (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
-+ bch2_btree_node_iter_advance(iter, b))
-+
-+/* Accounting: */
-+
-+static inline void btree_keys_account_key(struct btree_nr_keys *n,
-+ unsigned bset,
-+ struct bkey_packed *k,
-+ int sign)
-+{
-+ n->live_u64s += k->u64s * sign;
-+ n->bset_u64s[bset] += k->u64s * sign;
-+
-+ if (bkey_packed(k))
-+ n->packed_keys += sign;
-+ else
-+ n->unpacked_keys += sign;
-+}
-+
-+static inline void btree_keys_account_val_delta(struct btree *b,
-+ struct bkey_packed *k,
-+ int delta)
-+{
-+ struct bset_tree *t = bch2_bkey_to_bset(b, k);
-+
-+ b->nr.live_u64s += delta;
-+ b->nr.bset_u64s[t - b->set] += delta;
-+}
-+
-+#define btree_keys_account_key_add(_nr, _bset_idx, _k) \
-+ btree_keys_account_key(_nr, _bset_idx, _k, 1)
-+#define btree_keys_account_key_drop(_nr, _bset_idx, _k) \
-+ btree_keys_account_key(_nr, _bset_idx, _k, -1)
-+
-+#define btree_account_key_add(_b, _k) \
-+ btree_keys_account_key(&(_b)->nr, \
-+ bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1)
-+#define btree_account_key_drop(_b, _k) \
-+ btree_keys_account_key(&(_b)->nr, \
-+ bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1)
-+
-+struct bset_stats {
-+ struct {
-+ size_t nr, bytes;
-+ } sets[BSET_TREE_NR_TYPES];
-+
-+ size_t floats;
-+ size_t failed;
-+};
-+
-+void bch2_btree_keys_stats(const struct btree *, struct bset_stats *);
-+void bch2_bfloat_to_text(struct printbuf *, struct btree *,
-+ struct bkey_packed *);
-+
-+/* Debug stuff */
-+
-+void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned);
-+void bch2_dump_btree_node(struct bch_fs *, struct btree *);
-+void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void __bch2_verify_btree_nr_keys(struct btree *);
-+void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
-+void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
-+ struct bkey_packed *, unsigned);
-+
-+#else
-+
-+static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
-+static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
-+ struct btree *b) {}
-+static inline void bch2_verify_insert_pos(struct btree *b,
-+ struct bkey_packed *where,
-+ struct bkey_packed *insert,
-+ unsigned clobber_u64s) {}
-+#endif
-+
-+static inline void bch2_verify_btree_nr_keys(struct btree *b)
-+{
-+ if (bch2_debug_check_btree_accounting)
-+ __bch2_verify_btree_nr_keys(b);
-+}
-+
-+#endif /* _BCACHEFS_BSET_H */
-diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
-new file mode 100644
-index 000000000000..0b084fbc478a
---- /dev/null
-+++ b/fs/bcachefs/btree_cache.c
-@@ -0,0 +1,1215 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_buf.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "debug.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "trace.h"
-+
-+#include <linux/prefetch.h>
-+#include <linux/sched/mm.h>
-+
-+const char * const bch2_btree_node_flags[] = {
-+#define x(f) #f,
-+ BTREE_FLAGS()
-+#undef x
-+ NULL
-+};
-+
-+void bch2_recalc_btree_reserve(struct bch_fs *c)
-+{
-+ unsigned i, reserve = 16;
-+
-+ if (!c->btree_roots_known[0].b)
-+ reserve += 8;
-+
-+ for (i = 0; i < btree_id_nr_alive(c); i++) {
-+ struct btree_root *r = bch2_btree_id_root(c, i);
-+
-+ if (r->b)
-+ reserve += min_t(unsigned, 1, r->b->c.level) * 8;
-+ }
-+
-+ c->btree_cache.reserve = reserve;
-+}
-+
-+static inline unsigned btree_cache_can_free(struct btree_cache *bc)
-+{
-+ return max_t(int, 0, bc->used - bc->reserve);
-+}
-+
-+static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
-+{
-+ if (b->c.lock.readers)
-+ list_move(&b->list, &bc->freed_pcpu);
-+ else
-+ list_move(&b->list, &bc->freed_nonpcpu);
-+}
-+
-+static void btree_node_data_free(struct bch_fs *c, struct btree *b)
-+{
-+ struct btree_cache *bc = &c->btree_cache;
-+
-+ EBUG_ON(btree_node_write_in_flight(b));
-+
-+ clear_btree_node_just_written(b);
-+
-+ kvpfree(b->data, btree_bytes(c));
-+ b->data = NULL;
-+#ifdef __KERNEL__
-+ kvfree(b->aux_data);
-+#else
-+ munmap(b->aux_data, btree_aux_data_bytes(b));
-+#endif
-+ b->aux_data = NULL;
-+
-+ bc->used--;
-+
-+ btree_node_to_freedlist(bc, b);
-+}
-+
-+static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
-+ const void *obj)
-+{
-+ const struct btree *b = obj;
-+ const u64 *v = arg->key;
-+
-+ return b->hash_val == *v ? 0 : 1;
-+}
-+
-+static const struct rhashtable_params bch_btree_cache_params = {
-+ .head_offset = offsetof(struct btree, hash),
-+ .key_offset = offsetof(struct btree, hash_val),
-+ .key_len = sizeof(u64),
-+ .obj_cmpfn = bch2_btree_cache_cmp_fn,
-+};
-+
-+static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
-+{
-+ BUG_ON(b->data || b->aux_data);
-+
-+ b->data = kvpmalloc(btree_bytes(c), gfp);
-+ if (!b->data)
-+ return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
-+#ifdef __KERNEL__
-+ b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp);
-+#else
-+ b->aux_data = mmap(NULL, btree_aux_data_bytes(b),
-+ PROT_READ|PROT_WRITE|PROT_EXEC,
-+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
-+ if (b->aux_data == MAP_FAILED)
-+ b->aux_data = NULL;
-+#endif
-+ if (!b->aux_data) {
-+ kvpfree(b->data, btree_bytes(c));
-+ b->data = NULL;
-+ return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
-+ }
-+
-+ return 0;
-+}
-+
-+static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
-+{
-+ struct btree *b;
-+
-+ b = kzalloc(sizeof(struct btree), gfp);
-+ if (!b)
-+ return NULL;
-+
-+ bkey_btree_ptr_init(&b->key);
-+ INIT_LIST_HEAD(&b->list);
-+ INIT_LIST_HEAD(&b->write_blocked);
-+ b->byte_order = ilog2(btree_bytes(c));
-+ return b;
-+}
-+
-+struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
-+{
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct btree *b;
-+
-+ b = __btree_node_mem_alloc(c, GFP_KERNEL);
-+ if (!b)
-+ return NULL;
-+
-+ if (btree_node_data_alloc(c, b, GFP_KERNEL)) {
-+ kfree(b);
-+ return NULL;
-+ }
-+
-+ bch2_btree_lock_init(&b->c, 0);
-+
-+ bc->used++;
-+ list_add(&b->list, &bc->freeable);
-+ return b;
-+}
-+
-+/* Btree in memory cache - hash table */
-+
-+void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
-+{
-+ int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
-+
-+ BUG_ON(ret);
-+
-+ /* Cause future lookups for this node to fail: */
-+ b->hash_val = 0;
-+}
-+
-+int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
-+{
-+ BUG_ON(b->hash_val);
-+ b->hash_val = btree_ptr_hash_val(&b->key);
-+
-+ return rhashtable_lookup_insert_fast(&bc->table, &b->hash,
-+ bch_btree_cache_params);
-+}
-+
-+int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
-+ unsigned level, enum btree_id id)
-+{
-+ int ret;
-+
-+ b->c.level = level;
-+ b->c.btree_id = id;
-+
-+ mutex_lock(&bc->lock);
-+ ret = __bch2_btree_node_hash_insert(bc, b);
-+ if (!ret)
-+ list_add_tail(&b->list, &bc->live);
-+ mutex_unlock(&bc->lock);
-+
-+ return ret;
-+}
-+
-+__flatten
-+static inline struct btree *btree_cache_find(struct btree_cache *bc,
-+ const struct bkey_i *k)
-+{
-+ u64 v = btree_ptr_hash_val(k);
-+
-+ return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
-+}
-+
-+/*
-+ * this version is for btree nodes that have already been freed (we're not
-+ * reaping a real btree node)
-+ */
-+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
-+{
-+ struct btree_cache *bc = &c->btree_cache;
-+ int ret = 0;
-+
-+ lockdep_assert_held(&bc->lock);
-+wait_on_io:
-+ if (b->flags & ((1U << BTREE_NODE_dirty)|
-+ (1U << BTREE_NODE_read_in_flight)|
-+ (1U << BTREE_NODE_write_in_flight))) {
-+ if (!flush)
-+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
-+
-+ /* XXX: waiting on IO with btree cache lock held */
-+ bch2_btree_node_wait_on_read(b);
-+ bch2_btree_node_wait_on_write(b);
-+ }
-+
-+ if (!six_trylock_intent(&b->c.lock))
-+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
-+
-+ if (!six_trylock_write(&b->c.lock))
-+ goto out_unlock_intent;
-+
-+ /* recheck under lock */
-+ if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
-+ (1U << BTREE_NODE_write_in_flight))) {
-+ if (!flush)
-+ goto out_unlock;
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+ goto wait_on_io;
-+ }
-+
-+ if (btree_node_noevict(b) ||
-+ btree_node_write_blocked(b) ||
-+ btree_node_will_make_reachable(b))
-+ goto out_unlock;
-+
-+ if (btree_node_dirty(b)) {
-+ if (!flush)
-+ goto out_unlock;
-+ /*
-+ * Using the underscore version because we don't want to compact
-+ * bsets after the write, since this node is about to be evicted
-+ * - unless btree verify mode is enabled, since it runs out of
-+ * the post write cleanup:
-+ */
-+ if (bch2_verify_btree_ondisk)
-+ bch2_btree_node_write(c, b, SIX_LOCK_intent,
-+ BTREE_WRITE_cache_reclaim);
-+ else
-+ __bch2_btree_node_write(c, b,
-+ BTREE_WRITE_cache_reclaim);
-+
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+ goto wait_on_io;
-+ }
-+out:
-+ if (b->hash_val && !ret)
-+ trace_and_count(c, btree_cache_reap, c, b);
-+ return ret;
-+out_unlock:
-+ six_unlock_write(&b->c.lock);
-+out_unlock_intent:
-+ six_unlock_intent(&b->c.lock);
-+ ret = -BCH_ERR_ENOMEM_btree_node_reclaim;
-+ goto out;
-+}
-+
-+static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
-+{
-+ return __btree_node_reclaim(c, b, false);
-+}
-+
-+static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
-+{
-+ return __btree_node_reclaim(c, b, true);
-+}
-+
-+static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
-+ struct shrink_control *sc)
-+{
-+ struct bch_fs *c = container_of(shrink, struct bch_fs,
-+ btree_cache.shrink);
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct btree *b, *t;
-+ unsigned long nr = sc->nr_to_scan;
-+ unsigned long can_free = 0;
-+ unsigned long freed = 0;
-+ unsigned long touched = 0;
-+ unsigned i, flags;
-+ unsigned long ret = SHRINK_STOP;
-+ bool trigger_writes = atomic_read(&bc->dirty) + nr >=
-+ bc->used * 3 / 4;
-+
-+ if (bch2_btree_shrinker_disabled)
-+ return SHRINK_STOP;
-+
-+ mutex_lock(&bc->lock);
-+ flags = memalloc_nofs_save();
-+
-+ /*
-+ * It's _really_ critical that we don't free too many btree nodes - we
-+ * have to always leave ourselves a reserve. The reserve is how we
-+ * guarantee that allocating memory for a new btree node can always
-+ * succeed, so that inserting keys into the btree can always succeed and
-+ * IO can always make forward progress:
-+ */
-+ can_free = btree_cache_can_free(bc);
-+ nr = min_t(unsigned long, nr, can_free);
-+
-+ i = 0;
-+ list_for_each_entry_safe(b, t, &bc->freeable, list) {
-+ /*
-+ * Leave a few nodes on the freeable list, so that a btree split
-+ * won't have to hit the system allocator:
-+ */
-+ if (++i <= 3)
-+ continue;
-+
-+ touched++;
-+
-+ if (touched >= nr)
-+ goto out;
-+
-+ if (!btree_node_reclaim(c, b)) {
-+ btree_node_data_free(c, b);
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+ freed++;
-+ }
-+ }
-+restart:
-+ list_for_each_entry_safe(b, t, &bc->live, list) {
-+ touched++;
-+
-+ if (btree_node_accessed(b)) {
-+ clear_btree_node_accessed(b);
-+ } else if (!btree_node_reclaim(c, b)) {
-+ freed++;
-+ btree_node_data_free(c, b);
-+
-+ bch2_btree_node_hash_remove(bc, b);
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+
-+ if (freed == nr)
-+ goto out_rotate;
-+ } else if (trigger_writes &&
-+ btree_node_dirty(b) &&
-+ !btree_node_will_make_reachable(b) &&
-+ !btree_node_write_blocked(b) &&
-+ six_trylock_read(&b->c.lock)) {
-+ list_move(&bc->live, &b->list);
-+ mutex_unlock(&bc->lock);
-+ __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
-+ six_unlock_read(&b->c.lock);
-+ if (touched >= nr)
-+ goto out_nounlock;
-+ mutex_lock(&bc->lock);
-+ goto restart;
-+ }
-+
-+ if (touched >= nr)
-+ break;
-+ }
-+out_rotate:
-+ if (&t->list != &bc->live)
-+ list_move_tail(&bc->live, &t->list);
-+out:
-+ mutex_unlock(&bc->lock);
-+out_nounlock:
-+ ret = freed;
-+ memalloc_nofs_restore(flags);
-+ trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret);
-+ return ret;
-+}
-+
-+static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
-+ struct shrink_control *sc)
-+{
-+ struct bch_fs *c = container_of(shrink, struct bch_fs,
-+ btree_cache.shrink);
-+ struct btree_cache *bc = &c->btree_cache;
-+
-+ if (bch2_btree_shrinker_disabled)
-+ return 0;
-+
-+ return btree_cache_can_free(bc);
-+}
-+
-+void bch2_fs_btree_cache_exit(struct bch_fs *c)
-+{
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct btree *b;
-+ unsigned i, flags;
-+
-+ unregister_shrinker(&bc->shrink);
-+
-+ /* vfree() can allocate memory: */
-+ flags = memalloc_nofs_save();
-+ mutex_lock(&bc->lock);
-+
-+ if (c->verify_data)
-+ list_move(&c->verify_data->list, &bc->live);
-+
-+ kvpfree(c->verify_ondisk, btree_bytes(c));
-+
-+ for (i = 0; i < btree_id_nr_alive(c); i++) {
-+ struct btree_root *r = bch2_btree_id_root(c, i);
-+
-+ if (r->b)
-+ list_add(&r->b->list, &bc->live);
-+ }
-+
-+ list_splice(&bc->freeable, &bc->live);
-+
-+ while (!list_empty(&bc->live)) {
-+ b = list_first_entry(&bc->live, struct btree, list);
-+
-+ BUG_ON(btree_node_read_in_flight(b) ||
-+ btree_node_write_in_flight(b));
-+
-+ if (btree_node_dirty(b))
-+ bch2_btree_complete_write(c, b, btree_current_write(b));
-+ clear_btree_node_dirty_acct(c, b);
-+
-+ btree_node_data_free(c, b);
-+ }
-+
-+ BUG_ON(atomic_read(&c->btree_cache.dirty));
-+
-+ list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
-+
-+ while (!list_empty(&bc->freed_nonpcpu)) {
-+ b = list_first_entry(&bc->freed_nonpcpu, struct btree, list);
-+ list_del(&b->list);
-+ six_lock_exit(&b->c.lock);
-+ kfree(b);
-+ }
-+
-+ mutex_unlock(&bc->lock);
-+ memalloc_nofs_restore(flags);
-+
-+ if (bc->table_init_done)
-+ rhashtable_destroy(&bc->table);
-+}
-+
-+int bch2_fs_btree_cache_init(struct bch_fs *c)
-+{
-+ struct btree_cache *bc = &c->btree_cache;
-+ unsigned i;
-+ int ret = 0;
-+
-+ ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
-+ if (ret)
-+ goto err;
-+
-+ bc->table_init_done = true;
-+
-+ bch2_recalc_btree_reserve(c);
-+
-+ for (i = 0; i < bc->reserve; i++)
-+ if (!__bch2_btree_node_mem_alloc(c))
-+ goto err;
-+
-+ list_splice_init(&bc->live, &bc->freeable);
-+
-+ mutex_init(&c->verify_lock);
-+
-+ bc->shrink.count_objects = bch2_btree_cache_count;
-+ bc->shrink.scan_objects = bch2_btree_cache_scan;
-+ bc->shrink.seeks = 4;
-+ ret = register_shrinker(&bc->shrink, "%s-btree_cache", c->name);
-+ if (ret)
-+ goto err;
-+
-+ return 0;
-+err:
-+ return -BCH_ERR_ENOMEM_fs_btree_cache_init;
-+}
-+
-+void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
-+{
-+ mutex_init(&bc->lock);
-+ INIT_LIST_HEAD(&bc->live);
-+ INIT_LIST_HEAD(&bc->freeable);
-+ INIT_LIST_HEAD(&bc->freed_pcpu);
-+ INIT_LIST_HEAD(&bc->freed_nonpcpu);
-+}
-+
-+/*
-+ * We can only have one thread cannibalizing other cached btree nodes at a time,
-+ * or we'll deadlock. We use an open coded mutex to ensure that, which a
-+ * cannibalize_bucket() will take. This means every time we unlock the root of
-+ * the btree, we need to release this lock if we have it held.
-+ */
-+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
-+{
-+ struct btree_cache *bc = &c->btree_cache;
-+
-+ if (bc->alloc_lock == current) {
-+ trace_and_count(c, btree_cache_cannibalize_unlock, c);
-+ bc->alloc_lock = NULL;
-+ closure_wake_up(&bc->alloc_wait);
-+ }
-+}
-+
-+int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
-+{
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct task_struct *old;
-+
-+ old = cmpxchg(&bc->alloc_lock, NULL, current);
-+ if (old == NULL || old == current)
-+ goto success;
-+
-+ if (!cl) {
-+ trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
-+ return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock;
-+ }
-+
-+ closure_wait(&bc->alloc_wait, cl);
-+
-+ /* Try again, after adding ourselves to waitlist */
-+ old = cmpxchg(&bc->alloc_lock, NULL, current);
-+ if (old == NULL || old == current) {
-+ /* We raced */
-+ closure_wake_up(&bc->alloc_wait);
-+ goto success;
-+ }
-+
-+ trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
-+ return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
-+
-+success:
-+ trace_and_count(c, btree_cache_cannibalize_lock, c);
-+ return 0;
-+}
-+
-+static struct btree *btree_node_cannibalize(struct bch_fs *c)
-+{
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct btree *b;
-+
-+ list_for_each_entry_reverse(b, &bc->live, list)
-+ if (!btree_node_reclaim(c, b))
-+ return b;
-+
-+ while (1) {
-+ list_for_each_entry_reverse(b, &bc->live, list)
-+ if (!btree_node_write_and_reclaim(c, b))
-+ return b;
-+
-+ /*
-+ * Rare case: all nodes were intent-locked.
-+ * Just busy-wait.
-+ */
-+ WARN_ONCE(1, "btree cache cannibalize failed\n");
-+ cond_resched();
-+ }
-+}
-+
-+struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct list_head *freed = pcpu_read_locks
-+ ? &bc->freed_pcpu
-+ : &bc->freed_nonpcpu;
-+ struct btree *b, *b2;
-+ u64 start_time = local_clock();
-+ unsigned flags;
-+
-+ flags = memalloc_nofs_save();
-+ mutex_lock(&bc->lock);
-+
-+ /*
-+ * We never free struct btree itself, just the memory that holds the on
-+ * disk node. Check the freed list before allocating a new one:
-+ */
-+ list_for_each_entry(b, freed, list)
-+ if (!btree_node_reclaim(c, b)) {
-+ list_del_init(&b->list);
-+ goto got_node;
-+ }
-+
-+ b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN);
-+ if (!b) {
-+ mutex_unlock(&bc->lock);
-+ bch2_trans_unlock(trans);
-+ b = __btree_node_mem_alloc(c, GFP_KERNEL);
-+ if (!b)
-+ goto err;
-+ mutex_lock(&bc->lock);
-+ }
-+
-+ bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0);
-+
-+ BUG_ON(!six_trylock_intent(&b->c.lock));
-+ BUG_ON(!six_trylock_write(&b->c.lock));
-+got_node:
-+
-+ /*
-+ * btree_free() doesn't free memory; it sticks the node on the end of
-+ * the list. Check if there's any freed nodes there:
-+ */
-+ list_for_each_entry(b2, &bc->freeable, list)
-+ if (!btree_node_reclaim(c, b2)) {
-+ swap(b->data, b2->data);
-+ swap(b->aux_data, b2->aux_data);
-+ btree_node_to_freedlist(bc, b2);
-+ six_unlock_write(&b2->c.lock);
-+ six_unlock_intent(&b2->c.lock);
-+ goto got_mem;
-+ }
-+
-+ mutex_unlock(&bc->lock);
-+
-+ if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) {
-+ bch2_trans_unlock(trans);
-+ if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN))
-+ goto err;
-+ }
-+
-+ mutex_lock(&bc->lock);
-+ bc->used++;
-+got_mem:
-+ mutex_unlock(&bc->lock);
-+
-+ BUG_ON(btree_node_hashed(b));
-+ BUG_ON(btree_node_dirty(b));
-+ BUG_ON(btree_node_write_in_flight(b));
-+out:
-+ b->flags = 0;
-+ b->written = 0;
-+ b->nsets = 0;
-+ b->sib_u64s[0] = 0;
-+ b->sib_u64s[1] = 0;
-+ b->whiteout_u64s = 0;
-+ bch2_btree_keys_init(b);
-+ set_btree_node_accessed(b);
-+
-+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
-+ start_time);
-+
-+ memalloc_nofs_restore(flags);
-+ return b;
-+err:
-+ mutex_lock(&bc->lock);
-+
-+ /* Try to cannibalize another cached btree node: */
-+ if (bc->alloc_lock == current) {
-+ b2 = btree_node_cannibalize(c);
-+ clear_btree_node_just_written(b2);
-+ bch2_btree_node_hash_remove(bc, b2);
-+
-+ if (b) {
-+ swap(b->data, b2->data);
-+ swap(b->aux_data, b2->aux_data);
-+ btree_node_to_freedlist(bc, b2);
-+ six_unlock_write(&b2->c.lock);
-+ six_unlock_intent(&b2->c.lock);
-+ } else {
-+ b = b2;
-+ list_del_init(&b->list);
-+ }
-+
-+ mutex_unlock(&bc->lock);
-+
-+ trace_and_count(c, btree_cache_cannibalize, c);
-+ goto out;
-+ }
-+
-+ mutex_unlock(&bc->lock);
-+ memalloc_nofs_restore(flags);
-+ return ERR_PTR(-BCH_ERR_ENOMEM_btree_node_mem_alloc);
-+}
-+
-+/* Slowpath, don't want it inlined into btree_iter_traverse() */
-+static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
-+ struct btree_path *path,
-+ const struct bkey_i *k,
-+ enum btree_id btree_id,
-+ unsigned level,
-+ enum six_lock_type lock_type,
-+ bool sync)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct btree *b;
-+ u32 seq;
-+
-+ BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
-+ /*
-+ * Parent node must be locked, else we could read in a btree node that's
-+ * been freed:
-+ */
-+ if (path && !bch2_btree_node_relock(trans, path, level + 1)) {
-+ trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path);
-+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock));
-+ }
-+
-+ b = bch2_btree_node_mem_alloc(trans, level != 0);
-+
-+ if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) {
-+ trans->memory_allocation_failure = true;
-+ trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path);
-+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
-+ }
-+
-+ if (IS_ERR(b))
-+ return b;
-+
-+ /*
-+ * Btree nodes read in from disk should not have the accessed bit set
-+ * initially, so that linear scans don't thrash the cache:
-+ */
-+ clear_btree_node_accessed(b);
-+
-+ bkey_copy(&b->key, k);
-+ if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
-+ /* raced with another fill: */
-+
-+ /* mark as unhashed... */
-+ b->hash_val = 0;
-+
-+ mutex_lock(&bc->lock);
-+ list_add(&b->list, &bc->freeable);
-+ mutex_unlock(&bc->lock);
-+
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+ return NULL;
-+ }
-+
-+ set_btree_node_read_in_flight(b);
-+
-+ six_unlock_write(&b->c.lock);
-+ seq = six_lock_seq(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+
-+ /* Unlock before doing IO: */
-+ if (path && sync)
-+ bch2_trans_unlock_noassert(trans);
-+
-+ bch2_btree_node_read(c, b, sync);
-+
-+ if (!sync)
-+ return NULL;
-+
-+ if (path) {
-+ int ret = bch2_trans_relock(trans) ?:
-+ bch2_btree_path_relock_intent(trans, path);
-+ if (ret) {
-+ BUG_ON(!trans->restarted);
-+ return ERR_PTR(ret);
-+ }
-+ }
-+
-+ if (!six_relock_type(&b->c.lock, lock_type, seq)) {
-+ if (path)
-+ trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
-+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
-+ }
-+
-+ return b;
-+}
-+
-+static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
-+{
-+ struct printbuf buf = PRINTBUF;
-+
-+ if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations)
-+ return;
-+
-+ prt_printf(&buf,
-+ "btree node header doesn't match ptr\n"
-+ "btree %s level %u\n"
-+ "ptr: ",
-+ bch2_btree_id_str(b->c.btree_id), b->c.level);
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
-+
-+ prt_printf(&buf, "\nheader: btree %s level %llu\n"
-+ "min ",
-+ bch2_btree_id_str(BTREE_NODE_ID(b->data)),
-+ BTREE_NODE_LEVEL(b->data));
-+ bch2_bpos_to_text(&buf, b->data->min_key);
-+
-+ prt_printf(&buf, "\nmax ");
-+ bch2_bpos_to_text(&buf, b->data->max_key);
-+
-+ bch2_fs_inconsistent(c, "%s", buf.buf);
-+ printbuf_exit(&buf);
-+}
-+
-+static inline void btree_check_header(struct bch_fs *c, struct btree *b)
-+{
-+ if (b->c.btree_id != BTREE_NODE_ID(b->data) ||
-+ b->c.level != BTREE_NODE_LEVEL(b->data) ||
-+ !bpos_eq(b->data->max_key, b->key.k.p) ||
-+ (b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
-+ !bpos_eq(b->data->min_key,
-+ bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)))
-+ btree_bad_header(c, b);
-+}
-+
-+static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path,
-+ const struct bkey_i *k, unsigned level,
-+ enum six_lock_type lock_type,
-+ unsigned long trace_ip)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct btree *b;
-+ struct bset_tree *t;
-+ bool need_relock = false;
-+ int ret;
-+
-+ EBUG_ON(level >= BTREE_MAX_DEPTH);
-+retry:
-+ b = btree_cache_find(bc, k);
-+ if (unlikely(!b)) {
-+ /*
-+ * We must have the parent locked to call bch2_btree_node_fill(),
-+ * else we could read in a btree node from disk that's been
-+ * freed:
-+ */
-+ b = bch2_btree_node_fill(trans, path, k, path->btree_id,
-+ level, lock_type, true);
-+ need_relock = true;
-+
-+ /* We raced and found the btree node in the cache */
-+ if (!b)
-+ goto retry;
-+
-+ if (IS_ERR(b))
-+ return b;
-+ } else {
-+ if (btree_node_read_locked(path, level + 1))
-+ btree_node_unlock(trans, path, level + 1);
-+
-+ ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ return ERR_PTR(ret);
-+
-+ BUG_ON(ret);
-+
-+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
-+ b->c.level != level ||
-+ race_fault())) {
-+ six_unlock_type(&b->c.lock, lock_type);
-+ if (bch2_btree_node_relock(trans, path, level + 1))
-+ goto retry;
-+
-+ trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path);
-+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused));
-+ }
-+
-+ /* avoid atomic set bit if it's not needed: */
-+ if (!btree_node_accessed(b))
-+ set_btree_node_accessed(b);
-+ }
-+
-+ if (unlikely(btree_node_read_in_flight(b))) {
-+ u32 seq = six_lock_seq(&b->c.lock);
-+
-+ six_unlock_type(&b->c.lock, lock_type);
-+ bch2_trans_unlock(trans);
-+ need_relock = true;
-+
-+ bch2_btree_node_wait_on_read(b);
-+
-+ /*
-+ * should_be_locked is not set on this path yet, so we need to
-+ * relock it specifically:
-+ */
-+ if (!six_relock_type(&b->c.lock, lock_type, seq))
-+ goto retry;
-+ }
-+
-+ if (unlikely(need_relock)) {
-+ ret = bch2_trans_relock(trans) ?:
-+ bch2_btree_path_relock_intent(trans, path);
-+ if (ret) {
-+ six_unlock_type(&b->c.lock, lock_type);
-+ return ERR_PTR(ret);
-+ }
-+ }
-+
-+ prefetch(b->aux_data);
-+
-+ for_each_bset(b, t) {
-+ void *p = (u64 *) b->aux_data + t->aux_data_offset;
-+
-+ prefetch(p + L1_CACHE_BYTES * 0);
-+ prefetch(p + L1_CACHE_BYTES * 1);
-+ prefetch(p + L1_CACHE_BYTES * 2);
-+ }
-+
-+ if (unlikely(btree_node_read_error(b))) {
-+ six_unlock_type(&b->c.lock, lock_type);
-+ return ERR_PTR(-EIO);
-+ }
-+
-+ EBUG_ON(b->c.btree_id != path->btree_id);
-+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
-+ btree_check_header(c, b);
-+
-+ return b;
-+}
-+
-+/**
-+ * bch2_btree_node_get - find a btree node in the cache and lock it, reading it
-+ * in from disk if necessary.
-+ *
-+ * @trans: btree transaction object
-+ * @path: btree_path being traversed
-+ * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2)
-+ * @level: level of btree node being looked up (0 == leaf node)
-+ * @lock_type: SIX_LOCK_read or SIX_LOCK_intent
-+ * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek())
-+ *
-+ * The btree node will have either a read or a write lock held, depending on
-+ * the @write parameter.
-+ *
-+ * Returns: btree node or ERR_PTR()
-+ */
-+struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path,
-+ const struct bkey_i *k, unsigned level,
-+ enum six_lock_type lock_type,
-+ unsigned long trace_ip)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree *b;
-+ struct bset_tree *t;
-+ int ret;
-+
-+ EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+ b = btree_node_mem_ptr(k);
-+
-+ /*
-+ * Check b->hash_val _before_ calling btree_node_lock() - this might not
-+ * be the node we want anymore, and trying to lock the wrong node could
-+ * cause an unneccessary transaction restart:
-+ */
-+ if (unlikely(!c->opts.btree_node_mem_ptr_optimization ||
-+ !b ||
-+ b->hash_val != btree_ptr_hash_val(k)))
-+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
-+
-+ if (btree_node_read_locked(path, level + 1))
-+ btree_node_unlock(trans, path, level + 1);
-+
-+ ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ return ERR_PTR(ret);
-+
-+ BUG_ON(ret);
-+
-+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
-+ b->c.level != level ||
-+ race_fault())) {
-+ six_unlock_type(&b->c.lock, lock_type);
-+ if (bch2_btree_node_relock(trans, path, level + 1))
-+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
-+
-+ trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path);
-+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused));
-+ }
-+
-+ if (unlikely(btree_node_read_in_flight(b))) {
-+ six_unlock_type(&b->c.lock, lock_type);
-+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
-+ }
-+
-+ prefetch(b->aux_data);
-+
-+ for_each_bset(b, t) {
-+ void *p = (u64 *) b->aux_data + t->aux_data_offset;
-+
-+ prefetch(p + L1_CACHE_BYTES * 0);
-+ prefetch(p + L1_CACHE_BYTES * 1);
-+ prefetch(p + L1_CACHE_BYTES * 2);
-+ }
-+
-+ /* avoid atomic set bit if it's not needed: */
-+ if (!btree_node_accessed(b))
-+ set_btree_node_accessed(b);
-+
-+ if (unlikely(btree_node_read_error(b))) {
-+ six_unlock_type(&b->c.lock, lock_type);
-+ return ERR_PTR(-EIO);
-+ }
-+
-+ EBUG_ON(b->c.btree_id != path->btree_id);
-+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
-+ btree_check_header(c, b);
-+
-+ return b;
-+}
-+
-+struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
-+ const struct bkey_i *k,
-+ enum btree_id btree_id,
-+ unsigned level,
-+ bool nofill)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct btree *b;
-+ struct bset_tree *t;
-+ int ret;
-+
-+ EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+ if (c->opts.btree_node_mem_ptr_optimization) {
-+ b = btree_node_mem_ptr(k);
-+ if (b)
-+ goto lock_node;
-+ }
-+retry:
-+ b = btree_cache_find(bc, k);
-+ if (unlikely(!b)) {
-+ if (nofill)
-+ goto out;
-+
-+ b = bch2_btree_node_fill(trans, NULL, k, btree_id,
-+ level, SIX_LOCK_read, true);
-+
-+ /* We raced and found the btree node in the cache */
-+ if (!b)
-+ goto retry;
-+
-+ if (IS_ERR(b) &&
-+ !bch2_btree_cache_cannibalize_lock(c, NULL))
-+ goto retry;
-+
-+ if (IS_ERR(b))
-+ goto out;
-+ } else {
-+lock_node:
-+ ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read, _THIS_IP_);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ return ERR_PTR(ret);
-+
-+ BUG_ON(ret);
-+
-+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
-+ b->c.btree_id != btree_id ||
-+ b->c.level != level)) {
-+ six_unlock_read(&b->c.lock);
-+ goto retry;
-+ }
-+ }
-+
-+ /* XXX: waiting on IO with btree locks held: */
-+ __bch2_btree_node_wait_on_read(b);
-+
-+ prefetch(b->aux_data);
-+
-+ for_each_bset(b, t) {
-+ void *p = (u64 *) b->aux_data + t->aux_data_offset;
-+
-+ prefetch(p + L1_CACHE_BYTES * 0);
-+ prefetch(p + L1_CACHE_BYTES * 1);
-+ prefetch(p + L1_CACHE_BYTES * 2);
-+ }
-+
-+ /* avoid atomic set bit if it's not needed: */
-+ if (!btree_node_accessed(b))
-+ set_btree_node_accessed(b);
-+
-+ if (unlikely(btree_node_read_error(b))) {
-+ six_unlock_read(&b->c.lock);
-+ b = ERR_PTR(-EIO);
-+ goto out;
-+ }
-+
-+ EBUG_ON(b->c.btree_id != btree_id);
-+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
-+ btree_check_header(c, b);
-+out:
-+ bch2_btree_cache_cannibalize_unlock(c);
-+ return b;
-+}
-+
-+int bch2_btree_node_prefetch(struct btree_trans *trans,
-+ struct btree_path *path,
-+ const struct bkey_i *k,
-+ enum btree_id btree_id, unsigned level)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct btree *b;
-+
-+ BUG_ON(trans && !btree_node_locked(path, level + 1));
-+ BUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+ b = btree_cache_find(bc, k);
-+ if (b)
-+ return 0;
-+
-+ b = bch2_btree_node_fill(trans, path, k, btree_id,
-+ level, SIX_LOCK_read, false);
-+ return PTR_ERR_OR_ZERO(b);
-+}
-+
-+void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_cache *bc = &c->btree_cache;
-+ struct btree *b;
-+
-+ b = btree_cache_find(bc, k);
-+ if (!b)
-+ return;
-+wait_on_io:
-+ /* not allowed to wait on io with btree locks held: */
-+
-+ /* XXX we're called from btree_gc which will be holding other btree
-+ * nodes locked
-+ */
-+ __bch2_btree_node_wait_on_read(b);
-+ __bch2_btree_node_wait_on_write(b);
-+
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
-+
-+ if (btree_node_dirty(b)) {
-+ __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+ goto wait_on_io;
-+ }
-+
-+ BUG_ON(btree_node_dirty(b));
-+
-+ mutex_lock(&bc->lock);
-+ btree_node_data_free(c, b);
-+ bch2_btree_node_hash_remove(bc, b);
-+ mutex_unlock(&bc->lock);
-+
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+}
-+
-+const char *bch2_btree_id_str(enum btree_id btree)
-+{
-+ return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)";
-+}
-+
-+void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
-+{
-+ prt_printf(out, "%s level %u/%u\n ",
-+ bch2_btree_id_str(b->c.btree_id),
-+ b->c.level,
-+ bch2_btree_id_root(c, b->c.btree_id)->level);
-+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-+}
-+
-+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
-+{
-+ struct bset_stats stats;
-+
-+ memset(&stats, 0, sizeof(stats));
-+
-+ bch2_btree_keys_stats(b, &stats);
-+
-+ prt_printf(out, "l %u ", b->c.level);
-+ bch2_bpos_to_text(out, b->data->min_key);
-+ prt_printf(out, " - ");
-+ bch2_bpos_to_text(out, b->data->max_key);
-+ prt_printf(out, ":\n"
-+ " ptrs: ");
-+ bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-+ prt_newline(out);
-+
-+ prt_printf(out,
-+ " format: ");
-+ bch2_bkey_format_to_text(out, &b->format);
-+
-+ prt_printf(out,
-+ " unpack fn len: %u\n"
-+ " bytes used %zu/%zu (%zu%% full)\n"
-+ " sib u64s: %u, %u (merge threshold %u)\n"
-+ " nr packed keys %u\n"
-+ " nr unpacked keys %u\n"
-+ " floats %zu\n"
-+ " failed unpacked %zu\n",
-+ b->unpack_fn_len,
-+ b->nr.live_u64s * sizeof(u64),
-+ btree_bytes(c) - sizeof(struct btree_node),
-+ b->nr.live_u64s * 100 / btree_max_u64s(c),
-+ b->sib_u64s[0],
-+ b->sib_u64s[1],
-+ c->btree_foreground_merge_threshold,
-+ b->nr.packed_keys,
-+ b->nr.unpacked_keys,
-+ stats.floats,
-+ stats.failed);
-+}
-+
-+void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)
-+{
-+ prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used);
-+ prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty));
-+ prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock);
-+}
-diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
-new file mode 100644
-index 000000000000..cfb80b201d61
---- /dev/null
-+++ b/fs/bcachefs/btree_cache.h
-@@ -0,0 +1,131 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_CACHE_H
-+#define _BCACHEFS_BTREE_CACHE_H
-+
-+#include "bcachefs.h"
-+#include "btree_types.h"
-+#include "bkey_methods.h"
-+
-+extern const char * const bch2_btree_node_flags[];
-+
-+struct btree_iter;
-+
-+void bch2_recalc_btree_reserve(struct bch_fs *);
-+
-+void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
-+int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
-+int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
-+ unsigned, enum btree_id);
-+
-+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
-+int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
-+
-+struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
-+struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);
-+
-+struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *,
-+ const struct bkey_i *, unsigned,
-+ enum six_lock_type, unsigned long);
-+
-+struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *,
-+ enum btree_id, unsigned, bool);
-+
-+int bch2_btree_node_prefetch(struct btree_trans *, struct btree_path *,
-+ const struct bkey_i *, enum btree_id, unsigned);
-+
-+void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
-+
-+void bch2_fs_btree_cache_exit(struct bch_fs *);
-+int bch2_fs_btree_cache_init(struct bch_fs *);
-+void bch2_fs_btree_cache_init_early(struct btree_cache *);
-+
-+static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
-+{
-+ switch (k->k.type) {
-+ case KEY_TYPE_btree_ptr:
-+ return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start);
-+ case KEY_TYPE_btree_ptr_v2:
-+ /*
-+ * The cast/deref is only necessary to avoid sparse endianness
-+ * warnings:
-+ */
-+ return *((u64 *) &bkey_i_to_btree_ptr_v2_c(k)->v.seq);
-+ default:
-+ return 0;
-+ }
-+}
-+
-+static inline struct btree *btree_node_mem_ptr(const struct bkey_i *k)
-+{
-+ return k->k.type == KEY_TYPE_btree_ptr_v2
-+ ? (void *)(unsigned long)bkey_i_to_btree_ptr_v2_c(k)->v.mem_ptr
-+ : NULL;
-+}
-+
-+/* is btree node in hash table? */
-+static inline bool btree_node_hashed(struct btree *b)
-+{
-+ return b->hash_val != 0;
-+}
-+
-+#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \
-+ for ((_tbl) = rht_dereference_rcu((_c)->btree_cache.table.tbl, \
-+ &(_c)->btree_cache.table), \
-+ _iter = 0; _iter < (_tbl)->size; _iter++) \
-+ rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
-+
-+static inline size_t btree_bytes(struct bch_fs *c)
-+{
-+ return c->opts.btree_node_size;
-+}
-+
-+static inline size_t btree_max_u64s(struct bch_fs *c)
-+{
-+ return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
-+}
-+
-+static inline size_t btree_pages(struct bch_fs *c)
-+{
-+ return btree_bytes(c) / PAGE_SIZE;
-+}
-+
-+static inline unsigned btree_blocks(struct bch_fs *c)
-+{
-+ return btree_sectors(c) >> c->block_bits;
-+}
-+
-+#define BTREE_SPLIT_THRESHOLD(c) (btree_max_u64s(c) * 2 / 3)
-+
-+#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3)
-+#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \
-+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \
-+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) >> 2))
-+
-+static inline unsigned btree_id_nr_alive(struct bch_fs *c)
-+{
-+ return BTREE_ID_NR + c->btree_roots_extra.nr;
-+}
-+
-+static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned id)
-+{
-+ if (likely(id < BTREE_ID_NR)) {
-+ return &c->btree_roots_known[id];
-+ } else {
-+ unsigned idx = id - BTREE_ID_NR;
-+
-+ EBUG_ON(idx >= c->btree_roots_extra.nr);
-+ return &c->btree_roots_extra.data[idx];
-+ }
-+}
-+
-+static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
-+{
-+ return bch2_btree_id_root(c, b->c.btree_id)->b;
-+}
-+
-+const char *bch2_btree_id_str(enum btree_id);
-+void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
-+void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
-+void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_CACHE_H */
-diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
-new file mode 100644
-index 000000000000..0b5d09c8475d
---- /dev/null
-+++ b/fs/bcachefs/btree_gc.c
-@@ -0,0 +1,2145 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright (C) 2014 Datera Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "bkey_buf.h"
-+#include "btree_journal_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "debug.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "recovery.h"
-+#include "reflink.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "trace.h"
-+
-+#include <linux/slab.h>
-+#include <linux/bitops.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/preempt.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched/task.h>
-+
-+#define DROP_THIS_NODE 10
-+#define DROP_PREV_NODE 11
-+
-+static bool should_restart_for_topology_repair(struct bch_fs *c)
-+{
-+ return c->opts.fix_errors != FSCK_FIX_no &&
-+ !(c->recovery_passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology));
-+}
-+
-+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-+{
-+ preempt_disable();
-+ write_seqcount_begin(&c->gc_pos_lock);
-+ c->gc_pos = new_pos;
-+ write_seqcount_end(&c->gc_pos_lock);
-+ preempt_enable();
-+}
-+
-+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-+{
-+ BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
-+ __gc_pos_set(c, new_pos);
-+}
-+
-+/*
-+ * Missing: if an interior btree node is empty, we need to do something -
-+ * perhaps just kill it
-+ */
-+static int bch2_gc_check_topology(struct bch_fs *c,
-+ struct btree *b,
-+ struct bkey_buf *prev,
-+ struct bkey_buf cur,
-+ bool is_last)
-+{
-+ struct bpos node_start = b->data->min_key;
-+ struct bpos node_end = b->data->max_key;
-+ struct bpos expected_start = bkey_deleted(&prev->k->k)
-+ ? node_start
-+ : bpos_successor(prev->k->k.p);
-+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-+ int ret = 0;
-+
-+ if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
-+ struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
-+
-+ if (!bpos_eq(expected_start, bp->v.min_key)) {
-+ bch2_topology_error(c);
-+
-+ if (bkey_deleted(&prev->k->k)) {
-+ prt_printf(&buf1, "start of node: ");
-+ bch2_bpos_to_text(&buf1, node_start);
-+ } else {
-+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k));
-+ }
-+ bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k));
-+
-+ if (__fsck_err(c,
-+ FSCK_CAN_FIX|
-+ FSCK_CAN_IGNORE|
-+ FSCK_NO_RATELIMIT,
-+ btree_node_topology_bad_min_key,
-+ "btree node with incorrect min_key at btree %s level %u:\n"
-+ " prev %s\n"
-+ " cur %s",
-+ bch2_btree_id_str(b->c.btree_id), b->c.level,
-+ buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) {
-+ bch_info(c, "Halting mark and sweep to start topology repair pass");
-+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
-+ goto err;
-+ } else {
-+ set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
-+ }
-+ }
-+ }
-+
-+ if (is_last && !bpos_eq(cur.k->k.p, node_end)) {
-+ bch2_topology_error(c);
-+
-+ printbuf_reset(&buf1);
-+ printbuf_reset(&buf2);
-+
-+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k));
-+ bch2_bpos_to_text(&buf2, node_end);
-+
-+ if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT,
-+ btree_node_topology_bad_max_key,
-+ "btree node with incorrect max_key at btree %s level %u:\n"
-+ " %s\n"
-+ " expected %s",
-+ bch2_btree_id_str(b->c.btree_id), b->c.level,
-+ buf1.buf, buf2.buf) &&
-+ should_restart_for_topology_repair(c)) {
-+ bch_info(c, "Halting mark and sweep to start topology repair pass");
-+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
-+ goto err;
-+ } else {
-+ set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
-+ }
-+ }
-+
-+ bch2_bkey_buf_copy(prev, c, cur.k);
-+err:
-+fsck_err:
-+ printbuf_exit(&buf2);
-+ printbuf_exit(&buf1);
-+ return ret;
-+}
-+
-+static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
-+{
-+ switch (b->key.k.type) {
-+ case KEY_TYPE_btree_ptr: {
-+ struct bkey_i_btree_ptr *src = bkey_i_to_btree_ptr(&b->key);
-+
-+ dst->k.p = src->k.p;
-+ dst->v.mem_ptr = 0;
-+ dst->v.seq = b->data->keys.seq;
-+ dst->v.sectors_written = 0;
-+ dst->v.flags = 0;
-+ dst->v.min_key = b->data->min_key;
-+ set_bkey_val_bytes(&dst->k, sizeof(dst->v) + bkey_val_bytes(&src->k));
-+ memcpy(dst->v.start, src->v.start, bkey_val_bytes(&src->k));
-+ break;
-+ }
-+ case KEY_TYPE_btree_ptr_v2:
-+ bkey_copy(&dst->k_i, &b->key);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static void bch2_btree_node_update_key_early(struct btree_trans *trans,
-+ enum btree_id btree, unsigned level,
-+ struct bkey_s_c old, struct bkey_i *new)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree *b;
-+ struct bkey_buf tmp;
-+ int ret;
-+
-+ bch2_bkey_buf_init(&tmp);
-+ bch2_bkey_buf_reassemble(&tmp, c, old);
-+
-+ b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true);
-+ if (!IS_ERR_OR_NULL(b)) {
-+ mutex_lock(&c->btree_cache.lock);
-+
-+ bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+ bkey_copy(&b->key, new);
-+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
-+ BUG_ON(ret);
-+
-+ mutex_unlock(&c->btree_cache.lock);
-+ six_unlock_read(&b->c.lock);
-+ }
-+
-+ bch2_bkey_buf_exit(&tmp, c);
-+}
-+
-+static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
-+{
-+ struct bkey_i_btree_ptr_v2 *new;
-+ int ret;
-+
-+ new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
-+ if (!new)
-+ return -BCH_ERR_ENOMEM_gc_repair_key;
-+
-+ btree_ptr_to_v2(b, new);
-+ b->data->min_key = new_min;
-+ new->v.min_key = new_min;
-+ SET_BTREE_PTR_RANGE_UPDATED(&new->v, true);
-+
-+ ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i);
-+ if (ret) {
-+ kfree(new);
-+ return ret;
-+ }
-+
-+ bch2_btree_node_drop_keys_outside_node(b);
-+ bkey_copy(&b->key, &new->k_i);
-+ return 0;
-+}
-+
-+static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
-+{
-+ struct bkey_i_btree_ptr_v2 *new;
-+ int ret;
-+
-+ ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p);
-+ if (ret)
-+ return ret;
-+
-+ new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
-+ if (!new)
-+ return -BCH_ERR_ENOMEM_gc_repair_key;
-+
-+ btree_ptr_to_v2(b, new);
-+ b->data->max_key = new_max;
-+ new->k.p = new_max;
-+ SET_BTREE_PTR_RANGE_UPDATED(&new->v, true);
-+
-+ ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i);
-+ if (ret) {
-+ kfree(new);
-+ return ret;
-+ }
-+
-+ bch2_btree_node_drop_keys_outside_node(b);
-+
-+ mutex_lock(&c->btree_cache.lock);
-+ bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+ bkey_copy(&b->key, &new->k_i);
-+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
-+ BUG_ON(ret);
-+ mutex_unlock(&c->btree_cache.lock);
-+ return 0;
-+}
-+
-+static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
-+ struct btree *prev, struct btree *cur)
-+{
-+ struct bpos expected_start = !prev
-+ ? b->data->min_key
-+ : bpos_successor(prev->key.k.p);
-+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-+ int ret = 0;
-+
-+ if (!prev) {
-+ prt_printf(&buf1, "start of node: ");
-+ bch2_bpos_to_text(&buf1, b->data->min_key);
-+ } else {
-+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key));
-+ }
-+
-+ bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key));
-+
-+ if (prev &&
-+ bpos_gt(expected_start, cur->data->min_key) &&
-+ BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {
-+ /* cur overwrites prev: */
-+
-+ if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key,
-+ cur->data->min_key), c,
-+ btree_node_topology_overwritten_by_next_node,
-+ "btree node overwritten by next node at btree %s level %u:\n"
-+ " node %s\n"
-+ " next %s",
-+ bch2_btree_id_str(b->c.btree_id), b->c.level,
-+ buf1.buf, buf2.buf)) {
-+ ret = DROP_PREV_NODE;
-+ goto out;
-+ }
-+
-+ if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p,
-+ bpos_predecessor(cur->data->min_key)), c,
-+ btree_node_topology_bad_max_key,
-+ "btree node with incorrect max_key at btree %s level %u:\n"
-+ " node %s\n"
-+ " next %s",
-+ bch2_btree_id_str(b->c.btree_id), b->c.level,
-+ buf1.buf, buf2.buf))
-+ ret = set_node_max(c, prev,
-+ bpos_predecessor(cur->data->min_key));
-+ } else {
-+ /* prev overwrites cur: */
-+
-+ if (mustfix_fsck_err_on(bpos_ge(expected_start,
-+ cur->data->max_key), c,
-+ btree_node_topology_overwritten_by_prev_node,
-+ "btree node overwritten by prev node at btree %s level %u:\n"
-+ " prev %s\n"
-+ " node %s",
-+ bch2_btree_id_str(b->c.btree_id), b->c.level,
-+ buf1.buf, buf2.buf)) {
-+ ret = DROP_THIS_NODE;
-+ goto out;
-+ }
-+
-+ if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c,
-+ btree_node_topology_bad_min_key,
-+ "btree node with incorrect min_key at btree %s level %u:\n"
-+ " prev %s\n"
-+ " node %s",
-+ bch2_btree_id_str(b->c.btree_id), b->c.level,
-+ buf1.buf, buf2.buf))
-+ ret = set_node_min(c, cur, expected_start);
-+ }
-+out:
-+fsck_err:
-+ printbuf_exit(&buf2);
-+ printbuf_exit(&buf1);
-+ return ret;
-+}
-+
-+static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
-+ struct btree *child)
-+{
-+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-+ int ret = 0;
-+
-+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key));
-+ bch2_bpos_to_text(&buf2, b->key.k.p);
-+
-+ if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c,
-+ btree_node_topology_bad_max_key,
-+ "btree node with incorrect max_key at btree %s level %u:\n"
-+ " %s\n"
-+ " expected %s",
-+ bch2_btree_id_str(b->c.btree_id), b->c.level,
-+ buf1.buf, buf2.buf)) {
-+ ret = set_node_max(c, child, b->key.k.p);
-+ if (ret)
-+ goto err;
-+ }
-+err:
-+fsck_err:
-+ printbuf_exit(&buf2);
-+ printbuf_exit(&buf1);
-+ return ret;
-+}
-+
-+static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_and_journal_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_buf prev_k, cur_k;
-+ struct btree *prev = NULL, *cur = NULL;
-+ bool have_child, dropped_children = false;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ if (!b->c.level)
-+ return 0;
-+again:
-+ prev = NULL;
-+ have_child = dropped_children = false;
-+ bch2_bkey_buf_init(&prev_k);
-+ bch2_bkey_buf_init(&cur_k);
-+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
-+
-+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+ BUG_ON(bpos_lt(k.k->p, b->data->min_key));
-+ BUG_ON(bpos_gt(k.k->p, b->data->max_key));
-+
-+ bch2_btree_and_journal_iter_advance(&iter);
-+ bch2_bkey_buf_reassemble(&cur_k, c, k);
-+
-+ cur = bch2_btree_node_get_noiter(trans, cur_k.k,
-+ b->c.btree_id, b->c.level - 1,
-+ false);
-+ ret = PTR_ERR_OR_ZERO(cur);
-+
-+ printbuf_reset(&buf);
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
-+
-+ if (mustfix_fsck_err_on(ret == -EIO, c,
-+ btree_node_unreadable,
-+ "Topology repair: unreadable btree node at btree %s level %u:\n"
-+ " %s",
-+ bch2_btree_id_str(b->c.btree_id),
-+ b->c.level - 1,
-+ buf.buf)) {
-+ bch2_btree_node_evict(trans, cur_k.k);
-+ ret = bch2_journal_key_delete(c, b->c.btree_id,
-+ b->c.level, cur_k.k->k.p);
-+ cur = NULL;
-+ if (ret)
-+ break;
-+ continue;
-+ }
-+
-+ if (ret) {
-+ bch_err_msg(c, ret, "getting btree node");
-+ break;
-+ }
-+
-+ ret = btree_repair_node_boundaries(c, b, prev, cur);
-+
-+ if (ret == DROP_THIS_NODE) {
-+ six_unlock_read(&cur->c.lock);
-+ bch2_btree_node_evict(trans, cur_k.k);
-+ ret = bch2_journal_key_delete(c, b->c.btree_id,
-+ b->c.level, cur_k.k->k.p);
-+ cur = NULL;
-+ if (ret)
-+ break;
-+ continue;
-+ }
-+
-+ if (prev)
-+ six_unlock_read(&prev->c.lock);
-+ prev = NULL;
-+
-+ if (ret == DROP_PREV_NODE) {
-+ bch2_btree_node_evict(trans, prev_k.k);
-+ ret = bch2_journal_key_delete(c, b->c.btree_id,
-+ b->c.level, prev_k.k->k.p);
-+ if (ret)
-+ break;
-+
-+ bch2_btree_and_journal_iter_exit(&iter);
-+ bch2_bkey_buf_exit(&prev_k, c);
-+ bch2_bkey_buf_exit(&cur_k, c);
-+ goto again;
-+ } else if (ret)
-+ break;
-+
-+ prev = cur;
-+ cur = NULL;
-+ bch2_bkey_buf_copy(&prev_k, c, cur_k.k);
-+ }
-+
-+ if (!ret && !IS_ERR_OR_NULL(prev)) {
-+ BUG_ON(cur);
-+ ret = btree_repair_node_end(c, b, prev);
-+ }
-+
-+ if (!IS_ERR_OR_NULL(prev))
-+ six_unlock_read(&prev->c.lock);
-+ prev = NULL;
-+ if (!IS_ERR_OR_NULL(cur))
-+ six_unlock_read(&cur->c.lock);
-+ cur = NULL;
-+
-+ if (ret)
-+ goto err;
-+
-+ bch2_btree_and_journal_iter_exit(&iter);
-+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
-+
-+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+ bch2_bkey_buf_reassemble(&cur_k, c, k);
-+ bch2_btree_and_journal_iter_advance(&iter);
-+
-+ cur = bch2_btree_node_get_noiter(trans, cur_k.k,
-+ b->c.btree_id, b->c.level - 1,
-+ false);
-+ ret = PTR_ERR_OR_ZERO(cur);
-+
-+ if (ret) {
-+ bch_err_msg(c, ret, "getting btree node");
-+ goto err;
-+ }
-+
-+ ret = bch2_btree_repair_topology_recurse(trans, cur);
-+ six_unlock_read(&cur->c.lock);
-+ cur = NULL;
-+
-+ if (ret == DROP_THIS_NODE) {
-+ bch2_btree_node_evict(trans, cur_k.k);
-+ ret = bch2_journal_key_delete(c, b->c.btree_id,
-+ b->c.level, cur_k.k->k.p);
-+ dropped_children = true;
-+ }
-+
-+ if (ret)
-+ goto err;
-+
-+ have_child = true;
-+ }
-+
-+ printbuf_reset(&buf);
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
-+
-+ if (mustfix_fsck_err_on(!have_child, c,
-+ btree_node_topology_interior_node_empty,
-+ "empty interior btree node at btree %s level %u\n"
-+ " %s",
-+ bch2_btree_id_str(b->c.btree_id),
-+ b->c.level, buf.buf))
-+ ret = DROP_THIS_NODE;
-+err:
-+fsck_err:
-+ if (!IS_ERR_OR_NULL(prev))
-+ six_unlock_read(&prev->c.lock);
-+ if (!IS_ERR_OR_NULL(cur))
-+ six_unlock_read(&cur->c.lock);
-+
-+ bch2_btree_and_journal_iter_exit(&iter);
-+ bch2_bkey_buf_exit(&prev_k, c);
-+ bch2_bkey_buf_exit(&cur_k, c);
-+
-+ if (!ret && dropped_children)
-+ goto again;
-+
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+int bch2_check_topology(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree *b;
-+ unsigned i;
-+ int ret = 0;
-+
-+ for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
-+ struct btree_root *r = bch2_btree_id_root(c, i);
-+
-+ if (!r->alive)
-+ continue;
-+
-+ b = r->b;
-+ if (btree_node_fake(b))
-+ continue;
-+
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
-+ ret = bch2_btree_repair_topology_recurse(trans, b);
-+ six_unlock_read(&b->c.lock);
-+
-+ if (ret == DROP_THIS_NODE) {
-+ bch_err(c, "empty btree root - repair unimplemented");
-+ ret = -BCH_ERR_fsck_repair_unimplemented;
-+ }
-+ }
-+
-+ bch2_trans_put(trans);
-+
-+ return ret;
-+}
-+
-+static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id,
-+ unsigned level, bool is_root,
-+ struct bkey_s_c *k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(*k);
-+ const union bch_extent_entry *entry_c;
-+ struct extent_ptr_decoded p = { 0 };
-+ bool do_update = false;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ /*
-+ * XXX
-+ * use check_bucket_ref here
-+ */
-+ bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+ struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
-+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
-+
-+ if (!g->gen_valid &&
-+ (c->opts.reconstruct_alloc ||
-+ fsck_err(c, ptr_to_missing_alloc_key,
-+ "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
-+ "while marking %s",
-+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
-+ p.ptr.gen,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
-+ if (!p.ptr.cached) {
-+ g->gen_valid = true;
-+ g->gen = p.ptr.gen;
-+ } else {
-+ do_update = true;
-+ }
-+ }
-+
-+ if (gen_cmp(p.ptr.gen, g->gen) > 0 &&
-+ (c->opts.reconstruct_alloc ||
-+ fsck_err(c, ptr_gen_newer_than_bucket_gen,
-+ "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
-+ "while marking %s",
-+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
-+ p.ptr.gen, g->gen,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
-+ if (!p.ptr.cached) {
-+ g->gen_valid = true;
-+ g->gen = p.ptr.gen;
-+ g->data_type = 0;
-+ g->dirty_sectors = 0;
-+ g->cached_sectors = 0;
-+ set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
-+ } else {
-+ do_update = true;
-+ }
-+ }
-+
-+ if (gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX &&
-+ (c->opts.reconstruct_alloc ||
-+ fsck_err(c, ptr_gen_newer_than_bucket_gen,
-+ "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
-+ "while marking %s",
-+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
-+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
-+ p.ptr.gen,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
-+ do_update = true;
-+
-+ if (!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0 &&
-+ (c->opts.reconstruct_alloc ||
-+ fsck_err(c, stale_dirty_ptr,
-+ "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
-+ "while marking %s",
-+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
-+ p.ptr.gen, g->gen,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
-+ do_update = true;
-+
-+ if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
-+ continue;
-+
-+ if (fsck_err_on(bucket_data_type(g->data_type) &&
-+ bucket_data_type(g->data_type) != data_type, c,
-+ ptr_bucket_data_type_mismatch,
-+ "bucket %u:%zu different types of data in same bucket: %s, %s\n"
-+ "while marking %s",
-+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-+ bch2_data_types[g->data_type],
-+ bch2_data_types[data_type],
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
-+ if (data_type == BCH_DATA_btree) {
-+ g->data_type = data_type;
-+ set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
-+ } else {
-+ do_update = true;
-+ }
-+ }
-+
-+ if (p.has_ec) {
-+ struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx);
-+
-+ if (fsck_err_on(!m || !m->alive, c,
-+ ptr_to_missing_stripe,
-+ "pointer to nonexistent stripe %llu\n"
-+ "while marking %s",
-+ (u64) p.ec.idx,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
-+ do_update = true;
-+
-+ if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c,
-+ ptr_to_incorrect_stripe,
-+ "pointer does not match stripe %llu\n"
-+ "while marking %s",
-+ (u64) p.ec.idx,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
-+ do_update = true;
-+ }
-+ }
-+
-+ if (do_update) {
-+ struct bkey_ptrs ptrs;
-+ union bch_extent_entry *entry;
-+ struct bch_extent_ptr *ptr;
-+ struct bkey_i *new;
-+
-+ if (is_root) {
-+ bch_err(c, "cannot update btree roots yet");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
-+ if (!new) {
-+ bch_err_msg(c, ret, "allocating new key");
-+ ret = -BCH_ERR_ENOMEM_gc_repair_key;
-+ goto err;
-+ }
-+
-+ bkey_reassemble(new, *k);
-+
-+ if (level) {
-+ /*
-+ * We don't want to drop btree node pointers - if the
-+ * btree node isn't there anymore, the read path will
-+ * sort it out:
-+ */
-+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
-+ bkey_for_each_ptr(ptrs, ptr) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+ struct bucket *g = PTR_GC_BUCKET(ca, ptr);
-+
-+ ptr->gen = g->gen;
-+ }
-+ } else {
-+ bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+ struct bucket *g = PTR_GC_BUCKET(ca, ptr);
-+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
-+
-+ (ptr->cached &&
-+ (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
-+ (!ptr->cached &&
-+ gen_cmp(ptr->gen, g->gen) < 0) ||
-+ gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
-+ (g->data_type &&
-+ g->data_type != data_type);
-+ }));
-+again:
-+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
-+ bkey_extent_entry_for_each(ptrs, entry) {
-+ if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) {
-+ struct gc_stripe *m = genradix_ptr(&c->gc_stripes,
-+ entry->stripe_ptr.idx);
-+ union bch_extent_entry *next_ptr;
-+
-+ bkey_extent_entry_for_each_from(ptrs, next_ptr, entry)
-+ if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr)
-+ goto found;
-+ next_ptr = NULL;
-+found:
-+ if (!next_ptr) {
-+ bch_err(c, "aieee, found stripe ptr with no data ptr");
-+ continue;
-+ }
-+
-+ if (!m || !m->alive ||
-+ !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block],
-+ &next_ptr->ptr,
-+ m->sectors)) {
-+ bch2_bkey_extent_entry_drop(new, entry);
-+ goto again;
-+ }
-+ }
-+ }
-+ }
-+
-+ ret = bch2_journal_key_insert_take(c, btree_id, level, new);
-+ if (ret) {
-+ kfree(new);
-+ goto err;
-+ }
-+
-+ if (level)
-+ bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
-+
-+ if (0) {
-+ printbuf_reset(&buf);
-+ bch2_bkey_val_to_text(&buf, c, *k);
-+ bch_info(c, "updated %s", buf.buf);
-+
-+ printbuf_reset(&buf);
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new));
-+ bch_info(c, "new key %s", buf.buf);
-+ }
-+
-+ *k = bkey_i_to_s_c(new);
-+ }
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+/* marking of btree keys/nodes: */
-+
-+static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
-+ unsigned level, bool is_root,
-+ struct bkey_s_c *k,
-+ bool initial)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey deleted = KEY(0, 0, 0);
-+ struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
-+ unsigned flags =
-+ BTREE_TRIGGER_GC|
-+ (initial ? BTREE_TRIGGER_NOATOMIC : 0);
-+ int ret = 0;
-+
-+ deleted.p = k->k->p;
-+
-+ if (initial) {
-+ BUG_ON(bch2_journal_seq_verify &&
-+ k->k->version.lo > atomic64_read(&c->journal.seq));
-+
-+ ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
-+ if (ret)
-+ goto err;
-+
-+ if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
-+ bkey_version_in_future,
-+ "key version number higher than recorded: %llu > %llu",
-+ k->k->version.lo,
-+ atomic64_read(&c->key_version)))
-+ atomic64_set(&c->key_version, k->k->version.lo);
-+ }
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_mark_key(trans, btree_id, level, old, *k, flags));
-+fsck_err:
-+err:
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_node_iter iter;
-+ struct bkey unpacked;
-+ struct bkey_s_c k;
-+ struct bkey_buf prev, cur;
-+ int ret = 0;
-+
-+ if (!btree_node_type_needs_gc(btree_node_type(b)))
-+ return 0;
-+
-+ bch2_btree_node_iter_init_from_start(&iter, b);
-+ bch2_bkey_buf_init(&prev);
-+ bch2_bkey_buf_init(&cur);
-+ bkey_init(&prev.k->k);
-+
-+ while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
-+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
-+ &k, initial);
-+ if (ret)
-+ break;
-+
-+ bch2_btree_node_iter_advance(&iter, b);
-+
-+ if (b->c.level) {
-+ bch2_bkey_buf_reassemble(&cur, c, k);
-+
-+ ret = bch2_gc_check_topology(c, b, &prev, cur,
-+ bch2_btree_node_iter_end(&iter));
-+ if (ret)
-+ break;
-+ }
-+ }
-+
-+ bch2_bkey_buf_exit(&cur, c);
-+ bch2_bkey_buf_exit(&prev, c);
-+ return ret;
-+}
-+
-+static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
-+ bool initial, bool metadata_only)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct btree *b;
-+ unsigned depth = metadata_only ? 1 : 0;
-+ int ret = 0;
-+
-+ gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
-+
-+ __for_each_btree_node(trans, iter, btree_id, POS_MIN,
-+ 0, depth, BTREE_ITER_PREFETCH, b, ret) {
-+ bch2_verify_btree_nr_keys(b);
-+
-+ gc_pos_set(c, gc_pos_btree_node(b));
-+
-+ ret = btree_gc_mark_node(trans, b, initial);
-+ if (ret)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret)
-+ return ret;
-+
-+ mutex_lock(&c->btree_root_lock);
-+ b = bch2_btree_id_root(c, btree_id)->b;
-+ if (!btree_node_fake(b)) {
-+ struct bkey_s_c k = bkey_i_to_s_c(&b->key);
-+
-+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1,
-+ true, &k, initial);
-+ }
-+ gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
-+ mutex_unlock(&c->btree_root_lock);
-+
-+ return ret;
-+}
-+
-+static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b,
-+ unsigned target_depth)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_and_journal_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_buf cur, prev;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
-+ bch2_bkey_buf_init(&prev);
-+ bch2_bkey_buf_init(&cur);
-+ bkey_init(&prev.k->k);
-+
-+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+ BUG_ON(bpos_lt(k.k->p, b->data->min_key));
-+ BUG_ON(bpos_gt(k.k->p, b->data->max_key));
-+
-+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
-+ false, &k, true);
-+ if (ret)
-+ goto fsck_err;
-+
-+ if (b->c.level) {
-+ bch2_bkey_buf_reassemble(&cur, c, k);
-+ k = bkey_i_to_s_c(cur.k);
-+
-+ bch2_btree_and_journal_iter_advance(&iter);
-+
-+ ret = bch2_gc_check_topology(c, b,
-+ &prev, cur,
-+ !bch2_btree_and_journal_iter_peek(&iter).k);
-+ if (ret)
-+ goto fsck_err;
-+ } else {
-+ bch2_btree_and_journal_iter_advance(&iter);
-+ }
-+ }
-+
-+ if (b->c.level > target_depth) {
-+ bch2_btree_and_journal_iter_exit(&iter);
-+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
-+
-+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-+ struct btree *child;
-+
-+ bch2_bkey_buf_reassemble(&cur, c, k);
-+ bch2_btree_and_journal_iter_advance(&iter);
-+
-+ child = bch2_btree_node_get_noiter(trans, cur.k,
-+ b->c.btree_id, b->c.level - 1,
-+ false);
-+ ret = PTR_ERR_OR_ZERO(child);
-+
-+ if (ret == -EIO) {
-+ bch2_topology_error(c);
-+
-+ if (__fsck_err(c,
-+ FSCK_CAN_FIX|
-+ FSCK_CAN_IGNORE|
-+ FSCK_NO_RATELIMIT,
-+ btree_node_read_error,
-+ "Unreadable btree node at btree %s level %u:\n"
-+ " %s",
-+ bch2_btree_id_str(b->c.btree_id),
-+ b->c.level - 1,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) &&
-+ should_restart_for_topology_repair(c)) {
-+ bch_info(c, "Halting mark and sweep to start topology repair pass");
-+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
-+ goto fsck_err;
-+ } else {
-+ /* Continue marking when opted to not
-+ * fix the error: */
-+ ret = 0;
-+ set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
-+ continue;
-+ }
-+ } else if (ret) {
-+ bch_err_msg(c, ret, "getting btree node");
-+ break;
-+ }
-+
-+ ret = bch2_gc_btree_init_recurse(trans, child,
-+ target_depth);
-+ six_unlock_read(&child->c.lock);
-+
-+ if (ret)
-+ break;
-+ }
-+ }
-+fsck_err:
-+ bch2_bkey_buf_exit(&cur, c);
-+ bch2_bkey_buf_exit(&prev, c);
-+ bch2_btree_and_journal_iter_exit(&iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int bch2_gc_btree_init(struct btree_trans *trans,
-+ enum btree_id btree_id,
-+ bool metadata_only)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree *b;
-+ unsigned target_depth = metadata_only ? 1 : 0;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ b = bch2_btree_id_root(c, btree_id)->b;
-+
-+ if (btree_node_fake(b))
-+ return 0;
-+
-+ six_lock_read(&b->c.lock, NULL, NULL);
-+ printbuf_reset(&buf);
-+ bch2_bpos_to_text(&buf, b->data->min_key);
-+ if (mustfix_fsck_err_on(!bpos_eq(b->data->min_key, POS_MIN), c,
-+ btree_root_bad_min_key,
-+ "btree root with incorrect min_key: %s", buf.buf)) {
-+ bch_err(c, "repair unimplemented");
-+ ret = -BCH_ERR_fsck_repair_unimplemented;
-+ goto fsck_err;
-+ }
-+
-+ printbuf_reset(&buf);
-+ bch2_bpos_to_text(&buf, b->data->max_key);
-+ if (mustfix_fsck_err_on(!bpos_eq(b->data->max_key, SPOS_MAX), c,
-+ btree_root_bad_max_key,
-+ "btree root with incorrect max_key: %s", buf.buf)) {
-+ bch_err(c, "repair unimplemented");
-+ ret = -BCH_ERR_fsck_repair_unimplemented;
-+ goto fsck_err;
-+ }
-+
-+ if (b->c.level >= target_depth)
-+ ret = bch2_gc_btree_init_recurse(trans, b, target_depth);
-+
-+ if (!ret) {
-+ struct bkey_s_c k = bkey_i_to_s_c(&b->key);
-+
-+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, true,
-+ &k, true);
-+ }
-+fsck_err:
-+ six_unlock_read(&b->c.lock);
-+
-+ if (ret < 0)
-+ bch_err_fn(c, ret);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
-+{
-+ return (int) btree_id_to_gc_phase(l) -
-+ (int) btree_id_to_gc_phase(r);
-+}
-+
-+static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ enum btree_id ids[BTREE_ID_NR];
-+ unsigned i;
-+ int ret = 0;
-+
-+ for (i = 0; i < BTREE_ID_NR; i++)
-+ ids[i] = i;
-+ bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
-+
-+ for (i = 0; i < BTREE_ID_NR && !ret; i++)
-+ ret = initial
-+ ? bch2_gc_btree_init(trans, ids[i], metadata_only)
-+ : bch2_gc_btree(trans, ids[i], initial, metadata_only);
-+
-+ for (i = BTREE_ID_NR; i < btree_id_nr_alive(c) && !ret; i++) {
-+ if (!bch2_btree_id_root(c, i)->alive)
-+ continue;
-+
-+ ret = initial
-+ ? bch2_gc_btree_init(trans, i, metadata_only)
-+ : bch2_gc_btree(trans, i, initial, metadata_only);
-+ }
-+
-+ if (ret < 0)
-+ bch_err_fn(c, ret);
-+
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
-+ u64 start, u64 end,
-+ enum bch_data_type type,
-+ unsigned flags)
-+{
-+ u64 b = sector_to_bucket(ca, start);
-+
-+ do {
-+ unsigned sectors =
-+ min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
-+
-+ bch2_mark_metadata_bucket(c, ca, b, type, sectors,
-+ gc_phase(GC_PHASE_SB), flags);
-+ b++;
-+ start += sectors;
-+ } while (start < end);
-+}
-+
-+static void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
-+ unsigned flags)
-+{
-+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
-+ unsigned i;
-+ u64 b;
-+
-+ for (i = 0; i < layout->nr_superblocks; i++) {
-+ u64 offset = le64_to_cpu(layout->sb_offset[i]);
-+
-+ if (offset == BCH_SB_SECTOR)
-+ mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
-+ BCH_DATA_sb, flags);
-+
-+ mark_metadata_sectors(c, ca, offset,
-+ offset + (1 << layout->sb_max_size_bits),
-+ BCH_DATA_sb, flags);
-+ }
-+
-+ for (i = 0; i < ca->journal.nr; i++) {
-+ b = ca->journal.buckets[i];
-+ bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
-+ ca->mi.bucket_size,
-+ gc_phase(GC_PHASE_SB), flags);
-+ }
-+}
-+
-+static void bch2_mark_superblocks(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ mutex_lock(&c->sb_lock);
-+ gc_pos_set(c, gc_phase(GC_PHASE_SB));
-+
-+ for_each_online_member(ca, c, i)
-+ bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC);
-+ mutex_unlock(&c->sb_lock);
-+}
-+
-+#if 0
-+/* Also see bch2_pending_btree_node_free_insert_done() */
-+static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
-+{
-+ struct btree_update *as;
-+ struct pending_btree_node_free *d;
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE));
-+
-+ for_each_pending_btree_node_free(c, as, d)
-+ if (d->index_update_done)
-+ bch2_mark_key(c, bkey_i_to_s_c(&d->key), BTREE_TRIGGER_GC);
-+
-+ mutex_unlock(&c->btree_interior_update_lock);
-+}
-+#endif
-+
-+static void bch2_gc_free(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ genradix_free(&c->reflink_gc_table);
-+ genradix_free(&c->gc_stripes);
-+
-+ for_each_member_device(ca, c, i) {
-+ kvpfree(rcu_dereference_protected(ca->buckets_gc, 1),
-+ sizeof(struct bucket_array) +
-+ ca->mi.nbuckets * sizeof(struct bucket));
-+ ca->buckets_gc = NULL;
-+
-+ free_percpu(ca->usage_gc);
-+ ca->usage_gc = NULL;
-+ }
-+
-+ free_percpu(c->usage_gc);
-+ c->usage_gc = NULL;
-+}
-+
-+static int bch2_gc_done(struct bch_fs *c,
-+ bool initial, bool metadata_only)
-+{
-+ struct bch_dev *ca = NULL;
-+ struct printbuf buf = PRINTBUF;
-+ bool verify = !metadata_only &&
-+ !c->opts.reconstruct_alloc &&
-+ (!initial || (c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)));
-+ unsigned i, dev;
-+ int ret = 0;
-+
-+ percpu_down_write(&c->mark_lock);
-+
-+#define copy_field(_err, _f, _msg, ...) \
-+ if (dst->_f != src->_f && \
-+ (!verify || \
-+ fsck_err(c, _err, _msg ": got %llu, should be %llu" \
-+ , ##__VA_ARGS__, dst->_f, src->_f))) \
-+ dst->_f = src->_f
-+#define copy_dev_field(_err, _f, _msg, ...) \
-+ copy_field(_err, _f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
-+#define copy_fs_field(_err, _f, _msg, ...) \
-+ copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__)
-+
-+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
-+ bch2_fs_usage_acc_to_base(c, i);
-+
-+ for_each_member_device(ca, c, dev) {
-+ struct bch_dev_usage *dst = ca->usage_base;
-+ struct bch_dev_usage *src = (void *)
-+ bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc,
-+ dev_usage_u64s());
-+
-+ for (i = 0; i < BCH_DATA_NR; i++) {
-+ copy_dev_field(dev_usage_buckets_wrong,
-+ d[i].buckets, "%s buckets", bch2_data_types[i]);
-+ copy_dev_field(dev_usage_sectors_wrong,
-+ d[i].sectors, "%s sectors", bch2_data_types[i]);
-+ copy_dev_field(dev_usage_fragmented_wrong,
-+ d[i].fragmented, "%s fragmented", bch2_data_types[i]);
-+ }
-+
-+ copy_dev_field(dev_usage_buckets_ec_wrong,
-+ buckets_ec, "buckets_ec");
-+ }
-+
-+ {
-+ unsigned nr = fs_usage_u64s(c);
-+ struct bch_fs_usage *dst = c->usage_base;
-+ struct bch_fs_usage *src = (void *)
-+ bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr);
-+
-+ copy_fs_field(fs_usage_hidden_wrong,
-+ hidden, "hidden");
-+ copy_fs_field(fs_usage_btree_wrong,
-+ btree, "btree");
-+
-+ if (!metadata_only) {
-+ copy_fs_field(fs_usage_data_wrong,
-+ data, "data");
-+ copy_fs_field(fs_usage_cached_wrong,
-+ cached, "cached");
-+ copy_fs_field(fs_usage_reserved_wrong,
-+ reserved, "reserved");
-+ copy_fs_field(fs_usage_nr_inodes_wrong,
-+ nr_inodes,"nr_inodes");
-+
-+ for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+ copy_fs_field(fs_usage_persistent_reserved_wrong,
-+ persistent_reserved[i],
-+ "persistent_reserved[%i]", i);
-+ }
-+
-+ for (i = 0; i < c->replicas.nr; i++) {
-+ struct bch_replicas_entry *e =
-+ cpu_replicas_entry(&c->replicas, i);
-+
-+ if (metadata_only &&
-+ (e->data_type == BCH_DATA_user ||
-+ e->data_type == BCH_DATA_cached))
-+ continue;
-+
-+ printbuf_reset(&buf);
-+ bch2_replicas_entry_to_text(&buf, e);
-+
-+ copy_fs_field(fs_usage_replicas_wrong,
-+ replicas[i], "%s", buf.buf);
-+ }
-+ }
-+
-+#undef copy_fs_field
-+#undef copy_dev_field
-+#undef copy_stripe_field
-+#undef copy_field
-+fsck_err:
-+ if (ca)
-+ percpu_ref_put(&ca->ref);
-+ if (ret)
-+ bch_err_fn(c, ret);
-+
-+ percpu_up_write(&c->mark_lock);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int bch2_gc_start(struct bch_fs *c)
-+{
-+ struct bch_dev *ca = NULL;
-+ unsigned i;
-+
-+ BUG_ON(c->usage_gc);
-+
-+ c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
-+ sizeof(u64), GFP_KERNEL);
-+ if (!c->usage_gc) {
-+ bch_err(c, "error allocating c->usage_gc");
-+ return -BCH_ERR_ENOMEM_gc_start;
-+ }
-+
-+ for_each_member_device(ca, c, i) {
-+ BUG_ON(ca->usage_gc);
-+
-+ ca->usage_gc = alloc_percpu(struct bch_dev_usage);
-+ if (!ca->usage_gc) {
-+ bch_err(c, "error allocating ca->usage_gc");
-+ percpu_ref_put(&ca->ref);
-+ return -BCH_ERR_ENOMEM_gc_start;
-+ }
-+
-+ this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets,
-+ ca->mi.nbuckets - ca->mi.first_bucket);
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_gc_reset(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ for_each_member_device(ca, c, i) {
-+ free_percpu(ca->usage_gc);
-+ ca->usage_gc = NULL;
-+ }
-+
-+ free_percpu(c->usage_gc);
-+ c->usage_gc = NULL;
-+
-+ return bch2_gc_start(c);
-+}
-+
-+/* returns true if not equal */
-+static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
-+ struct bch_alloc_v4 r)
-+{
-+ return l.gen != r.gen ||
-+ l.oldest_gen != r.oldest_gen ||
-+ l.data_type != r.data_type ||
-+ l.dirty_sectors != r.dirty_sectors ||
-+ l.cached_sectors != r.cached_sectors ||
-+ l.stripe_redundancy != r.stripe_redundancy ||
-+ l.stripe != r.stripe;
-+}
-+
-+static int bch2_alloc_write_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ bool metadata_only)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
-+ struct bucket gc, *b;
-+ struct bkey_i_alloc_v4 *a;
-+ struct bch_alloc_v4 old_convert, new;
-+ const struct bch_alloc_v4 *old;
-+ enum bch_data_type type;
-+ int ret;
-+
-+ if (bkey_ge(iter->pos, POS(ca->dev_idx, ca->mi.nbuckets)))
-+ return 1;
-+
-+ old = bch2_alloc_to_v4(k, &old_convert);
-+ new = *old;
-+
-+ percpu_down_read(&c->mark_lock);
-+ b = gc_bucket(ca, iter->pos.offset);
-+
-+ /*
-+ * b->data_type doesn't yet include need_discard & need_gc_gen states -
-+ * fix that here:
-+ */
-+ type = __alloc_data_type(b->dirty_sectors,
-+ b->cached_sectors,
-+ b->stripe,
-+ *old,
-+ b->data_type);
-+ if (b->data_type != type) {
-+ struct bch_dev_usage *u;
-+
-+ preempt_disable();
-+ u = this_cpu_ptr(ca->usage_gc);
-+ u->d[b->data_type].buckets--;
-+ b->data_type = type;
-+ u->d[b->data_type].buckets++;
-+ preempt_enable();
-+ }
-+
-+ gc = *b;
-+ percpu_up_read(&c->mark_lock);
-+
-+ if (metadata_only &&
-+ gc.data_type != BCH_DATA_sb &&
-+ gc.data_type != BCH_DATA_journal &&
-+ gc.data_type != BCH_DATA_btree)
-+ return 0;
-+
-+ if (gen_after(old->gen, gc.gen))
-+ return 0;
-+
-+ if (c->opts.reconstruct_alloc ||
-+ fsck_err_on(new.data_type != gc.data_type, c,
-+ alloc_key_data_type_wrong,
-+ "bucket %llu:%llu gen %u has wrong data_type"
-+ ": got %s, should be %s",
-+ iter->pos.inode, iter->pos.offset,
-+ gc.gen,
-+ bch2_data_types[new.data_type],
-+ bch2_data_types[gc.data_type]))
-+ new.data_type = gc.data_type;
-+
-+#define copy_bucket_field(_errtype, _f) \
-+ if (c->opts.reconstruct_alloc || \
-+ fsck_err_on(new._f != gc._f, c, _errtype, \
-+ "bucket %llu:%llu gen %u data type %s has wrong " #_f \
-+ ": got %u, should be %u", \
-+ iter->pos.inode, iter->pos.offset, \
-+ gc.gen, \
-+ bch2_data_types[gc.data_type], \
-+ new._f, gc._f)) \
-+ new._f = gc._f; \
-+
-+ copy_bucket_field(alloc_key_gen_wrong,
-+ gen);
-+ copy_bucket_field(alloc_key_dirty_sectors_wrong,
-+ dirty_sectors);
-+ copy_bucket_field(alloc_key_cached_sectors_wrong,
-+ cached_sectors);
-+ copy_bucket_field(alloc_key_stripe_wrong,
-+ stripe);
-+ copy_bucket_field(alloc_key_stripe_redundancy_wrong,
-+ stripe_redundancy);
-+#undef copy_bucket_field
-+
-+ if (!bch2_alloc_v4_cmp(*old, new))
-+ return 0;
-+
-+ a = bch2_alloc_to_v4_mut(trans, k);
-+ ret = PTR_ERR_OR_ZERO(a);
-+ if (ret)
-+ return ret;
-+
-+ a->v = new;
-+
-+ /*
-+ * The trigger normally makes sure this is set, but we're not running
-+ * triggers:
-+ */
-+ if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
-+ a->v.io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
-+
-+ ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN);
-+fsck_err:
-+ return ret;
-+}
-+
-+static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_dev *ca;
-+ unsigned i;
-+ int ret = 0;
-+
-+ for_each_member_device(ca, c, i) {
-+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
-+ POS(ca->dev_idx, ca->mi.first_bucket),
-+ BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_LAZY_RW,
-+ bch2_alloc_write_key(trans, &iter, k, metadata_only));
-+
-+ if (ret < 0) {
-+ bch_err_fn(c, ret);
-+ percpu_ref_put(&ca->ref);
-+ break;
-+ }
-+ }
-+
-+ bch2_trans_put(trans);
-+ return ret < 0 ? ret : 0;
-+}
-+
-+static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
-+{
-+ struct bch_dev *ca;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bucket *g;
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a;
-+ unsigned i;
-+ int ret;
-+
-+ for_each_member_device(ca, c, i) {
-+ struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
-+ ca->mi.nbuckets * sizeof(struct bucket),
-+ GFP_KERNEL|__GFP_ZERO);
-+ if (!buckets) {
-+ percpu_ref_put(&ca->ref);
-+ bch_err(c, "error allocating ca->buckets[gc]");
-+ ret = -BCH_ERR_ENOMEM_gc_alloc_start;
-+ goto err;
-+ }
-+
-+ buckets->first_bucket = ca->mi.first_bucket;
-+ buckets->nbuckets = ca->mi.nbuckets;
-+ rcu_assign_pointer(ca->buckets_gc, buckets);
-+ }
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
-+ BTREE_ITER_PREFETCH, k, ret) {
-+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
-+ g = gc_bucket(ca, k.k->p.offset);
-+
-+ a = bch2_alloc_to_v4(k, &a_convert);
-+
-+ g->gen_valid = 1;
-+ g->gen = a->gen;
-+
-+ if (metadata_only &&
-+ (a->data_type == BCH_DATA_user ||
-+ a->data_type == BCH_DATA_cached ||
-+ a->data_type == BCH_DATA_parity)) {
-+ g->data_type = a->data_type;
-+ g->dirty_sectors = a->dirty_sectors;
-+ g->cached_sectors = a->cached_sectors;
-+ g->stripe = a->stripe;
-+ g->stripe_redundancy = a->stripe_redundancy;
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ bch2_trans_put(trans);
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ for_each_member_device(ca, c, i) {
-+ struct bucket_array *buckets = gc_bucket_array(ca);
-+ struct bucket *g;
-+
-+ for_each_bucket(g, buckets) {
-+ if (metadata_only &&
-+ (g->data_type == BCH_DATA_user ||
-+ g->data_type == BCH_DATA_cached ||
-+ g->data_type == BCH_DATA_parity))
-+ continue;
-+ g->data_type = 0;
-+ g->dirty_sectors = 0;
-+ g->cached_sectors = 0;
-+ }
-+ }
-+}
-+
-+static int bch2_gc_write_reflink_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ size_t *idx)
-+{
-+ struct bch_fs *c = trans->c;
-+ const __le64 *refcount = bkey_refcount_c(k);
-+ struct printbuf buf = PRINTBUF;
-+ struct reflink_gc *r;
-+ int ret = 0;
-+
-+ if (!refcount)
-+ return 0;
-+
-+ while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) &&
-+ r->offset < k.k->p.offset)
-+ ++*idx;
-+
-+ if (!r ||
-+ r->offset != k.k->p.offset ||
-+ r->size != k.k->size) {
-+ bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
-+ return -EINVAL;
-+ }
-+
-+ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
-+ reflink_v_refcount_wrong,
-+ "reflink key has wrong refcount:\n"
-+ " %s\n"
-+ " should be %u",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
-+ r->refcount)) {
-+ struct bkey_i *new = bch2_bkey_make_mut(trans, iter, &k, 0);
-+
-+ ret = PTR_ERR_OR_ZERO(new);
-+ if (ret)
-+ return ret;
-+
-+ if (!r->refcount)
-+ new->k.type = KEY_TYPE_deleted;
-+ else
-+ *bkey_refcount(new) = cpu_to_le64(r->refcount);
-+ }
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ size_t idx = 0;
-+ int ret = 0;
-+
-+ if (metadata_only)
-+ return 0;
-+
-+ trans = bch2_trans_get(c);
-+
-+ ret = for_each_btree_key_commit(trans, iter,
-+ BTREE_ID_reflink, POS_MIN,
-+ BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL,
-+ bch2_gc_write_reflink_key(trans, &iter, k, &idx));
-+
-+ c->reflink_gc_nr = 0;
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int bch2_gc_reflink_start(struct bch_fs *c,
-+ bool metadata_only)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct reflink_gc *r;
-+ int ret = 0;
-+
-+ if (metadata_only)
-+ return 0;
-+
-+ trans = bch2_trans_get(c);
-+ c->reflink_gc_nr = 0;
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
-+ BTREE_ITER_PREFETCH, k, ret) {
-+ const __le64 *refcount = bkey_refcount_c(k);
-+
-+ if (!refcount)
-+ continue;
-+
-+ r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
-+ GFP_KERNEL);
-+ if (!r) {
-+ ret = -BCH_ERR_ENOMEM_gc_reflink_start;
-+ break;
-+ }
-+
-+ r->offset = k.k->p.offset;
-+ r->size = k.k->size;
-+ r->refcount = 0;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static void bch2_gc_reflink_reset(struct bch_fs *c, bool metadata_only)
-+{
-+ struct genradix_iter iter;
-+ struct reflink_gc *r;
-+
-+ genradix_for_each(&c->reflink_gc_table, iter, r)
-+ r->refcount = 0;
-+}
-+
-+static int bch2_gc_write_stripes_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct printbuf buf = PRINTBUF;
-+ const struct bch_stripe *s;
-+ struct gc_stripe *m;
-+ bool bad = false;
-+ unsigned i;
-+ int ret = 0;
-+
-+ if (k.k->type != KEY_TYPE_stripe)
-+ return 0;
-+
-+ s = bkey_s_c_to_stripe(k).v;
-+ m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
-+
-+ for (i = 0; i < s->nr_blocks; i++) {
-+ u32 old = stripe_blockcount_get(s, i);
-+ u32 new = (m ? m->block_sectors[i] : 0);
-+
-+ if (old != new) {
-+ prt_printf(&buf, "stripe block %u has wrong sector count: got %u, should be %u\n",
-+ i, old, new);
-+ bad = true;
-+ }
-+ }
-+
-+ if (bad)
-+ bch2_bkey_val_to_text(&buf, c, k);
-+
-+ if (fsck_err_on(bad, c, stripe_sector_count_wrong,
-+ "%s", buf.buf)) {
-+ struct bkey_i_stripe *new;
-+
-+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-+ ret = PTR_ERR_OR_ZERO(new);
-+ if (ret)
-+ return ret;
-+
-+ bkey_reassemble(&new->k_i, k);
-+
-+ for (i = 0; i < new->v.nr_blocks; i++)
-+ stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
-+
-+ ret = bch2_trans_update(trans, iter, &new->k_i, 0);
-+ }
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ if (metadata_only)
-+ return 0;
-+
-+ trans = bch2_trans_get(c);
-+
-+ ret = for_each_btree_key_commit(trans, iter,
-+ BTREE_ID_stripes, POS_MIN,
-+ BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL,
-+ bch2_gc_write_stripes_key(trans, &iter, k));
-+
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only)
-+{
-+ genradix_free(&c->gc_stripes);
-+}
-+
-+/**
-+ * bch2_gc - walk _all_ references to buckets, and recompute them:
-+ *
-+ * @c: filesystem object
-+ * @initial: are we in recovery?
-+ * @metadata_only: are we just checking metadata references, or everything?
-+ *
-+ * Returns: 0 on success, or standard errcode on failure
-+ *
-+ * Order matters here:
-+ * - Concurrent GC relies on the fact that we have a total ordering for
-+ * everything that GC walks - see gc_will_visit_node(),
-+ * gc_will_visit_root()
-+ *
-+ * - also, references move around in the course of index updates and
-+ * various other crap: everything needs to agree on the ordering
-+ * references are allowed to move around in - e.g., we're allowed to
-+ * start with a reference owned by an open_bucket (the allocator) and
-+ * move it to the btree, but not the reverse.
-+ *
-+ * This is necessary to ensure that gc doesn't miss references that
-+ * move around - if references move backwards in the ordering GC
-+ * uses, GC could skip past them
-+ */
-+int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
-+{
-+ unsigned iter = 0;
-+ int ret;
-+
-+ lockdep_assert_held(&c->state_lock);
-+
-+ down_write(&c->gc_lock);
-+
-+ bch2_btree_interior_updates_flush(c);
-+
-+ ret = bch2_gc_start(c) ?:
-+ bch2_gc_alloc_start(c, metadata_only) ?:
-+ bch2_gc_reflink_start(c, metadata_only);
-+ if (ret)
-+ goto out;
-+again:
-+ gc_pos_set(c, gc_phase(GC_PHASE_START));
-+
-+ bch2_mark_superblocks(c);
-+
-+ ret = bch2_gc_btrees(c, initial, metadata_only);
-+
-+ if (ret)
-+ goto out;
-+
-+#if 0
-+ bch2_mark_pending_btree_node_frees(c);
-+#endif
-+ c->gc_count++;
-+
-+ if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
-+ (!iter && bch2_test_restart_gc)) {
-+ if (iter++ > 2) {
-+ bch_info(c, "Unable to fix bucket gens, looping");
-+ ret = -EINVAL;
-+ goto out;
-+ }
-+
-+ /*
-+ * XXX: make sure gens we fixed got saved
-+ */
-+ bch_info(c, "Second GC pass needed, restarting:");
-+ clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
-+ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
-+
-+ bch2_gc_stripes_reset(c, metadata_only);
-+ bch2_gc_alloc_reset(c, metadata_only);
-+ bch2_gc_reflink_reset(c, metadata_only);
-+ ret = bch2_gc_reset(c);
-+ if (ret)
-+ goto out;
-+
-+ /* flush fsck errors, reset counters */
-+ bch2_flush_fsck_errs(c);
-+ goto again;
-+ }
-+out:
-+ if (!ret) {
-+ bch2_journal_block(&c->journal);
-+
-+ ret = bch2_gc_stripes_done(c, metadata_only) ?:
-+ bch2_gc_reflink_done(c, metadata_only) ?:
-+ bch2_gc_alloc_done(c, metadata_only) ?:
-+ bch2_gc_done(c, initial, metadata_only);
-+
-+ bch2_journal_unblock(&c->journal);
-+ }
-+
-+ percpu_down_write(&c->mark_lock);
-+ /* Indicates that gc is no longer in progress: */
-+ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
-+
-+ bch2_gc_free(c);
-+ percpu_up_write(&c->mark_lock);
-+
-+ up_write(&c->gc_lock);
-+
-+ /*
-+ * At startup, allocations can happen directly instead of via the
-+ * allocator thread - issue wakeup in case they blocked on gc_lock:
-+ */
-+ closure_wake_up(&c->freelist_wait);
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int gc_btree_gens_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+ struct bkey_i *u;
-+ int ret;
-+
-+ percpu_down_read(&c->mark_lock);
-+ bkey_for_each_ptr(ptrs, ptr) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+ if (ptr_stale(ca, ptr) > 16) {
-+ percpu_up_read(&c->mark_lock);
-+ goto update;
-+ }
-+ }
-+
-+ bkey_for_each_ptr(ptrs, ptr) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+ u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)];
-+
-+ if (gen_after(*gen, ptr->gen))
-+ *gen = ptr->gen;
-+ }
-+ percpu_up_read(&c->mark_lock);
-+ return 0;
-+update:
-+ u = bch2_bkey_make_mut(trans, iter, &k, 0);
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ return ret;
-+
-+ bch2_extent_normalize(c, bkey_i_to_s(u));
-+ return 0;
-+}
-+
-+static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode);
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
-+ struct bkey_i_alloc_v4 *a_mut;
-+ int ret;
-+
-+ if (a->oldest_gen == ca->oldest_gen[iter->pos.offset])
-+ return 0;
-+
-+ a_mut = bch2_alloc_to_v4_mut(trans, k);
-+ ret = PTR_ERR_OR_ZERO(a_mut);
-+ if (ret)
-+ return ret;
-+
-+ a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
-+ a_mut->v.data_type = alloc_data_type(a_mut->v, a_mut->v.data_type);
-+
-+ return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
-+}
-+
-+int bch2_gc_gens(struct bch_fs *c)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_dev *ca;
-+ u64 b, start_time = local_clock();
-+ unsigned i;
-+ int ret;
-+
-+ /*
-+ * Ideally we would be using state_lock and not gc_lock here, but that
-+ * introduces a deadlock in the RO path - we currently take the state
-+ * lock at the start of going RO, thus the gc thread may get stuck:
-+ */
-+ if (!mutex_trylock(&c->gc_gens_lock))
-+ return 0;
-+
-+ trace_and_count(c, gc_gens_start, c);
-+ down_read(&c->gc_lock);
-+ trans = bch2_trans_get(c);
-+
-+ for_each_member_device(ca, c, i) {
-+ struct bucket_gens *gens = bucket_gens(ca);
-+
-+ BUG_ON(ca->oldest_gen);
-+
-+ ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL);
-+ if (!ca->oldest_gen) {
-+ percpu_ref_put(&ca->ref);
-+ ret = -BCH_ERR_ENOMEM_gc_gens;
-+ goto err;
-+ }
-+
-+ for (b = gens->first_bucket;
-+ b < gens->nbuckets; b++)
-+ ca->oldest_gen[b] = gens->b[b];
-+ }
-+
-+ for (i = 0; i < BTREE_ID_NR; i++)
-+ if (btree_type_has_ptrs(i)) {
-+ c->gc_gens_btree = i;
-+ c->gc_gens_pos = POS_MIN;
-+
-+ ret = for_each_btree_key_commit(trans, iter, i,
-+ POS_MIN,
-+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
-+ k,
-+ NULL, NULL,
-+ BTREE_INSERT_NOFAIL,
-+ gc_btree_gens_key(trans, &iter, k));
-+ if (ret && !bch2_err_matches(ret, EROFS))
-+ bch_err_fn(c, ret);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
-+ POS_MIN,
-+ BTREE_ITER_PREFETCH,
-+ k,
-+ NULL, NULL,
-+ BTREE_INSERT_NOFAIL,
-+ bch2_alloc_write_oldest_gen(trans, &iter, k));
-+ if (ret && !bch2_err_matches(ret, EROFS))
-+ bch_err_fn(c, ret);
-+ if (ret)
-+ goto err;
-+
-+ c->gc_gens_btree = 0;
-+ c->gc_gens_pos = POS_MIN;
-+
-+ c->gc_count++;
-+
-+ bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
-+ trace_and_count(c, gc_gens_end, c);
-+err:
-+ for_each_member_device(ca, c, i) {
-+ kvfree(ca->oldest_gen);
-+ ca->oldest_gen = NULL;
-+ }
-+
-+ bch2_trans_put(trans);
-+ up_read(&c->gc_lock);
-+ mutex_unlock(&c->gc_gens_lock);
-+ return ret;
-+}
-+
-+static int bch2_gc_thread(void *arg)
-+{
-+ struct bch_fs *c = arg;
-+ struct io_clock *clock = &c->io_clock[WRITE];
-+ unsigned long last = atomic64_read(&clock->now);
-+ unsigned last_kick = atomic_read(&c->kick_gc);
-+ int ret;
-+
-+ set_freezable();
-+
-+ while (1) {
-+ while (1) {
-+ set_current_state(TASK_INTERRUPTIBLE);
-+
-+ if (kthread_should_stop()) {
-+ __set_current_state(TASK_RUNNING);
-+ return 0;
-+ }
-+
-+ if (atomic_read(&c->kick_gc) != last_kick)
-+ break;
-+
-+ if (c->btree_gc_periodic) {
-+ unsigned long next = last + c->capacity / 16;
-+
-+ if (atomic64_read(&clock->now) >= next)
-+ break;
-+
-+ bch2_io_clock_schedule_timeout(clock, next);
-+ } else {
-+ schedule();
-+ }
-+
-+ try_to_freeze();
-+ }
-+ __set_current_state(TASK_RUNNING);
-+
-+ last = atomic64_read(&clock->now);
-+ last_kick = atomic_read(&c->kick_gc);
-+
-+ /*
-+ * Full gc is currently incompatible with btree key cache:
-+ */
-+#if 0
-+ ret = bch2_gc(c, false, false);
-+#else
-+ ret = bch2_gc_gens(c);
-+#endif
-+ if (ret < 0)
-+ bch_err_fn(c, ret);
-+
-+ debug_check_no_locks_held();
-+ }
-+
-+ return 0;
-+}
-+
-+void bch2_gc_thread_stop(struct bch_fs *c)
-+{
-+ struct task_struct *p;
-+
-+ p = c->gc_thread;
-+ c->gc_thread = NULL;
-+
-+ if (p) {
-+ kthread_stop(p);
-+ put_task_struct(p);
-+ }
-+}
-+
-+int bch2_gc_thread_start(struct bch_fs *c)
-+{
-+ struct task_struct *p;
-+
-+ if (c->gc_thread)
-+ return 0;
-+
-+ p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
-+ if (IS_ERR(p)) {
-+ bch_err_fn(c, PTR_ERR(p));
-+ return PTR_ERR(p);
-+ }
-+
-+ get_task_struct(p);
-+ c->gc_thread = p;
-+ wake_up_process(p);
-+ return 0;
-+}
-diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
-new file mode 100644
-index 000000000000..607575f83a00
---- /dev/null
-+++ b/fs/bcachefs/btree_gc.h
-@@ -0,0 +1,114 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_GC_H
-+#define _BCACHEFS_BTREE_GC_H
-+
-+#include "bkey.h"
-+#include "btree_types.h"
-+
-+int bch2_check_topology(struct bch_fs *);
-+int bch2_gc(struct bch_fs *, bool, bool);
-+int bch2_gc_gens(struct bch_fs *);
-+void bch2_gc_thread_stop(struct bch_fs *);
-+int bch2_gc_thread_start(struct bch_fs *);
-+
-+/*
-+ * For concurrent mark and sweep (with other index updates), we define a total
-+ * ordering of _all_ references GC walks:
-+ *
-+ * Note that some references will have the same GC position as others - e.g.
-+ * everything within the same btree node; in those cases we're relying on
-+ * whatever locking exists for where those references live, i.e. the write lock
-+ * on a btree node.
-+ *
-+ * That locking is also required to ensure GC doesn't pass the updater in
-+ * between the updater adding/removing the reference and updating the GC marks;
-+ * without that, we would at best double count sometimes.
-+ *
-+ * That part is important - whenever calling bch2_mark_pointers(), a lock _must_
-+ * be held that prevents GC from passing the position the updater is at.
-+ *
-+ * (What about the start of gc, when we're clearing all the marks? GC clears the
-+ * mark with the gc pos seqlock held, and bch_mark_bucket checks against the gc
-+ * position inside its cmpxchg loop, so crap magically works).
-+ */
-+
-+/* Position of (the start of) a gc phase: */
-+static inline struct gc_pos gc_phase(enum gc_phase phase)
-+{
-+ return (struct gc_pos) {
-+ .phase = phase,
-+ .pos = POS_MIN,
-+ .level = 0,
-+ };
-+}
-+
-+static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
-+{
-+ return cmp_int(l.phase, r.phase) ?:
-+ bpos_cmp(l.pos, r.pos) ?:
-+ cmp_int(l.level, r.level);
-+}
-+
-+static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
-+{
-+ switch (id) {
-+#define x(name, v, ...) case BTREE_ID_##name: return GC_PHASE_BTREE_##name;
-+ BCH_BTREE_IDS()
-+#undef x
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static inline struct gc_pos gc_pos_btree(enum btree_id id,
-+ struct bpos pos, unsigned level)
-+{
-+ return (struct gc_pos) {
-+ .phase = btree_id_to_gc_phase(id),
-+ .pos = pos,
-+ .level = level,
-+ };
-+}
-+
-+/*
-+ * GC position of the pointers within a btree node: note, _not_ for &b->key
-+ * itself, that lives in the parent node:
-+ */
-+static inline struct gc_pos gc_pos_btree_node(struct btree *b)
-+{
-+ return gc_pos_btree(b->c.btree_id, b->key.k.p, b->c.level);
-+}
-+
-+/*
-+ * GC position of the pointer to a btree root: we don't use
-+ * gc_pos_pointer_to_btree_node() here to avoid a potential race with
-+ * btree_split() increasing the tree depth - the new root will have level > the
-+ * old root and thus have a greater gc position than the old root, but that
-+ * would be incorrect since once gc has marked the root it's not coming back.
-+ */
-+static inline struct gc_pos gc_pos_btree_root(enum btree_id id)
-+{
-+ return gc_pos_btree(id, SPOS_MAX, BTREE_MAX_DEPTH);
-+}
-+
-+static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
-+{
-+ unsigned seq;
-+ bool ret;
-+
-+ do {
-+ seq = read_seqcount_begin(&c->gc_pos_lock);
-+ ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
-+ } while (read_seqcount_retry(&c->gc_pos_lock, seq));
-+
-+ return ret;
-+}
-+
-+static inline void bch2_do_gc_gens(struct bch_fs *c)
-+{
-+ atomic_inc(&c->kick_gc);
-+ if (c->gc_thread)
-+ wake_up_process(c->gc_thread);
-+}
-+
-+#endif /* _BCACHEFS_BTREE_GC_H */
-diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
-new file mode 100644
-index 000000000000..37d896edb06e
---- /dev/null
-+++ b/fs/bcachefs/btree_io.c
-@@ -0,0 +1,2298 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "bkey_sort.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "io_write.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "recovery.h"
-+#include "super-io.h"
-+#include "trace.h"
-+
-+#include <linux/sched/mm.h>
-+
-+void bch2_btree_node_io_unlock(struct btree *b)
-+{
-+ EBUG_ON(!btree_node_write_in_flight(b));
-+
-+ clear_btree_node_write_in_flight_inner(b);
-+ clear_btree_node_write_in_flight(b);
-+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
-+}
-+
-+void bch2_btree_node_io_lock(struct btree *b)
-+{
-+ bch2_assert_btree_nodes_not_locked();
-+
-+ wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight,
-+ TASK_UNINTERRUPTIBLE);
-+}
-+
-+void __bch2_btree_node_wait_on_read(struct btree *b)
-+{
-+ wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+ TASK_UNINTERRUPTIBLE);
-+}
-+
-+void __bch2_btree_node_wait_on_write(struct btree *b)
-+{
-+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
-+ TASK_UNINTERRUPTIBLE);
-+}
-+
-+void bch2_btree_node_wait_on_read(struct btree *b)
-+{
-+ bch2_assert_btree_nodes_not_locked();
-+
-+ wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-+ TASK_UNINTERRUPTIBLE);
-+}
-+
-+void bch2_btree_node_wait_on_write(struct btree *b)
-+{
-+ bch2_assert_btree_nodes_not_locked();
-+
-+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
-+ TASK_UNINTERRUPTIBLE);
-+}
-+
-+static void verify_no_dups(struct btree *b,
-+ struct bkey_packed *start,
-+ struct bkey_packed *end)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ struct bkey_packed *k, *p;
-+
-+ if (start == end)
-+ return;
-+
-+ for (p = start, k = bkey_p_next(start);
-+ k != end;
-+ p = k, k = bkey_p_next(k)) {
-+ struct bkey l = bkey_unpack_key(b, p);
-+ struct bkey r = bkey_unpack_key(b, k);
-+
-+ BUG_ON(bpos_ge(l.p, bkey_start_pos(&r)));
-+ }
-+#endif
-+}
-+
-+static void set_needs_whiteout(struct bset *i, int v)
-+{
-+ struct bkey_packed *k;
-+
-+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k))
-+ k->needs_whiteout = v;
-+}
-+
-+static void btree_bounce_free(struct bch_fs *c, size_t size,
-+ bool used_mempool, void *p)
-+{
-+ if (used_mempool)
-+ mempool_free(p, &c->btree_bounce_pool);
-+ else
-+ vpfree(p, size);
-+}
-+
-+static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
-+ bool *used_mempool)
-+{
-+ unsigned flags = memalloc_nofs_save();
-+ void *p;
-+
-+ BUG_ON(size > btree_bytes(c));
-+
-+ *used_mempool = false;
-+ p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
-+ if (!p) {
-+ *used_mempool = true;
-+ p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
-+ }
-+ memalloc_nofs_restore(flags);
-+ return p;
-+}
-+
-+static void sort_bkey_ptrs(const struct btree *bt,
-+ struct bkey_packed **ptrs, unsigned nr)
-+{
-+ unsigned n = nr, a = nr / 2, b, c, d;
-+
-+ if (!a)
-+ return;
-+
-+ /* Heap sort: see lib/sort.c: */
-+ while (1) {
-+ if (a)
-+ a--;
-+ else if (--n)
-+ swap(ptrs[0], ptrs[n]);
-+ else
-+ break;
-+
-+ for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
-+ b = bch2_bkey_cmp_packed(bt,
-+ ptrs[c],
-+ ptrs[d]) >= 0 ? c : d;
-+ if (d == n)
-+ b = c;
-+
-+ while (b != a &&
-+ bch2_bkey_cmp_packed(bt,
-+ ptrs[a],
-+ ptrs[b]) >= 0)
-+ b = (b - 1) / 2;
-+ c = b;
-+ while (b != a) {
-+ b = (b - 1) / 2;
-+ swap(ptrs[b], ptrs[c]);
-+ }
-+ }
-+}
-+
-+static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
-+{
-+ struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k;
-+ bool used_mempool = false;
-+ size_t bytes = b->whiteout_u64s * sizeof(u64);
-+
-+ if (!b->whiteout_u64s)
-+ return;
-+
-+ new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
-+
-+ ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
-+
-+ for (k = unwritten_whiteouts_start(c, b);
-+ k != unwritten_whiteouts_end(c, b);
-+ k = bkey_p_next(k))
-+ *--ptrs = k;
-+
-+ sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs);
-+
-+ k = new_whiteouts;
-+
-+ while (ptrs != ptrs_end) {
-+ bkey_p_copy(k, *ptrs);
-+ k = bkey_p_next(k);
-+ ptrs++;
-+ }
-+
-+ verify_no_dups(b, new_whiteouts,
-+ (void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
-+
-+ memcpy_u64s(unwritten_whiteouts_start(c, b),
-+ new_whiteouts, b->whiteout_u64s);
-+
-+ btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
-+}
-+
-+static bool should_compact_bset(struct btree *b, struct bset_tree *t,
-+ bool compacting, enum compact_mode mode)
-+{
-+ if (!bset_dead_u64s(b, t))
-+ return false;
-+
-+ switch (mode) {
-+ case COMPACT_LAZY:
-+ return should_compact_bset_lazy(b, t) ||
-+ (compacting && !bset_written(b, bset(b, t)));
-+ case COMPACT_ALL:
-+ return true;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
-+{
-+ struct bset_tree *t;
-+ bool ret = false;
-+
-+ for_each_bset(b, t) {
-+ struct bset *i = bset(b, t);
-+ struct bkey_packed *k, *n, *out, *start, *end;
-+ struct btree_node_entry *src = NULL, *dst = NULL;
-+
-+ if (t != b->set && !bset_written(b, i)) {
-+ src = container_of(i, struct btree_node_entry, keys);
-+ dst = max(write_block(b),
-+ (void *) btree_bkey_last(b, t - 1));
-+ }
-+
-+ if (src != dst)
-+ ret = true;
-+
-+ if (!should_compact_bset(b, t, ret, mode)) {
-+ if (src != dst) {
-+ memmove(dst, src, sizeof(*src) +
-+ le16_to_cpu(src->keys.u64s) *
-+ sizeof(u64));
-+ i = &dst->keys;
-+ set_btree_bset(b, t, i);
-+ }
-+ continue;
-+ }
-+
-+ start = btree_bkey_first(b, t);
-+ end = btree_bkey_last(b, t);
-+
-+ if (src != dst) {
-+ memmove(dst, src, sizeof(*src));
-+ i = &dst->keys;
-+ set_btree_bset(b, t, i);
-+ }
-+
-+ out = i->start;
-+
-+ for (k = start; k != end; k = n) {
-+ n = bkey_p_next(k);
-+
-+ if (!bkey_deleted(k)) {
-+ bkey_p_copy(out, k);
-+ out = bkey_p_next(out);
-+ } else {
-+ BUG_ON(k->needs_whiteout);
-+ }
-+ }
-+
-+ i->u64s = cpu_to_le16((u64 *) out - i->_data);
-+ set_btree_bset_end(b, t);
-+ bch2_bset_set_no_aux_tree(b, t);
-+ ret = true;
-+ }
-+
-+ bch2_verify_btree_nr_keys(b);
-+
-+ bch2_btree_build_aux_trees(b);
-+
-+ return ret;
-+}
-+
-+bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
-+ enum compact_mode mode)
-+{
-+ return bch2_drop_whiteouts(b, mode);
-+}
-+
-+static void btree_node_sort(struct bch_fs *c, struct btree *b,
-+ unsigned start_idx,
-+ unsigned end_idx,
-+ bool filter_whiteouts)
-+{
-+ struct btree_node *out;
-+ struct sort_iter_stack sort_iter;
-+ struct bset_tree *t;
-+ struct bset *start_bset = bset(b, &b->set[start_idx]);
-+ bool used_mempool = false;
-+ u64 start_time, seq = 0;
-+ unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1;
-+ bool sorting_entire_node = start_idx == 0 &&
-+ end_idx == b->nsets;
-+
-+ sort_iter_stack_init(&sort_iter, b);
-+
-+ for (t = b->set + start_idx;
-+ t < b->set + end_idx;
-+ t++) {
-+ u64s += le16_to_cpu(bset(b, t)->u64s);
-+ sort_iter_add(&sort_iter.iter,
-+ btree_bkey_first(b, t),
-+ btree_bkey_last(b, t));
-+ }
-+
-+ bytes = sorting_entire_node
-+ ? btree_bytes(c)
-+ : __vstruct_bytes(struct btree_node, u64s);
-+
-+ out = btree_bounce_alloc(c, bytes, &used_mempool);
-+
-+ start_time = local_clock();
-+
-+ u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter, filter_whiteouts);
-+
-+ out->keys.u64s = cpu_to_le16(u64s);
-+
-+ BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes);
-+
-+ if (sorting_entire_node)
-+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
-+ start_time);
-+
-+ /* Make sure we preserve bset journal_seq: */
-+ for (t = b->set + start_idx; t < b->set + end_idx; t++)
-+ seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq));
-+ start_bset->journal_seq = cpu_to_le64(seq);
-+
-+ if (sorting_entire_node) {
-+ u64s = le16_to_cpu(out->keys.u64s);
-+
-+ BUG_ON(bytes != btree_bytes(c));
-+
-+ /*
-+ * Our temporary buffer is the same size as the btree node's
-+ * buffer, we can just swap buffers instead of doing a big
-+ * memcpy()
-+ */
-+ *out = *b->data;
-+ out->keys.u64s = cpu_to_le16(u64s);
-+ swap(out, b->data);
-+ set_btree_bset(b, b->set, &b->data->keys);
-+ } else {
-+ start_bset->u64s = out->keys.u64s;
-+ memcpy_u64s(start_bset->start,
-+ out->keys.start,
-+ le16_to_cpu(out->keys.u64s));
-+ }
-+
-+ for (i = start_idx + 1; i < end_idx; i++)
-+ b->nr.bset_u64s[start_idx] +=
-+ b->nr.bset_u64s[i];
-+
-+ b->nsets -= shift;
-+
-+ for (i = start_idx + 1; i < b->nsets; i++) {
-+ b->nr.bset_u64s[i] = b->nr.bset_u64s[i + shift];
-+ b->set[i] = b->set[i + shift];
-+ }
-+
-+ for (i = b->nsets; i < MAX_BSETS; i++)
-+ b->nr.bset_u64s[i] = 0;
-+
-+ set_btree_bset_end(b, &b->set[start_idx]);
-+ bch2_bset_set_no_aux_tree(b, &b->set[start_idx]);
-+
-+ btree_bounce_free(c, bytes, used_mempool, out);
-+
-+ bch2_verify_btree_nr_keys(b);
-+}
-+
-+void bch2_btree_sort_into(struct bch_fs *c,
-+ struct btree *dst,
-+ struct btree *src)
-+{
-+ struct btree_nr_keys nr;
-+ struct btree_node_iter src_iter;
-+ u64 start_time = local_clock();
-+
-+ BUG_ON(dst->nsets != 1);
-+
-+ bch2_bset_set_no_aux_tree(dst, dst->set);
-+
-+ bch2_btree_node_iter_init_from_start(&src_iter, src);
-+
-+ nr = bch2_sort_repack(btree_bset_first(dst),
-+ src, &src_iter,
-+ &dst->format,
-+ true);
-+
-+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
-+ start_time);
-+
-+ set_btree_bset_end(dst, dst->set);
-+
-+ dst->nr.live_u64s += nr.live_u64s;
-+ dst->nr.bset_u64s[0] += nr.bset_u64s[0];
-+ dst->nr.packed_keys += nr.packed_keys;
-+ dst->nr.unpacked_keys += nr.unpacked_keys;
-+
-+ bch2_verify_btree_nr_keys(dst);
-+}
-+
-+/*
-+ * We're about to add another bset to the btree node, so if there's currently
-+ * too many bsets - sort some of them together:
-+ */
-+static bool btree_node_compact(struct bch_fs *c, struct btree *b)
-+{
-+ unsigned unwritten_idx;
-+ bool ret = false;
-+
-+ for (unwritten_idx = 0;
-+ unwritten_idx < b->nsets;
-+ unwritten_idx++)
-+ if (!bset_written(b, bset(b, &b->set[unwritten_idx])))
-+ break;
-+
-+ if (b->nsets - unwritten_idx > 1) {
-+ btree_node_sort(c, b, unwritten_idx,
-+ b->nsets, false);
-+ ret = true;
-+ }
-+
-+ if (unwritten_idx > 1) {
-+ btree_node_sort(c, b, 0, unwritten_idx, false);
-+ ret = true;
-+ }
-+
-+ return ret;
-+}
-+
-+void bch2_btree_build_aux_trees(struct btree *b)
-+{
-+ struct bset_tree *t;
-+
-+ for_each_bset(b, t)
-+ bch2_bset_build_aux_tree(b, t,
-+ !bset_written(b, bset(b, t)) &&
-+ t == bset_tree_last(b));
-+}
-+
-+/*
-+ * If we have MAX_BSETS (3) bsets, should we sort them all down to just one?
-+ *
-+ * The first bset is going to be of similar order to the size of the node, the
-+ * last bset is bounded by btree_write_set_buffer(), which is set to keep the
-+ * memmove on insert from being too expensive: the middle bset should, ideally,
-+ * be the geometric mean of the first and the last.
-+ *
-+ * Returns true if the middle bset is greater than that geometric mean:
-+ */
-+static inline bool should_compact_all(struct bch_fs *c, struct btree *b)
-+{
-+ unsigned mid_u64s_bits =
-+ (ilog2(btree_max_u64s(c)) + BTREE_WRITE_SET_U64s_BITS) / 2;
-+
-+ return bset_u64s(&b->set[1]) > 1U << mid_u64s_bits;
-+}
-+
-+/*
-+ * @bch_btree_init_next - initialize a new (unwritten) bset that can then be
-+ * inserted into
-+ *
-+ * Safe to call if there already is an unwritten bset - will only add a new bset
-+ * if @b doesn't already have one.
-+ *
-+ * Returns true if we sorted (i.e. invalidated iterators
-+ */
-+void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_node_entry *bne;
-+ bool reinit_iter = false;
-+
-+ EBUG_ON(!six_lock_counts(&b->c.lock).n[SIX_LOCK_write]);
-+ BUG_ON(bset_written(b, bset(b, &b->set[1])));
-+ BUG_ON(btree_node_just_written(b));
-+
-+ if (b->nsets == MAX_BSETS &&
-+ !btree_node_write_in_flight(b) &&
-+ should_compact_all(c, b)) {
-+ bch2_btree_node_write(c, b, SIX_LOCK_write,
-+ BTREE_WRITE_init_next_bset);
-+ reinit_iter = true;
-+ }
-+
-+ if (b->nsets == MAX_BSETS &&
-+ btree_node_compact(c, b))
-+ reinit_iter = true;
-+
-+ BUG_ON(b->nsets >= MAX_BSETS);
-+
-+ bne = want_new_bset(c, b);
-+ if (bne)
-+ bch2_bset_init_next(c, b, bne);
-+
-+ bch2_btree_build_aux_trees(b);
-+
-+ if (reinit_iter)
-+ bch2_trans_node_reinit_iter(trans, b);
-+}
-+
-+static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
-+ struct bch_dev *ca,
-+ struct btree *b, struct bset *i,
-+ unsigned offset, int write)
-+{
-+ prt_printf(out, bch2_log_msg(c, "%s"),
-+ write == READ
-+ ? "error validating btree node "
-+ : "corrupt btree node before write ");
-+ if (ca)
-+ prt_printf(out, "on %s ", ca->name);
-+ prt_printf(out, "at btree ");
-+ bch2_btree_pos_to_text(out, c, b);
-+
-+ prt_printf(out, "\n node offset %u", b->written);
-+ if (i)
-+ prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
-+ prt_str(out, ": ");
-+}
-+
-+__printf(9, 10)
-+static int __btree_err(int ret,
-+ struct bch_fs *c,
-+ struct bch_dev *ca,
-+ struct btree *b,
-+ struct bset *i,
-+ int write,
-+ bool have_retry,
-+ enum bch_sb_error_id err_type,
-+ const char *fmt, ...)
-+{
-+ struct printbuf out = PRINTBUF;
-+ va_list args;
-+
-+ btree_err_msg(&out, c, ca, b, i, b->written, write);
-+
-+ va_start(args, fmt);
-+ prt_vprintf(&out, fmt, args);
-+ va_end(args);
-+
-+ if (write == WRITE) {
-+ bch2_print_string_as_lines(KERN_ERR, out.buf);
-+ ret = c->opts.errors == BCH_ON_ERROR_continue
-+ ? 0
-+ : -BCH_ERR_fsck_errors_not_fixed;
-+ goto out;
-+ }
-+
-+ if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
-+ ret = -BCH_ERR_btree_node_read_err_fixable;
-+ if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
-+ ret = -BCH_ERR_btree_node_read_err_bad_node;
-+
-+ if (ret != -BCH_ERR_btree_node_read_err_fixable)
-+ bch2_sb_error_count(c, err_type);
-+
-+ switch (ret) {
-+ case -BCH_ERR_btree_node_read_err_fixable:
-+ ret = bch2_fsck_err(c, FSCK_CAN_FIX, err_type, "%s", out.buf);
-+ if (ret != -BCH_ERR_fsck_fix &&
-+ ret != -BCH_ERR_fsck_ignore)
-+ goto fsck_err;
-+ ret = -BCH_ERR_fsck_fix;
-+ break;
-+ case -BCH_ERR_btree_node_read_err_want_retry:
-+ case -BCH_ERR_btree_node_read_err_must_retry:
-+ bch2_print_string_as_lines(KERN_ERR, out.buf);
-+ break;
-+ case -BCH_ERR_btree_node_read_err_bad_node:
-+ bch2_print_string_as_lines(KERN_ERR, out.buf);
-+ bch2_topology_error(c);
-+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO;
-+ break;
-+ case -BCH_ERR_btree_node_read_err_incompatible:
-+ bch2_print_string_as_lines(KERN_ERR, out.buf);
-+ ret = -BCH_ERR_fsck_errors_not_fixed;
-+ break;
-+ default:
-+ BUG();
-+ }
-+out:
-+fsck_err:
-+ printbuf_exit(&out);
-+ return ret;
-+}
-+
-+#define btree_err(type, c, ca, b, i, _err_type, msg, ...) \
-+({ \
-+ int _ret = __btree_err(type, c, ca, b, i, write, have_retry, \
-+ BCH_FSCK_ERR_##_err_type, \
-+ msg, ##__VA_ARGS__); \
-+ \
-+ if (_ret != -BCH_ERR_fsck_fix) { \
-+ ret = _ret; \
-+ goto fsck_err; \
-+ } \
-+ \
-+ *saw_error = true; \
-+})
-+
-+#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
-+
-+/*
-+ * When btree topology repair changes the start or end of a node, that might
-+ * mean we have to drop keys that are no longer inside the node:
-+ */
-+__cold
-+void bch2_btree_node_drop_keys_outside_node(struct btree *b)
-+{
-+ struct bset_tree *t;
-+
-+ for_each_bset(b, t) {
-+ struct bset *i = bset(b, t);
-+ struct bkey_packed *k;
-+
-+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k))
-+ if (bkey_cmp_left_packed(b, k, &b->data->min_key) >= 0)
-+ break;
-+
-+ if (k != i->start) {
-+ unsigned shift = (u64 *) k - (u64 *) i->start;
-+
-+ memmove_u64s_down(i->start, k,
-+ (u64 *) vstruct_end(i) - (u64 *) k);
-+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift);
-+ set_btree_bset_end(b, t);
-+ }
-+
-+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k))
-+ if (bkey_cmp_left_packed(b, k, &b->data->max_key) > 0)
-+ break;
-+
-+ if (k != vstruct_last(i)) {
-+ i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start);
-+ set_btree_bset_end(b, t);
-+ }
-+ }
-+
-+ /*
-+ * Always rebuild search trees: eytzinger search tree nodes directly
-+ * depend on the values of min/max key:
-+ */
-+ bch2_bset_set_no_aux_tree(b, b->set);
-+ bch2_btree_build_aux_trees(b);
-+
-+ struct bkey_s_c k;
-+ struct bkey unpacked;
-+ struct btree_node_iter iter;
-+ for_each_btree_node_key_unpack(b, k, &iter, &unpacked) {
-+ BUG_ON(bpos_lt(k.k->p, b->data->min_key));
-+ BUG_ON(bpos_gt(k.k->p, b->data->max_key));
-+ }
-+}
-+
-+static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
-+ struct btree *b, struct bset *i,
-+ unsigned offset, unsigned sectors,
-+ int write, bool have_retry, bool *saw_error)
-+{
-+ unsigned version = le16_to_cpu(i->version);
-+ struct printbuf buf1 = PRINTBUF;
-+ struct printbuf buf2 = PRINTBUF;
-+ int ret = 0;
-+
-+ btree_err_on(!bch2_version_compatible(version),
-+ -BCH_ERR_btree_node_read_err_incompatible,
-+ c, ca, b, i,
-+ btree_node_unsupported_version,
-+ "unsupported bset version %u.%u",
-+ BCH_VERSION_MAJOR(version),
-+ BCH_VERSION_MINOR(version));
-+
-+ if (btree_err_on(version < c->sb.version_min,
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, i,
-+ btree_node_bset_older_than_sb_min,
-+ "bset version %u older than superblock version_min %u",
-+ version, c->sb.version_min)) {
-+ mutex_lock(&c->sb_lock);
-+ c->disk_sb.sb->version_min = cpu_to_le16(version);
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+ }
-+
-+ if (btree_err_on(BCH_VERSION_MAJOR(version) >
-+ BCH_VERSION_MAJOR(c->sb.version),
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, i,
-+ btree_node_bset_newer_than_sb,
-+ "bset version %u newer than superblock version %u",
-+ version, c->sb.version)) {
-+ mutex_lock(&c->sb_lock);
-+ c->disk_sb.sb->version = cpu_to_le16(version);
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+ }
-+
-+ btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
-+ -BCH_ERR_btree_node_read_err_incompatible,
-+ c, ca, b, i,
-+ btree_node_unsupported_version,
-+ "BSET_SEPARATE_WHITEOUTS no longer supported");
-+
-+ if (btree_err_on(offset + sectors > btree_sectors(c),
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, ca, b, i,
-+ bset_past_end_of_btree_node,
-+ "bset past end of btree node")) {
-+ i->u64s = 0;
-+ ret = 0;
-+ goto out;
-+ }
-+
-+ btree_err_on(offset && !i->u64s,
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, ca, b, i,
-+ bset_empty,
-+ "empty bset");
-+
-+ btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset,
-+ -BCH_ERR_btree_node_read_err_want_retry,
-+ c, ca, b, i,
-+ bset_wrong_sector_offset,
-+ "bset at wrong sector offset");
-+
-+ if (!offset) {
-+ struct btree_node *bn =
-+ container_of(i, struct btree_node, keys);
-+ /* These indicate that we read the wrong btree node: */
-+
-+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+ struct bch_btree_ptr_v2 *bp =
-+ &bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+ /* XXX endianness */
-+ btree_err_on(bp->seq != bn->keys.seq,
-+ -BCH_ERR_btree_node_read_err_must_retry,
-+ c, ca, b, NULL,
-+ bset_bad_seq,
-+ "incorrect sequence number (wrong btree node)");
-+ }
-+
-+ btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
-+ -BCH_ERR_btree_node_read_err_must_retry,
-+ c, ca, b, i,
-+ btree_node_bad_btree,
-+ "incorrect btree id");
-+
-+ btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
-+ -BCH_ERR_btree_node_read_err_must_retry,
-+ c, ca, b, i,
-+ btree_node_bad_level,
-+ "incorrect level");
-+
-+ if (!write)
-+ compat_btree_node(b->c.level, b->c.btree_id, version,
-+ BSET_BIG_ENDIAN(i), write, bn);
-+
-+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+ struct bch_btree_ptr_v2 *bp =
-+ &bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+ if (BTREE_PTR_RANGE_UPDATED(bp)) {
-+ b->data->min_key = bp->min_key;
-+ b->data->max_key = b->key.k.p;
-+ }
-+
-+ btree_err_on(!bpos_eq(b->data->min_key, bp->min_key),
-+ -BCH_ERR_btree_node_read_err_must_retry,
-+ c, ca, b, NULL,
-+ btree_node_bad_min_key,
-+ "incorrect min_key: got %s should be %s",
-+ (printbuf_reset(&buf1),
-+ bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf),
-+ (printbuf_reset(&buf2),
-+ bch2_bpos_to_text(&buf2, bp->min_key), buf2.buf));
-+ }
-+
-+ btree_err_on(!bpos_eq(bn->max_key, b->key.k.p),
-+ -BCH_ERR_btree_node_read_err_must_retry,
-+ c, ca, b, i,
-+ btree_node_bad_max_key,
-+ "incorrect max key %s",
-+ (printbuf_reset(&buf1),
-+ bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf));
-+
-+ if (write)
-+ compat_btree_node(b->c.level, b->c.btree_id, version,
-+ BSET_BIG_ENDIAN(i), write, bn);
-+
-+ btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1),
-+ -BCH_ERR_btree_node_read_err_bad_node,
-+ c, ca, b, i,
-+ btree_node_bad_format,
-+ "invalid bkey format: %s\n %s", buf1.buf,
-+ (printbuf_reset(&buf2),
-+ bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf));
-+ printbuf_reset(&buf1);
-+
-+ compat_bformat(b->c.level, b->c.btree_id, version,
-+ BSET_BIG_ENDIAN(i), write,
-+ &bn->format);
-+ }
-+out:
-+fsck_err:
-+ printbuf_exit(&buf2);
-+ printbuf_exit(&buf1);
-+ return ret;
-+}
-+
-+static int bset_key_invalid(struct bch_fs *c, struct btree *b,
-+ struct bkey_s_c k,
-+ bool updated_range, int rw,
-+ struct printbuf *err)
-+{
-+ return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?:
-+ (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?:
-+ (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
-+}
-+
-+static int validate_bset_keys(struct bch_fs *c, struct btree *b,
-+ struct bset *i, int write,
-+ bool have_retry, bool *saw_error)
-+{
-+ unsigned version = le16_to_cpu(i->version);
-+ struct bkey_packed *k, *prev = NULL;
-+ struct printbuf buf = PRINTBUF;
-+ bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
-+ BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
-+ int ret = 0;
-+
-+ for (k = i->start;
-+ k != vstruct_last(i);) {
-+ struct bkey_s u;
-+ struct bkey tmp;
-+
-+ if (btree_err_on(bkey_p_next(k) > vstruct_last(i),
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, i,
-+ btree_node_bkey_past_bset_end,
-+ "key extends past end of bset")) {
-+ i->u64s = cpu_to_le16((u64 *) k - i->_data);
-+ break;
-+ }
-+
-+ if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, i,
-+ btree_node_bkey_bad_format,
-+ "invalid bkey format %u", k->format)) {
-+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+ memmove_u64s_down(k, bkey_p_next(k),
-+ (u64 *) vstruct_end(i) - (u64 *) k);
-+ continue;
-+ }
-+
-+ /* XXX: validate k->u64s */
-+ if (!write)
-+ bch2_bkey_compat(b->c.level, b->c.btree_id, version,
-+ BSET_BIG_ENDIAN(i), write,
-+ &b->format, k);
-+
-+ u = __bkey_disassemble(b, k, &tmp);
-+
-+ printbuf_reset(&buf);
-+ if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) {
-+ printbuf_reset(&buf);
-+ bset_key_invalid(c, b, u.s_c, updated_range, write, &buf);
-+ prt_printf(&buf, "\n ");
-+ bch2_bkey_val_to_text(&buf, c, u.s_c);
-+
-+ btree_err(-BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, i,
-+ btree_node_bad_bkey,
-+ "invalid bkey: %s", buf.buf);
-+
-+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+ memmove_u64s_down(k, bkey_p_next(k),
-+ (u64 *) vstruct_end(i) - (u64 *) k);
-+ continue;
-+ }
-+
-+ if (write)
-+ bch2_bkey_compat(b->c.level, b->c.btree_id, version,
-+ BSET_BIG_ENDIAN(i), write,
-+ &b->format, k);
-+
-+ if (prev && bkey_iter_cmp(b, prev, k) > 0) {
-+ struct bkey up = bkey_unpack_key(b, prev);
-+
-+ printbuf_reset(&buf);
-+ prt_printf(&buf, "keys out of order: ");
-+ bch2_bkey_to_text(&buf, &up);
-+ prt_printf(&buf, " > ");
-+ bch2_bkey_to_text(&buf, u.k);
-+
-+ bch2_dump_bset(c, b, i, 0);
-+
-+ if (btree_err(-BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, i,
-+ btree_node_bkey_out_of_order,
-+ "%s", buf.buf)) {
-+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+ memmove_u64s_down(k, bkey_p_next(k),
-+ (u64 *) vstruct_end(i) - (u64 *) k);
-+ continue;
-+ }
-+ }
-+
-+ prev = k;
-+ k = bkey_p_next(k);
-+ }
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
-+ struct btree *b, bool have_retry, bool *saw_error)
-+{
-+ struct btree_node_entry *bne;
-+ struct sort_iter *iter;
-+ struct btree_node *sorted;
-+ struct bkey_packed *k;
-+ struct bch_extent_ptr *ptr;
-+ struct bset *i;
-+ bool used_mempool, blacklisted;
-+ bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
-+ BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
-+ unsigned u64s;
-+ unsigned ptr_written = btree_ptr_sectors_written(&b->key);
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0, retry_read = 0, write = READ;
-+
-+ b->version_ondisk = U16_MAX;
-+ /* We might get called multiple times on read retry: */
-+ b->written = 0;
-+
-+ iter = mempool_alloc(&c->fill_iter, GFP_NOFS);
-+ sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2);
-+
-+ if (bch2_meta_read_fault("btree"))
-+ btree_err(-BCH_ERR_btree_node_read_err_must_retry,
-+ c, ca, b, NULL,
-+ btree_node_fault_injected,
-+ "dynamic fault");
-+
-+ btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
-+ -BCH_ERR_btree_node_read_err_must_retry,
-+ c, ca, b, NULL,
-+ btree_node_bad_magic,
-+ "bad magic: want %llx, got %llx",
-+ bset_magic(c), le64_to_cpu(b->data->magic));
-+
-+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+ struct bch_btree_ptr_v2 *bp =
-+ &bkey_i_to_btree_ptr_v2(&b->key)->v;
-+
-+ btree_err_on(b->data->keys.seq != bp->seq,
-+ -BCH_ERR_btree_node_read_err_must_retry,
-+ c, ca, b, NULL,
-+ btree_node_bad_seq,
-+ "got wrong btree node (seq %llx want %llx)",
-+ b->data->keys.seq, bp->seq);
-+ } else {
-+ btree_err_on(!b->data->keys.seq,
-+ -BCH_ERR_btree_node_read_err_must_retry,
-+ c, ca, b, NULL,
-+ btree_node_bad_seq,
-+ "bad btree header: seq 0");
-+ }
-+
-+ while (b->written < (ptr_written ?: btree_sectors(c))) {
-+ unsigned sectors;
-+ struct nonce nonce;
-+ bool first = !b->written;
-+ bool csum_bad;
-+
-+ if (!b->written) {
-+ i = &b->data->keys;
-+
-+ btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-+ -BCH_ERR_btree_node_read_err_want_retry,
-+ c, ca, b, i,
-+ bset_unknown_csum,
-+ "unknown checksum type %llu", BSET_CSUM_TYPE(i));
-+
-+ nonce = btree_nonce(i, b->written << 9);
-+
-+ csum_bad = bch2_crc_cmp(b->data->csum,
-+ csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data));
-+ if (csum_bad)
-+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
-+
-+ btree_err_on(csum_bad,
-+ -BCH_ERR_btree_node_read_err_want_retry,
-+ c, ca, b, i,
-+ bset_bad_csum,
-+ "invalid checksum");
-+
-+ ret = bset_encrypt(c, i, b->written << 9);
-+ if (bch2_fs_fatal_err_on(ret, c,
-+ "error decrypting btree node: %i", ret))
-+ goto fsck_err;
-+
-+ btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
-+ !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
-+ -BCH_ERR_btree_node_read_err_incompatible,
-+ c, NULL, b, NULL,
-+ btree_node_unsupported_version,
-+ "btree node does not have NEW_EXTENT_OVERWRITE set");
-+
-+ sectors = vstruct_sectors(b->data, c->block_bits);
-+ } else {
-+ bne = write_block(b);
-+ i = &bne->keys;
-+
-+ if (i->seq != b->data->keys.seq)
-+ break;
-+
-+ btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-+ -BCH_ERR_btree_node_read_err_want_retry,
-+ c, ca, b, i,
-+ bset_unknown_csum,
-+ "unknown checksum type %llu", BSET_CSUM_TYPE(i));
-+
-+ nonce = btree_nonce(i, b->written << 9);
-+ csum_bad = bch2_crc_cmp(bne->csum,
-+ csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne));
-+ if (csum_bad)
-+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
-+
-+ btree_err_on(csum_bad,
-+ -BCH_ERR_btree_node_read_err_want_retry,
-+ c, ca, b, i,
-+ bset_bad_csum,
-+ "invalid checksum");
-+
-+ ret = bset_encrypt(c, i, b->written << 9);
-+ if (bch2_fs_fatal_err_on(ret, c,
-+ "error decrypting btree node: %i\n", ret))
-+ goto fsck_err;
-+
-+ sectors = vstruct_sectors(bne, c->block_bits);
-+ }
-+
-+ b->version_ondisk = min(b->version_ondisk,
-+ le16_to_cpu(i->version));
-+
-+ ret = validate_bset(c, ca, b, i, b->written, sectors,
-+ READ, have_retry, saw_error);
-+ if (ret)
-+ goto fsck_err;
-+
-+ if (!b->written)
-+ btree_node_set_format(b, b->data->format);
-+
-+ ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error);
-+ if (ret)
-+ goto fsck_err;
-+
-+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-+
-+ blacklisted = bch2_journal_seq_is_blacklisted(c,
-+ le64_to_cpu(i->journal_seq),
-+ true);
-+
-+ btree_err_on(blacklisted && first,
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, ca, b, i,
-+ bset_blacklisted_journal_seq,
-+ "first btree node bset has blacklisted journal seq (%llu)",
-+ le64_to_cpu(i->journal_seq));
-+
-+ btree_err_on(blacklisted && ptr_written,
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, ca, b, i,
-+ first_bset_blacklisted_journal_seq,
-+ "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u",
-+ le64_to_cpu(i->journal_seq),
-+ b->written, b->written + sectors, ptr_written);
-+
-+ b->written += sectors;
-+
-+ if (blacklisted && !first)
-+ continue;
-+
-+ sort_iter_add(iter,
-+ vstruct_idx(i, 0),
-+ vstruct_last(i));
-+ }
-+
-+ if (ptr_written) {
-+ btree_err_on(b->written < ptr_written,
-+ -BCH_ERR_btree_node_read_err_want_retry,
-+ c, ca, b, NULL,
-+ btree_node_data_missing,
-+ "btree node data missing: expected %u sectors, found %u",
-+ ptr_written, b->written);
-+ } else {
-+ for (bne = write_block(b);
-+ bset_byte_offset(b, bne) < btree_bytes(c);
-+ bne = (void *) bne + block_bytes(c))
-+ btree_err_on(bne->keys.seq == b->data->keys.seq &&
-+ !bch2_journal_seq_is_blacklisted(c,
-+ le64_to_cpu(bne->keys.journal_seq),
-+ true),
-+ -BCH_ERR_btree_node_read_err_want_retry,
-+ c, ca, b, NULL,
-+ btree_node_bset_after_end,
-+ "found bset signature after last bset");
-+ }
-+
-+ sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
-+ sorted->keys.u64s = 0;
-+
-+ set_btree_bset(b, b->set, &b->data->keys);
-+
-+ b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter);
-+
-+ u64s = le16_to_cpu(sorted->keys.u64s);
-+ *sorted = *b->data;
-+ sorted->keys.u64s = cpu_to_le16(u64s);
-+ swap(sorted, b->data);
-+ set_btree_bset(b, b->set, &b->data->keys);
-+ b->nsets = 1;
-+
-+ BUG_ON(b->nr.live_u64s != u64s);
-+
-+ btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
-+
-+ if (updated_range)
-+ bch2_btree_node_drop_keys_outside_node(b);
-+
-+ i = &b->data->keys;
-+ for (k = i->start; k != vstruct_last(i);) {
-+ struct bkey tmp;
-+ struct bkey_s u = __bkey_disassemble(b, k, &tmp);
-+
-+ printbuf_reset(&buf);
-+
-+ if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) ||
-+ (bch2_inject_invalid_keys &&
-+ !bversion_cmp(u.k->version, MAX_VERSION))) {
-+ printbuf_reset(&buf);
-+
-+ prt_printf(&buf, "invalid bkey: ");
-+ bch2_bkey_val_invalid(c, u.s_c, READ, &buf);
-+ prt_printf(&buf, "\n ");
-+ bch2_bkey_val_to_text(&buf, c, u.s_c);
-+
-+ btree_err(-BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, i,
-+ btree_node_bad_bkey,
-+ "%s", buf.buf);
-+
-+ btree_keys_account_key_drop(&b->nr, 0, k);
-+
-+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
-+ memmove_u64s_down(k, bkey_p_next(k),
-+ (u64 *) vstruct_end(i) - (u64 *) k);
-+ set_btree_bset_end(b, b->set);
-+ continue;
-+ }
-+
-+ if (u.k->type == KEY_TYPE_btree_ptr_v2) {
-+ struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u);
-+
-+ bp.v->mem_ptr = 0;
-+ }
-+
-+ k = bkey_p_next(k);
-+ }
-+
-+ bch2_bset_build_aux_tree(b, b->set, false);
-+
-+ set_needs_whiteout(btree_bset_first(b), true);
-+
-+ btree_node_reset_sib_u64s(b);
-+
-+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
-+ struct bch_dev *ca2 = bch_dev_bkey_exists(c, ptr->dev);
-+
-+ if (ca2->mi.state != BCH_MEMBER_STATE_rw)
-+ set_btree_node_need_rewrite(b);
-+ }
-+
-+ if (!ptr_written)
-+ set_btree_node_need_rewrite(b);
-+out:
-+ mempool_free(iter, &c->fill_iter);
-+ printbuf_exit(&buf);
-+ return retry_read;
-+fsck_err:
-+ if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
-+ ret == -BCH_ERR_btree_node_read_err_must_retry)
-+ retry_read = 1;
-+ else
-+ set_btree_node_read_error(b);
-+ goto out;
-+}
-+
-+static void btree_node_read_work(struct work_struct *work)
-+{
-+ struct btree_read_bio *rb =
-+ container_of(work, struct btree_read_bio, work);
-+ struct bch_fs *c = rb->c;
-+ struct btree *b = rb->b;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+ struct bio *bio = &rb->bio;
-+ struct bch_io_failures failed = { .nr = 0 };
-+ struct printbuf buf = PRINTBUF;
-+ bool saw_error = false;
-+ bool retry = false;
-+ bool can_retry;
-+
-+ goto start;
-+ while (1) {
-+ retry = true;
-+ bch_info(c, "retrying read");
-+ ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+ rb->have_ioref = bch2_dev_get_ioref(ca, READ);
-+ bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
-+ bio->bi_iter.bi_sector = rb->pick.ptr.offset;
-+ bio->bi_iter.bi_size = btree_bytes(c);
-+
-+ if (rb->have_ioref) {
-+ bio_set_dev(bio, ca->disk_sb.bdev);
-+ submit_bio_wait(bio);
-+ } else {
-+ bio->bi_status = BLK_STS_REMOVED;
-+ }
-+start:
-+ printbuf_reset(&buf);
-+ bch2_btree_pos_to_text(&buf, c, b);
-+ bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
-+ "btree read error %s for %s",
-+ bch2_blk_status_to_str(bio->bi_status), buf.buf);
-+ if (rb->have_ioref)
-+ percpu_ref_put(&ca->io_ref);
-+ rb->have_ioref = false;
-+
-+ bch2_mark_io_failure(&failed, &rb->pick);
-+
-+ can_retry = bch2_bkey_pick_read_device(c,
-+ bkey_i_to_s_c(&b->key),
-+ &failed, &rb->pick) > 0;
-+
-+ if (!bio->bi_status &&
-+ !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
-+ if (retry)
-+ bch_info(c, "retry success");
-+ break;
-+ }
-+
-+ saw_error = true;
-+
-+ if (!can_retry) {
-+ set_btree_node_read_error(b);
-+ break;
-+ }
-+ }
-+
-+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
-+ rb->start_time);
-+ bio_put(&rb->bio);
-+
-+ if (saw_error && !btree_node_read_error(b)) {
-+ printbuf_reset(&buf);
-+ bch2_bpos_to_text(&buf, b->key.k.p);
-+ bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
-+ __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
-+
-+ bch2_btree_node_rewrite_async(c, b);
-+ }
-+
-+ printbuf_exit(&buf);
-+ clear_btree_node_read_in_flight(b);
-+ wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
-+}
-+
-+static void btree_node_read_endio(struct bio *bio)
-+{
-+ struct btree_read_bio *rb =
-+ container_of(bio, struct btree_read_bio, bio);
-+ struct bch_fs *c = rb->c;
-+
-+ if (rb->have_ioref) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+
-+ bch2_latency_acct(ca, rb->start_time, READ);
-+ }
-+
-+ queue_work(c->io_complete_wq, &rb->work);
-+}
-+
-+struct btree_node_read_all {
-+ struct closure cl;
-+ struct bch_fs *c;
-+ struct btree *b;
-+ unsigned nr;
-+ void *buf[BCH_REPLICAS_MAX];
-+ struct bio *bio[BCH_REPLICAS_MAX];
-+ blk_status_t err[BCH_REPLICAS_MAX];
-+};
-+
-+static unsigned btree_node_sectors_written(struct bch_fs *c, void *data)
-+{
-+ struct btree_node *bn = data;
-+ struct btree_node_entry *bne;
-+ unsigned offset = 0;
-+
-+ if (le64_to_cpu(bn->magic) != bset_magic(c))
-+ return 0;
-+
-+ while (offset < btree_sectors(c)) {
-+ if (!offset) {
-+ offset += vstruct_sectors(bn, c->block_bits);
-+ } else {
-+ bne = data + (offset << 9);
-+ if (bne->keys.seq != bn->keys.seq)
-+ break;
-+ offset += vstruct_sectors(bne, c->block_bits);
-+ }
-+ }
-+
-+ return offset;
-+}
-+
-+static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *data)
-+{
-+ struct btree_node *bn = data;
-+ struct btree_node_entry *bne;
-+
-+ if (!offset)
-+ return false;
-+
-+ while (offset < btree_sectors(c)) {
-+ bne = data + (offset << 9);
-+ if (bne->keys.seq == bn->keys.seq)
-+ return true;
-+ offset++;
-+ }
-+
-+ return false;
-+ return offset;
-+}
-+
-+static void btree_node_read_all_replicas_done(struct closure *cl)
-+{
-+ struct btree_node_read_all *ra =
-+ container_of(cl, struct btree_node_read_all, cl);
-+ struct bch_fs *c = ra->c;
-+ struct btree *b = ra->b;
-+ struct printbuf buf = PRINTBUF;
-+ bool dump_bset_maps = false;
-+ bool have_retry = false;
-+ int ret = 0, best = -1, write = READ;
-+ unsigned i, written = 0, written2 = 0;
-+ __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2
-+ ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0;
-+ bool _saw_error = false, *saw_error = &_saw_error;
-+
-+ for (i = 0; i < ra->nr; i++) {
-+ struct btree_node *bn = ra->buf[i];
-+
-+ if (ra->err[i])
-+ continue;
-+
-+ if (le64_to_cpu(bn->magic) != bset_magic(c) ||
-+ (seq && seq != bn->keys.seq))
-+ continue;
-+
-+ if (best < 0) {
-+ best = i;
-+ written = btree_node_sectors_written(c, bn);
-+ continue;
-+ }
-+
-+ written2 = btree_node_sectors_written(c, ra->buf[i]);
-+ if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, NULL,
-+ btree_node_replicas_sectors_written_mismatch,
-+ "btree node sectors written mismatch: %u != %u",
-+ written, written2) ||
-+ btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]),
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, NULL,
-+ btree_node_bset_after_end,
-+ "found bset signature after last bset") ||
-+ btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9),
-+ -BCH_ERR_btree_node_read_err_fixable,
-+ c, NULL, b, NULL,
-+ btree_node_replicas_data_mismatch,
-+ "btree node replicas content mismatch"))
-+ dump_bset_maps = true;
-+
-+ if (written2 > written) {
-+ written = written2;
-+ best = i;
-+ }
-+ }
-+fsck_err:
-+ if (dump_bset_maps) {
-+ for (i = 0; i < ra->nr; i++) {
-+ struct btree_node *bn = ra->buf[i];
-+ struct btree_node_entry *bne = NULL;
-+ unsigned offset = 0, sectors;
-+ bool gap = false;
-+
-+ if (ra->err[i])
-+ continue;
-+
-+ printbuf_reset(&buf);
-+
-+ while (offset < btree_sectors(c)) {
-+ if (!offset) {
-+ sectors = vstruct_sectors(bn, c->block_bits);
-+ } else {
-+ bne = ra->buf[i] + (offset << 9);
-+ if (bne->keys.seq != bn->keys.seq)
-+ break;
-+ sectors = vstruct_sectors(bne, c->block_bits);
-+ }
-+
-+ prt_printf(&buf, " %u-%u", offset, offset + sectors);
-+ if (bne && bch2_journal_seq_is_blacklisted(c,
-+ le64_to_cpu(bne->keys.journal_seq), false))
-+ prt_printf(&buf, "*");
-+ offset += sectors;
-+ }
-+
-+ while (offset < btree_sectors(c)) {
-+ bne = ra->buf[i] + (offset << 9);
-+ if (bne->keys.seq == bn->keys.seq) {
-+ if (!gap)
-+ prt_printf(&buf, " GAP");
-+ gap = true;
-+
-+ sectors = vstruct_sectors(bne, c->block_bits);
-+ prt_printf(&buf, " %u-%u", offset, offset + sectors);
-+ if (bch2_journal_seq_is_blacklisted(c,
-+ le64_to_cpu(bne->keys.journal_seq), false))
-+ prt_printf(&buf, "*");
-+ }
-+ offset++;
-+ }
-+
-+ bch_err(c, "replica %u:%s", i, buf.buf);
-+ }
-+ }
-+
-+ if (best >= 0) {
-+ memcpy(b->data, ra->buf[best], btree_bytes(c));
-+ ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
-+ } else {
-+ ret = -1;
-+ }
-+
-+ if (ret)
-+ set_btree_node_read_error(b);
-+ else if (*saw_error)
-+ bch2_btree_node_rewrite_async(c, b);
-+
-+ for (i = 0; i < ra->nr; i++) {
-+ mempool_free(ra->buf[i], &c->btree_bounce_pool);
-+ bio_put(ra->bio[i]);
-+ }
-+
-+ closure_debug_destroy(&ra->cl);
-+ kfree(ra);
-+ printbuf_exit(&buf);
-+
-+ clear_btree_node_read_in_flight(b);
-+ wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
-+}
-+
-+static void btree_node_read_all_replicas_endio(struct bio *bio)
-+{
-+ struct btree_read_bio *rb =
-+ container_of(bio, struct btree_read_bio, bio);
-+ struct bch_fs *c = rb->c;
-+ struct btree_node_read_all *ra = rb->ra;
-+
-+ if (rb->have_ioref) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
-+
-+ bch2_latency_acct(ca, rb->start_time, READ);
-+ }
-+
-+ ra->err[rb->idx] = bio->bi_status;
-+ closure_put(&ra->cl);
-+}
-+
-+/*
-+ * XXX This allocates multiple times from the same mempools, and can deadlock
-+ * under sufficient memory pressure (but is only a debug path)
-+ */
-+static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool sync)
-+{
-+ struct bkey_s_c k = bkey_i_to_s_c(&b->key);
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded pick;
-+ struct btree_node_read_all *ra;
-+ unsigned i;
-+
-+ ra = kzalloc(sizeof(*ra), GFP_NOFS);
-+ if (!ra)
-+ return -BCH_ERR_ENOMEM_btree_node_read_all_replicas;
-+
-+ closure_init(&ra->cl, NULL);
-+ ra->c = c;
-+ ra->b = b;
-+ ra->nr = bch2_bkey_nr_ptrs(k);
-+
-+ for (i = 0; i < ra->nr; i++) {
-+ ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
-+ ra->bio[i] = bio_alloc_bioset(NULL,
-+ buf_pages(ra->buf[i], btree_bytes(c)),
-+ REQ_OP_READ|REQ_SYNC|REQ_META,
-+ GFP_NOFS,
-+ &c->btree_bio);
-+ }
-+
-+ i = 0;
-+ bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+ struct btree_read_bio *rb =
-+ container_of(ra->bio[i], struct btree_read_bio, bio);
-+ rb->c = c;
-+ rb->b = b;
-+ rb->ra = ra;
-+ rb->start_time = local_clock();
-+ rb->have_ioref = bch2_dev_get_ioref(ca, READ);
-+ rb->idx = i;
-+ rb->pick = pick;
-+ rb->bio.bi_iter.bi_sector = pick.ptr.offset;
-+ rb->bio.bi_end_io = btree_node_read_all_replicas_endio;
-+ bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c));
-+
-+ if (rb->have_ioref) {
-+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
-+ bio_sectors(&rb->bio));
-+ bio_set_dev(&rb->bio, ca->disk_sb.bdev);
-+
-+ closure_get(&ra->cl);
-+ submit_bio(&rb->bio);
-+ } else {
-+ ra->err[i] = BLK_STS_REMOVED;
-+ }
-+
-+ i++;
-+ }
-+
-+ if (sync) {
-+ closure_sync(&ra->cl);
-+ btree_node_read_all_replicas_done(&ra->cl);
-+ } else {
-+ continue_at(&ra->cl, btree_node_read_all_replicas_done,
-+ c->io_complete_wq);
-+ }
-+
-+ return 0;
-+}
-+
-+void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
-+ bool sync)
-+{
-+ struct extent_ptr_decoded pick;
-+ struct btree_read_bio *rb;
-+ struct bch_dev *ca;
-+ struct bio *bio;
-+ int ret;
-+
-+ trace_and_count(c, btree_node_read, c, b);
-+
-+ if (bch2_verify_all_btree_replicas &&
-+ !btree_node_read_all_replicas(c, b, sync))
-+ return;
-+
-+ ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
-+ NULL, &pick);
-+
-+ if (ret <= 0) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ prt_str(&buf, "btree node read error: no device to read from\n at ");
-+ bch2_btree_pos_to_text(&buf, c, b);
-+ bch_err(c, "%s", buf.buf);
-+
-+ if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
-+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
-+ bch2_fatal_error(c);
-+
-+ set_btree_node_read_error(b);
-+ clear_btree_node_read_in_flight(b);
-+ wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
-+ printbuf_exit(&buf);
-+ return;
-+ }
-+
-+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+
-+ bio = bio_alloc_bioset(NULL,
-+ buf_pages(b->data, btree_bytes(c)),
-+ REQ_OP_READ|REQ_SYNC|REQ_META,
-+ GFP_NOFS,
-+ &c->btree_bio);
-+ rb = container_of(bio, struct btree_read_bio, bio);
-+ rb->c = c;
-+ rb->b = b;
-+ rb->ra = NULL;
-+ rb->start_time = local_clock();
-+ rb->have_ioref = bch2_dev_get_ioref(ca, READ);
-+ rb->pick = pick;
-+ INIT_WORK(&rb->work, btree_node_read_work);
-+ bio->bi_iter.bi_sector = pick.ptr.offset;
-+ bio->bi_end_io = btree_node_read_endio;
-+ bch2_bio_map(bio, b->data, btree_bytes(c));
-+
-+ if (rb->have_ioref) {
-+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
-+ bio_sectors(bio));
-+ bio_set_dev(bio, ca->disk_sb.bdev);
-+
-+ if (sync) {
-+ submit_bio_wait(bio);
-+
-+ btree_node_read_work(&rb->work);
-+ } else {
-+ submit_bio(bio);
-+ }
-+ } else {
-+ bio->bi_status = BLK_STS_REMOVED;
-+
-+ if (sync)
-+ btree_node_read_work(&rb->work);
-+ else
-+ queue_work(c->io_complete_wq, &rb->work);
-+ }
-+}
-+
-+static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
-+ const struct bkey_i *k, unsigned level)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct closure cl;
-+ struct btree *b;
-+ int ret;
-+
-+ closure_init_stack(&cl);
-+
-+ do {
-+ ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+ closure_sync(&cl);
-+ } while (ret);
-+
-+ b = bch2_btree_node_mem_alloc(trans, level != 0);
-+ bch2_btree_cache_cannibalize_unlock(c);
-+
-+ BUG_ON(IS_ERR(b));
-+
-+ bkey_copy(&b->key, k);
-+ BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
-+
-+ set_btree_node_read_in_flight(b);
-+
-+ bch2_btree_node_read(c, b, true);
-+
-+ if (btree_node_read_error(b)) {
-+ bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+ mutex_lock(&c->btree_cache.lock);
-+ list_move(&b->list, &c->btree_cache.freeable);
-+ mutex_unlock(&c->btree_cache.lock);
-+
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ bch2_btree_set_root_for_read(c, b);
-+err:
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+
-+ return ret;
-+}
-+
-+int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
-+ const struct bkey_i *k, unsigned level)
-+{
-+ return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
-+}
-+
-+void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
-+ struct btree_write *w)
-+{
-+ unsigned long old, new, v = READ_ONCE(b->will_make_reachable);
-+
-+ do {
-+ old = new = v;
-+ if (!(old & 1))
-+ break;
-+
-+ new &= ~1UL;
-+ } while ((v = cmpxchg(&b->will_make_reachable, old, new)) != old);
-+
-+ if (old & 1)
-+ closure_put(&((struct btree_update *) new)->cl);
-+
-+ bch2_journal_pin_drop(&c->journal, &w->journal);
-+}
-+
-+static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
-+{
-+ struct btree_write *w = btree_prev_write(b);
-+ unsigned long old, new, v;
-+ unsigned type = 0;
-+
-+ bch2_btree_complete_write(c, b, w);
-+
-+ v = READ_ONCE(b->flags);
-+ do {
-+ old = new = v;
-+
-+ if ((old & (1U << BTREE_NODE_dirty)) &&
-+ (old & (1U << BTREE_NODE_need_write)) &&
-+ !(old & (1U << BTREE_NODE_never_write)) &&
-+ !(old & (1U << BTREE_NODE_write_blocked)) &&
-+ !(old & (1U << BTREE_NODE_will_make_reachable))) {
-+ new &= ~(1U << BTREE_NODE_dirty);
-+ new &= ~(1U << BTREE_NODE_need_write);
-+ new |= (1U << BTREE_NODE_write_in_flight);
-+ new |= (1U << BTREE_NODE_write_in_flight_inner);
-+ new |= (1U << BTREE_NODE_just_written);
-+ new ^= (1U << BTREE_NODE_write_idx);
-+
-+ type = new & BTREE_WRITE_TYPE_MASK;
-+ new &= ~BTREE_WRITE_TYPE_MASK;
-+ } else {
-+ new &= ~(1U << BTREE_NODE_write_in_flight);
-+ new &= ~(1U << BTREE_NODE_write_in_flight_inner);
-+ }
-+ } while ((v = cmpxchg(&b->flags, old, new)) != old);
-+
-+ if (new & (1U << BTREE_NODE_write_in_flight))
-+ __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type);
-+ else
-+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
-+}
-+
-+static void btree_node_write_done(struct bch_fs *c, struct btree *b)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
-+ __btree_node_write_done(c, b);
-+ six_unlock_read(&b->c.lock);
-+
-+ bch2_trans_put(trans);
-+}
-+
-+static void btree_node_write_work(struct work_struct *work)
-+{
-+ struct btree_write_bio *wbio =
-+ container_of(work, struct btree_write_bio, work);
-+ struct bch_fs *c = wbio->wbio.c;
-+ struct btree *b = wbio->wbio.bio.bi_private;
-+ struct bch_extent_ptr *ptr;
-+ int ret = 0;
-+
-+ btree_bounce_free(c,
-+ wbio->data_bytes,
-+ wbio->wbio.used_mempool,
-+ wbio->data);
-+
-+ bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), ptr,
-+ bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
-+
-+ if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key)))
-+ goto err;
-+
-+ if (wbio->wbio.first_btree_write) {
-+ if (wbio->wbio.failed.nr) {
-+
-+ }
-+ } else {
-+ ret = bch2_trans_do(c, NULL, NULL, 0,
-+ bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
-+ BCH_WATERMARK_reclaim|
-+ BTREE_INSERT_JOURNAL_RECLAIM|
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_NOCHECK_RW,
-+ !wbio->wbio.failed.nr));
-+ if (ret)
-+ goto err;
-+ }
-+out:
-+ bio_put(&wbio->wbio.bio);
-+ btree_node_write_done(c, b);
-+ return;
-+err:
-+ set_btree_node_noevict(b);
-+ if (!bch2_err_matches(ret, EROFS))
-+ bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret));
-+ goto out;
-+}
-+
-+static void btree_node_write_endio(struct bio *bio)
-+{
-+ struct bch_write_bio *wbio = to_wbio(bio);
-+ struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
-+ struct bch_write_bio *orig = parent ?: wbio;
-+ struct btree_write_bio *wb = container_of(orig, struct btree_write_bio, wbio);
-+ struct bch_fs *c = wbio->c;
-+ struct btree *b = wbio->bio.bi_private;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
-+ unsigned long flags;
-+
-+ if (wbio->have_ioref)
-+ bch2_latency_acct(ca, wbio->submit_time, WRITE);
-+
-+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
-+ "btree write error: %s",
-+ bch2_blk_status_to_str(bio->bi_status)) ||
-+ bch2_meta_write_fault("btree")) {
-+ spin_lock_irqsave(&c->btree_write_error_lock, flags);
-+ bch2_dev_list_add_dev(&orig->failed, wbio->dev);
-+ spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
-+ }
-+
-+ if (wbio->have_ioref)
-+ percpu_ref_put(&ca->io_ref);
-+
-+ if (parent) {
-+ bio_put(bio);
-+ bio_endio(&parent->bio);
-+ return;
-+ }
-+
-+ clear_btree_node_write_in_flight_inner(b);
-+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner);
-+ INIT_WORK(&wb->work, btree_node_write_work);
-+ queue_work(c->btree_io_complete_wq, &wb->work);
-+}
-+
-+static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
-+ struct bset *i, unsigned sectors)
-+{
-+ struct printbuf buf = PRINTBUF;
-+ bool saw_error;
-+ int ret;
-+
-+ ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key),
-+ BKEY_TYPE_btree, WRITE, &buf);
-+
-+ if (ret)
-+ bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf);
-+ printbuf_exit(&buf);
-+ if (ret)
-+ return ret;
-+
-+ ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?:
-+ validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error);
-+ if (ret) {
-+ bch2_inconsistent_error(c);
-+ dump_stack();
-+ }
-+
-+ return ret;
-+}
-+
-+static void btree_write_submit(struct work_struct *work)
-+{
-+ struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work);
-+ struct bch_extent_ptr *ptr;
-+ BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-+
-+ bkey_copy(&tmp.k, &wbio->key);
-+
-+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&tmp.k)), ptr)
-+ ptr->offset += wbio->sector_offset;
-+
-+ bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree,
-+ &tmp.k, false);
-+}
-+
-+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
-+{
-+ struct btree_write_bio *wbio;
-+ struct bset_tree *t;
-+ struct bset *i;
-+ struct btree_node *bn = NULL;
-+ struct btree_node_entry *bne = NULL;
-+ struct sort_iter_stack sort_iter;
-+ struct nonce nonce;
-+ unsigned bytes_to_write, sectors_to_write, bytes, u64s;
-+ u64 seq = 0;
-+ bool used_mempool;
-+ unsigned long old, new;
-+ bool validate_before_checksum = false;
-+ enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
-+ void *data;
-+ int ret;
-+
-+ if (flags & BTREE_WRITE_ALREADY_STARTED)
-+ goto do_write;
-+
-+ /*
-+ * We may only have a read lock on the btree node - the dirty bit is our
-+ * "lock" against racing with other threads that may be trying to start
-+ * a write, we do a write iff we clear the dirty bit. Since setting the
-+ * dirty bit requires a write lock, we can't race with other threads
-+ * redirtying it:
-+ */
-+ do {
-+ old = new = READ_ONCE(b->flags);
-+
-+ if (!(old & (1 << BTREE_NODE_dirty)))
-+ return;
-+
-+ if ((flags & BTREE_WRITE_ONLY_IF_NEED) &&
-+ !(old & (1 << BTREE_NODE_need_write)))
-+ return;
-+
-+ if (old &
-+ ((1 << BTREE_NODE_never_write)|
-+ (1 << BTREE_NODE_write_blocked)))
-+ return;
-+
-+ if (b->written &&
-+ (old & (1 << BTREE_NODE_will_make_reachable)))
-+ return;
-+
-+ if (old & (1 << BTREE_NODE_write_in_flight))
-+ return;
-+
-+ if (flags & BTREE_WRITE_ONLY_IF_NEED)
-+ type = new & BTREE_WRITE_TYPE_MASK;
-+ new &= ~BTREE_WRITE_TYPE_MASK;
-+
-+ new &= ~(1 << BTREE_NODE_dirty);
-+ new &= ~(1 << BTREE_NODE_need_write);
-+ new |= (1 << BTREE_NODE_write_in_flight);
-+ new |= (1 << BTREE_NODE_write_in_flight_inner);
-+ new |= (1 << BTREE_NODE_just_written);
-+ new ^= (1 << BTREE_NODE_write_idx);
-+ } while (cmpxchg_acquire(&b->flags, old, new) != old);
-+
-+ if (new & (1U << BTREE_NODE_need_write))
-+ return;
-+do_write:
-+ BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0));
-+
-+ atomic_dec(&c->btree_cache.dirty);
-+
-+ BUG_ON(btree_node_fake(b));
-+ BUG_ON((b->will_make_reachable != 0) != !b->written);
-+
-+ BUG_ON(b->written >= btree_sectors(c));
-+ BUG_ON(b->written & (block_sectors(c) - 1));
-+ BUG_ON(bset_written(b, btree_bset_last(b)));
-+ BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
-+ BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
-+
-+ bch2_sort_whiteouts(c, b);
-+
-+ sort_iter_stack_init(&sort_iter, b);
-+
-+ bytes = !b->written
-+ ? sizeof(struct btree_node)
-+ : sizeof(struct btree_node_entry);
-+
-+ bytes += b->whiteout_u64s * sizeof(u64);
-+
-+ for_each_bset(b, t) {
-+ i = bset(b, t);
-+
-+ if (bset_written(b, i))
-+ continue;
-+
-+ bytes += le16_to_cpu(i->u64s) * sizeof(u64);
-+ sort_iter_add(&sort_iter.iter,
-+ btree_bkey_first(b, t),
-+ btree_bkey_last(b, t));
-+ seq = max(seq, le64_to_cpu(i->journal_seq));
-+ }
-+
-+ BUG_ON(b->written && !seq);
-+
-+ /* bch2_varint_decode may read up to 7 bytes past the end of the buffer: */
-+ bytes += 8;
-+
-+ /* buffer must be a multiple of the block size */
-+ bytes = round_up(bytes, block_bytes(c));
-+
-+ data = btree_bounce_alloc(c, bytes, &used_mempool);
-+
-+ if (!b->written) {
-+ bn = data;
-+ *bn = *b->data;
-+ i = &bn->keys;
-+ } else {
-+ bne = data;
-+ bne->keys = b->data->keys;
-+ i = &bne->keys;
-+ }
-+
-+ i->journal_seq = cpu_to_le64(seq);
-+ i->u64s = 0;
-+
-+ sort_iter_add(&sort_iter.iter,
-+ unwritten_whiteouts_start(c, b),
-+ unwritten_whiteouts_end(c, b));
-+ SET_BSET_SEPARATE_WHITEOUTS(i, false);
-+
-+ b->whiteout_u64s = 0;
-+
-+ u64s = bch2_sort_keys(i->start, &sort_iter.iter, false);
-+ le16_add_cpu(&i->u64s, u64s);
-+
-+ BUG_ON(!b->written && i->u64s != b->data->keys.u64s);
-+
-+ set_needs_whiteout(i, false);
-+
-+ /* do we have data to write? */
-+ if (b->written && !i->u64s)
-+ goto nowrite;
-+
-+ bytes_to_write = vstruct_end(i) - data;
-+ sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
-+
-+ if (!b->written &&
-+ b->key.k.type == KEY_TYPE_btree_ptr_v2)
-+ BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write);
-+
-+ memset(data + bytes_to_write, 0,
-+ (sectors_to_write << 9) - bytes_to_write);
-+
-+ BUG_ON(b->written + sectors_to_write > btree_sectors(c));
-+ BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
-+ BUG_ON(i->seq != b->data->keys.seq);
-+
-+ i->version = cpu_to_le16(c->sb.version);
-+ SET_BSET_OFFSET(i, b->written);
-+ SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
-+
-+ if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)))
-+ validate_before_checksum = true;
-+
-+ /* validate_bset will be modifying: */
-+ if (le16_to_cpu(i->version) < bcachefs_metadata_version_current)
-+ validate_before_checksum = true;
-+
-+ /* if we're going to be encrypting, check metadata validity first: */
-+ if (validate_before_checksum &&
-+ validate_bset_for_write(c, b, i, sectors_to_write))
-+ goto err;
-+
-+ ret = bset_encrypt(c, i, b->written << 9);
-+ if (bch2_fs_fatal_err_on(ret, c,
-+ "error encrypting btree node: %i\n", ret))
-+ goto err;
-+
-+ nonce = btree_nonce(i, b->written << 9);
-+
-+ if (bn)
-+ bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
-+ else
-+ bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
-+
-+ /* if we're not encrypting, check metadata after checksumming: */
-+ if (!validate_before_checksum &&
-+ validate_bset_for_write(c, b, i, sectors_to_write))
-+ goto err;
-+
-+ /*
-+ * We handle btree write errors by immediately halting the journal -
-+ * after we've done that, we can't issue any subsequent btree writes
-+ * because they might have pointers to new nodes that failed to write.
-+ *
-+ * Furthermore, there's no point in doing any more btree writes because
-+ * with the journal stopped, we're never going to update the journal to
-+ * reflect that those writes were done and the data flushed from the
-+ * journal:
-+ *
-+ * Also on journal error, the pending write may have updates that were
-+ * never journalled (interior nodes, see btree_update_nodes_written()) -
-+ * it's critical that we don't do the write in that case otherwise we
-+ * will have updates visible that weren't in the journal:
-+ *
-+ * Make sure to update b->written so bch2_btree_init_next() doesn't
-+ * break:
-+ */
-+ if (bch2_journal_error(&c->journal) ||
-+ c->opts.nochanges)
-+ goto err;
-+
-+ trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write);
-+
-+ wbio = container_of(bio_alloc_bioset(NULL,
-+ buf_pages(data, sectors_to_write << 9),
-+ REQ_OP_WRITE|REQ_META,
-+ GFP_NOFS,
-+ &c->btree_bio),
-+ struct btree_write_bio, wbio.bio);
-+ wbio_init(&wbio->wbio.bio);
-+ wbio->data = data;
-+ wbio->data_bytes = bytes;
-+ wbio->sector_offset = b->written;
-+ wbio->wbio.c = c;
-+ wbio->wbio.used_mempool = used_mempool;
-+ wbio->wbio.first_btree_write = !b->written;
-+ wbio->wbio.bio.bi_end_io = btree_node_write_endio;
-+ wbio->wbio.bio.bi_private = b;
-+
-+ bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
-+
-+ bkey_copy(&wbio->key, &b->key);
-+
-+ b->written += sectors_to_write;
-+
-+ if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2)
-+ bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
-+ cpu_to_le16(b->written);
-+
-+ atomic64_inc(&c->btree_write_stats[type].nr);
-+ atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
-+
-+ INIT_WORK(&wbio->work, btree_write_submit);
-+ queue_work(c->io_complete_wq, &wbio->work);
-+ return;
-+err:
-+ set_btree_node_noevict(b);
-+ b->written += sectors_to_write;
-+nowrite:
-+ btree_bounce_free(c, bytes, used_mempool, data);
-+ __btree_node_write_done(c, b);
-+}
-+
-+/*
-+ * Work that must be done with write lock held:
-+ */
-+bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
-+{
-+ bool invalidated_iter = false;
-+ struct btree_node_entry *bne;
-+ struct bset_tree *t;
-+
-+ if (!btree_node_just_written(b))
-+ return false;
-+
-+ BUG_ON(b->whiteout_u64s);
-+
-+ clear_btree_node_just_written(b);
-+
-+ /*
-+ * Note: immediately after write, bset_written() doesn't work - the
-+ * amount of data we had to write after compaction might have been
-+ * smaller than the offset of the last bset.
-+ *
-+ * However, we know that all bsets have been written here, as long as
-+ * we're still holding the write lock:
-+ */
-+
-+ /*
-+ * XXX: decide if we really want to unconditionally sort down to a
-+ * single bset:
-+ */
-+ if (b->nsets > 1) {
-+ btree_node_sort(c, b, 0, b->nsets, true);
-+ invalidated_iter = true;
-+ } else {
-+ invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
-+ }
-+
-+ for_each_bset(b, t)
-+ set_needs_whiteout(bset(b, t), true);
-+
-+ bch2_btree_verify(c, b);
-+
-+ /*
-+ * If later we don't unconditionally sort down to a single bset, we have
-+ * to ensure this is still true:
-+ */
-+ BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b));
-+
-+ bne = want_new_bset(c, b);
-+ if (bne)
-+ bch2_bset_init_next(c, b, bne);
-+
-+ bch2_btree_build_aux_trees(b);
-+
-+ return invalidated_iter;
-+}
-+
-+/*
-+ * Use this one if the node is intent locked:
-+ */
-+void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
-+ enum six_lock_type lock_type_held,
-+ unsigned flags)
-+{
-+ if (lock_type_held == SIX_LOCK_intent ||
-+ (lock_type_held == SIX_LOCK_read &&
-+ six_lock_tryupgrade(&b->c.lock))) {
-+ __bch2_btree_node_write(c, b, flags);
-+
-+ /* don't cycle lock unnecessarily: */
-+ if (btree_node_just_written(b) &&
-+ six_trylock_write(&b->c.lock)) {
-+ bch2_btree_post_write_cleanup(c, b);
-+ six_unlock_write(&b->c.lock);
-+ }
-+
-+ if (lock_type_held == SIX_LOCK_read)
-+ six_lock_downgrade(&b->c.lock);
-+ } else {
-+ __bch2_btree_node_write(c, b, flags);
-+ if (lock_type_held == SIX_LOCK_write &&
-+ btree_node_just_written(b))
-+ bch2_btree_post_write_cleanup(c, b);
-+ }
-+}
-+
-+static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
-+{
-+ struct bucket_table *tbl;
-+ struct rhash_head *pos;
-+ struct btree *b;
-+ unsigned i;
-+ bool ret = false;
-+restart:
-+ rcu_read_lock();
-+ for_each_cached_btree(b, c, tbl, i, pos)
-+ if (test_bit(flag, &b->flags)) {
-+ rcu_read_unlock();
-+ wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
-+ ret = true;
-+ goto restart;
-+ }
-+ rcu_read_unlock();
-+
-+ return ret;
-+}
-+
-+bool bch2_btree_flush_all_reads(struct bch_fs *c)
-+{
-+ return __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
-+}
-+
-+bool bch2_btree_flush_all_writes(struct bch_fs *c)
-+{
-+ return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
-+}
-+
-+static const char * const bch2_btree_write_types[] = {
-+#define x(t, n) [n] = #t,
-+ BCH_BTREE_WRITE_TYPES()
-+ NULL
-+};
-+
-+void bch2_btree_write_stats_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ printbuf_tabstop_push(out, 20);
-+ printbuf_tabstop_push(out, 10);
-+
-+ prt_tab(out);
-+ prt_str(out, "nr");
-+ prt_tab(out);
-+ prt_str(out, "size");
-+ prt_newline(out);
-+
-+ for (unsigned i = 0; i < BTREE_WRITE_TYPE_NR; i++) {
-+ u64 nr = atomic64_read(&c->btree_write_stats[i].nr);
-+ u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes);
-+
-+ prt_printf(out, "%s:", bch2_btree_write_types[i]);
-+ prt_tab(out);
-+ prt_u64(out, nr);
-+ prt_tab(out);
-+ prt_human_readable_u64(out, nr ? div64_u64(bytes, nr) : 0);
-+ prt_newline(out);
-+ }
-+}
-diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
-new file mode 100644
-index 000000000000..7e03dd76fb38
---- /dev/null
-+++ b/fs/bcachefs/btree_io.h
-@@ -0,0 +1,228 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_IO_H
-+#define _BCACHEFS_BTREE_IO_H
-+
-+#include "bkey_methods.h"
-+#include "bset.h"
-+#include "btree_locking.h"
-+#include "checksum.h"
-+#include "extents.h"
-+#include "io_write_types.h"
-+
-+struct bch_fs;
-+struct btree_write;
-+struct btree;
-+struct btree_iter;
-+struct btree_node_read_all;
-+
-+static inline void set_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
-+{
-+ if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags))
-+ atomic_inc(&c->btree_cache.dirty);
-+}
-+
-+static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
-+{
-+ if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags))
-+ atomic_dec(&c->btree_cache.dirty);
-+}
-+
-+static inline unsigned btree_ptr_sectors_written(struct bkey_i *k)
-+{
-+ return k->k.type == KEY_TYPE_btree_ptr_v2
-+ ? le16_to_cpu(bkey_i_to_btree_ptr_v2(k)->v.sectors_written)
-+ : 0;
-+}
-+
-+struct btree_read_bio {
-+ struct bch_fs *c;
-+ struct btree *b;
-+ struct btree_node_read_all *ra;
-+ u64 start_time;
-+ unsigned have_ioref:1;
-+ unsigned idx:7;
-+ struct extent_ptr_decoded pick;
-+ struct work_struct work;
-+ struct bio bio;
-+};
-+
-+struct btree_write_bio {
-+ struct work_struct work;
-+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+ void *data;
-+ unsigned data_bytes;
-+ unsigned sector_offset;
-+ struct bch_write_bio wbio;
-+};
-+
-+void bch2_btree_node_io_unlock(struct btree *);
-+void bch2_btree_node_io_lock(struct btree *);
-+void __bch2_btree_node_wait_on_read(struct btree *);
-+void __bch2_btree_node_wait_on_write(struct btree *);
-+void bch2_btree_node_wait_on_read(struct btree *);
-+void bch2_btree_node_wait_on_write(struct btree *);
-+
-+enum compact_mode {
-+ COMPACT_LAZY,
-+ COMPACT_ALL,
-+};
-+
-+bool bch2_compact_whiteouts(struct bch_fs *, struct btree *,
-+ enum compact_mode);
-+
-+static inline bool should_compact_bset_lazy(struct btree *b,
-+ struct bset_tree *t)
-+{
-+ unsigned total_u64s = bset_u64s(t);
-+ unsigned dead_u64s = bset_dead_u64s(b, t);
-+
-+ return dead_u64s > 64 && dead_u64s * 3 > total_u64s;
-+}
-+
-+static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *b)
-+{
-+ struct bset_tree *t;
-+
-+ for_each_bset(b, t)
-+ if (should_compact_bset_lazy(b, t))
-+ return bch2_compact_whiteouts(c, b, COMPACT_LAZY);
-+
-+ return false;
-+}
-+
-+static inline struct nonce btree_nonce(struct bset *i, unsigned offset)
-+{
-+ return (struct nonce) {{
-+ [0] = cpu_to_le32(offset),
-+ [1] = ((__le32 *) &i->seq)[0],
-+ [2] = ((__le32 *) &i->seq)[1],
-+ [3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
-+ }};
-+}
-+
-+static inline int bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
-+{
-+ struct nonce nonce = btree_nonce(i, offset);
-+ int ret;
-+
-+ if (!offset) {
-+ struct btree_node *bn = container_of(i, struct btree_node, keys);
-+ unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
-+
-+ ret = bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
-+ &bn->flags, bytes);
-+ if (ret)
-+ return ret;
-+
-+ nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
-+ }
-+
-+ return bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
-+ vstruct_end(i) - (void *) i->_data);
-+}
-+
-+void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *);
-+
-+void bch2_btree_node_drop_keys_outside_node(struct btree *);
-+
-+void bch2_btree_build_aux_trees(struct btree *);
-+void bch2_btree_init_next(struct btree_trans *, struct btree *);
-+
-+int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
-+ struct btree *, bool, bool *);
-+void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
-+int bch2_btree_root_read(struct bch_fs *, enum btree_id,
-+ const struct bkey_i *, unsigned);
-+
-+void bch2_btree_complete_write(struct bch_fs *, struct btree *,
-+ struct btree_write *);
-+
-+bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
-+
-+enum btree_write_flags {
-+ __BTREE_WRITE_ONLY_IF_NEED = BTREE_WRITE_TYPE_BITS,
-+ __BTREE_WRITE_ALREADY_STARTED,
-+};
-+#define BTREE_WRITE_ONLY_IF_NEED BIT(__BTREE_WRITE_ONLY_IF_NEED)
-+#define BTREE_WRITE_ALREADY_STARTED BIT(__BTREE_WRITE_ALREADY_STARTED)
-+
-+void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
-+void bch2_btree_node_write(struct bch_fs *, struct btree *,
-+ enum six_lock_type, unsigned);
-+
-+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
-+ enum six_lock_type lock_held)
-+{
-+ bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
-+}
-+
-+bool bch2_btree_flush_all_reads(struct bch_fs *);
-+bool bch2_btree_flush_all_writes(struct bch_fs *);
-+
-+static inline void compat_bformat(unsigned level, enum btree_id btree_id,
-+ unsigned version, unsigned big_endian,
-+ int write, struct bkey_format *f)
-+{
-+ if (version < bcachefs_metadata_version_inode_btree_change &&
-+ btree_id == BTREE_ID_inodes) {
-+ swap(f->bits_per_field[BKEY_FIELD_INODE],
-+ f->bits_per_field[BKEY_FIELD_OFFSET]);
-+ swap(f->field_offset[BKEY_FIELD_INODE],
-+ f->field_offset[BKEY_FIELD_OFFSET]);
-+ }
-+
-+ if (version < bcachefs_metadata_version_snapshot &&
-+ (level || btree_type_has_snapshots(btree_id))) {
-+ u64 max_packed =
-+ ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
-+
-+ f->field_offset[BKEY_FIELD_SNAPSHOT] = write
-+ ? 0
-+ : cpu_to_le64(U32_MAX - max_packed);
-+ }
-+}
-+
-+static inline void compat_bpos(unsigned level, enum btree_id btree_id,
-+ unsigned version, unsigned big_endian,
-+ int write, struct bpos *p)
-+{
-+ if (big_endian != CPU_BIG_ENDIAN)
-+ bch2_bpos_swab(p);
-+
-+ if (version < bcachefs_metadata_version_inode_btree_change &&
-+ btree_id == BTREE_ID_inodes)
-+ swap(p->inode, p->offset);
-+}
-+
-+static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
-+ unsigned version, unsigned big_endian,
-+ int write,
-+ struct btree_node *bn)
-+{
-+ if (version < bcachefs_metadata_version_inode_btree_change &&
-+ btree_id_is_extents(btree_id) &&
-+ !bpos_eq(bn->min_key, POS_MIN) &&
-+ write)
-+ bn->min_key = bpos_nosnap_predecessor(bn->min_key);
-+
-+ if (version < bcachefs_metadata_version_snapshot &&
-+ write)
-+ bn->max_key.snapshot = 0;
-+
-+ compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
-+ compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
-+
-+ if (version < bcachefs_metadata_version_snapshot &&
-+ !write)
-+ bn->max_key.snapshot = U32_MAX;
-+
-+ if (version < bcachefs_metadata_version_inode_btree_change &&
-+ btree_id_is_extents(btree_id) &&
-+ !bpos_eq(bn->min_key, POS_MIN) &&
-+ !write)
-+ bn->min_key = bpos_nosnap_successor(bn->min_key);
-+}
-+
-+void bch2_btree_write_stats_to_text(struct printbuf *, struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_IO_H */
-diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
-new file mode 100644
-index 000000000000..c2adf3fbb0b3
---- /dev/null
-+++ b/fs/bcachefs/btree_iter.c
-@@ -0,0 +1,3242 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "bkey_buf.h"
-+#include "btree_cache.h"
-+#include "btree_iter.h"
-+#include "btree_journal_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "journal.h"
-+#include "replicas.h"
-+#include "snapshot.h"
-+#include "trace.h"
-+
-+#include <linux/random.h>
-+#include <linux/prefetch.h>
-+
-+static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
-+static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
-+ struct btree_path *);
-+
-+static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter)
-+{
-+#ifdef TRACK_PATH_ALLOCATED
-+ return iter->ip_allocated;
-+#else
-+ return 0;
-+#endif
-+}
-+
-+static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *);
-+
-+static inline int __btree_path_cmp(const struct btree_path *l,
-+ enum btree_id r_btree_id,
-+ bool r_cached,
-+ struct bpos r_pos,
-+ unsigned r_level)
-+{
-+ /*
-+ * Must match lock ordering as defined by __bch2_btree_node_lock:
-+ */
-+ return cmp_int(l->btree_id, r_btree_id) ?:
-+ cmp_int((int) l->cached, (int) r_cached) ?:
-+ bpos_cmp(l->pos, r_pos) ?:
-+ -cmp_int(l->level, r_level);
-+}
-+
-+static inline int btree_path_cmp(const struct btree_path *l,
-+ const struct btree_path *r)
-+{
-+ return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level);
-+}
-+
-+static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
-+{
-+ /* Are we iterating over keys in all snapshots? */
-+ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
-+ p = bpos_successor(p);
-+ } else {
-+ p = bpos_nosnap_successor(p);
-+ p.snapshot = iter->snapshot;
-+ }
-+
-+ return p;
-+}
-+
-+static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
-+{
-+ /* Are we iterating over keys in all snapshots? */
-+ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
-+ p = bpos_predecessor(p);
-+ } else {
-+ p = bpos_nosnap_predecessor(p);
-+ p.snapshot = iter->snapshot;
-+ }
-+
-+ return p;
-+}
-+
-+static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
-+{
-+ struct bpos pos = iter->pos;
-+
-+ if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
-+ !bkey_eq(pos, POS_MAX))
-+ pos = bkey_successor(iter, pos);
-+ return pos;
-+}
-+
-+static inline bool btree_path_pos_before_node(struct btree_path *path,
-+ struct btree *b)
-+{
-+ return bpos_lt(path->pos, b->data->min_key);
-+}
-+
-+static inline bool btree_path_pos_after_node(struct btree_path *path,
-+ struct btree *b)
-+{
-+ return bpos_gt(path->pos, b->key.k.p);
-+}
-+
-+static inline bool btree_path_pos_in_node(struct btree_path *path,
-+ struct btree *b)
-+{
-+ return path->btree_id == b->c.btree_id &&
-+ !btree_path_pos_before_node(path, b) &&
-+ !btree_path_pos_after_node(path, b);
-+}
-+
-+/* Btree iterator: */
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+static void bch2_btree_path_verify_cached(struct btree_trans *trans,
-+ struct btree_path *path)
-+{
-+ struct bkey_cached *ck;
-+ bool locked = btree_node_locked(path, 0);
-+
-+ if (!bch2_btree_node_relock(trans, path, 0))
-+ return;
-+
-+ ck = (void *) path->l[0].b;
-+ BUG_ON(ck->key.btree_id != path->btree_id ||
-+ !bkey_eq(ck->key.pos, path->pos));
-+
-+ if (!locked)
-+ btree_node_unlock(trans, path, 0);
-+}
-+
-+static void bch2_btree_path_verify_level(struct btree_trans *trans,
-+ struct btree_path *path, unsigned level)
-+{
-+ struct btree_path_level *l;
-+ struct btree_node_iter tmp;
-+ bool locked;
-+ struct bkey_packed *p, *k;
-+ struct printbuf buf1 = PRINTBUF;
-+ struct printbuf buf2 = PRINTBUF;
-+ struct printbuf buf3 = PRINTBUF;
-+ const char *msg;
-+
-+ if (!bch2_debug_check_iterators)
-+ return;
-+
-+ l = &path->l[level];
-+ tmp = l->iter;
-+ locked = btree_node_locked(path, level);
-+
-+ if (path->cached) {
-+ if (!level)
-+ bch2_btree_path_verify_cached(trans, path);
-+ return;
-+ }
-+
-+ if (!btree_path_node(path, level))
-+ return;
-+
-+ if (!bch2_btree_node_relock_notrace(trans, path, level))
-+ return;
-+
-+ BUG_ON(!btree_path_pos_in_node(path, l->b));
-+
-+ bch2_btree_node_iter_verify(&l->iter, l->b);
-+
-+ /*
-+ * For interior nodes, the iterator will have skipped past deleted keys:
-+ */
-+ p = level
-+ ? bch2_btree_node_iter_prev(&tmp, l->b)
-+ : bch2_btree_node_iter_prev_all(&tmp, l->b);
-+ k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+
-+ if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
-+ msg = "before";
-+ goto err;
-+ }
-+
-+ if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
-+ msg = "after";
-+ goto err;
-+ }
-+
-+ if (!locked)
-+ btree_node_unlock(trans, path, level);
-+ return;
-+err:
-+ bch2_bpos_to_text(&buf1, path->pos);
-+
-+ if (p) {
-+ struct bkey uk = bkey_unpack_key(l->b, p);
-+
-+ bch2_bkey_to_text(&buf2, &uk);
-+ } else {
-+ prt_printf(&buf2, "(none)");
-+ }
-+
-+ if (k) {
-+ struct bkey uk = bkey_unpack_key(l->b, k);
-+
-+ bch2_bkey_to_text(&buf3, &uk);
-+ } else {
-+ prt_printf(&buf3, "(none)");
-+ }
-+
-+ panic("path should be %s key at level %u:\n"
-+ "path pos %s\n"
-+ "prev key %s\n"
-+ "cur key %s\n",
-+ msg, level, buf1.buf, buf2.buf, buf3.buf);
-+}
-+
-+static void bch2_btree_path_verify(struct btree_trans *trans,
-+ struct btree_path *path)
-+{
-+ struct bch_fs *c = trans->c;
-+ unsigned i;
-+
-+ EBUG_ON(path->btree_id >= BTREE_ID_NR);
-+
-+ for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
-+ if (!path->l[i].b) {
-+ BUG_ON(!path->cached &&
-+ bch2_btree_id_root(c, path->btree_id)->b->c.level > i);
-+ break;
-+ }
-+
-+ bch2_btree_path_verify_level(trans, path, i);
-+ }
-+
-+ bch2_btree_path_verify_locks(path);
-+}
-+
-+void bch2_trans_verify_paths(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+
-+ trans_for_each_path(trans, path)
-+ bch2_btree_path_verify(trans, path);
-+}
-+
-+static void bch2_btree_iter_verify(struct btree_iter *iter)
-+{
-+ struct btree_trans *trans = iter->trans;
-+
-+ BUG_ON(iter->btree_id >= BTREE_ID_NR);
-+
-+ BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached);
-+
-+ BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
-+ (iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
-+
-+ BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
-+ (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
-+ !btree_type_has_snapshot_field(iter->btree_id));
-+
-+ if (iter->update_path)
-+ bch2_btree_path_verify(trans, iter->update_path);
-+ bch2_btree_path_verify(trans, iter->path);
-+}
-+
-+static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
-+{
-+ BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
-+ !iter->pos.snapshot);
-+
-+ BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
-+ iter->pos.snapshot != iter->snapshot);
-+
-+ BUG_ON(bkey_lt(iter->pos, bkey_start_pos(&iter->k)) ||
-+ bkey_gt(iter->pos, iter->k.p));
-+}
-+
-+static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
-+{
-+ struct btree_trans *trans = iter->trans;
-+ struct btree_iter copy;
-+ struct bkey_s_c prev;
-+ int ret = 0;
-+
-+ if (!bch2_debug_check_iterators)
-+ return 0;
-+
-+ if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS))
-+ return 0;
-+
-+ if (bkey_err(k) || !k.k)
-+ return 0;
-+
-+ BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
-+ iter->snapshot,
-+ k.k->p.snapshot));
-+
-+ bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
-+ BTREE_ITER_NOPRESERVE|
-+ BTREE_ITER_ALL_SNAPSHOTS);
-+ prev = bch2_btree_iter_prev(&copy);
-+ if (!prev.k)
-+ goto out;
-+
-+ ret = bkey_err(prev);
-+ if (ret)
-+ goto out;
-+
-+ if (bkey_eq(prev.k->p, k.k->p) &&
-+ bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
-+ prev.k->p.snapshot) > 0) {
-+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-+
-+ bch2_bkey_to_text(&buf1, k.k);
-+ bch2_bkey_to_text(&buf2, prev.k);
-+
-+ panic("iter snap %u\n"
-+ "k %s\n"
-+ "prev %s\n",
-+ iter->snapshot,
-+ buf1.buf, buf2.buf);
-+ }
-+out:
-+ bch2_trans_iter_exit(trans, &copy);
-+ return ret;
-+}
-+
-+void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
-+ struct bpos pos, bool key_cache)
-+{
-+ struct btree_path *path;
-+ unsigned idx;
-+ struct printbuf buf = PRINTBUF;
-+
-+ btree_trans_sort_paths(trans);
-+
-+ trans_for_each_path_inorder(trans, path, idx) {
-+ int cmp = cmp_int(path->btree_id, id) ?:
-+ cmp_int(path->cached, key_cache);
-+
-+ if (cmp > 0)
-+ break;
-+ if (cmp < 0)
-+ continue;
-+
-+ if (!btree_node_locked(path, 0) ||
-+ !path->should_be_locked)
-+ continue;
-+
-+ if (!key_cache) {
-+ if (bkey_ge(pos, path->l[0].b->data->min_key) &&
-+ bkey_le(pos, path->l[0].b->key.k.p))
-+ return;
-+ } else {
-+ if (bkey_eq(pos, path->pos))
-+ return;
-+ }
-+ }
-+
-+ bch2_dump_trans_paths_updates(trans);
-+ bch2_bpos_to_text(&buf, pos);
-+
-+ panic("not locked: %s %s%s\n",
-+ bch2_btree_id_str(id), buf.buf,
-+ key_cache ? " cached" : "");
-+}
-+
-+#else
-+
-+static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
-+ struct btree_path *path, unsigned l) {}
-+static inline void bch2_btree_path_verify(struct btree_trans *trans,
-+ struct btree_path *path) {}
-+static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
-+static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
-+static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
-+
-+#endif
-+
-+/* Btree path: fixups after btree updates */
-+
-+static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
-+ struct btree *b,
-+ struct bset_tree *t,
-+ struct bkey_packed *k)
-+{
-+ struct btree_node_iter_set *set;
-+
-+ btree_node_iter_for_each(iter, set)
-+ if (set->end == t->end_offset) {
-+ set->k = __btree_node_key_to_offset(b, k);
-+ bch2_btree_node_iter_sort(iter, b);
-+ return;
-+ }
-+
-+ bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
-+}
-+
-+static void __bch2_btree_path_fix_key_modified(struct btree_path *path,
-+ struct btree *b,
-+ struct bkey_packed *where)
-+{
-+ struct btree_path_level *l = &path->l[b->c.level];
-+
-+ if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
-+ return;
-+
-+ if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0)
-+ bch2_btree_node_iter_advance(&l->iter, l->b);
-+}
-+
-+void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
-+ struct btree *b,
-+ struct bkey_packed *where)
-+{
-+ struct btree_path *path;
-+
-+ trans_for_each_path_with_node(trans, b, path) {
-+ __bch2_btree_path_fix_key_modified(path, b, where);
-+ bch2_btree_path_verify_level(trans, path, b->c.level);
-+ }
-+}
-+
-+static void __bch2_btree_node_iter_fix(struct btree_path *path,
-+ struct btree *b,
-+ struct btree_node_iter *node_iter,
-+ struct bset_tree *t,
-+ struct bkey_packed *where,
-+ unsigned clobber_u64s,
-+ unsigned new_u64s)
-+{
-+ const struct bkey_packed *end = btree_bkey_last(b, t);
-+ struct btree_node_iter_set *set;
-+ unsigned offset = __btree_node_key_to_offset(b, where);
-+ int shift = new_u64s - clobber_u64s;
-+ unsigned old_end = t->end_offset - shift;
-+ unsigned orig_iter_pos = node_iter->data[0].k;
-+ bool iter_current_key_modified =
-+ orig_iter_pos >= offset &&
-+ orig_iter_pos <= offset + clobber_u64s;
-+
-+ btree_node_iter_for_each(node_iter, set)
-+ if (set->end == old_end)
-+ goto found;
-+
-+ /* didn't find the bset in the iterator - might have to readd it: */
-+ if (new_u64s &&
-+ bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
-+ bch2_btree_node_iter_push(node_iter, b, where, end);
-+ goto fixup_done;
-+ } else {
-+ /* Iterator is after key that changed */
-+ return;
-+ }
-+found:
-+ set->end = t->end_offset;
-+
-+ /* Iterator hasn't gotten to the key that changed yet: */
-+ if (set->k < offset)
-+ return;
-+
-+ if (new_u64s &&
-+ bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
-+ set->k = offset;
-+ } else if (set->k < offset + clobber_u64s) {
-+ set->k = offset + new_u64s;
-+ if (set->k == set->end)
-+ bch2_btree_node_iter_set_drop(node_iter, set);
-+ } else {
-+ /* Iterator is after key that changed */
-+ set->k = (int) set->k + shift;
-+ return;
-+ }
-+
-+ bch2_btree_node_iter_sort(node_iter, b);
-+fixup_done:
-+ if (node_iter->data[0].k != orig_iter_pos)
-+ iter_current_key_modified = true;
-+
-+ /*
-+ * When a new key is added, and the node iterator now points to that
-+ * key, the iterator might have skipped past deleted keys that should
-+ * come after the key the iterator now points to. We have to rewind to
-+ * before those deleted keys - otherwise
-+ * bch2_btree_node_iter_prev_all() breaks:
-+ */
-+ if (!bch2_btree_node_iter_end(node_iter) &&
-+ iter_current_key_modified &&
-+ b->c.level) {
-+ struct bkey_packed *k, *k2, *p;
-+
-+ k = bch2_btree_node_iter_peek_all(node_iter, b);
-+
-+ for_each_bset(b, t) {
-+ bool set_pos = false;
-+
-+ if (node_iter->data[0].end == t->end_offset)
-+ continue;
-+
-+ k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
-+
-+ while ((p = bch2_bkey_prev_all(b, t, k2)) &&
-+ bkey_iter_cmp(b, k, p) < 0) {
-+ k2 = p;
-+ set_pos = true;
-+ }
-+
-+ if (set_pos)
-+ btree_node_iter_set_set_pos(node_iter,
-+ b, t, k2);
-+ }
-+ }
-+}
-+
-+void bch2_btree_node_iter_fix(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b,
-+ struct btree_node_iter *node_iter,
-+ struct bkey_packed *where,
-+ unsigned clobber_u64s,
-+ unsigned new_u64s)
-+{
-+ struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where);
-+ struct btree_path *linked;
-+
-+ if (node_iter != &path->l[b->c.level].iter) {
-+ __bch2_btree_node_iter_fix(path, b, node_iter, t,
-+ where, clobber_u64s, new_u64s);
-+
-+ if (bch2_debug_check_iterators)
-+ bch2_btree_node_iter_verify(node_iter, b);
-+ }
-+
-+ trans_for_each_path_with_node(trans, b, linked) {
-+ __bch2_btree_node_iter_fix(linked, b,
-+ &linked->l[b->c.level].iter, t,
-+ where, clobber_u64s, new_u64s);
-+ bch2_btree_path_verify_level(trans, linked, b->c.level);
-+ }
-+}
-+
-+/* Btree path level: pointer to a particular btree node and node iter */
-+
-+static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
-+ struct btree_path_level *l,
-+ struct bkey *u,
-+ struct bkey_packed *k)
-+{
-+ if (unlikely(!k)) {
-+ /*
-+ * signal to bch2_btree_iter_peek_slot() that we're currently at
-+ * a hole
-+ */
-+ u->type = KEY_TYPE_deleted;
-+ return bkey_s_c_null;
-+ }
-+
-+ return bkey_disassemble(l->b, k, u);
-+}
-+
-+static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
-+ struct btree_path_level *l,
-+ struct bkey *u)
-+{
-+ return __btree_iter_unpack(c, l, u,
-+ bch2_btree_node_iter_peek_all(&l->iter, l->b));
-+}
-+
-+static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree_path_level *l,
-+ struct bkey *u)
-+{
-+ struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
-+ bch2_btree_node_iter_peek(&l->iter, l->b));
-+
-+ path->pos = k.k ? k.k->p : l->b->key.k.p;
-+ trans->paths_sorted = false;
-+ bch2_btree_path_verify_level(trans, path, l - path->l);
-+ return k;
-+}
-+
-+static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree_path_level *l,
-+ struct bkey *u)
-+{
-+ struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
-+ bch2_btree_node_iter_prev(&l->iter, l->b));
-+
-+ path->pos = k.k ? k.k->p : l->b->data->min_key;
-+ trans->paths_sorted = false;
-+ bch2_btree_path_verify_level(trans, path, l - path->l);
-+ return k;
-+}
-+
-+static inline bool btree_path_advance_to_pos(struct btree_path *path,
-+ struct btree_path_level *l,
-+ int max_advance)
-+{
-+ struct bkey_packed *k;
-+ int nr_advanced = 0;
-+
-+ while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
-+ bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
-+ if (max_advance > 0 && nr_advanced >= max_advance)
-+ return false;
-+
-+ bch2_btree_node_iter_advance(&l->iter, l->b);
-+ nr_advanced++;
-+ }
-+
-+ return true;
-+}
-+
-+static inline void __btree_path_level_init(struct btree_path *path,
-+ unsigned level)
-+{
-+ struct btree_path_level *l = &path->l[level];
-+
-+ bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
-+
-+ /*
-+ * Iterators to interior nodes should always be pointed at the first non
-+ * whiteout:
-+ */
-+ if (level)
-+ bch2_btree_node_iter_peek(&l->iter, l->b);
-+}
-+
-+void bch2_btree_path_level_init(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b)
-+{
-+ BUG_ON(path->cached);
-+
-+ EBUG_ON(!btree_path_pos_in_node(path, b));
-+
-+ path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
-+ path->l[b->c.level].b = b;
-+ __btree_path_level_init(path, b->c.level);
-+}
-+
-+/* Btree path: fixups after btree node updates: */
-+
-+static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, struct btree *b)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_insert_entry *i;
-+
-+ trans_for_each_update(trans, i)
-+ if (!i->cached &&
-+ i->level == b->c.level &&
-+ i->btree_id == b->c.btree_id &&
-+ bpos_cmp(i->k->k.p, b->data->min_key) >= 0 &&
-+ bpos_cmp(i->k->k.p, b->data->max_key) <= 0) {
-+ i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v;
-+
-+ if (unlikely(trans->journal_replay_not_finished)) {
-+ struct bkey_i *j_k =
-+ bch2_journal_keys_peek_slot(c, i->btree_id, i->level,
-+ i->k->k.p);
-+
-+ if (j_k) {
-+ i->old_k = j_k->k;
-+ i->old_v = &j_k->v;
-+ }
-+ }
-+ }
-+}
-+
-+/*
-+ * A btree node is being replaced - update the iterator to point to the new
-+ * node:
-+ */
-+void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
-+{
-+ struct btree_path *path;
-+
-+ trans_for_each_path(trans, path)
-+ if (path->uptodate == BTREE_ITER_UPTODATE &&
-+ !path->cached &&
-+ btree_path_pos_in_node(path, b)) {
-+ enum btree_node_locked_type t =
-+ btree_lock_want(path, b->c.level);
-+
-+ if (t != BTREE_NODE_UNLOCKED) {
-+ btree_node_unlock(trans, path, b->c.level);
-+ six_lock_increment(&b->c.lock, (enum six_lock_type) t);
-+ mark_btree_node_locked(trans, path, b->c.level, t);
-+ }
-+
-+ bch2_btree_path_level_init(trans, path, b);
-+ }
-+
-+ bch2_trans_revalidate_updates_in_node(trans, b);
-+}
-+
-+/*
-+ * A btree node has been modified in such a way as to invalidate iterators - fix
-+ * them:
-+ */
-+void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
-+{
-+ struct btree_path *path;
-+
-+ trans_for_each_path_with_node(trans, b, path)
-+ __btree_path_level_init(path, b->c.level);
-+
-+ bch2_trans_revalidate_updates_in_node(trans, b);
-+}
-+
-+/* Btree path: traverse, set_pos: */
-+
-+static inline int btree_path_lock_root(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned depth_want,
-+ unsigned long trace_ip)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b;
-+ enum six_lock_type lock_type;
-+ unsigned i;
-+ int ret;
-+
-+ EBUG_ON(path->nodes_locked);
-+
-+ while (1) {
-+ b = READ_ONCE(*rootp);
-+ path->level = READ_ONCE(b->c.level);
-+
-+ if (unlikely(path->level < depth_want)) {
-+ /*
-+ * the root is at a lower depth than the depth we want:
-+ * got to the end of the btree, or we're walking nodes
-+ * greater than some depth and there are no nodes >=
-+ * that depth
-+ */
-+ path->level = depth_want;
-+ for (i = path->level; i < BTREE_MAX_DEPTH; i++)
-+ path->l[i].b = NULL;
-+ return 1;
-+ }
-+
-+ lock_type = __btree_lock_want(path, path->level);
-+ ret = btree_node_lock(trans, path, &b->c,
-+ path->level, lock_type, trace_ip);
-+ if (unlikely(ret)) {
-+ if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
-+ continue;
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ return ret;
-+ BUG();
-+ }
-+
-+ if (likely(b == READ_ONCE(*rootp) &&
-+ b->c.level == path->level &&
-+ !race_fault())) {
-+ for (i = 0; i < path->level; i++)
-+ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root);
-+ path->l[path->level].b = b;
-+ for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
-+ path->l[i].b = NULL;
-+
-+ mark_btree_node_locked(trans, path, path->level,
-+ (enum btree_node_locked_type) lock_type);
-+ bch2_btree_path_level_init(trans, path, b);
-+ return 0;
-+ }
-+
-+ six_unlock_type(&b->c.lock, lock_type);
-+ }
-+}
-+
-+noinline
-+static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_path_level *l = path_l(path);
-+ struct btree_node_iter node_iter = l->iter;
-+ struct bkey_packed *k;
-+ struct bkey_buf tmp;
-+ unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
-+ ? (path->level > 1 ? 0 : 2)
-+ : (path->level > 1 ? 1 : 16);
-+ bool was_locked = btree_node_locked(path, path->level);
-+ int ret = 0;
-+
-+ bch2_bkey_buf_init(&tmp);
-+
-+ while (nr-- && !ret) {
-+ if (!bch2_btree_node_relock(trans, path, path->level))
-+ break;
-+
-+ bch2_btree_node_iter_advance(&node_iter, l->b);
-+ k = bch2_btree_node_iter_peek(&node_iter, l->b);
-+ if (!k)
-+ break;
-+
-+ bch2_bkey_buf_unpack(&tmp, c, l->b, k);
-+ ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
-+ path->level - 1);
-+ }
-+
-+ if (!was_locked)
-+ btree_node_unlock(trans, path, path->level);
-+
-+ bch2_bkey_buf_exit(&tmp, c);
-+ return ret;
-+}
-+
-+static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
-+ struct btree_and_journal_iter *jiter)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_s_c k;
-+ struct bkey_buf tmp;
-+ unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
-+ ? (path->level > 1 ? 0 : 2)
-+ : (path->level > 1 ? 1 : 16);
-+ bool was_locked = btree_node_locked(path, path->level);
-+ int ret = 0;
-+
-+ bch2_bkey_buf_init(&tmp);
-+
-+ while (nr-- && !ret) {
-+ if (!bch2_btree_node_relock(trans, path, path->level))
-+ break;
-+
-+ bch2_btree_and_journal_iter_advance(jiter);
-+ k = bch2_btree_and_journal_iter_peek(jiter);
-+ if (!k.k)
-+ break;
-+
-+ bch2_bkey_buf_reassemble(&tmp, c, k);
-+ ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
-+ path->level - 1);
-+ }
-+
-+ if (!was_locked)
-+ btree_node_unlock(trans, path, path->level);
-+
-+ bch2_bkey_buf_exit(&tmp, c);
-+ return ret;
-+}
-+
-+static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned plevel, struct btree *b)
-+{
-+ struct btree_path_level *l = &path->l[plevel];
-+ bool locked = btree_node_locked(path, plevel);
-+ struct bkey_packed *k;
-+ struct bch_btree_ptr_v2 *bp;
-+
-+ if (!bch2_btree_node_relock(trans, path, plevel))
-+ return;
-+
-+ k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+ BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
-+
-+ bp = (void *) bkeyp_val(&l->b->format, k);
-+ bp->mem_ptr = (unsigned long)b;
-+
-+ if (!locked)
-+ btree_node_unlock(trans, path, plevel);
-+}
-+
-+static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned flags,
-+ struct bkey_buf *out)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_path_level *l = path_l(path);
-+ struct btree_and_journal_iter jiter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
-+
-+ k = bch2_btree_and_journal_iter_peek(&jiter);
-+
-+ bch2_bkey_buf_reassemble(out, c, k);
-+
-+ if (flags & BTREE_ITER_PREFETCH)
-+ ret = btree_path_prefetch_j(trans, path, &jiter);
-+
-+ bch2_btree_and_journal_iter_exit(&jiter);
-+ return ret;
-+}
-+
-+static __always_inline int btree_path_down(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned flags,
-+ unsigned long trace_ip)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_path_level *l = path_l(path);
-+ struct btree *b;
-+ unsigned level = path->level - 1;
-+ enum six_lock_type lock_type = __btree_lock_want(path, level);
-+ struct bkey_buf tmp;
-+ int ret;
-+
-+ EBUG_ON(!btree_node_locked(path, path->level));
-+
-+ bch2_bkey_buf_init(&tmp);
-+
-+ if (unlikely(trans->journal_replay_not_finished)) {
-+ ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
-+ if (ret)
-+ goto err;
-+ } else {
-+ bch2_bkey_buf_unpack(&tmp, c, l->b,
-+ bch2_btree_node_iter_peek(&l->iter, l->b));
-+
-+ if (flags & BTREE_ITER_PREFETCH) {
-+ ret = btree_path_prefetch(trans, path);
-+ if (ret)
-+ goto err;
-+ }
-+ }
-+
-+ b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
-+ ret = PTR_ERR_OR_ZERO(b);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ if (likely(!trans->journal_replay_not_finished &&
-+ tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
-+ unlikely(b != btree_node_mem_ptr(tmp.k)))
-+ btree_node_mem_ptr_set(trans, path, level + 1, b);
-+
-+ if (btree_node_read_locked(path, level + 1))
-+ btree_node_unlock(trans, path, level + 1);
-+
-+ mark_btree_node_locked(trans, path, level,
-+ (enum btree_node_locked_type) lock_type);
-+ path->level = level;
-+ bch2_btree_path_level_init(trans, path, b);
-+
-+ bch2_btree_path_verify_locks(path);
-+err:
-+ bch2_bkey_buf_exit(&tmp, c);
-+ return ret;
-+}
-+
-+
-+static int bch2_btree_path_traverse_all(struct btree_trans *trans)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_path *path;
-+ unsigned long trace_ip = _RET_IP_;
-+ int i, ret = 0;
-+
-+ if (trans->in_traverse_all)
-+ return -BCH_ERR_transaction_restart_in_traverse_all;
-+
-+ trans->in_traverse_all = true;
-+retry_all:
-+ trans->restarted = 0;
-+ trans->last_restarted_ip = 0;
-+
-+ trans_for_each_path(trans, path)
-+ path->should_be_locked = false;
-+
-+ btree_trans_sort_paths(trans);
-+
-+ bch2_trans_unlock(trans);
-+ cond_resched();
-+
-+ if (unlikely(trans->memory_allocation_failure)) {
-+ struct closure cl;
-+
-+ closure_init_stack(&cl);
-+
-+ do {
-+ ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+ closure_sync(&cl);
-+ } while (ret);
-+ }
-+
-+ /* Now, redo traversals in correct order: */
-+ i = 0;
-+ while (i < trans->nr_sorted) {
-+ path = trans->paths + trans->sorted[i];
-+
-+ /*
-+ * Traversing a path can cause another path to be added at about
-+ * the same position:
-+ */
-+ if (path->uptodate) {
-+ __btree_path_get(path, false);
-+ ret = bch2_btree_path_traverse_one(trans, path, 0, _THIS_IP_);
-+ __btree_path_put(path, false);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
-+ bch2_err_matches(ret, ENOMEM))
-+ goto retry_all;
-+ if (ret)
-+ goto err;
-+ } else {
-+ i++;
-+ }
-+ }
-+
-+ /*
-+ * We used to assert that all paths had been traversed here
-+ * (path->uptodate < BTREE_ITER_NEED_TRAVERSE); however, since
-+ * path->should_be_locked is not set yet, we might have unlocked and
-+ * then failed to relock a path - that's fine.
-+ */
-+err:
-+ bch2_btree_cache_cannibalize_unlock(c);
-+
-+ trans->in_traverse_all = false;
-+
-+ trace_and_count(c, trans_traverse_all, trans, trace_ip);
-+ return ret;
-+}
-+
-+static inline bool btree_path_check_pos_in_node(struct btree_path *path,
-+ unsigned l, int check_pos)
-+{
-+ if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
-+ return false;
-+ if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
-+ return false;
-+ return true;
-+}
-+
-+static inline bool btree_path_good_node(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned l, int check_pos)
-+{
-+ return is_btree_node(path, l) &&
-+ bch2_btree_node_relock(trans, path, l) &&
-+ btree_path_check_pos_in_node(path, l, check_pos);
-+}
-+
-+static void btree_path_set_level_down(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned new_level)
-+{
-+ unsigned l;
-+
-+ path->level = new_level;
-+
-+ for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
-+ if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
-+ btree_node_unlock(trans, path, l);
-+
-+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-+ bch2_btree_path_verify(trans, path);
-+}
-+
-+static noinline unsigned __btree_path_up_until_good_node(struct btree_trans *trans,
-+ struct btree_path *path,
-+ int check_pos)
-+{
-+ unsigned i, l = path->level;
-+again:
-+ while (btree_path_node(path, l) &&
-+ !btree_path_good_node(trans, path, l, check_pos))
-+ __btree_path_set_level_up(trans, path, l++);
-+
-+ /* If we need intent locks, take them too: */
-+ for (i = l + 1;
-+ i < path->locks_want && btree_path_node(path, i);
-+ i++)
-+ if (!bch2_btree_node_relock(trans, path, i)) {
-+ while (l <= i)
-+ __btree_path_set_level_up(trans, path, l++);
-+ goto again;
-+ }
-+
-+ return l;
-+}
-+
-+static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
-+ struct btree_path *path,
-+ int check_pos)
-+{
-+ return likely(btree_node_locked(path, path->level) &&
-+ btree_path_check_pos_in_node(path, path->level, check_pos))
-+ ? path->level
-+ : __btree_path_up_until_good_node(trans, path, check_pos);
-+}
-+
-+/*
-+ * This is the main state machine for walking down the btree - walks down to a
-+ * specified depth
-+ *
-+ * Returns 0 on success, -EIO on error (error reading in a btree node).
-+ *
-+ * On error, caller (peek_node()/peek_key()) must return NULL; the error is
-+ * stashed in the iterator and returned from bch2_trans_exit().
-+ */
-+int bch2_btree_path_traverse_one(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned flags,
-+ unsigned long trace_ip)
-+{
-+ unsigned depth_want = path->level;
-+ int ret = -((int) trans->restarted);
-+
-+ if (unlikely(ret))
-+ goto out;
-+
-+ if (unlikely(!trans->srcu_held))
-+ bch2_trans_srcu_lock(trans);
-+
-+ /*
-+ * Ensure we obey path->should_be_locked: if it's set, we can't unlock
-+ * and re-traverse the path without a transaction restart:
-+ */
-+ if (path->should_be_locked) {
-+ ret = bch2_btree_path_relock(trans, path, trace_ip);
-+ goto out;
-+ }
-+
-+ if (path->cached) {
-+ ret = bch2_btree_path_traverse_cached(trans, path, flags);
-+ goto out;
-+ }
-+
-+ if (unlikely(path->level >= BTREE_MAX_DEPTH))
-+ goto out;
-+
-+ path->level = btree_path_up_until_good_node(trans, path, 0);
-+
-+ EBUG_ON(btree_path_node(path, path->level) &&
-+ !btree_node_locked(path, path->level));
-+
-+ /*
-+ * Note: path->nodes[path->level] may be temporarily NULL here - that
-+ * would indicate to other code that we got to the end of the btree,
-+ * here it indicates that relocking the root failed - it's critical that
-+ * btree_path_lock_root() comes next and that it can't fail
-+ */
-+ while (path->level > depth_want) {
-+ ret = btree_path_node(path, path->level)
-+ ? btree_path_down(trans, path, flags, trace_ip)
-+ : btree_path_lock_root(trans, path, depth_want, trace_ip);
-+ if (unlikely(ret)) {
-+ if (ret == 1) {
-+ /*
-+ * No nodes at this level - got to the end of
-+ * the btree:
-+ */
-+ ret = 0;
-+ goto out;
-+ }
-+
-+ __bch2_btree_path_unlock(trans, path);
-+ path->level = depth_want;
-+ path->l[path->level].b = ERR_PTR(ret);
-+ goto out;
-+ }
-+ }
-+
-+ path->uptodate = BTREE_ITER_UPTODATE;
-+out:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
-+ panic("ret %s (%i) trans->restarted %s (%i)\n",
-+ bch2_err_str(ret), ret,
-+ bch2_err_str(trans->restarted), trans->restarted);
-+ bch2_btree_path_verify(trans, path);
-+ return ret;
-+}
-+
-+static inline void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
-+ struct btree_path *src)
-+{
-+ unsigned i, offset = offsetof(struct btree_path, pos);
-+
-+ memcpy((void *) dst + offset,
-+ (void *) src + offset,
-+ sizeof(struct btree_path) - offset);
-+
-+ for (i = 0; i < BTREE_MAX_DEPTH; i++) {
-+ unsigned t = btree_node_locked_type(dst, i);
-+
-+ if (t != BTREE_NODE_UNLOCKED)
-+ six_lock_increment(&dst->l[i].b->c.lock, t);
-+ }
-+}
-+
-+static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src,
-+ bool intent)
-+{
-+ struct btree_path *new = btree_path_alloc(trans, src);
-+
-+ btree_path_copy(trans, new, src);
-+ __btree_path_get(new, intent);
-+ return new;
-+}
-+
-+__flatten
-+struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans,
-+ struct btree_path *path, bool intent,
-+ unsigned long ip)
-+{
-+ __btree_path_put(path, intent);
-+ path = btree_path_clone(trans, path, intent);
-+ path->preserve = false;
-+ return path;
-+}
-+
-+struct btree_path * __must_check
-+__bch2_btree_path_set_pos(struct btree_trans *trans,
-+ struct btree_path *path, struct bpos new_pos,
-+ bool intent, unsigned long ip, int cmp)
-+{
-+ unsigned level = path->level;
-+
-+ bch2_trans_verify_not_in_restart(trans);
-+ EBUG_ON(!path->ref);
-+
-+ path = bch2_btree_path_make_mut(trans, path, intent, ip);
-+
-+ path->pos = new_pos;
-+ trans->paths_sorted = false;
-+
-+ if (unlikely(path->cached)) {
-+ btree_node_unlock(trans, path, 0);
-+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
-+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-+ goto out;
-+ }
-+
-+ level = btree_path_up_until_good_node(trans, path, cmp);
-+
-+ if (btree_path_node(path, level)) {
-+ struct btree_path_level *l = &path->l[level];
-+
-+ BUG_ON(!btree_node_locked(path, level));
-+ /*
-+ * We might have to skip over many keys, or just a few: try
-+ * advancing the node iterator, and if we have to skip over too
-+ * many keys just reinit it (or if we're rewinding, since that
-+ * is expensive).
-+ */
-+ if (cmp < 0 ||
-+ !btree_path_advance_to_pos(path, l, 8))
-+ bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
-+
-+ /*
-+ * Iterators to interior nodes should always be pointed at the first non
-+ * whiteout:
-+ */
-+ if (unlikely(level))
-+ bch2_btree_node_iter_peek(&l->iter, l->b);
-+ }
-+
-+ if (unlikely(level != path->level)) {
-+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-+ __bch2_btree_path_unlock(trans, path);
-+ }
-+out:
-+ bch2_btree_path_verify(trans, path);
-+ return path;
-+}
-+
-+/* Btree path: main interface: */
-+
-+static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
-+{
-+ struct btree_path *sib;
-+
-+ sib = prev_btree_path(trans, path);
-+ if (sib && !btree_path_cmp(sib, path))
-+ return sib;
-+
-+ sib = next_btree_path(trans, path);
-+ if (sib && !btree_path_cmp(sib, path))
-+ return sib;
-+
-+ return NULL;
-+}
-+
-+static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
-+{
-+ struct btree_path *sib;
-+
-+ sib = prev_btree_path(trans, path);
-+ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
-+ return sib;
-+
-+ sib = next_btree_path(trans, path);
-+ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
-+ return sib;
-+
-+ return NULL;
-+}
-+
-+static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
-+{
-+ __bch2_btree_path_unlock(trans, path);
-+ btree_path_list_remove(trans, path);
-+ trans->paths_allocated &= ~(1ULL << path->idx);
-+}
-+
-+void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
-+{
-+ struct btree_path *dup;
-+
-+ EBUG_ON(trans->paths + path->idx != path);
-+ EBUG_ON(!path->ref);
-+
-+ if (!__btree_path_put(path, intent))
-+ return;
-+
-+ dup = path->preserve
-+ ? have_path_at_pos(trans, path)
-+ : have_node_at_pos(trans, path);
-+
-+ if (!dup && !(!path->preserve && !is_btree_node(path, path->level)))
-+ return;
-+
-+ if (path->should_be_locked &&
-+ !trans->restarted &&
-+ (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_)))
-+ return;
-+
-+ if (dup) {
-+ dup->preserve |= path->preserve;
-+ dup->should_be_locked |= path->should_be_locked;
-+ }
-+
-+ __bch2_path_free(trans, path);
-+}
-+
-+static void bch2_path_put_nokeep(struct btree_trans *trans, struct btree_path *path,
-+ bool intent)
-+{
-+ EBUG_ON(trans->paths + path->idx != path);
-+ EBUG_ON(!path->ref);
-+
-+ if (!__btree_path_put(path, intent))
-+ return;
-+
-+ __bch2_path_free(trans, path);
-+}
-+
-+void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
-+{
-+ panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
-+ trans->restart_count, restart_count,
-+ (void *) trans->last_begin_ip);
-+}
-+
-+void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans)
-+{
-+ panic("in transaction restart: %s, last restarted by %pS\n",
-+ bch2_err_str(trans->restarted),
-+ (void *) trans->last_restarted_ip);
-+}
-+
-+noinline __cold
-+void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
-+{
-+ struct btree_insert_entry *i;
-+ struct btree_write_buffered_key *wb;
-+
-+ prt_printf(buf, "transaction updates for %s journal seq %llu",
-+ trans->fn, trans->journal_res.seq);
-+ prt_newline(buf);
-+ printbuf_indent_add(buf, 2);
-+
-+ trans_for_each_update(trans, i) {
-+ struct bkey_s_c old = { &i->old_k, i->old_v };
-+
-+ prt_printf(buf, "update: btree=%s cached=%u %pS",
-+ bch2_btree_id_str(i->btree_id),
-+ i->cached,
-+ (void *) i->ip_allocated);
-+ prt_newline(buf);
-+
-+ prt_printf(buf, " old ");
-+ bch2_bkey_val_to_text(buf, trans->c, old);
-+ prt_newline(buf);
-+
-+ prt_printf(buf, " new ");
-+ bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
-+ prt_newline(buf);
-+ }
-+
-+ trans_for_each_wb_update(trans, wb) {
-+ prt_printf(buf, "update: btree=%s wb=1 %pS",
-+ bch2_btree_id_str(wb->btree),
-+ (void *) i->ip_allocated);
-+ prt_newline(buf);
-+
-+ prt_printf(buf, " new ");
-+ bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(&wb->k));
-+ prt_newline(buf);
-+ }
-+
-+ printbuf_indent_sub(buf, 2);
-+}
-+
-+noinline __cold
-+void bch2_dump_trans_updates(struct btree_trans *trans)
-+{
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_trans_updates_to_text(&buf, trans);
-+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
-+ printbuf_exit(&buf);
-+}
-+
-+noinline __cold
-+void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path)
-+{
-+ prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ",
-+ path->idx, path->ref, path->intent_ref,
-+ path->preserve ? 'P' : ' ',
-+ path->should_be_locked ? 'S' : ' ',
-+ bch2_btree_id_str(path->btree_id),
-+ path->level);
-+ bch2_bpos_to_text(out, path->pos);
-+
-+ prt_printf(out, " locks %u", path->nodes_locked);
-+#ifdef TRACK_PATH_ALLOCATED
-+ prt_printf(out, " %pS", (void *) path->ip_allocated);
-+#endif
-+ prt_newline(out);
-+}
-+
-+static noinline __cold
-+void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans,
-+ bool nosort)
-+{
-+ struct btree_path *path;
-+ unsigned idx;
-+
-+ if (!nosort)
-+ btree_trans_sort_paths(trans);
-+
-+ trans_for_each_path_inorder(trans, path, idx)
-+ bch2_btree_path_to_text(out, path);
-+}
-+
-+noinline __cold
-+void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
-+{
-+ __bch2_trans_paths_to_text(out, trans, false);
-+}
-+
-+static noinline __cold
-+void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
-+{
-+ struct printbuf buf = PRINTBUF;
-+
-+ __bch2_trans_paths_to_text(&buf, trans, nosort);
-+ bch2_trans_updates_to_text(&buf, trans);
-+
-+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
-+ printbuf_exit(&buf);
-+}
-+
-+noinline __cold
-+void bch2_dump_trans_paths_updates(struct btree_trans *trans)
-+{
-+ __bch2_dump_trans_paths_updates(trans, false);
-+}
-+
-+noinline __cold
-+static void bch2_trans_update_max_paths(struct btree_trans *trans)
-+{
-+ struct btree_transaction_stats *s = btree_trans_stats(trans);
-+ struct printbuf buf = PRINTBUF;
-+
-+ if (!s)
-+ return;
-+
-+ bch2_trans_paths_to_text(&buf, trans);
-+
-+ if (!buf.allocation_failure) {
-+ mutex_lock(&s->lock);
-+ if (s->nr_max_paths < hweight64(trans->paths_allocated)) {
-+ s->nr_max_paths = trans->nr_max_paths =
-+ hweight64(trans->paths_allocated);
-+ swap(s->max_paths_text, buf.buf);
-+ }
-+ mutex_unlock(&s->lock);
-+ }
-+
-+ printbuf_exit(&buf);
-+
-+ trans->nr_max_paths = hweight64(trans->paths_allocated);
-+}
-+
-+static noinline void btree_path_overflow(struct btree_trans *trans)
-+{
-+ bch2_dump_trans_paths_updates(trans);
-+ panic("trans path overflow\n");
-+}
-+
-+static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
-+ struct btree_path *pos)
-+{
-+ struct btree_path *path;
-+ unsigned idx;
-+
-+ if (unlikely(trans->paths_allocated ==
-+ ~((~0ULL << 1) << (BTREE_ITER_MAX - 1))))
-+ btree_path_overflow(trans);
-+
-+ idx = __ffs64(~trans->paths_allocated);
-+
-+ /*
-+ * Do this before marking the new path as allocated, since it won't be
-+ * initialized yet:
-+ */
-+ if (unlikely(idx > trans->nr_max_paths))
-+ bch2_trans_update_max_paths(trans);
-+
-+ trans->paths_allocated |= 1ULL << idx;
-+
-+ path = &trans->paths[idx];
-+ path->idx = idx;
-+ path->ref = 0;
-+ path->intent_ref = 0;
-+ path->nodes_locked = 0;
-+ path->alloc_seq++;
-+
-+ btree_path_list_add(trans, pos, path);
-+ trans->paths_sorted = false;
-+ return path;
-+}
-+
-+struct btree_path *bch2_path_get(struct btree_trans *trans,
-+ enum btree_id btree_id, struct bpos pos,
-+ unsigned locks_want, unsigned level,
-+ unsigned flags, unsigned long ip)
-+{
-+ struct btree_path *path, *path_pos = NULL;
-+ bool cached = flags & BTREE_ITER_CACHED;
-+ bool intent = flags & BTREE_ITER_INTENT;
-+ int i;
-+
-+ bch2_trans_verify_not_in_restart(trans);
-+ bch2_trans_verify_locks(trans);
-+
-+ btree_trans_sort_paths(trans);
-+
-+ trans_for_each_path_inorder(trans, path, i) {
-+ if (__btree_path_cmp(path,
-+ btree_id,
-+ cached,
-+ pos,
-+ level) > 0)
-+ break;
-+
-+ path_pos = path;
-+ }
-+
-+ if (path_pos &&
-+ path_pos->cached == cached &&
-+ path_pos->btree_id == btree_id &&
-+ path_pos->level == level) {
-+ __btree_path_get(path_pos, intent);
-+ path = bch2_btree_path_set_pos(trans, path_pos, pos, intent, ip);
-+ } else {
-+ path = btree_path_alloc(trans, path_pos);
-+ path_pos = NULL;
-+
-+ __btree_path_get(path, intent);
-+ path->pos = pos;
-+ path->btree_id = btree_id;
-+ path->cached = cached;
-+ path->uptodate = BTREE_ITER_NEED_TRAVERSE;
-+ path->should_be_locked = false;
-+ path->level = level;
-+ path->locks_want = locks_want;
-+ path->nodes_locked = 0;
-+ for (i = 0; i < ARRAY_SIZE(path->l); i++)
-+ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
-+#ifdef TRACK_PATH_ALLOCATED
-+ path->ip_allocated = ip;
-+#endif
-+ trans->paths_sorted = false;
-+ }
-+
-+ if (!(flags & BTREE_ITER_NOPRESERVE))
-+ path->preserve = true;
-+
-+ if (path->intent_ref)
-+ locks_want = max(locks_want, level + 1);
-+
-+ /*
-+ * If the path has locks_want greater than requested, we don't downgrade
-+ * it here - on transaction restart because btree node split needs to
-+ * upgrade locks, we might be putting/getting the iterator again.
-+ * Downgrading iterators only happens via bch2_trans_downgrade(), after
-+ * a successful transaction commit.
-+ */
-+
-+ locks_want = min(locks_want, BTREE_MAX_DEPTH);
-+ if (locks_want > path->locks_want)
-+ bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
-+
-+ return path;
-+}
-+
-+struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
-+{
-+
-+ struct btree_path_level *l = path_l(path);
-+ struct bkey_packed *_k;
-+ struct bkey_s_c k;
-+
-+ if (unlikely(!l->b))
-+ return bkey_s_c_null;
-+
-+ EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
-+ EBUG_ON(!btree_node_locked(path, path->level));
-+
-+ if (!path->cached) {
-+ _k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-+ k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
-+
-+ EBUG_ON(k.k && bkey_deleted(k.k) && bpos_eq(k.k->p, path->pos));
-+
-+ if (!k.k || !bpos_eq(path->pos, k.k->p))
-+ goto hole;
-+ } else {
-+ struct bkey_cached *ck = (void *) path->l[0].b;
-+
-+ EBUG_ON(ck &&
-+ (path->btree_id != ck->key.btree_id ||
-+ !bkey_eq(path->pos, ck->key.pos)));
-+ if (!ck || !ck->valid)
-+ return bkey_s_c_null;
-+
-+ *u = ck->k->k;
-+ k = bkey_i_to_s_c(ck->k);
-+ }
-+
-+ return k;
-+hole:
-+ bkey_init(u);
-+ u->p = path->pos;
-+ return (struct bkey_s_c) { u, NULL };
-+}
-+
-+/* Btree iterators: */
-+
-+int __must_check
-+__bch2_btree_iter_traverse(struct btree_iter *iter)
-+{
-+ return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
-+}
-+
-+int __must_check
-+bch2_btree_iter_traverse(struct btree_iter *iter)
-+{
-+ int ret;
-+
-+ iter->path = bch2_btree_path_set_pos(iter->trans, iter->path,
-+ btree_iter_search_key(iter),
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+
-+ ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
-+ if (ret)
-+ return ret;
-+
-+ btree_path_set_should_be_locked(iter->path);
-+ return 0;
-+}
-+
-+/* Iterate across nodes (leaf and interior nodes) */
-+
-+struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
-+{
-+ struct btree_trans *trans = iter->trans;
-+ struct btree *b = NULL;
-+ int ret;
-+
-+ EBUG_ON(iter->path->cached);
-+ bch2_btree_iter_verify(iter);
-+
-+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
-+ if (ret)
-+ goto err;
-+
-+ b = btree_path_node(iter->path, iter->path->level);
-+ if (!b)
-+ goto out;
-+
-+ BUG_ON(bpos_lt(b->key.k.p, iter->pos));
-+
-+ bkey_init(&iter->k);
-+ iter->k.p = iter->pos = b->key.k.p;
-+
-+ iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+ btree_path_set_should_be_locked(iter->path);
-+out:
-+ bch2_btree_iter_verify_entry_exit(iter);
-+ bch2_btree_iter_verify(iter);
-+
-+ return b;
-+err:
-+ b = ERR_PTR(ret);
-+ goto out;
-+}
-+
-+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter)
-+{
-+ struct btree *b;
-+
-+ while (b = bch2_btree_iter_peek_node(iter),
-+ bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
-+ bch2_trans_begin(iter->trans);
-+
-+ return b;
-+}
-+
-+struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
-+{
-+ struct btree_trans *trans = iter->trans;
-+ struct btree_path *path = iter->path;
-+ struct btree *b = NULL;
-+ int ret;
-+
-+ bch2_trans_verify_not_in_restart(trans);
-+ EBUG_ON(iter->path->cached);
-+ bch2_btree_iter_verify(iter);
-+
-+ /* already at end? */
-+ if (!btree_path_node(path, path->level))
-+ return NULL;
-+
-+ /* got to end? */
-+ if (!btree_path_node(path, path->level + 1)) {
-+ btree_path_set_level_up(trans, path);
-+ return NULL;
-+ }
-+
-+ if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
-+ __bch2_btree_path_unlock(trans, path);
-+ path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
-+ path->l[path->level + 1].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
-+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-+ trace_and_count(trans->c, trans_restart_relock_next_node, trans, _THIS_IP_, path);
-+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
-+ goto err;
-+ }
-+
-+ b = btree_path_node(path, path->level + 1);
-+
-+ if (bpos_eq(iter->pos, b->key.k.p)) {
-+ __btree_path_set_level_up(trans, path, path->level++);
-+ } else {
-+ /*
-+ * Haven't gotten to the end of the parent node: go back down to
-+ * the next child node
-+ */
-+ path = iter->path =
-+ bch2_btree_path_set_pos(trans, path, bpos_successor(iter->pos),
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+
-+ btree_path_set_level_down(trans, path, iter->min_depth);
-+
-+ ret = bch2_btree_path_traverse(trans, path, iter->flags);
-+ if (ret)
-+ goto err;
-+
-+ b = path->l[path->level].b;
-+ }
-+
-+ bkey_init(&iter->k);
-+ iter->k.p = iter->pos = b->key.k.p;
-+
-+ iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+ btree_path_set_should_be_locked(iter->path);
-+ BUG_ON(iter->path->uptodate);
-+out:
-+ bch2_btree_iter_verify_entry_exit(iter);
-+ bch2_btree_iter_verify(iter);
-+
-+ return b;
-+err:
-+ b = ERR_PTR(ret);
-+ goto out;
-+}
-+
-+/* Iterate across keys (in leaf nodes only) */
-+
-+inline bool bch2_btree_iter_advance(struct btree_iter *iter)
-+{
-+ if (likely(!(iter->flags & BTREE_ITER_ALL_LEVELS))) {
-+ struct bpos pos = iter->k.p;
-+ bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS
-+ ? bpos_eq(pos, SPOS_MAX)
-+ : bkey_eq(pos, SPOS_MAX));
-+
-+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
-+ pos = bkey_successor(iter, pos);
-+ bch2_btree_iter_set_pos(iter, pos);
-+ return ret;
-+ } else {
-+ if (!btree_path_node(iter->path, iter->path->level))
-+ return true;
-+
-+ iter->advanced = true;
-+ return false;
-+ }
-+}
-+
-+inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
-+{
-+ struct bpos pos = bkey_start_pos(&iter->k);
-+ bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS
-+ ? bpos_eq(pos, POS_MIN)
-+ : bkey_eq(pos, POS_MIN));
-+
-+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
-+ pos = bkey_predecessor(iter, pos);
-+ bch2_btree_iter_set_pos(iter, pos);
-+ return ret;
-+}
-+
-+static noinline
-+struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
-+{
-+ struct btree_insert_entry *i;
-+ struct bkey_i *ret = NULL;
-+
-+ trans_for_each_update(iter->trans, i) {
-+ if (i->btree_id < iter->btree_id)
-+ continue;
-+ if (i->btree_id > iter->btree_id)
-+ break;
-+ if (bpos_lt(i->k->k.p, iter->path->pos))
-+ continue;
-+ if (i->key_cache_already_flushed)
-+ continue;
-+ if (!ret || bpos_lt(i->k->k.p, ret->k.p))
-+ ret = i->k;
-+ }
-+
-+ return ret;
-+}
-+
-+static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter)
-+{
-+ return iter->flags & BTREE_ITER_WITH_UPDATES
-+ ? __bch2_btree_trans_peek_updates(iter)
-+ : NULL;
-+}
-+
-+static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bpos end_pos)
-+{
-+ struct bkey_i *k;
-+
-+ if (bpos_lt(iter->path->pos, iter->journal_pos))
-+ iter->journal_idx = 0;
-+
-+ k = bch2_journal_keys_peek_upto(trans->c, iter->btree_id,
-+ iter->path->level,
-+ iter->path->pos,
-+ end_pos,
-+ &iter->journal_idx);
-+
-+ iter->journal_pos = k ? k->k.p : end_pos;
-+ return k;
-+}
-+
-+static noinline
-+struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
-+ struct btree_iter *iter)
-+{
-+ struct bkey_i *k = bch2_btree_journal_peek(trans, iter, iter->path->pos);
-+
-+ if (k) {
-+ iter->k = k->k;
-+ return bkey_i_to_s_c(k);
-+ } else {
-+ return bkey_s_c_null;
-+ }
-+}
-+
-+static noinline
-+struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_i *next_journal =
-+ bch2_btree_journal_peek(trans, iter,
-+ k.k ? k.k->p : path_l(iter->path)->b->key.k.p);
-+
-+ if (next_journal) {
-+ iter->k = next_journal->k;
-+ k = bkey_i_to_s_c(next_journal);
-+ }
-+
-+ return k;
-+}
-+
-+/*
-+ * Checks btree key cache for key at iter->pos and returns it if present, or
-+ * bkey_s_c_null:
-+ */
-+static noinline
-+struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
-+{
-+ struct btree_trans *trans = iter->trans;
-+ struct bch_fs *c = trans->c;
-+ struct bkey u;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ if ((iter->flags & BTREE_ITER_KEY_CACHE_FILL) &&
-+ bpos_eq(iter->pos, pos))
-+ return bkey_s_c_null;
-+
-+ if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
-+ return bkey_s_c_null;
-+
-+ if (!iter->key_cache_path)
-+ iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
-+ iter->flags & BTREE_ITER_INTENT, 0,
-+ iter->flags|BTREE_ITER_CACHED|
-+ BTREE_ITER_CACHED_NOFILL,
-+ _THIS_IP_);
-+
-+ iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+
-+ ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
-+ iter->flags|BTREE_ITER_CACHED) ?:
-+ bch2_btree_path_relock(trans, iter->path, _THIS_IP_);
-+ if (unlikely(ret))
-+ return bkey_s_c_err(ret);
-+
-+ btree_path_set_should_be_locked(iter->key_cache_path);
-+
-+ k = bch2_btree_path_peek_slot(iter->key_cache_path, &u);
-+ if (k.k && !bkey_err(k)) {
-+ iter->k = u;
-+ k.k = &iter->k;
-+ }
-+ return k;
-+}
-+
-+static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
-+{
-+ struct btree_trans *trans = iter->trans;
-+ struct bkey_i *next_update;
-+ struct bkey_s_c k, k2;
-+ int ret;
-+
-+ EBUG_ON(iter->path->cached);
-+ bch2_btree_iter_verify(iter);
-+
-+ while (1) {
-+ struct btree_path_level *l;
-+
-+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+
-+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
-+ if (unlikely(ret)) {
-+ /* ensure that iter->k is consistent with iter->pos: */
-+ bch2_btree_iter_set_pos(iter, iter->pos);
-+ k = bkey_s_c_err(ret);
-+ goto out;
-+ }
-+
-+ l = path_l(iter->path);
-+
-+ if (unlikely(!l->b)) {
-+ /* No btree nodes at requested level: */
-+ bch2_btree_iter_set_pos(iter, SPOS_MAX);
-+ k = bkey_s_c_null;
-+ goto out;
-+ }
-+
-+ btree_path_set_should_be_locked(iter->path);
-+
-+ k = btree_path_level_peek_all(trans->c, l, &iter->k);
-+
-+ if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
-+ k.k &&
-+ (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
-+ k = k2;
-+ ret = bkey_err(k);
-+ if (ret) {
-+ bch2_btree_iter_set_pos(iter, iter->pos);
-+ goto out;
-+ }
-+ }
-+
-+ if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
-+ k = btree_trans_peek_journal(trans, iter, k);
-+
-+ next_update = btree_trans_peek_updates(iter);
-+
-+ if (next_update &&
-+ bpos_le(next_update->k.p,
-+ k.k ? k.k->p : l->b->key.k.p)) {
-+ iter->k = next_update->k;
-+ k = bkey_i_to_s_c(next_update);
-+ }
-+
-+ if (k.k && bkey_deleted(k.k)) {
-+ /*
-+ * If we've got a whiteout, and it's after the search
-+ * key, advance the search key to the whiteout instead
-+ * of just after the whiteout - it might be a btree
-+ * whiteout, with a real key at the same position, since
-+ * in the btree deleted keys sort before non deleted.
-+ */
-+ search_key = !bpos_eq(search_key, k.k->p)
-+ ? k.k->p
-+ : bpos_successor(k.k->p);
-+ continue;
-+ }
-+
-+ if (likely(k.k)) {
-+ break;
-+ } else if (likely(!bpos_eq(l->b->key.k.p, SPOS_MAX))) {
-+ /* Advance to next leaf node: */
-+ search_key = bpos_successor(l->b->key.k.p);
-+ } else {
-+ /* End of btree: */
-+ bch2_btree_iter_set_pos(iter, SPOS_MAX);
-+ k = bkey_s_c_null;
-+ goto out;
-+ }
-+ }
-+out:
-+ bch2_btree_iter_verify(iter);
-+
-+ return k;
-+}
-+
-+/**
-+ * bch2_btree_iter_peek_upto() - returns first key greater than or equal to
-+ * iterator's current position
-+ * @iter: iterator to peek from
-+ * @end: search limit: returns keys less than or equal to @end
-+ *
-+ * Returns: key if found, or an error extractable with bkey_err().
-+ */
-+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
-+{
-+ struct btree_trans *trans = iter->trans;
-+ struct bpos search_key = btree_iter_search_key(iter);
-+ struct bkey_s_c k;
-+ struct bpos iter_pos;
-+ int ret;
-+
-+ EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS);
-+ EBUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && bkey_eq(end, POS_MAX));
-+
-+ if (iter->update_path) {
-+ bch2_path_put_nokeep(trans, iter->update_path,
-+ iter->flags & BTREE_ITER_INTENT);
-+ iter->update_path = NULL;
-+ }
-+
-+ bch2_btree_iter_verify_entry_exit(iter);
-+
-+ while (1) {
-+ k = __bch2_btree_iter_peek(iter, search_key);
-+ if (unlikely(!k.k))
-+ goto end;
-+ if (unlikely(bkey_err(k)))
-+ goto out_no_locked;
-+
-+ /*
-+ * iter->pos should be mononotically increasing, and always be
-+ * equal to the key we just returned - except extents can
-+ * straddle iter->pos:
-+ */
-+ if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
-+ iter_pos = k.k->p;
-+ else
-+ iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
-+
-+ if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
-+ ? bkey_gt(iter_pos, end)
-+ : bkey_ge(iter_pos, end)))
-+ goto end;
-+
-+ if (iter->update_path &&
-+ !bkey_eq(iter->update_path->pos, k.k->p)) {
-+ bch2_path_put_nokeep(trans, iter->update_path,
-+ iter->flags & BTREE_ITER_INTENT);
-+ iter->update_path = NULL;
-+ }
-+
-+ if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
-+ (iter->flags & BTREE_ITER_INTENT) &&
-+ !(iter->flags & BTREE_ITER_IS_EXTENTS) &&
-+ !iter->update_path) {
-+ struct bpos pos = k.k->p;
-+
-+ if (pos.snapshot < iter->snapshot) {
-+ search_key = bpos_successor(k.k->p);
-+ continue;
-+ }
-+
-+ pos.snapshot = iter->snapshot;
-+
-+ /*
-+ * advance, same as on exit for iter->path, but only up
-+ * to snapshot
-+ */
-+ __btree_path_get(iter->path, iter->flags & BTREE_ITER_INTENT);
-+ iter->update_path = iter->path;
-+
-+ iter->update_path = bch2_btree_path_set_pos(trans,
-+ iter->update_path, pos,
-+ iter->flags & BTREE_ITER_INTENT,
-+ _THIS_IP_);
-+ ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags);
-+ if (unlikely(ret)) {
-+ k = bkey_s_c_err(ret);
-+ goto out_no_locked;
-+ }
-+ }
-+
-+ /*
-+ * We can never have a key in a leaf node at POS_MAX, so
-+ * we don't have to check these successor() calls:
-+ */
-+ if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
-+ !bch2_snapshot_is_ancestor(trans->c,
-+ iter->snapshot,
-+ k.k->p.snapshot)) {
-+ search_key = bpos_successor(k.k->p);
-+ continue;
-+ }
-+
-+ if (bkey_whiteout(k.k) &&
-+ !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
-+ search_key = bkey_successor(iter, k.k->p);
-+ continue;
-+ }
-+
-+ break;
-+ }
-+
-+ iter->pos = iter_pos;
-+
-+ iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+
-+ btree_path_set_should_be_locked(iter->path);
-+out_no_locked:
-+ if (iter->update_path) {
-+ ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_);
-+ if (unlikely(ret))
-+ k = bkey_s_c_err(ret);
-+ else
-+ btree_path_set_should_be_locked(iter->update_path);
-+ }
-+
-+ if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
-+ iter->pos.snapshot = iter->snapshot;
-+
-+ ret = bch2_btree_iter_verify_ret(iter, k);
-+ if (unlikely(ret)) {
-+ bch2_btree_iter_set_pos(iter, iter->pos);
-+ k = bkey_s_c_err(ret);
-+ }
-+
-+ bch2_btree_iter_verify_entry_exit(iter);
-+
-+ return k;
-+end:
-+ bch2_btree_iter_set_pos(iter, end);
-+ k = bkey_s_c_null;
-+ goto out_no_locked;
-+}
-+
-+/**
-+ * bch2_btree_iter_peek_all_levels() - returns the first key greater than or
-+ * equal to iterator's current position, returning keys from every level of the
-+ * btree. For keys at different levels of the btree that compare equal, the key
-+ * from the lower level (leaf) is returned first.
-+ * @iter: iterator to peek from
-+ *
-+ * Returns: key if found, or an error extractable with bkey_err().
-+ */
-+struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
-+{
-+ struct btree_trans *trans = iter->trans;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ EBUG_ON(iter->path->cached);
-+ bch2_btree_iter_verify(iter);
-+ BUG_ON(iter->path->level < iter->min_depth);
-+ BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
-+ EBUG_ON(!(iter->flags & BTREE_ITER_ALL_LEVELS));
-+
-+ while (1) {
-+ iter->path = bch2_btree_path_set_pos(trans, iter->path, iter->pos,
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+
-+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
-+ if (unlikely(ret)) {
-+ /* ensure that iter->k is consistent with iter->pos: */
-+ bch2_btree_iter_set_pos(iter, iter->pos);
-+ k = bkey_s_c_err(ret);
-+ goto out_no_locked;
-+ }
-+
-+ /* Already at end? */
-+ if (!btree_path_node(iter->path, iter->path->level)) {
-+ k = bkey_s_c_null;
-+ goto out_no_locked;
-+ }
-+
-+ k = btree_path_level_peek_all(trans->c,
-+ &iter->path->l[iter->path->level], &iter->k);
-+
-+ /* Check if we should go up to the parent node: */
-+ if (!k.k ||
-+ (iter->advanced &&
-+ bpos_eq(path_l(iter->path)->b->key.k.p, iter->pos))) {
-+ iter->pos = path_l(iter->path)->b->key.k.p;
-+ btree_path_set_level_up(trans, iter->path);
-+ iter->advanced = false;
-+ continue;
-+ }
-+
-+ /*
-+ * Check if we should go back down to a leaf:
-+ * If we're not in a leaf node, we only return the current key
-+ * if it exactly matches iter->pos - otherwise we first have to
-+ * go back to the leaf:
-+ */
-+ if (iter->path->level != iter->min_depth &&
-+ (iter->advanced ||
-+ !k.k ||
-+ !bpos_eq(iter->pos, k.k->p))) {
-+ btree_path_set_level_down(trans, iter->path, iter->min_depth);
-+ iter->pos = bpos_successor(iter->pos);
-+ iter->advanced = false;
-+ continue;
-+ }
-+
-+ /* Check if we should go to the next key: */
-+ if (iter->path->level == iter->min_depth &&
-+ iter->advanced &&
-+ k.k &&
-+ bpos_eq(iter->pos, k.k->p)) {
-+ iter->pos = bpos_successor(iter->pos);
-+ iter->advanced = false;
-+ continue;
-+ }
-+
-+ if (iter->advanced &&
-+ iter->path->level == iter->min_depth &&
-+ !bpos_eq(k.k->p, iter->pos))
-+ iter->advanced = false;
-+
-+ BUG_ON(iter->advanced);
-+ BUG_ON(!k.k);
-+ break;
-+ }
-+
-+ iter->pos = k.k->p;
-+ btree_path_set_should_be_locked(iter->path);
-+out_no_locked:
-+ bch2_btree_iter_verify(iter);
-+
-+ return k;
-+}
-+
-+/**
-+ * bch2_btree_iter_next() - returns first key greater than iterator's current
-+ * position
-+ * @iter: iterator to peek from
-+ *
-+ * Returns: key if found, or an error extractable with bkey_err().
-+ */
-+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
-+{
-+ if (!bch2_btree_iter_advance(iter))
-+ return bkey_s_c_null;
-+
-+ return bch2_btree_iter_peek(iter);
-+}
-+
-+/**
-+ * bch2_btree_iter_peek_prev() - returns first key less than or equal to
-+ * iterator's current position
-+ * @iter: iterator to peek from
-+ *
-+ * Returns: key if found, or an error extractable with bkey_err().
-+ */
-+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
-+{
-+ struct btree_trans *trans = iter->trans;
-+ struct bpos search_key = iter->pos;
-+ struct btree_path *saved_path = NULL;
-+ struct bkey_s_c k;
-+ struct bkey saved_k;
-+ const struct bch_val *saved_v;
-+ int ret;
-+
-+ EBUG_ON(iter->path->cached || iter->path->level);
-+ EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
-+
-+ if (iter->flags & BTREE_ITER_WITH_JOURNAL)
-+ return bkey_s_c_err(-EIO);
-+
-+ bch2_btree_iter_verify(iter);
-+ bch2_btree_iter_verify_entry_exit(iter);
-+
-+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
-+ search_key.snapshot = U32_MAX;
-+
-+ while (1) {
-+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+
-+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
-+ if (unlikely(ret)) {
-+ /* ensure that iter->k is consistent with iter->pos: */
-+ bch2_btree_iter_set_pos(iter, iter->pos);
-+ k = bkey_s_c_err(ret);
-+ goto out_no_locked;
-+ }
-+
-+ k = btree_path_level_peek(trans, iter->path,
-+ &iter->path->l[0], &iter->k);
-+ if (!k.k ||
-+ ((iter->flags & BTREE_ITER_IS_EXTENTS)
-+ ? bpos_ge(bkey_start_pos(k.k), search_key)
-+ : bpos_gt(k.k->p, search_key)))
-+ k = btree_path_level_prev(trans, iter->path,
-+ &iter->path->l[0], &iter->k);
-+
-+ if (likely(k.k)) {
-+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
-+ if (k.k->p.snapshot == iter->snapshot)
-+ goto got_key;
-+
-+ /*
-+ * If we have a saved candidate, and we're no
-+ * longer at the same _key_ (not pos), return
-+ * that candidate
-+ */
-+ if (saved_path && !bkey_eq(k.k->p, saved_k.p)) {
-+ bch2_path_put_nokeep(trans, iter->path,
-+ iter->flags & BTREE_ITER_INTENT);
-+ iter->path = saved_path;
-+ saved_path = NULL;
-+ iter->k = saved_k;
-+ k.v = saved_v;
-+ goto got_key;
-+ }
-+
-+ if (bch2_snapshot_is_ancestor(iter->trans->c,
-+ iter->snapshot,
-+ k.k->p.snapshot)) {
-+ if (saved_path)
-+ bch2_path_put_nokeep(trans, saved_path,
-+ iter->flags & BTREE_ITER_INTENT);
-+ saved_path = btree_path_clone(trans, iter->path,
-+ iter->flags & BTREE_ITER_INTENT);
-+ saved_k = *k.k;
-+ saved_v = k.v;
-+ }
-+
-+ search_key = bpos_predecessor(k.k->p);
-+ continue;
-+ }
-+got_key:
-+ if (bkey_whiteout(k.k) &&
-+ !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
-+ search_key = bkey_predecessor(iter, k.k->p);
-+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
-+ search_key.snapshot = U32_MAX;
-+ continue;
-+ }
-+
-+ break;
-+ } else if (likely(!bpos_eq(iter->path->l[0].b->data->min_key, POS_MIN))) {
-+ /* Advance to previous leaf node: */
-+ search_key = bpos_predecessor(iter->path->l[0].b->data->min_key);
-+ } else {
-+ /* Start of btree: */
-+ bch2_btree_iter_set_pos(iter, POS_MIN);
-+ k = bkey_s_c_null;
-+ goto out_no_locked;
-+ }
-+ }
-+
-+ EBUG_ON(bkey_gt(bkey_start_pos(k.k), iter->pos));
-+
-+ /* Extents can straddle iter->pos: */
-+ if (bkey_lt(k.k->p, iter->pos))
-+ iter->pos = k.k->p;
-+
-+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
-+ iter->pos.snapshot = iter->snapshot;
-+
-+ btree_path_set_should_be_locked(iter->path);
-+out_no_locked:
-+ if (saved_path)
-+ bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
-+
-+ bch2_btree_iter_verify_entry_exit(iter);
-+ bch2_btree_iter_verify(iter);
-+
-+ return k;
-+}
-+
-+/**
-+ * bch2_btree_iter_prev() - returns first key less than iterator's current
-+ * position
-+ * @iter: iterator to peek from
-+ *
-+ * Returns: key if found, or an error extractable with bkey_err().
-+ */
-+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
-+{
-+ if (!bch2_btree_iter_rewind(iter))
-+ return bkey_s_c_null;
-+
-+ return bch2_btree_iter_peek_prev(iter);
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
-+{
-+ struct btree_trans *trans = iter->trans;
-+ struct bpos search_key;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ bch2_btree_iter_verify(iter);
-+ bch2_btree_iter_verify_entry_exit(iter);
-+ EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS);
-+ EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE));
-+
-+ /* extents can't span inode numbers: */
-+ if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
-+ unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
-+ if (iter->pos.inode == KEY_INODE_MAX)
-+ return bkey_s_c_null;
-+
-+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
-+ }
-+
-+ search_key = btree_iter_search_key(iter);
-+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
-+ iter->flags & BTREE_ITER_INTENT,
-+ btree_iter_ip_allocated(iter));
-+
-+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
-+ if (unlikely(ret)) {
-+ k = bkey_s_c_err(ret);
-+ goto out_no_locked;
-+ }
-+
-+ if ((iter->flags & BTREE_ITER_CACHED) ||
-+ !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
-+ struct bkey_i *next_update;
-+
-+ if ((next_update = btree_trans_peek_updates(iter)) &&
-+ bpos_eq(next_update->k.p, iter->pos)) {
-+ iter->k = next_update->k;
-+ k = bkey_i_to_s_c(next_update);
-+ goto out;
-+ }
-+
-+ if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
-+ (k = btree_trans_peek_slot_journal(trans, iter)).k)
-+ goto out;
-+
-+ if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
-+ (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
-+ if (!bkey_err(k))
-+ iter->k = *k.k;
-+ /* We're not returning a key from iter->path: */
-+ goto out_no_locked;
-+ }
-+
-+ k = bch2_btree_path_peek_slot(iter->path, &iter->k);
-+ if (unlikely(!k.k))
-+ goto out_no_locked;
-+ } else {
-+ struct bpos next;
-+ struct bpos end = iter->pos;
-+
-+ if (iter->flags & BTREE_ITER_IS_EXTENTS)
-+ end.offset = U64_MAX;
-+
-+ EBUG_ON(iter->path->level);
-+
-+ if (iter->flags & BTREE_ITER_INTENT) {
-+ struct btree_iter iter2;
-+
-+ bch2_trans_copy_iter(&iter2, iter);
-+ k = bch2_btree_iter_peek_upto(&iter2, end);
-+
-+ if (k.k && !bkey_err(k)) {
-+ iter->k = iter2.k;
-+ k.k = &iter->k;
-+ }
-+ bch2_trans_iter_exit(trans, &iter2);
-+ } else {
-+ struct bpos pos = iter->pos;
-+
-+ k = bch2_btree_iter_peek_upto(iter, end);
-+ if (unlikely(bkey_err(k)))
-+ bch2_btree_iter_set_pos(iter, pos);
-+ else
-+ iter->pos = pos;
-+ }
-+
-+ if (unlikely(bkey_err(k)))
-+ goto out_no_locked;
-+
-+ next = k.k ? bkey_start_pos(k.k) : POS_MAX;
-+
-+ if (bkey_lt(iter->pos, next)) {
-+ bkey_init(&iter->k);
-+ iter->k.p = iter->pos;
-+
-+ if (iter->flags & BTREE_ITER_IS_EXTENTS) {
-+ bch2_key_resize(&iter->k,
-+ min_t(u64, KEY_SIZE_MAX,
-+ (next.inode == iter->pos.inode
-+ ? next.offset
-+ : KEY_OFFSET_MAX) -
-+ iter->pos.offset));
-+ EBUG_ON(!iter->k.size);
-+ }
-+
-+ k = (struct bkey_s_c) { &iter->k, NULL };
-+ }
-+ }
-+out:
-+ btree_path_set_should_be_locked(iter->path);
-+out_no_locked:
-+ bch2_btree_iter_verify_entry_exit(iter);
-+ bch2_btree_iter_verify(iter);
-+ ret = bch2_btree_iter_verify_ret(iter, k);
-+ if (unlikely(ret))
-+ return bkey_s_c_err(ret);
-+
-+ return k;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
-+{
-+ if (!bch2_btree_iter_advance(iter))
-+ return bkey_s_c_null;
-+
-+ return bch2_btree_iter_peek_slot(iter);
-+}
-+
-+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
-+{
-+ if (!bch2_btree_iter_rewind(iter))
-+ return bkey_s_c_null;
-+
-+ return bch2_btree_iter_peek_slot(iter);
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
-+{
-+ struct bkey_s_c k;
-+
-+ while (btree_trans_too_many_iters(iter->trans) ||
-+ (k = bch2_btree_iter_peek_type(iter, iter->flags),
-+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
-+ bch2_trans_begin(iter->trans);
-+
-+ return k;
-+}
-+
-+/* new transactional stuff: */
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+ unsigned i;
-+
-+ BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated));
-+
-+ trans_for_each_path(trans, path) {
-+ BUG_ON(path->sorted_idx >= trans->nr_sorted);
-+ BUG_ON(trans->sorted[path->sorted_idx] != path->idx);
-+ }
-+
-+ for (i = 0; i < trans->nr_sorted; i++) {
-+ unsigned idx = trans->sorted[i];
-+
-+ EBUG_ON(!(trans->paths_allocated & (1ULL << idx)));
-+ BUG_ON(trans->paths[idx].sorted_idx != i);
-+ }
-+}
-+
-+static void btree_trans_verify_sorted(struct btree_trans *trans)
-+{
-+ struct btree_path *path, *prev = NULL;
-+ unsigned i;
-+
-+ if (!bch2_debug_check_iterators)
-+ return;
-+
-+ trans_for_each_path_inorder(trans, path, i) {
-+ if (prev && btree_path_cmp(prev, path) > 0) {
-+ __bch2_dump_trans_paths_updates(trans, true);
-+ panic("trans paths out of order!\n");
-+ }
-+ prev = path;
-+ }
-+}
-+#else
-+static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {}
-+static inline void btree_trans_verify_sorted(struct btree_trans *trans) {}
-+#endif
-+
-+void __bch2_btree_trans_sort_paths(struct btree_trans *trans)
-+{
-+ int i, l = 0, r = trans->nr_sorted, inc = 1;
-+ bool swapped;
-+
-+ btree_trans_verify_sorted_refs(trans);
-+
-+ if (trans->paths_sorted)
-+ goto out;
-+
-+ /*
-+ * Cocktail shaker sort: this is efficient because iterators will be
-+ * mostly sorted.
-+ */
-+ do {
-+ swapped = false;
-+
-+ for (i = inc > 0 ? l : r - 2;
-+ i + 1 < r && i >= l;
-+ i += inc) {
-+ if (btree_path_cmp(trans->paths + trans->sorted[i],
-+ trans->paths + trans->sorted[i + 1]) > 0) {
-+ swap(trans->sorted[i], trans->sorted[i + 1]);
-+ trans->paths[trans->sorted[i]].sorted_idx = i;
-+ trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1;
-+ swapped = true;
-+ }
-+ }
-+
-+ if (inc > 0)
-+ --r;
-+ else
-+ l++;
-+ inc = -inc;
-+ } while (swapped);
-+
-+ trans->paths_sorted = true;
-+out:
-+ btree_trans_verify_sorted(trans);
-+}
-+
-+static inline void btree_path_list_remove(struct btree_trans *trans,
-+ struct btree_path *path)
-+{
-+ unsigned i;
-+
-+ EBUG_ON(path->sorted_idx >= trans->nr_sorted);
-+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-+ trans->nr_sorted--;
-+ memmove_u64s_down_small(trans->sorted + path->sorted_idx,
-+ trans->sorted + path->sorted_idx + 1,
-+ DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
-+#else
-+ array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
-+#endif
-+ for (i = path->sorted_idx; i < trans->nr_sorted; i++)
-+ trans->paths[trans->sorted[i]].sorted_idx = i;
-+
-+ path->sorted_idx = U8_MAX;
-+}
-+
-+static inline void btree_path_list_add(struct btree_trans *trans,
-+ struct btree_path *pos,
-+ struct btree_path *path)
-+{
-+ unsigned i;
-+
-+ path->sorted_idx = pos ? pos->sorted_idx + 1 : trans->nr_sorted;
-+
-+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-+ memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
-+ trans->sorted + path->sorted_idx,
-+ DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
-+ trans->nr_sorted++;
-+ trans->sorted[path->sorted_idx] = path->idx;
-+#else
-+ array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
-+#endif
-+
-+ for (i = path->sorted_idx; i < trans->nr_sorted; i++)
-+ trans->paths[trans->sorted[i]].sorted_idx = i;
-+
-+ btree_trans_verify_sorted_refs(trans);
-+}
-+
-+void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
-+{
-+ if (iter->update_path)
-+ bch2_path_put_nokeep(trans, iter->update_path,
-+ iter->flags & BTREE_ITER_INTENT);
-+ if (iter->path)
-+ bch2_path_put(trans, iter->path,
-+ iter->flags & BTREE_ITER_INTENT);
-+ if (iter->key_cache_path)
-+ bch2_path_put(trans, iter->key_cache_path,
-+ iter->flags & BTREE_ITER_INTENT);
-+ iter->path = NULL;
-+ iter->update_path = NULL;
-+ iter->key_cache_path = NULL;
-+}
-+
-+void bch2_trans_iter_init_outlined(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ enum btree_id btree_id, struct bpos pos,
-+ unsigned flags)
-+{
-+ bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
-+ bch2_btree_iter_flags(trans, btree_id, flags),
-+ _RET_IP_);
-+}
-+
-+void bch2_trans_node_iter_init(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ enum btree_id btree_id,
-+ struct bpos pos,
-+ unsigned locks_want,
-+ unsigned depth,
-+ unsigned flags)
-+{
-+ flags |= BTREE_ITER_NOT_EXTENTS;
-+ flags |= __BTREE_ITER_ALL_SNAPSHOTS;
-+ flags |= BTREE_ITER_ALL_SNAPSHOTS;
-+
-+ bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth,
-+ __bch2_btree_iter_flags(trans, btree_id, flags),
-+ _RET_IP_);
-+
-+ iter->min_depth = depth;
-+
-+ BUG_ON(iter->path->locks_want < min(locks_want, BTREE_MAX_DEPTH));
-+ BUG_ON(iter->path->level != depth);
-+ BUG_ON(iter->min_depth != depth);
-+}
-+
-+void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
-+{
-+ *dst = *src;
-+ if (src->path)
-+ __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
-+ if (src->update_path)
-+ __btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT);
-+ dst->key_cache_path = NULL;
-+}
-+
-+void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
-+{
-+ unsigned new_top = trans->mem_top + size;
-+ size_t old_bytes = trans->mem_bytes;
-+ size_t new_bytes = roundup_pow_of_two(new_top);
-+ int ret;
-+ void *new_mem;
-+ void *p;
-+
-+ trans->mem_max = max(trans->mem_max, new_top);
-+
-+ WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
-+
-+ new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
-+ if (unlikely(!new_mem)) {
-+ bch2_trans_unlock(trans);
-+
-+ new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
-+ if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
-+ new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
-+ new_bytes = BTREE_TRANS_MEM_MAX;
-+ kfree(trans->mem);
-+ }
-+
-+ if (!new_mem)
-+ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
-+
-+ trans->mem = new_mem;
-+ trans->mem_bytes = new_bytes;
-+
-+ ret = bch2_trans_relock(trans);
-+ if (ret)
-+ return ERR_PTR(ret);
-+ }
-+
-+ trans->mem = new_mem;
-+ trans->mem_bytes = new_bytes;
-+
-+ if (old_bytes) {
-+ trace_and_count(trans->c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
-+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
-+ }
-+
-+ p = trans->mem + trans->mem_top;
-+ trans->mem_top += size;
-+ memset(p, 0, size);
-+ return p;
-+}
-+
-+static inline void check_srcu_held_too_long(struct btree_trans *trans)
-+{
-+ WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
-+ "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
-+ (jiffies - trans->srcu_lock_time) / HZ);
-+}
-+
-+void bch2_trans_srcu_unlock(struct btree_trans *trans)
-+{
-+ if (trans->srcu_held) {
-+ struct bch_fs *c = trans->c;
-+ struct btree_path *path;
-+
-+ trans_for_each_path(trans, path)
-+ if (path->cached && !btree_node_locked(path, 0))
-+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
-+
-+ check_srcu_held_too_long(trans);
-+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
-+ trans->srcu_held = false;
-+ }
-+}
-+
-+void bch2_trans_srcu_lock(struct btree_trans *trans)
-+{
-+ if (!trans->srcu_held) {
-+ trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
-+ trans->srcu_lock_time = jiffies;
-+ trans->srcu_held = true;
-+ }
-+}
-+
-+/**
-+ * bch2_trans_begin() - reset a transaction after a interrupted attempt
-+ * @trans: transaction to reset
-+ *
-+ * Returns: current restart counter, to be used with trans_was_restarted()
-+ *
-+ * While iterating over nodes or updating nodes a attempt to lock a btree node
-+ * may return BCH_ERR_transaction_restart when the trylock fails. When this
-+ * occurs bch2_trans_begin() should be called and the transaction retried.
-+ */
-+u32 bch2_trans_begin(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+ u64 now;
-+
-+ bch2_trans_reset_updates(trans);
-+
-+ trans->restart_count++;
-+ trans->mem_top = 0;
-+
-+ trans_for_each_path(trans, path) {
-+ path->should_be_locked = false;
-+
-+ /*
-+ * If the transaction wasn't restarted, we're presuming to be
-+ * doing something new: dont keep iterators excpt the ones that
-+ * are in use - except for the subvolumes btree:
-+ */
-+ if (!trans->restarted && path->btree_id != BTREE_ID_subvolumes)
-+ path->preserve = false;
-+
-+ /*
-+ * XXX: we probably shouldn't be doing this if the transaction
-+ * was restarted, but currently we still overflow transaction
-+ * iterators if we do that
-+ */
-+ if (!path->ref && !path->preserve)
-+ __bch2_path_free(trans, path);
-+ else
-+ path->preserve = false;
-+ }
-+
-+ now = local_clock();
-+ if (!trans->restarted &&
-+ (need_resched() ||
-+ now - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) {
-+ drop_locks_do(trans, (cond_resched(), 0));
-+ now = local_clock();
-+ }
-+ trans->last_begin_time = now;
-+
-+ if (unlikely(trans->srcu_held &&
-+ time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
-+ bch2_trans_srcu_unlock(trans);
-+
-+ trans->last_begin_ip = _RET_IP_;
-+ if (trans->restarted) {
-+ bch2_btree_path_traverse_all(trans);
-+ trans->notrace_relock_fail = false;
-+ }
-+
-+ return trans->restart_count;
-+}
-+
-+static struct btree_trans *bch2_trans_alloc(struct bch_fs *c)
-+{
-+ struct btree_trans *trans;
-+
-+ if (IS_ENABLED(__KERNEL__)) {
-+ trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL);
-+ if (trans)
-+ return trans;
-+ }
-+
-+ trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
-+ /*
-+ * paths need to be zeroed, bch2_check_for_deadlock looks at
-+ * paths in other threads
-+ */
-+ memset(&trans->paths, 0, sizeof(trans->paths));
-+ return trans;
-+}
-+
-+const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
-+
-+unsigned bch2_trans_get_fn_idx(const char *fn)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
-+ if (!bch2_btree_transaction_fns[i] ||
-+ bch2_btree_transaction_fns[i] == fn) {
-+ bch2_btree_transaction_fns[i] = fn;
-+ return i;
-+ }
-+
-+ pr_warn_once("BCH_TRANSACTIONS_NR not big enough!");
-+ return i;
-+}
-+
-+struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
-+ __acquires(&c->btree_trans_barrier)
-+{
-+ struct btree_trans *trans;
-+ struct btree_transaction_stats *s;
-+
-+ trans = bch2_trans_alloc(c);
-+
-+ memset(trans, 0, sizeof(*trans));
-+ trans->c = c;
-+ trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns)
-+ ? bch2_btree_transaction_fns[fn_idx] : NULL;
-+ trans->last_begin_time = local_clock();
-+ trans->fn_idx = fn_idx;
-+ trans->locking_wait.task = current;
-+ trans->journal_replay_not_finished =
-+ !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
-+ closure_init_stack(&trans->ref);
-+
-+ s = btree_trans_stats(trans);
-+ if (s && s->max_mem) {
-+ unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem);
-+
-+ trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL);
-+
-+ if (!unlikely(trans->mem)) {
-+ trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
-+ trans->mem_bytes = BTREE_TRANS_MEM_MAX;
-+ } else {
-+ trans->mem_bytes = expected_mem_bytes;
-+ }
-+ }
-+
-+ if (s) {
-+ trans->nr_max_paths = s->nr_max_paths;
-+ trans->wb_updates_size = s->wb_updates_size;
-+ }
-+
-+ trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
-+ trans->srcu_lock_time = jiffies;
-+ trans->srcu_held = true;
-+
-+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
-+ struct btree_trans *pos;
-+
-+ seqmutex_lock(&c->btree_trans_lock);
-+ list_for_each_entry(pos, &c->btree_trans_list, list) {
-+ /*
-+ * We'd much prefer to be stricter here and completely
-+ * disallow multiple btree_trans in the same thread -
-+ * but the data move path calls bch2_write when we
-+ * already have a btree_trans initialized.
-+ */
-+ BUG_ON(trans->locking_wait.task->pid == pos->locking_wait.task->pid &&
-+ bch2_trans_locked(pos));
-+
-+ if (trans->locking_wait.task->pid < pos->locking_wait.task->pid) {
-+ list_add_tail(&trans->list, &pos->list);
-+ goto list_add_done;
-+ }
-+ }
-+ list_add_tail(&trans->list, &c->btree_trans_list);
-+list_add_done:
-+ seqmutex_unlock(&c->btree_trans_lock);
-+ }
-+
-+ return trans;
-+}
-+
-+static void check_btree_paths_leaked(struct btree_trans *trans)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ struct bch_fs *c = trans->c;
-+ struct btree_path *path;
-+
-+ trans_for_each_path(trans, path)
-+ if (path->ref)
-+ goto leaked;
-+ return;
-+leaked:
-+ bch_err(c, "btree paths leaked from %s!", trans->fn);
-+ trans_for_each_path(trans, path)
-+ if (path->ref)
-+ printk(KERN_ERR " btree %s %pS\n",
-+ bch2_btree_id_str(path->btree_id),
-+ (void *) path->ip_allocated);
-+ /* Be noisy about this: */
-+ bch2_fatal_error(c);
-+#endif
-+}
-+
-+void bch2_trans_put(struct btree_trans *trans)
-+ __releases(&c->btree_trans_barrier)
-+{
-+ struct btree_insert_entry *i;
-+ struct bch_fs *c = trans->c;
-+ struct btree_transaction_stats *s = btree_trans_stats(trans);
-+
-+ bch2_trans_unlock(trans);
-+
-+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
-+ seqmutex_lock(&c->btree_trans_lock);
-+ list_del(&trans->list);
-+ seqmutex_unlock(&c->btree_trans_lock);
-+ }
-+
-+ closure_sync(&trans->ref);
-+
-+ if (s)
-+ s->max_mem = max(s->max_mem, trans->mem_max);
-+
-+ trans_for_each_update(trans, i)
-+ __btree_path_put(i->path, true);
-+ trans->nr_updates = 0;
-+
-+ check_btree_paths_leaked(trans);
-+
-+ if (trans->srcu_held) {
-+ check_srcu_held_too_long(trans);
-+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
-+ }
-+
-+ bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-+
-+ kfree(trans->extra_journal_entries.data);
-+
-+ if (trans->fs_usage_deltas) {
-+ if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
-+ REPLICAS_DELTA_LIST_MAX)
-+ mempool_free(trans->fs_usage_deltas,
-+ &c->replicas_delta_pool);
-+ else
-+ kfree(trans->fs_usage_deltas);
-+ }
-+
-+ if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
-+ mempool_free(trans->mem, &c->btree_trans_mem_pool);
-+ else
-+ kfree(trans->mem);
-+
-+ /* Userspace doesn't have a real percpu implementation: */
-+ if (IS_ENABLED(__KERNEL__))
-+ trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
-+ if (trans)
-+ mempool_free(trans, &c->btree_trans_pool);
-+}
-+
-+static void __maybe_unused
-+bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
-+ struct btree_bkey_cached_common *b)
-+{
-+ struct six_lock_count c = six_lock_counts(&b->lock);
-+ struct task_struct *owner;
-+ pid_t pid;
-+
-+ rcu_read_lock();
-+ owner = READ_ONCE(b->lock.owner);
-+ pid = owner ? owner->pid : 0;
-+ rcu_read_unlock();
-+
-+ prt_tab(out);
-+ prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b',
-+ b->level, bch2_btree_id_str(b->btree_id));
-+ bch2_bpos_to_text(out, btree_node_pos(b));
-+
-+ prt_tab(out);
-+ prt_printf(out, " locks %u:%u:%u held by pid %u",
-+ c.n[0], c.n[1], c.n[2], pid);
-+}
-+
-+void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+ struct btree_bkey_cached_common *b;
-+ static char lock_types[] = { 'r', 'i', 'w' };
-+ unsigned l, idx;
-+
-+ if (!out->nr_tabstops) {
-+ printbuf_tabstop_push(out, 16);
-+ printbuf_tabstop_push(out, 32);
-+ }
-+
-+ prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn);
-+
-+ trans_for_each_path_safe(trans, path, idx) {
-+ if (!path->nodes_locked)
-+ continue;
-+
-+ prt_printf(out, " path %u %c l=%u %s:",
-+ path->idx,
-+ path->cached ? 'c' : 'b',
-+ path->level,
-+ bch2_btree_id_str(path->btree_id));
-+ bch2_bpos_to_text(out, path->pos);
-+ prt_newline(out);
-+
-+ for (l = 0; l < BTREE_MAX_DEPTH; l++) {
-+ if (btree_node_locked(path, l) &&
-+ !IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) {
-+ prt_printf(out, " %c l=%u ",
-+ lock_types[btree_node_locked_type(path, l)], l);
-+ bch2_btree_bkey_cached_common_to_text(out, b);
-+ prt_newline(out);
-+ }
-+ }
-+ }
-+
-+ b = READ_ONCE(trans->locking);
-+ if (b) {
-+ prt_printf(out, " blocked for %lluus on",
-+ div_u64(local_clock() - trans->locking_wait.start_time,
-+ 1000));
-+ prt_newline(out);
-+ prt_printf(out, " %c", lock_types[trans->locking_wait.lock_want]);
-+ bch2_btree_bkey_cached_common_to_text(out, b);
-+ prt_newline(out);
-+ }
-+}
-+
-+void bch2_fs_btree_iter_exit(struct bch_fs *c)
-+{
-+ struct btree_transaction_stats *s;
-+ struct btree_trans *trans;
-+ int cpu;
-+
-+ trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list);
-+ if (trans)
-+ panic("%s leaked btree_trans\n", trans->fn);
-+
-+ if (c->btree_trans_bufs)
-+ for_each_possible_cpu(cpu)
-+ kfree(per_cpu_ptr(c->btree_trans_bufs, cpu)->trans);
-+ free_percpu(c->btree_trans_bufs);
-+
-+ for (s = c->btree_transaction_stats;
-+ s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
-+ s++) {
-+ kfree(s->max_paths_text);
-+ bch2_time_stats_exit(&s->lock_hold_times);
-+ }
-+
-+ if (c->btree_trans_barrier_initialized)
-+ cleanup_srcu_struct(&c->btree_trans_barrier);
-+ mempool_exit(&c->btree_trans_mem_pool);
-+ mempool_exit(&c->btree_trans_pool);
-+}
-+
-+int bch2_fs_btree_iter_init(struct bch_fs *c)
-+{
-+ struct btree_transaction_stats *s;
-+ int ret;
-+
-+ for (s = c->btree_transaction_stats;
-+ s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
-+ s++) {
-+ bch2_time_stats_init(&s->lock_hold_times);
-+ mutex_init(&s->lock);
-+ }
-+
-+ INIT_LIST_HEAD(&c->btree_trans_list);
-+ seqmutex_init(&c->btree_trans_lock);
-+
-+ c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
-+ if (!c->btree_trans_bufs)
-+ return -ENOMEM;
-+
-+ ret = mempool_init_kmalloc_pool(&c->btree_trans_pool, 1,
-+ sizeof(struct btree_trans)) ?:
-+ mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
-+ BTREE_TRANS_MEM_MAX) ?:
-+ init_srcu_struct(&c->btree_trans_barrier);
-+ if (!ret)
-+ c->btree_trans_barrier_initialized = true;
-+ return ret;
-+}
-diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
-new file mode 100644
-index 000000000000..85e7cb52f6b6
---- /dev/null
-+++ b/fs/bcachefs/btree_iter.h
-@@ -0,0 +1,943 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_ITER_H
-+#define _BCACHEFS_BTREE_ITER_H
-+
-+#include "bset.h"
-+#include "btree_types.h"
-+#include "trace.h"
-+
-+static inline int __bkey_err(const struct bkey *k)
-+{
-+ return PTR_ERR_OR_ZERO(k);
-+}
-+
-+#define bkey_err(_k) __bkey_err((_k).k)
-+
-+static inline void __btree_path_get(struct btree_path *path, bool intent)
-+{
-+ path->ref++;
-+ path->intent_ref += intent;
-+}
-+
-+static inline bool __btree_path_put(struct btree_path *path, bool intent)
-+{
-+ EBUG_ON(!path->ref);
-+ EBUG_ON(!path->intent_ref && intent);
-+ path->intent_ref -= intent;
-+ return --path->ref == 0;
-+}
-+
-+static inline void btree_path_set_dirty(struct btree_path *path,
-+ enum btree_path_uptodate u)
-+{
-+ path->uptodate = max_t(unsigned, path->uptodate, u);
-+}
-+
-+static inline struct btree *btree_path_node(struct btree_path *path,
-+ unsigned level)
-+{
-+ return level < BTREE_MAX_DEPTH ? path->l[level].b : NULL;
-+}
-+
-+static inline bool btree_node_lock_seq_matches(const struct btree_path *path,
-+ const struct btree *b, unsigned level)
-+{
-+ return path->l[level].lock_seq == six_lock_seq(&b->c.lock);
-+}
-+
-+static inline struct btree *btree_node_parent(struct btree_path *path,
-+ struct btree *b)
-+{
-+ return btree_path_node(path, b->c.level + 1);
-+}
-+
-+/* Iterate over paths within a transaction: */
-+
-+void __bch2_btree_trans_sort_paths(struct btree_trans *);
-+
-+static inline void btree_trans_sort_paths(struct btree_trans *trans)
-+{
-+ if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
-+ trans->paths_sorted)
-+ return;
-+ __bch2_btree_trans_sort_paths(trans);
-+}
-+
-+static inline struct btree_path *
-+__trans_next_path(struct btree_trans *trans, unsigned idx)
-+{
-+ u64 l;
-+
-+ if (idx == BTREE_ITER_MAX)
-+ return NULL;
-+
-+ l = trans->paths_allocated >> idx;
-+ if (!l)
-+ return NULL;
-+
-+ idx += __ffs64(l);
-+ EBUG_ON(idx >= BTREE_ITER_MAX);
-+ EBUG_ON(trans->paths[idx].idx != idx);
-+ return &trans->paths[idx];
-+}
-+
-+#define trans_for_each_path_from(_trans, _path, _start) \
-+ for (_path = __trans_next_path((_trans), _start); \
-+ (_path); \
-+ _path = __trans_next_path((_trans), (_path)->idx + 1))
-+
-+#define trans_for_each_path(_trans, _path) \
-+ trans_for_each_path_from(_trans, _path, 0)
-+
-+static inline struct btree_path *
-+__trans_next_path_safe(struct btree_trans *trans, unsigned *idx)
-+{
-+ u64 l;
-+
-+ if (*idx == BTREE_ITER_MAX)
-+ return NULL;
-+
-+ l = trans->paths_allocated >> *idx;
-+ if (!l)
-+ return NULL;
-+
-+ *idx += __ffs64(l);
-+ EBUG_ON(*idx >= BTREE_ITER_MAX);
-+ return &trans->paths[*idx];
-+}
-+
-+/*
-+ * This version is intended to be safe for use on a btree_trans that is owned by
-+ * another thread, for bch2_btree_trans_to_text();
-+ */
-+#define trans_for_each_path_safe_from(_trans, _path, _idx, _start) \
-+ for (_idx = _start; \
-+ (_path = __trans_next_path_safe((_trans), &_idx)); \
-+ _idx++)
-+
-+#define trans_for_each_path_safe(_trans, _path, _idx) \
-+ trans_for_each_path_safe_from(_trans, _path, _idx, 0)
-+
-+static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path)
-+{
-+ unsigned idx = path ? path->sorted_idx + 1 : 0;
-+
-+ EBUG_ON(idx > trans->nr_sorted);
-+
-+ return idx < trans->nr_sorted
-+ ? trans->paths + trans->sorted[idx]
-+ : NULL;
-+}
-+
-+static inline struct btree_path *prev_btree_path(struct btree_trans *trans, struct btree_path *path)
-+{
-+ unsigned idx = path ? path->sorted_idx : trans->nr_sorted;
-+
-+ return idx
-+ ? trans->paths + trans->sorted[idx - 1]
-+ : NULL;
-+}
-+
-+#define trans_for_each_path_inorder(_trans, _path, _i) \
-+ for (_i = 0; \
-+ ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\
-+ _i++)
-+
-+#define trans_for_each_path_inorder_reverse(_trans, _path, _i) \
-+ for (_i = trans->nr_sorted - 1; \
-+ ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) >= 0;\
-+ --_i)
-+
-+static inline bool __path_has_node(const struct btree_path *path,
-+ const struct btree *b)
-+{
-+ return path->l[b->c.level].b == b &&
-+ btree_node_lock_seq_matches(path, b, b->c.level);
-+}
-+
-+static inline struct btree_path *
-+__trans_next_path_with_node(struct btree_trans *trans, struct btree *b,
-+ unsigned idx)
-+{
-+ struct btree_path *path = __trans_next_path(trans, idx);
-+
-+ while (path && !__path_has_node(path, b))
-+ path = __trans_next_path(trans, path->idx + 1);
-+
-+ return path;
-+}
-+
-+#define trans_for_each_path_with_node(_trans, _b, _path) \
-+ for (_path = __trans_next_path_with_node((_trans), (_b), 0); \
-+ (_path); \
-+ _path = __trans_next_path_with_node((_trans), (_b), \
-+ (_path)->idx + 1))
-+
-+struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *,
-+ bool, unsigned long);
-+
-+static inline struct btree_path * __must_check
-+bch2_btree_path_make_mut(struct btree_trans *trans,
-+ struct btree_path *path, bool intent,
-+ unsigned long ip)
-+{
-+ if (path->ref > 1 || path->preserve)
-+ path = __bch2_btree_path_make_mut(trans, path, intent, ip);
-+ path->should_be_locked = false;
-+ return path;
-+}
-+
-+struct btree_path * __must_check
-+__bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *,
-+ struct bpos, bool, unsigned long, int);
-+
-+static inline struct btree_path * __must_check
-+bch2_btree_path_set_pos(struct btree_trans *trans,
-+ struct btree_path *path, struct bpos new_pos,
-+ bool intent, unsigned long ip)
-+{
-+ int cmp = bpos_cmp(new_pos, path->pos);
-+
-+ return cmp
-+ ? __bch2_btree_path_set_pos(trans, path, new_pos, intent, ip, cmp)
-+ : path;
-+}
-+
-+int __must_check bch2_btree_path_traverse_one(struct btree_trans *, struct btree_path *,
-+ unsigned, unsigned long);
-+
-+static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
-+ struct btree_path *path, unsigned flags)
-+{
-+ if (path->uptodate < BTREE_ITER_NEED_RELOCK)
-+ return 0;
-+
-+ return bch2_btree_path_traverse_one(trans, path, flags, _RET_IP_);
-+}
-+
-+int __must_check bch2_btree_path_traverse(struct btree_trans *,
-+ struct btree_path *, unsigned);
-+struct btree_path *bch2_path_get(struct btree_trans *, enum btree_id, struct bpos,
-+ unsigned, unsigned, unsigned, unsigned long);
-+struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *);
-+
-+/*
-+ * bch2_btree_path_peek_slot() for a cached iterator might return a key in a
-+ * different snapshot:
-+ */
-+static inline struct bkey_s_c bch2_btree_path_peek_slot_exact(struct btree_path *path, struct bkey *u)
-+{
-+ struct bkey_s_c k = bch2_btree_path_peek_slot(path, u);
-+
-+ if (k.k && bpos_eq(path->pos, k.k->p))
-+ return k;
-+
-+ bkey_init(u);
-+ u->p = path->pos;
-+ return (struct bkey_s_c) { u, NULL };
-+}
-+
-+struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *,
-+ struct btree_iter *, struct bpos);
-+
-+void bch2_btree_path_level_init(struct btree_trans *, struct btree_path *, struct btree *);
-+
-+int __bch2_trans_mutex_lock(struct btree_trans *, struct mutex *);
-+
-+static inline int bch2_trans_mutex_lock(struct btree_trans *trans, struct mutex *lock)
-+{
-+ return mutex_trylock(lock)
-+ ? 0
-+ : __bch2_trans_mutex_lock(trans, lock);
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_trans_verify_paths(struct btree_trans *);
-+void bch2_assert_pos_locked(struct btree_trans *, enum btree_id,
-+ struct bpos, bool);
-+#else
-+static inline void bch2_trans_verify_paths(struct btree_trans *trans) {}
-+static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
-+ struct bpos pos, bool key_cache) {}
-+#endif
-+
-+void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
-+ struct btree *, struct bkey_packed *);
-+void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *,
-+ struct btree *, struct btree_node_iter *,
-+ struct bkey_packed *, unsigned, unsigned);
-+
-+int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *);
-+
-+void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
-+
-+int bch2_trans_relock(struct btree_trans *);
-+int bch2_trans_relock_notrace(struct btree_trans *);
-+void bch2_trans_unlock(struct btree_trans *);
-+void bch2_trans_unlock_long(struct btree_trans *);
-+bool bch2_trans_locked(struct btree_trans *);
-+
-+static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
-+{
-+ return restart_count != trans->restart_count
-+ ? -BCH_ERR_transaction_restart_nested
-+ : 0;
-+}
-+
-+void __noreturn bch2_trans_restart_error(struct btree_trans *, u32);
-+
-+static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans,
-+ u32 restart_count)
-+{
-+ if (trans_was_restarted(trans, restart_count))
-+ bch2_trans_restart_error(trans, restart_count);
-+}
-+
-+void __noreturn bch2_trans_in_restart_error(struct btree_trans *);
-+
-+static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans)
-+{
-+ if (trans->restarted)
-+ bch2_trans_in_restart_error(trans);
-+}
-+
-+__always_inline
-+static int btree_trans_restart_nounlock(struct btree_trans *trans, int err)
-+{
-+ BUG_ON(err <= 0);
-+ BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart));
-+
-+ trans->restarted = err;
-+ trans->last_restarted_ip = _THIS_IP_;
-+ return -err;
-+}
-+
-+__always_inline
-+static int btree_trans_restart(struct btree_trans *trans, int err)
-+{
-+ btree_trans_restart_nounlock(trans, err);
-+ return -err;
-+}
-+
-+bool bch2_btree_node_upgrade(struct btree_trans *,
-+ struct btree_path *, unsigned);
-+
-+void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned);
-+
-+static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
-+ struct btree_path *path)
-+{
-+ unsigned new_locks_want = path->level + !!path->intent_ref;
-+
-+ if (path->locks_want > new_locks_want)
-+ __bch2_btree_path_downgrade(trans, path, new_locks_want);
-+}
-+
-+void bch2_trans_downgrade(struct btree_trans *);
-+
-+void bch2_trans_node_add(struct btree_trans *trans, struct btree *);
-+void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *);
-+
-+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
-+int __must_check bch2_btree_iter_traverse(struct btree_iter *);
-+
-+struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
-+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *);
-+struct btree *bch2_btree_iter_next_node(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
-+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *);
-+
-+static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
-+{
-+ return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
-+
-+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
-+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *);
-+
-+bool bch2_btree_iter_advance(struct btree_iter *);
-+bool bch2_btree_iter_rewind(struct btree_iter *);
-+
-+static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
-+{
-+ iter->k.type = KEY_TYPE_deleted;
-+ iter->k.p.inode = iter->pos.inode = new_pos.inode;
-+ iter->k.p.offset = iter->pos.offset = new_pos.offset;
-+ iter->k.p.snapshot = iter->pos.snapshot = new_pos.snapshot;
-+ iter->k.size = 0;
-+}
-+
-+static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
-+{
-+ if (unlikely(iter->update_path))
-+ bch2_path_put(iter->trans, iter->update_path,
-+ iter->flags & BTREE_ITER_INTENT);
-+ iter->update_path = NULL;
-+
-+ if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
-+ new_pos.snapshot = iter->snapshot;
-+
-+ __bch2_btree_iter_set_pos(iter, new_pos);
-+}
-+
-+static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *iter)
-+{
-+ BUG_ON(!(iter->flags & BTREE_ITER_IS_EXTENTS));
-+ iter->pos = bkey_start_pos(&iter->k);
-+}
-+
-+static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot)
-+{
-+ struct bpos pos = iter->pos;
-+
-+ iter->snapshot = snapshot;
-+ pos.snapshot = snapshot;
-+ bch2_btree_iter_set_pos(iter, pos);
-+}
-+
-+void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
-+
-+static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
-+ unsigned btree_id,
-+ unsigned flags)
-+{
-+ if (flags & BTREE_ITER_ALL_LEVELS)
-+ flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS;
-+
-+ if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
-+ btree_id_is_extents(btree_id))
-+ flags |= BTREE_ITER_IS_EXTENTS;
-+
-+ if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
-+ !btree_type_has_snapshot_field(btree_id))
-+ flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
-+
-+ if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) &&
-+ btree_type_has_snapshots(btree_id))
-+ flags |= BTREE_ITER_FILTER_SNAPSHOTS;
-+
-+ if (trans->journal_replay_not_finished)
-+ flags |= BTREE_ITER_WITH_JOURNAL;
-+
-+ return flags;
-+}
-+
-+static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
-+ unsigned btree_id,
-+ unsigned flags)
-+{
-+ if (!btree_id_cached(trans->c, btree_id)) {
-+ flags &= ~BTREE_ITER_CACHED;
-+ flags &= ~BTREE_ITER_WITH_KEY_CACHE;
-+ } else if (!(flags & BTREE_ITER_CACHED))
-+ flags |= BTREE_ITER_WITH_KEY_CACHE;
-+
-+ return __bch2_btree_iter_flags(trans, btree_id, flags);
-+}
-+
-+static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned locks_want,
-+ unsigned depth,
-+ unsigned flags,
-+ unsigned long ip)
-+{
-+ memset(iter, 0, sizeof(*iter));
-+ iter->trans = trans;
-+ iter->btree_id = btree_id;
-+ iter->flags = flags;
-+ iter->snapshot = pos.snapshot;
-+ iter->pos = pos;
-+ iter->k.p = pos;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ iter->ip_allocated = ip;
-+#endif
-+ iter->path = bch2_path_get(trans, btree_id, iter->pos,
-+ locks_want, depth, flags, ip);
-+}
-+
-+void bch2_trans_iter_init_outlined(struct btree_trans *, struct btree_iter *,
-+ enum btree_id, struct bpos, unsigned);
-+
-+static inline void bch2_trans_iter_init(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned flags)
-+{
-+ if (__builtin_constant_p(btree_id) &&
-+ __builtin_constant_p(flags))
-+ bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
-+ bch2_btree_iter_flags(trans, btree_id, flags),
-+ _THIS_IP_);
-+ else
-+ bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags);
-+}
-+
-+void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *,
-+ enum btree_id, struct bpos,
-+ unsigned, unsigned, unsigned);
-+void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
-+
-+static inline void set_btree_iter_dontneed(struct btree_iter *iter)
-+{
-+ if (!iter->trans->restarted)
-+ iter->path->preserve = false;
-+}
-+
-+void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
-+
-+static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
-+{
-+ size = roundup(size, 8);
-+
-+ if (likely(trans->mem_top + size <= trans->mem_bytes)) {
-+ void *p = trans->mem + trans->mem_top;
-+
-+ trans->mem_top += size;
-+ memset(p, 0, size);
-+ return p;
-+ } else {
-+ return __bch2_trans_kmalloc(trans, size);
-+ }
-+}
-+
-+static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
-+{
-+ size = roundup(size, 8);
-+
-+ if (likely(trans->mem_top + size <= trans->mem_bytes)) {
-+ void *p = trans->mem + trans->mem_top;
-+
-+ trans->mem_top += size;
-+ return p;
-+ } else {
-+ return __bch2_trans_kmalloc(trans, size);
-+ }
-+}
-+
-+static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned flags, unsigned type)
-+{
-+ struct bkey_s_c k;
-+
-+ bch2_trans_iter_init(trans, iter, btree_id, pos, flags);
-+ k = bch2_btree_iter_peek_slot(iter);
-+
-+ if (!bkey_err(k) && type && k.k->type != type)
-+ k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch);
-+ if (unlikely(bkey_err(k)))
-+ bch2_trans_iter_exit(trans, iter);
-+ return k;
-+}
-+
-+static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned flags)
-+{
-+ return __bch2_bkey_get_iter(trans, iter, btree_id, pos, flags, 0);
-+}
-+
-+#define bch2_bkey_get_iter_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\
-+ bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \
-+ _btree_id, _pos, _flags, KEY_TYPE_##_type))
-+
-+static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned flags, unsigned type,
-+ unsigned val_size, void *val)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type);
-+ ret = bkey_err(k);
-+ if (!ret) {
-+ unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size);
-+
-+ memcpy(val, k.v, b);
-+ if (unlikely(b < sizeof(*val)))
-+ memset((void *) val + b, 0, sizeof(*val) - b);
-+ bch2_trans_iter_exit(trans, &iter);
-+ }
-+
-+ return ret;
-+}
-+
-+#define bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, _type, _val)\
-+ __bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \
-+ KEY_TYPE_##_type, sizeof(*_val), _val)
-+
-+void bch2_trans_srcu_unlock(struct btree_trans *);
-+void bch2_trans_srcu_lock(struct btree_trans *);
-+
-+u32 bch2_trans_begin(struct btree_trans *);
-+
-+/*
-+ * XXX
-+ * this does not handle transaction restarts from bch2_btree_iter_next_node()
-+ * correctly
-+ */
-+#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \
-+ _locks_want, _depth, _flags, _b, _ret) \
-+ for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \
-+ _start, _locks_want, _depth, _flags); \
-+ (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \
-+ !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \
-+ (_b) = bch2_btree_iter_next_node(&(_iter)))
-+
-+#define for_each_btree_node(_trans, _iter, _btree_id, _start, \
-+ _flags, _b, _ret) \
-+ __for_each_btree_node(_trans, _iter, _btree_id, _start, \
-+ 0, 0, _flags, _b, _ret)
-+
-+static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter,
-+ unsigned flags)
-+{
-+ BUG_ON(flags & BTREE_ITER_ALL_LEVELS);
-+
-+ return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) :
-+ bch2_btree_iter_peek_prev(iter);
-+}
-+
-+static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
-+ unsigned flags)
-+{
-+ return flags & BTREE_ITER_ALL_LEVELS ? bch2_btree_iter_peek_all_levels(iter) :
-+ flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) :
-+ bch2_btree_iter_peek(iter);
-+}
-+
-+static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
-+ struct bpos end,
-+ unsigned flags)
-+{
-+ if (!(flags & BTREE_ITER_SLOTS))
-+ return bch2_btree_iter_peek_upto(iter, end);
-+
-+ if (bkey_gt(iter->pos, end))
-+ return bkey_s_c_null;
-+
-+ return bch2_btree_iter_peek_slot(iter);
-+}
-+
-+static inline int btree_trans_too_many_iters(struct btree_trans *trans)
-+{
-+ if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) {
-+ trace_and_count(trans->c, trans_restart_too_many_iters, trans, _THIS_IP_);
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
-+ }
-+
-+ return 0;
-+}
-+
-+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
-+
-+static inline struct bkey_s_c
-+__bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
-+ struct btree_iter *iter, unsigned flags)
-+{
-+ struct bkey_s_c k;
-+
-+ while (btree_trans_too_many_iters(trans) ||
-+ (k = bch2_btree_iter_peek_type(iter, flags),
-+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
-+ bch2_trans_begin(trans);
-+
-+ return k;
-+}
-+
-+static inline struct bkey_s_c
-+__bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bpos end,
-+ unsigned flags)
-+{
-+ struct bkey_s_c k;
-+
-+ while (btree_trans_too_many_iters(trans) ||
-+ (k = bch2_btree_iter_peek_upto_type(iter, end, flags),
-+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
-+ bch2_trans_begin(trans);
-+
-+ return k;
-+}
-+
-+#define lockrestart_do(_trans, _do) \
-+({ \
-+ u32 _restart_count; \
-+ int _ret2; \
-+ \
-+ do { \
-+ _restart_count = bch2_trans_begin(_trans); \
-+ _ret2 = (_do); \
-+ } while (bch2_err_matches(_ret2, BCH_ERR_transaction_restart)); \
-+ \
-+ if (!_ret2) \
-+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
-+ \
-+ _ret2; \
-+})
-+
-+/*
-+ * nested_lockrestart_do(), nested_commit_do():
-+ *
-+ * These are like lockrestart_do() and commit_do(), with two differences:
-+ *
-+ * - We don't call bch2_trans_begin() unless we had a transaction restart
-+ * - We return -BCH_ERR_transaction_restart_nested if we succeeded after a
-+ * transaction restart
-+ */
-+#define nested_lockrestart_do(_trans, _do) \
-+({ \
-+ u32 _restart_count, _orig_restart_count; \
-+ int _ret2; \
-+ \
-+ _restart_count = _orig_restart_count = (_trans)->restart_count; \
-+ \
-+ while (bch2_err_matches(_ret2 = (_do), BCH_ERR_transaction_restart))\
-+ _restart_count = bch2_trans_begin(_trans); \
-+ \
-+ if (!_ret2) \
-+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
-+ \
-+ _ret2 ?: trans_was_restarted(_trans, _restart_count); \
-+})
-+
-+#define for_each_btree_key2(_trans, _iter, _btree_id, \
-+ _start, _flags, _k, _do) \
-+({ \
-+ int _ret3 = 0; \
-+ \
-+ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
-+ (_start), (_flags)); \
-+ \
-+ while (1) { \
-+ u32 _restart_count = bch2_trans_begin(_trans); \
-+ \
-+ _ret3 = 0; \
-+ (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \
-+ if (!(_k).k) \
-+ break; \
-+ \
-+ _ret3 = bkey_err(_k) ?: (_do); \
-+ if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\
-+ continue; \
-+ if (_ret3) \
-+ break; \
-+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
-+ if (!bch2_btree_iter_advance(&(_iter))) \
-+ break; \
-+ } \
-+ \
-+ bch2_trans_iter_exit((_trans), &(_iter)); \
-+ _ret3; \
-+})
-+
-+#define for_each_btree_key2_upto(_trans, _iter, _btree_id, \
-+ _start, _end, _flags, _k, _do) \
-+({ \
-+ int _ret3 = 0; \
-+ \
-+ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
-+ (_start), (_flags)); \
-+ \
-+ while (1) { \
-+ u32 _restart_count = bch2_trans_begin(_trans); \
-+ \
-+ _ret3 = 0; \
-+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, (_flags));\
-+ if (!(_k).k) \
-+ break; \
-+ \
-+ _ret3 = bkey_err(_k) ?: (_do); \
-+ if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\
-+ continue; \
-+ if (_ret3) \
-+ break; \
-+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
-+ if (!bch2_btree_iter_advance(&(_iter))) \
-+ break; \
-+ } \
-+ \
-+ bch2_trans_iter_exit((_trans), &(_iter)); \
-+ _ret3; \
-+})
-+
-+#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \
-+ _start, _flags, _k, _do) \
-+({ \
-+ int _ret3 = 0; \
-+ \
-+ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
-+ (_start), (_flags)); \
-+ \
-+ while (1) { \
-+ u32 _restart_count = bch2_trans_begin(_trans); \
-+ (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\
-+ if (!(_k).k) { \
-+ _ret3 = 0; \
-+ break; \
-+ } \
-+ \
-+ _ret3 = bkey_err(_k) ?: (_do); \
-+ if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\
-+ continue; \
-+ if (_ret3) \
-+ break; \
-+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
-+ if (!bch2_btree_iter_rewind(&(_iter))) \
-+ break; \
-+ } \
-+ \
-+ bch2_trans_iter_exit((_trans), &(_iter)); \
-+ _ret3; \
-+})
-+
-+#define for_each_btree_key_commit(_trans, _iter, _btree_id, \
-+ _start, _iter_flags, _k, \
-+ _disk_res, _journal_seq, _commit_flags,\
-+ _do) \
-+ for_each_btree_key2(_trans, _iter, _btree_id, _start, _iter_flags, _k,\
-+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
-+ (_journal_seq), (_commit_flags)))
-+
-+#define for_each_btree_key_reverse_commit(_trans, _iter, _btree_id, \
-+ _start, _iter_flags, _k, \
-+ _disk_res, _journal_seq, _commit_flags,\
-+ _do) \
-+ for_each_btree_key_reverse(_trans, _iter, _btree_id, _start, _iter_flags, _k,\
-+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
-+ (_journal_seq), (_commit_flags)))
-+
-+#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \
-+ _start, _end, _iter_flags, _k, \
-+ _disk_res, _journal_seq, _commit_flags,\
-+ _do) \
-+ for_each_btree_key2_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\
-+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
-+ (_journal_seq), (_commit_flags)))
-+
-+#define for_each_btree_key(_trans, _iter, _btree_id, \
-+ _start, _flags, _k, _ret) \
-+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
-+ (_start), (_flags)); \
-+ (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
-+ !((_ret) = bkey_err(_k)) && (_k).k; \
-+ bch2_btree_iter_advance(&(_iter)))
-+
-+#define for_each_btree_key_upto(_trans, _iter, _btree_id, \
-+ _start, _end, _flags, _k, _ret) \
-+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
-+ (_start), (_flags)); \
-+ (_k) = __bch2_btree_iter_peek_upto_and_restart((_trans), \
-+ &(_iter), _end, _flags),\
-+ !((_ret) = bkey_err(_k)) && (_k).k; \
-+ bch2_btree_iter_advance(&(_iter)))
-+
-+#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \
-+ _start, _flags, _k, _ret) \
-+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
-+ (_start), (_flags)); \
-+ (_k) = bch2_btree_iter_peek_type(&(_iter), _flags), \
-+ !((_ret) = bkey_err(_k)) && (_k).k; \
-+ bch2_btree_iter_advance(&(_iter)))
-+
-+#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \
-+ _start, _end, _flags, _k, _ret) \
-+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
-+ (_start), (_flags)); \
-+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\
-+ !((_ret) = bkey_err(_k)) && (_k).k; \
-+ bch2_btree_iter_advance(&(_iter)))
-+
-+#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \
-+ for (; \
-+ (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
-+ !((_ret) = bkey_err(_k)) && (_k).k; \
-+ bch2_btree_iter_advance(&(_iter)))
-+
-+#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
-+ for (; \
-+ (_k) = bch2_btree_iter_peek_type(&(_iter), _flags), \
-+ !((_ret) = bkey_err(_k)) && (_k).k; \
-+ bch2_btree_iter_advance(&(_iter)))
-+
-+#define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\
-+ for (; \
-+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \
-+ !((_ret) = bkey_err(_k)) && (_k).k; \
-+ bch2_btree_iter_advance(&(_iter)))
-+
-+#define drop_locks_do(_trans, _do) \
-+({ \
-+ bch2_trans_unlock(_trans); \
-+ _do ?: bch2_trans_relock(_trans); \
-+})
-+
-+#define allocate_dropping_locks_errcode(_trans, _do) \
-+({ \
-+ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \
-+ int _ret = _do; \
-+ \
-+ if (bch2_err_matches(_ret, ENOMEM)) { \
-+ _gfp = GFP_KERNEL; \
-+ _ret = drop_locks_do(trans, _do); \
-+ } \
-+ _ret; \
-+})
-+
-+#define allocate_dropping_locks(_trans, _ret, _do) \
-+({ \
-+ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \
-+ typeof(_do) _p = _do; \
-+ \
-+ _ret = 0; \
-+ if (unlikely(!_p)) { \
-+ _gfp = GFP_KERNEL; \
-+ _ret = drop_locks_do(trans, ((_p = _do), 0)); \
-+ } \
-+ _p; \
-+})
-+
-+/* new multiple iterator interface: */
-+
-+void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
-+void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
-+void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
-+void bch2_dump_trans_updates(struct btree_trans *);
-+void bch2_dump_trans_paths_updates(struct btree_trans *);
-+
-+struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
-+void bch2_trans_put(struct btree_trans *);
-+
-+extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
-+unsigned bch2_trans_get_fn_idx(const char *);
-+
-+#define bch2_trans_get(_c) \
-+({ \
-+ static unsigned trans_fn_idx; \
-+ \
-+ if (unlikely(!trans_fn_idx)) \
-+ trans_fn_idx = bch2_trans_get_fn_idx(__func__); \
-+ __bch2_trans_get(_c, trans_fn_idx); \
-+})
-+
-+void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
-+
-+void bch2_fs_btree_iter_exit(struct bch_fs *);
-+int bch2_fs_btree_iter_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_ITER_H */
-diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
-new file mode 100644
-index 000000000000..58a981bcf3aa
---- /dev/null
-+++ b/fs/bcachefs/btree_journal_iter.c
-@@ -0,0 +1,531 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bset.h"
-+#include "btree_journal_iter.h"
-+#include "journal_io.h"
-+
-+#include <linux/sort.h>
-+
-+/*
-+ * For managing keys we read from the journal: until journal replay works normal
-+ * btree lookups need to be able to find and return keys from the journal where
-+ * they overwrite what's in the btree, so we have a special iterator and
-+ * operations for the regular btree iter code to use:
-+ */
-+
-+static int __journal_key_cmp(enum btree_id l_btree_id,
-+ unsigned l_level,
-+ struct bpos l_pos,
-+ const struct journal_key *r)
-+{
-+ return (cmp_int(l_btree_id, r->btree_id) ?:
-+ cmp_int(l_level, r->level) ?:
-+ bpos_cmp(l_pos, r->k->k.p));
-+}
-+
-+static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
-+{
-+ return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
-+}
-+
-+static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx)
-+{
-+ size_t gap_size = keys->size - keys->nr;
-+
-+ if (idx >= keys->gap)
-+ idx += gap_size;
-+ return idx;
-+}
-+
-+static inline struct journal_key *idx_to_key(struct journal_keys *keys, size_t idx)
-+{
-+ return keys->d + idx_to_pos(keys, idx);
-+}
-+
-+static size_t __bch2_journal_key_search(struct journal_keys *keys,
-+ enum btree_id id, unsigned level,
-+ struct bpos pos)
-+{
-+ size_t l = 0, r = keys->nr, m;
-+
-+ while (l < r) {
-+ m = l + ((r - l) >> 1);
-+ if (__journal_key_cmp(id, level, pos, idx_to_key(keys, m)) > 0)
-+ l = m + 1;
-+ else
-+ r = m;
-+ }
-+
-+ BUG_ON(l < keys->nr &&
-+ __journal_key_cmp(id, level, pos, idx_to_key(keys, l)) > 0);
-+
-+ BUG_ON(l &&
-+ __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0);
-+
-+ return l;
-+}
-+
-+static size_t bch2_journal_key_search(struct journal_keys *keys,
-+ enum btree_id id, unsigned level,
-+ struct bpos pos)
-+{
-+ return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
-+}
-+
-+struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id,
-+ unsigned level, struct bpos pos,
-+ struct bpos end_pos, size_t *idx)
-+{
-+ struct journal_keys *keys = &c->journal_keys;
-+ unsigned iters = 0;
-+ struct journal_key *k;
-+search:
-+ if (!*idx)
-+ *idx = __bch2_journal_key_search(keys, btree_id, level, pos);
-+
-+ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
-+ if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
-+ return NULL;
-+
-+ if (__journal_key_cmp(btree_id, level, pos, k) <= 0 &&
-+ !k->overwritten)
-+ return k->k;
-+
-+ (*idx)++;
-+ iters++;
-+ if (iters == 10) {
-+ *idx = 0;
-+ goto search;
-+ }
-+ }
-+
-+ return NULL;
-+}
-+
-+struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id,
-+ unsigned level, struct bpos pos)
-+{
-+ size_t idx = 0;
-+
-+ return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
-+}
-+
-+static void journal_iters_fix(struct bch_fs *c)
-+{
-+ struct journal_keys *keys = &c->journal_keys;
-+ /* The key we just inserted is immediately before the gap: */
-+ size_t gap_end = keys->gap + (keys->size - keys->nr);
-+ struct btree_and_journal_iter *iter;
-+
-+ /*
-+ * If an iterator points one after the key we just inserted, decrement
-+ * the iterator so it points at the key we just inserted - if the
-+ * decrement was unnecessary, bch2_btree_and_journal_iter_peek() will
-+ * handle that:
-+ */
-+ list_for_each_entry(iter, &c->journal_iters, journal.list)
-+ if (iter->journal.idx == gap_end)
-+ iter->journal.idx = keys->gap - 1;
-+}
-+
-+static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap)
-+{
-+ struct journal_keys *keys = &c->journal_keys;
-+ struct journal_iter *iter;
-+ size_t gap_size = keys->size - keys->nr;
-+
-+ list_for_each_entry(iter, &c->journal_iters, list) {
-+ if (iter->idx > old_gap)
-+ iter->idx -= gap_size;
-+ if (iter->idx >= new_gap)
-+ iter->idx += gap_size;
-+ }
-+}
-+
-+int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
-+ unsigned level, struct bkey_i *k)
-+{
-+ struct journal_key n = {
-+ .btree_id = id,
-+ .level = level,
-+ .k = k,
-+ .allocated = true,
-+ /*
-+ * Ensure these keys are done last by journal replay, to unblock
-+ * journal reclaim:
-+ */
-+ .journal_seq = U32_MAX,
-+ };
-+ struct journal_keys *keys = &c->journal_keys;
-+ size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
-+
-+ BUG_ON(test_bit(BCH_FS_RW, &c->flags));
-+
-+ if (idx < keys->size &&
-+ journal_key_cmp(&n, &keys->d[idx]) == 0) {
-+ if (keys->d[idx].allocated)
-+ kfree(keys->d[idx].k);
-+ keys->d[idx] = n;
-+ return 0;
-+ }
-+
-+ if (idx > keys->gap)
-+ idx -= keys->size - keys->nr;
-+
-+ if (keys->nr == keys->size) {
-+ struct journal_keys new_keys = {
-+ .nr = keys->nr,
-+ .size = max_t(size_t, keys->size, 8) * 2,
-+ };
-+
-+ new_keys.d = kvmalloc_array(new_keys.size, sizeof(new_keys.d[0]), GFP_KERNEL);
-+ if (!new_keys.d) {
-+ bch_err(c, "%s: error allocating new key array (size %zu)",
-+ __func__, new_keys.size);
-+ return -BCH_ERR_ENOMEM_journal_key_insert;
-+ }
-+
-+ /* Since @keys was full, there was no gap: */
-+ memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
-+ kvfree(keys->d);
-+ *keys = new_keys;
-+
-+ /* And now the gap is at the end: */
-+ keys->gap = keys->nr;
-+ }
-+
-+ journal_iters_move_gap(c, keys->gap, idx);
-+
-+ move_gap(keys->d, keys->nr, keys->size, keys->gap, idx);
-+ keys->gap = idx;
-+
-+ keys->nr++;
-+ keys->d[keys->gap++] = n;
-+
-+ journal_iters_fix(c);
-+
-+ return 0;
-+}
-+
-+/*
-+ * Can only be used from the recovery thread while we're still RO - can't be
-+ * used once we've got RW, as journal_keys is at that point used by multiple
-+ * threads:
-+ */
-+int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
-+ unsigned level, struct bkey_i *k)
-+{
-+ struct bkey_i *n;
-+ int ret;
-+
-+ n = kmalloc(bkey_bytes(&k->k), GFP_KERNEL);
-+ if (!n)
-+ return -BCH_ERR_ENOMEM_journal_key_insert;
-+
-+ bkey_copy(n, k);
-+ ret = bch2_journal_key_insert_take(c, id, level, n);
-+ if (ret)
-+ kfree(n);
-+ return ret;
-+}
-+
-+int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
-+ unsigned level, struct bpos pos)
-+{
-+ struct bkey_i whiteout;
-+
-+ bkey_init(&whiteout.k);
-+ whiteout.k.p = pos;
-+
-+ return bch2_journal_key_insert(c, id, level, &whiteout);
-+}
-+
-+void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
-+ unsigned level, struct bpos pos)
-+{
-+ struct journal_keys *keys = &c->journal_keys;
-+ size_t idx = bch2_journal_key_search(keys, btree, level, pos);
-+
-+ if (idx < keys->size &&
-+ keys->d[idx].btree_id == btree &&
-+ keys->d[idx].level == level &&
-+ bpos_eq(keys->d[idx].k->k.p, pos))
-+ keys->d[idx].overwritten = true;
-+}
-+
-+static void bch2_journal_iter_advance(struct journal_iter *iter)
-+{
-+ if (iter->idx < iter->keys->size) {
-+ iter->idx++;
-+ if (iter->idx == iter->keys->gap)
-+ iter->idx += iter->keys->size - iter->keys->nr;
-+ }
-+}
-+
-+static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
-+{
-+ struct journal_key *k = iter->keys->d + iter->idx;
-+
-+ while (k < iter->keys->d + iter->keys->size &&
-+ k->btree_id == iter->btree_id &&
-+ k->level == iter->level) {
-+ if (!k->overwritten)
-+ return bkey_i_to_s_c(k->k);
-+
-+ bch2_journal_iter_advance(iter);
-+ k = iter->keys->d + iter->idx;
-+ }
-+
-+ return bkey_s_c_null;
-+}
-+
-+static void bch2_journal_iter_exit(struct journal_iter *iter)
-+{
-+ list_del(&iter->list);
-+}
-+
-+static void bch2_journal_iter_init(struct bch_fs *c,
-+ struct journal_iter *iter,
-+ enum btree_id id, unsigned level,
-+ struct bpos pos)
-+{
-+ iter->btree_id = id;
-+ iter->level = level;
-+ iter->keys = &c->journal_keys;
-+ iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
-+}
-+
-+static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
-+{
-+ return bch2_btree_node_iter_peek_unpack(&iter->node_iter,
-+ iter->b, &iter->unpacked);
-+}
-+
-+static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
-+{
-+ bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
-+}
-+
-+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
-+{
-+ if (bpos_eq(iter->pos, SPOS_MAX))
-+ iter->at_end = true;
-+ else
-+ iter->pos = bpos_successor(iter->pos);
-+}
-+
-+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
-+{
-+ struct bkey_s_c btree_k, journal_k, ret;
-+again:
-+ if (iter->at_end)
-+ return bkey_s_c_null;
-+
-+ while ((btree_k = bch2_journal_iter_peek_btree(iter)).k &&
-+ bpos_lt(btree_k.k->p, iter->pos))
-+ bch2_journal_iter_advance_btree(iter);
-+
-+ while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
-+ bpos_lt(journal_k.k->p, iter->pos))
-+ bch2_journal_iter_advance(&iter->journal);
-+
-+ ret = journal_k.k &&
-+ (!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p))
-+ ? journal_k
-+ : btree_k;
-+
-+ if (ret.k && iter->b && bpos_gt(ret.k->p, iter->b->data->max_key))
-+ ret = bkey_s_c_null;
-+
-+ if (ret.k) {
-+ iter->pos = ret.k->p;
-+ if (bkey_deleted(ret.k)) {
-+ bch2_btree_and_journal_iter_advance(iter);
-+ goto again;
-+ }
-+ } else {
-+ iter->pos = SPOS_MAX;
-+ iter->at_end = true;
-+ }
-+
-+ return ret;
-+}
-+
-+void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
-+{
-+ bch2_journal_iter_exit(&iter->journal);
-+}
-+
-+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
-+ struct bch_fs *c,
-+ struct btree *b,
-+ struct btree_node_iter node_iter,
-+ struct bpos pos)
-+{
-+ memset(iter, 0, sizeof(*iter));
-+
-+ iter->b = b;
-+ iter->node_iter = node_iter;
-+ bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
-+ INIT_LIST_HEAD(&iter->journal.list);
-+ iter->pos = b->data->min_key;
-+ iter->at_end = false;
-+}
-+
-+/*
-+ * this version is used by btree_gc before filesystem has gone RW and
-+ * multithreaded, so uses the journal_iters list:
-+ */
-+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
-+ struct bch_fs *c,
-+ struct btree *b)
-+{
-+ struct btree_node_iter node_iter;
-+
-+ bch2_btree_node_iter_init_from_start(&node_iter, b);
-+ __bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
-+ list_add(&iter->journal.list, &c->journal_iters);
-+}
-+
-+/* sort and dedup all keys in the journal: */
-+
-+void bch2_journal_entries_free(struct bch_fs *c)
-+{
-+ struct journal_replay **i;
-+ struct genradix_iter iter;
-+
-+ genradix_for_each(&c->journal_entries, iter, i)
-+ if (*i)
-+ kvpfree(*i, offsetof(struct journal_replay, j) +
-+ vstruct_bytes(&(*i)->j));
-+ genradix_free(&c->journal_entries);
-+}
-+
-+/*
-+ * When keys compare equal, oldest compares first:
-+ */
-+static int journal_sort_key_cmp(const void *_l, const void *_r)
-+{
-+ const struct journal_key *l = _l;
-+ const struct journal_key *r = _r;
-+
-+ return journal_key_cmp(l, r) ?:
-+ cmp_int(l->journal_seq, r->journal_seq) ?:
-+ cmp_int(l->journal_offset, r->journal_offset);
-+}
-+
-+void bch2_journal_keys_free(struct journal_keys *keys)
-+{
-+ struct journal_key *i;
-+
-+ move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
-+ keys->gap = keys->nr;
-+
-+ for (i = keys->d; i < keys->d + keys->nr; i++)
-+ if (i->allocated)
-+ kfree(i->k);
-+
-+ kvfree(keys->d);
-+ keys->d = NULL;
-+ keys->nr = keys->gap = keys->size = 0;
-+}
-+
-+static void __journal_keys_sort(struct journal_keys *keys)
-+{
-+ struct journal_key *src, *dst;
-+
-+ sort(keys->d, keys->nr, sizeof(keys->d[0]), journal_sort_key_cmp, NULL);
-+
-+ src = dst = keys->d;
-+ while (src < keys->d + keys->nr) {
-+ while (src + 1 < keys->d + keys->nr &&
-+ src[0].btree_id == src[1].btree_id &&
-+ src[0].level == src[1].level &&
-+ bpos_eq(src[0].k->k.p, src[1].k->k.p))
-+ src++;
-+
-+ *dst++ = *src++;
-+ }
-+
-+ keys->nr = dst - keys->d;
-+}
-+
-+int bch2_journal_keys_sort(struct bch_fs *c)
-+{
-+ struct genradix_iter iter;
-+ struct journal_replay *i, **_i;
-+ struct jset_entry *entry;
-+ struct bkey_i *k;
-+ struct journal_keys *keys = &c->journal_keys;
-+ size_t nr_keys = 0, nr_read = 0;
-+
-+ genradix_for_each(&c->journal_entries, iter, _i) {
-+ i = *_i;
-+
-+ if (!i || i->ignore)
-+ continue;
-+
-+ for_each_jset_key(k, entry, &i->j)
-+ nr_keys++;
-+ }
-+
-+ if (!nr_keys)
-+ return 0;
-+
-+ keys->size = roundup_pow_of_two(nr_keys);
-+
-+ keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
-+ if (!keys->d) {
-+ bch_err(c, "Failed to allocate buffer for sorted journal keys (%zu keys); trying slowpath",
-+ nr_keys);
-+
-+ do {
-+ keys->size >>= 1;
-+ keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
-+ } while (!keys->d && keys->size > nr_keys / 8);
-+
-+ if (!keys->d) {
-+ bch_err(c, "Failed to allocate %zu size buffer for sorted journal keys; exiting",
-+ keys->size);
-+ return -BCH_ERR_ENOMEM_journal_keys_sort;
-+ }
-+ }
-+
-+ genradix_for_each(&c->journal_entries, iter, _i) {
-+ i = *_i;
-+
-+ if (!i || i->ignore)
-+ continue;
-+
-+ cond_resched();
-+
-+ for_each_jset_key(k, entry, &i->j) {
-+ if (keys->nr == keys->size) {
-+ __journal_keys_sort(keys);
-+
-+ if (keys->nr > keys->size * 7 / 8) {
-+ bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu/%zu",
-+ keys->nr, keys->size, nr_read, nr_keys);
-+ return -BCH_ERR_ENOMEM_journal_keys_sort;
-+ }
-+ }
-+
-+ keys->d[keys->nr++] = (struct journal_key) {
-+ .btree_id = entry->btree_id,
-+ .level = entry->level,
-+ .k = k,
-+ .journal_seq = le64_to_cpu(i->j.seq),
-+ .journal_offset = k->_data - i->j._data,
-+ };
-+
-+ nr_read++;
-+ }
-+ }
-+
-+ __journal_keys_sort(keys);
-+ keys->gap = keys->nr;
-+
-+ bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_keys, keys->nr);
-+ return 0;
-+}
-diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h
-new file mode 100644
-index 000000000000..5d64e7e22f26
---- /dev/null
-+++ b/fs/bcachefs/btree_journal_iter.h
-@@ -0,0 +1,57 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_JOURNAL_ITER_H
-+#define _BCACHEFS_BTREE_JOURNAL_ITER_H
-+
-+struct journal_iter {
-+ struct list_head list;
-+ enum btree_id btree_id;
-+ unsigned level;
-+ size_t idx;
-+ struct journal_keys *keys;
-+};
-+
-+/*
-+ * Iterate over keys in the btree, with keys from the journal overlaid on top:
-+ */
-+
-+struct btree_and_journal_iter {
-+ struct btree *b;
-+ struct btree_node_iter node_iter;
-+ struct bkey unpacked;
-+
-+ struct journal_iter journal;
-+ struct bpos pos;
-+ bool at_end;
-+};
-+
-+struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id,
-+ unsigned, struct bpos, struct bpos, size_t *);
-+struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id,
-+ unsigned, struct bpos);
-+
-+int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
-+ unsigned, struct bkey_i *);
-+int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
-+ unsigned, struct bkey_i *);
-+int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
-+ unsigned, struct bpos);
-+void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id,
-+ unsigned, struct bpos);
-+
-+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
-+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
-+
-+void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
-+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
-+ struct bch_fs *, struct btree *,
-+ struct btree_node_iter, struct bpos);
-+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
-+ struct bch_fs *,
-+ struct btree *);
-+
-+void bch2_journal_keys_free(struct journal_keys *);
-+void bch2_journal_entries_free(struct bch_fs *);
-+
-+int bch2_journal_keys_sort(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */
-diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
-new file mode 100644
-index 000000000000..3304bff7d464
---- /dev/null
-+++ b/fs/bcachefs/btree_key_cache.c
-@@ -0,0 +1,1072 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_cache.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "trace.h"
-+
-+#include <linux/sched/mm.h>
-+
-+static inline bool btree_uses_pcpu_readers(enum btree_id id)
-+{
-+ return id == BTREE_ID_subvolumes;
-+}
-+
-+static struct kmem_cache *bch2_key_cache;
-+
-+static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
-+ const void *obj)
-+{
-+ const struct bkey_cached *ck = obj;
-+ const struct bkey_cached_key *key = arg->key;
-+
-+ return ck->key.btree_id != key->btree_id ||
-+ !bpos_eq(ck->key.pos, key->pos);
-+}
-+
-+static const struct rhashtable_params bch2_btree_key_cache_params = {
-+ .head_offset = offsetof(struct bkey_cached, hash),
-+ .key_offset = offsetof(struct bkey_cached, key),
-+ .key_len = sizeof(struct bkey_cached_key),
-+ .obj_cmpfn = bch2_btree_key_cache_cmp_fn,
-+};
-+
-+__flatten
-+inline struct bkey_cached *
-+bch2_btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
-+{
-+ struct bkey_cached_key key = {
-+ .btree_id = btree_id,
-+ .pos = pos,
-+ };
-+
-+ return rhashtable_lookup_fast(&c->btree_key_cache.table, &key,
-+ bch2_btree_key_cache_params);
-+}
-+
-+static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
-+{
-+ if (!six_trylock_intent(&ck->c.lock))
-+ return false;
-+
-+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+ six_unlock_intent(&ck->c.lock);
-+ return false;
-+ }
-+
-+ if (!six_trylock_write(&ck->c.lock)) {
-+ six_unlock_intent(&ck->c.lock);
-+ return false;
-+ }
-+
-+ return true;
-+}
-+
-+static void bkey_cached_evict(struct btree_key_cache *c,
-+ struct bkey_cached *ck)
-+{
-+ BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
-+ bch2_btree_key_cache_params));
-+ memset(&ck->key, ~0, sizeof(ck->key));
-+
-+ atomic_long_dec(&c->nr_keys);
-+}
-+
-+static void bkey_cached_free(struct btree_key_cache *bc,
-+ struct bkey_cached *ck)
-+{
-+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
-+
-+ BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
-+
-+ ck->btree_trans_barrier_seq =
-+ start_poll_synchronize_srcu(&c->btree_trans_barrier);
-+
-+ if (ck->c.lock.readers)
-+ list_move_tail(&ck->list, &bc->freed_pcpu);
-+ else
-+ list_move_tail(&ck->list, &bc->freed_nonpcpu);
-+ atomic_long_inc(&bc->nr_freed);
-+
-+ kfree(ck->k);
-+ ck->k = NULL;
-+ ck->u64s = 0;
-+
-+ six_unlock_write(&ck->c.lock);
-+ six_unlock_intent(&ck->c.lock);
-+}
-+
-+#ifdef __KERNEL__
-+static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
-+ struct bkey_cached *ck)
-+{
-+ struct bkey_cached *pos;
-+
-+ list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
-+ if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
-+ pos->btree_trans_barrier_seq)) {
-+ list_move(&ck->list, &pos->list);
-+ return;
-+ }
-+ }
-+
-+ list_move(&ck->list, &bc->freed_nonpcpu);
-+}
-+#endif
-+
-+static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
-+ struct bkey_cached *ck)
-+{
-+ BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
-+
-+ if (!ck->c.lock.readers) {
-+#ifdef __KERNEL__
-+ struct btree_key_cache_freelist *f;
-+ bool freed = false;
-+
-+ preempt_disable();
-+ f = this_cpu_ptr(bc->pcpu_freed);
-+
-+ if (f->nr < ARRAY_SIZE(f->objs)) {
-+ f->objs[f->nr++] = ck;
-+ freed = true;
-+ }
-+ preempt_enable();
-+
-+ if (!freed) {
-+ mutex_lock(&bc->lock);
-+ preempt_disable();
-+ f = this_cpu_ptr(bc->pcpu_freed);
-+
-+ while (f->nr > ARRAY_SIZE(f->objs) / 2) {
-+ struct bkey_cached *ck2 = f->objs[--f->nr];
-+
-+ __bkey_cached_move_to_freelist_ordered(bc, ck2);
-+ }
-+ preempt_enable();
-+
-+ __bkey_cached_move_to_freelist_ordered(bc, ck);
-+ mutex_unlock(&bc->lock);
-+ }
-+#else
-+ mutex_lock(&bc->lock);
-+ list_move_tail(&ck->list, &bc->freed_nonpcpu);
-+ mutex_unlock(&bc->lock);
-+#endif
-+ } else {
-+ mutex_lock(&bc->lock);
-+ list_move_tail(&ck->list, &bc->freed_pcpu);
-+ mutex_unlock(&bc->lock);
-+ }
-+}
-+
-+static void bkey_cached_free_fast(struct btree_key_cache *bc,
-+ struct bkey_cached *ck)
-+{
-+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
-+
-+ ck->btree_trans_barrier_seq =
-+ start_poll_synchronize_srcu(&c->btree_trans_barrier);
-+
-+ list_del_init(&ck->list);
-+ atomic_long_inc(&bc->nr_freed);
-+
-+ kfree(ck->k);
-+ ck->k = NULL;
-+ ck->u64s = 0;
-+
-+ bkey_cached_move_to_freelist(bc, ck);
-+
-+ six_unlock_write(&ck->c.lock);
-+ six_unlock_intent(&ck->c.lock);
-+}
-+
-+static struct bkey_cached *
-+bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
-+ bool *was_new)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_key_cache *bc = &c->btree_key_cache;
-+ struct bkey_cached *ck = NULL;
-+ bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id);
-+ int ret;
-+
-+ if (!pcpu_readers) {
-+#ifdef __KERNEL__
-+ struct btree_key_cache_freelist *f;
-+
-+ preempt_disable();
-+ f = this_cpu_ptr(bc->pcpu_freed);
-+ if (f->nr)
-+ ck = f->objs[--f->nr];
-+ preempt_enable();
-+
-+ if (!ck) {
-+ mutex_lock(&bc->lock);
-+ preempt_disable();
-+ f = this_cpu_ptr(bc->pcpu_freed);
-+
-+ while (!list_empty(&bc->freed_nonpcpu) &&
-+ f->nr < ARRAY_SIZE(f->objs) / 2) {
-+ ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
-+ list_del_init(&ck->list);
-+ f->objs[f->nr++] = ck;
-+ }
-+
-+ ck = f->nr ? f->objs[--f->nr] : NULL;
-+ preempt_enable();
-+ mutex_unlock(&bc->lock);
-+ }
-+#else
-+ mutex_lock(&bc->lock);
-+ if (!list_empty(&bc->freed_nonpcpu)) {
-+ ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
-+ list_del_init(&ck->list);
-+ }
-+ mutex_unlock(&bc->lock);
-+#endif
-+ } else {
-+ mutex_lock(&bc->lock);
-+ if (!list_empty(&bc->freed_pcpu)) {
-+ ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
-+ list_del_init(&ck->list);
-+ }
-+ mutex_unlock(&bc->lock);
-+ }
-+
-+ if (ck) {
-+ ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_);
-+ if (unlikely(ret)) {
-+ bkey_cached_move_to_freelist(bc, ck);
-+ return ERR_PTR(ret);
-+ }
-+
-+ path->l[0].b = (void *) ck;
-+ path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
-+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
-+
-+ ret = bch2_btree_node_lock_write(trans, path, &ck->c);
-+ if (unlikely(ret)) {
-+ btree_node_unlock(trans, path, 0);
-+ bkey_cached_move_to_freelist(bc, ck);
-+ return ERR_PTR(ret);
-+ }
-+
-+ return ck;
-+ }
-+
-+ ck = allocate_dropping_locks(trans, ret,
-+ kmem_cache_zalloc(bch2_key_cache, _gfp));
-+ if (ret) {
-+ kmem_cache_free(bch2_key_cache, ck);
-+ return ERR_PTR(ret);
-+ }
-+
-+ if (!ck)
-+ return NULL;
-+
-+ INIT_LIST_HEAD(&ck->list);
-+ bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
-+
-+ ck->c.cached = true;
-+ BUG_ON(!six_trylock_intent(&ck->c.lock));
-+ BUG_ON(!six_trylock_write(&ck->c.lock));
-+ *was_new = true;
-+ return ck;
-+}
-+
-+static struct bkey_cached *
-+bkey_cached_reuse(struct btree_key_cache *c)
-+{
-+ struct bucket_table *tbl;
-+ struct rhash_head *pos;
-+ struct bkey_cached *ck;
-+ unsigned i;
-+
-+ mutex_lock(&c->lock);
-+ rcu_read_lock();
-+ tbl = rht_dereference_rcu(c->table.tbl, &c->table);
-+ for (i = 0; i < tbl->size; i++)
-+ rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
-+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
-+ bkey_cached_lock_for_evict(ck)) {
-+ bkey_cached_evict(c, ck);
-+ goto out;
-+ }
-+ }
-+ ck = NULL;
-+out:
-+ rcu_read_unlock();
-+ mutex_unlock(&c->lock);
-+ return ck;
-+}
-+
-+static struct bkey_cached *
-+btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_key_cache *bc = &c->btree_key_cache;
-+ struct bkey_cached *ck;
-+ bool was_new = false;
-+
-+ ck = bkey_cached_alloc(trans, path, &was_new);
-+ if (IS_ERR(ck))
-+ return ck;
-+
-+ if (unlikely(!ck)) {
-+ ck = bkey_cached_reuse(bc);
-+ if (unlikely(!ck)) {
-+ bch_err(c, "error allocating memory for key cache item, btree %s",
-+ bch2_btree_id_str(path->btree_id));
-+ return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create);
-+ }
-+
-+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
-+ }
-+
-+ ck->c.level = 0;
-+ ck->c.btree_id = path->btree_id;
-+ ck->key.btree_id = path->btree_id;
-+ ck->key.pos = path->pos;
-+ ck->valid = false;
-+ ck->flags = 1U << BKEY_CACHED_ACCESSED;
-+
-+ if (unlikely(rhashtable_lookup_insert_fast(&bc->table,
-+ &ck->hash,
-+ bch2_btree_key_cache_params))) {
-+ /* We raced with another fill: */
-+
-+ if (likely(was_new)) {
-+ six_unlock_write(&ck->c.lock);
-+ six_unlock_intent(&ck->c.lock);
-+ kfree(ck);
-+ } else {
-+ bkey_cached_free_fast(bc, ck);
-+ }
-+
-+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
-+ return NULL;
-+ }
-+
-+ atomic_long_inc(&bc->nr_keys);
-+
-+ six_unlock_write(&ck->c.lock);
-+
-+ return ck;
-+}
-+
-+static int btree_key_cache_fill(struct btree_trans *trans,
-+ struct btree_path *ck_path,
-+ struct bkey_cached *ck)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ unsigned new_u64s = 0;
-+ struct bkey_i *new_k = NULL;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, ck->key.btree_id, ck->key.pos,
-+ BTREE_ITER_KEY_CACHE_FILL|
-+ BTREE_ITER_CACHED_NOFILL);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (!bch2_btree_node_relock(trans, ck_path, 0)) {
-+ trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
-+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
-+ goto err;
-+ }
-+
-+ /*
-+ * bch2_varint_decode can read past the end of the buffer by at
-+ * most 7 bytes (it won't be used):
-+ */
-+ new_u64s = k.k->u64s + 1;
-+
-+ /*
-+ * Allocate some extra space so that the transaction commit path is less
-+ * likely to have to reallocate, since that requires a transaction
-+ * restart:
-+ */
-+ new_u64s = min(256U, (new_u64s * 3) / 2);
-+
-+ if (new_u64s > ck->u64s) {
-+ new_u64s = roundup_pow_of_two(new_u64s);
-+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN);
-+ if (!new_k) {
-+ bch2_trans_unlock(trans);
-+
-+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
-+ if (!new_k) {
-+ bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
-+ bch2_btree_id_str(ck->key.btree_id), new_u64s);
-+ ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
-+ goto err;
-+ }
-+
-+ if (!bch2_btree_node_relock(trans, ck_path, 0)) {
-+ kfree(new_k);
-+ trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
-+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
-+ goto err;
-+ }
-+
-+ ret = bch2_trans_relock(trans);
-+ if (ret) {
-+ kfree(new_k);
-+ goto err;
-+ }
-+ }
-+ }
-+
-+ ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c);
-+ if (ret) {
-+ kfree(new_k);
-+ goto err;
-+ }
-+
-+ if (new_k) {
-+ kfree(ck->k);
-+ ck->u64s = new_u64s;
-+ ck->k = new_k;
-+ }
-+
-+ bkey_reassemble(ck->k, k);
-+ ck->valid = true;
-+ bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
-+
-+ /* We're not likely to need this iterator again: */
-+ set_btree_iter_dontneed(&iter);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static noinline int
-+bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_cached *ck;
-+ int ret = 0;
-+
-+ BUG_ON(path->level);
-+
-+ path->l[1].b = NULL;
-+
-+ if (bch2_btree_node_relock_notrace(trans, path, 0)) {
-+ ck = (void *) path->l[0].b;
-+ goto fill;
-+ }
-+retry:
-+ ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
-+ if (!ck) {
-+ ck = btree_key_cache_create(trans, path);
-+ ret = PTR_ERR_OR_ZERO(ck);
-+ if (ret)
-+ goto err;
-+ if (!ck)
-+ goto retry;
-+
-+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
-+ path->locks_want = 1;
-+ } else {
-+ enum six_lock_type lock_want = __btree_lock_want(path, 0);
-+
-+ ret = btree_node_lock(trans, path, (void *) ck, 0,
-+ lock_want, _THIS_IP_);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto err;
-+
-+ BUG_ON(ret);
-+
-+ if (ck->key.btree_id != path->btree_id ||
-+ !bpos_eq(ck->key.pos, path->pos)) {
-+ six_unlock_type(&ck->c.lock, lock_want);
-+ goto retry;
-+ }
-+
-+ mark_btree_node_locked(trans, path, 0,
-+ (enum btree_node_locked_type) lock_want);
-+ }
-+
-+ path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
-+ path->l[0].b = (void *) ck;
-+fill:
-+ path->uptodate = BTREE_ITER_UPTODATE;
-+
-+ if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) {
-+ /*
-+ * Using the underscore version because we haven't set
-+ * path->uptodate yet:
-+ */
-+ if (!path->locks_want &&
-+ !__bch2_btree_path_upgrade(trans, path, 1, NULL)) {
-+ trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_);
-+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
-+ goto err;
-+ }
-+
-+ ret = btree_key_cache_fill(trans, path, ck);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_btree_path_relock(trans, path, _THIS_IP_);
-+ if (ret)
-+ goto err;
-+
-+ path->uptodate = BTREE_ITER_UPTODATE;
-+ }
-+
-+ if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
-+ set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
-+
-+ BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
-+ BUG_ON(path->uptodate);
-+
-+ return ret;
-+err:
-+ path->uptodate = BTREE_ITER_NEED_TRAVERSE;
-+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
-+ btree_node_unlock(trans, path, 0);
-+ path->l[0].b = ERR_PTR(ret);
-+ }
-+ return ret;
-+}
-+
-+int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_cached *ck;
-+ int ret = 0;
-+
-+ EBUG_ON(path->level);
-+
-+ path->l[1].b = NULL;
-+
-+ if (bch2_btree_node_relock_notrace(trans, path, 0)) {
-+ ck = (void *) path->l[0].b;
-+ goto fill;
-+ }
-+retry:
-+ ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
-+ if (!ck) {
-+ return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
-+ } else {
-+ enum six_lock_type lock_want = __btree_lock_want(path, 0);
-+
-+ ret = btree_node_lock(trans, path, (void *) ck, 0,
-+ lock_want, _THIS_IP_);
-+ EBUG_ON(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart));
-+
-+ if (ret)
-+ return ret;
-+
-+ if (ck->key.btree_id != path->btree_id ||
-+ !bpos_eq(ck->key.pos, path->pos)) {
-+ six_unlock_type(&ck->c.lock, lock_want);
-+ goto retry;
-+ }
-+
-+ mark_btree_node_locked(trans, path, 0,
-+ (enum btree_node_locked_type) lock_want);
-+ }
-+
-+ path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
-+ path->l[0].b = (void *) ck;
-+fill:
-+ if (!ck->valid)
-+ return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
-+
-+ if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
-+ set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
-+
-+ path->uptodate = BTREE_ITER_UPTODATE;
-+ EBUG_ON(!ck->valid);
-+ EBUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
-+
-+ return ret;
-+}
-+
-+static int btree_key_cache_flush_pos(struct btree_trans *trans,
-+ struct bkey_cached_key key,
-+ u64 journal_seq,
-+ unsigned commit_flags,
-+ bool evict)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct journal *j = &c->journal;
-+ struct btree_iter c_iter, b_iter;
-+ struct bkey_cached *ck = NULL;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos,
-+ BTREE_ITER_SLOTS|
-+ BTREE_ITER_INTENT|
-+ BTREE_ITER_ALL_SNAPSHOTS);
-+ bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos,
-+ BTREE_ITER_CACHED|
-+ BTREE_ITER_INTENT);
-+ b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE;
-+
-+ ret = bch2_btree_iter_traverse(&c_iter);
-+ if (ret)
-+ goto out;
-+
-+ ck = (void *) c_iter.path->l[0].b;
-+ if (!ck)
-+ goto out;
-+
-+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+ if (evict)
-+ goto evict;
-+ goto out;
-+ }
-+
-+ BUG_ON(!ck->valid);
-+
-+ if (journal_seq && ck->journal.seq != journal_seq)
-+ goto out;
-+
-+ /*
-+ * Since journal reclaim depends on us making progress here, and the
-+ * allocator/copygc depend on journal reclaim making progress, we need
-+ * to be using alloc reserves:
-+ */
-+ ret = bch2_btree_iter_traverse(&b_iter) ?:
-+ bch2_trans_update(trans, &b_iter, ck->k,
-+ BTREE_UPDATE_KEY_CACHE_RECLAIM|
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
-+ BTREE_TRIGGER_NORUN) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_NOCHECK_RW|
-+ BTREE_INSERT_NOFAIL|
-+ (ck->journal.seq == journal_last_seq(j)
-+ ? BCH_WATERMARK_reclaim
-+ : 0)|
-+ commit_flags);
-+
-+ bch2_fs_fatal_err_on(ret &&
-+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
-+ !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
-+ !bch2_journal_error(j), c,
-+ "error flushing key cache: %s", bch2_err_str(ret));
-+ if (ret)
-+ goto out;
-+
-+ bch2_journal_pin_drop(j, &ck->journal);
-+ bch2_journal_preres_put(j, &ck->res);
-+
-+ BUG_ON(!btree_node_locked(c_iter.path, 0));
-+
-+ if (!evict) {
-+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+ atomic_long_dec(&c->btree_key_cache.nr_dirty);
-+ }
-+ } else {
-+ struct btree_path *path2;
-+evict:
-+ trans_for_each_path(trans, path2)
-+ if (path2 != c_iter.path)
-+ __bch2_btree_path_unlock(trans, path2);
-+
-+ bch2_btree_node_lock_write_nofail(trans, c_iter.path, &ck->c);
-+
-+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+ atomic_long_dec(&c->btree_key_cache.nr_dirty);
-+ }
-+
-+ mark_btree_node_locked_noreset(c_iter.path, 0, BTREE_NODE_UNLOCKED);
-+ bkey_cached_evict(&c->btree_key_cache, ck);
-+ bkey_cached_free_fast(&c->btree_key_cache, ck);
-+ }
-+out:
-+ bch2_trans_iter_exit(trans, &b_iter);
-+ bch2_trans_iter_exit(trans, &c_iter);
-+ return ret;
-+}
-+
-+int bch2_btree_key_cache_journal_flush(struct journal *j,
-+ struct journal_entry_pin *pin, u64 seq)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct bkey_cached *ck =
-+ container_of(pin, struct bkey_cached, journal);
-+ struct bkey_cached_key key;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
-+ int ret = 0;
-+
-+ btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read);
-+ key = ck->key;
-+
-+ if (ck->journal.seq != seq ||
-+ !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+ six_unlock_read(&ck->c.lock);
-+ goto unlock;
-+ }
-+
-+ if (ck->seq != seq) {
-+ bch2_journal_pin_update(&c->journal, ck->seq, &ck->journal,
-+ bch2_btree_key_cache_journal_flush);
-+ six_unlock_read(&ck->c.lock);
-+ goto unlock;
-+ }
-+ six_unlock_read(&ck->c.lock);
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ btree_key_cache_flush_pos(trans, key, seq,
-+ BTREE_INSERT_JOURNAL_RECLAIM, false));
-+unlock:
-+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
-+
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+/*
-+ * Flush and evict a key from the key cache:
-+ */
-+int bch2_btree_key_cache_flush(struct btree_trans *trans,
-+ enum btree_id id, struct bpos pos)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_cached_key key = { id, pos };
-+
-+ /* Fastpath - assume it won't be found: */
-+ if (!bch2_btree_key_cache_find(c, id, pos))
-+ return 0;
-+
-+ return btree_key_cache_flush_pos(trans, key, 0, 0, true);
-+}
-+
-+bool bch2_btree_insert_key_cached(struct btree_trans *trans,
-+ unsigned flags,
-+ struct btree_insert_entry *insert_entry)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_cached *ck = (void *) insert_entry->path->l[0].b;
-+ struct bkey_i *insert = insert_entry->k;
-+ bool kick_reclaim = false;
-+
-+ BUG_ON(insert->k.u64s > ck->u64s);
-+
-+ if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+ int difference;
-+
-+ BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);
-+
-+ difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
-+ if (difference > 0) {
-+ trans->journal_preres.u64s -= difference;
-+ ck->res.u64s += difference;
-+ }
-+ }
-+
-+ bkey_copy(ck->k, insert);
-+ ck->valid = true;
-+
-+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+ EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
-+ set_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+ atomic_long_inc(&c->btree_key_cache.nr_dirty);
-+
-+ if (bch2_nr_btree_keys_need_flush(c))
-+ kick_reclaim = true;
-+ }
-+
-+ /*
-+ * To minimize lock contention, we only add the journal pin here and
-+ * defer pin updates to the flush callback via ->seq. Be careful not to
-+ * update ->seq on nojournal commits because we don't want to update the
-+ * pin to a seq that doesn't include journal updates on disk. Otherwise
-+ * we risk losing the update after a crash.
-+ *
-+ * The only exception is if the pin is not active in the first place. We
-+ * have to add the pin because journal reclaim drives key cache
-+ * flushing. The flush callback will not proceed unless ->seq matches
-+ * the latest pin, so make sure it starts with a consistent value.
-+ */
-+ if (!(insert_entry->flags & BTREE_UPDATE_NOJOURNAL) ||
-+ !journal_pin_active(&ck->journal)) {
-+ ck->seq = trans->journal_res.seq;
-+ }
-+ bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
-+ &ck->journal, bch2_btree_key_cache_journal_flush);
-+
-+ if (kick_reclaim)
-+ journal_reclaim_kick(&c->journal);
-+ return true;
-+}
-+
-+void bch2_btree_key_cache_drop(struct btree_trans *trans,
-+ struct btree_path *path)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_cached *ck = (void *) path->l[0].b;
-+
-+ BUG_ON(!ck->valid);
-+
-+ /*
-+ * We just did an update to the btree, bypassing the key cache: the key
-+ * cache key is now stale and must be dropped, even if dirty:
-+ */
-+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
-+ atomic_long_dec(&c->btree_key_cache.nr_dirty);
-+ bch2_journal_pin_drop(&c->journal, &ck->journal);
-+ }
-+
-+ ck->valid = false;
-+}
-+
-+static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
-+ struct shrink_control *sc)
-+{
-+ struct bch_fs *c = container_of(shrink, struct bch_fs,
-+ btree_key_cache.shrink);
-+ struct btree_key_cache *bc = &c->btree_key_cache;
-+ struct bucket_table *tbl;
-+ struct bkey_cached *ck, *t;
-+ size_t scanned = 0, freed = 0, nr = sc->nr_to_scan;
-+ unsigned start, flags;
-+ int srcu_idx;
-+
-+ mutex_lock(&bc->lock);
-+ srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
-+ flags = memalloc_nofs_save();
-+
-+ /*
-+ * Newest freed entries are at the end of the list - once we hit one
-+ * that's too new to be freed, we can bail out:
-+ */
-+ list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
-+ if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
-+ ck->btree_trans_barrier_seq))
-+ break;
-+
-+ list_del(&ck->list);
-+ six_lock_exit(&ck->c.lock);
-+ kmem_cache_free(bch2_key_cache, ck);
-+ atomic_long_dec(&bc->nr_freed);
-+ scanned++;
-+ freed++;
-+ }
-+
-+ if (scanned >= nr)
-+ goto out;
-+
-+ list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
-+ if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
-+ ck->btree_trans_barrier_seq))
-+ break;
-+
-+ list_del(&ck->list);
-+ six_lock_exit(&ck->c.lock);
-+ kmem_cache_free(bch2_key_cache, ck);
-+ atomic_long_dec(&bc->nr_freed);
-+ scanned++;
-+ freed++;
-+ }
-+
-+ if (scanned >= nr)
-+ goto out;
-+
-+ rcu_read_lock();
-+ tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
-+ if (bc->shrink_iter >= tbl->size)
-+ bc->shrink_iter = 0;
-+ start = bc->shrink_iter;
-+
-+ do {
-+ struct rhash_head *pos, *next;
-+
-+ pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter));
-+
-+ while (!rht_is_a_nulls(pos)) {
-+ next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
-+ ck = container_of(pos, struct bkey_cached, hash);
-+
-+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
-+ goto next;
-+
-+ if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
-+ clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
-+ else if (bkey_cached_lock_for_evict(ck)) {
-+ bkey_cached_evict(bc, ck);
-+ bkey_cached_free(bc, ck);
-+ }
-+
-+ scanned++;
-+ if (scanned >= nr)
-+ break;
-+next:
-+ pos = next;
-+ }
-+
-+ bc->shrink_iter++;
-+ if (bc->shrink_iter >= tbl->size)
-+ bc->shrink_iter = 0;
-+ } while (scanned < nr && bc->shrink_iter != start);
-+
-+ rcu_read_unlock();
-+out:
-+ memalloc_nofs_restore(flags);
-+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
-+ mutex_unlock(&bc->lock);
-+
-+ return freed;
-+}
-+
-+static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
-+ struct shrink_control *sc)
-+{
-+ struct bch_fs *c = container_of(shrink, struct bch_fs,
-+ btree_key_cache.shrink);
-+ struct btree_key_cache *bc = &c->btree_key_cache;
-+ long nr = atomic_long_read(&bc->nr_keys) -
-+ atomic_long_read(&bc->nr_dirty);
-+
-+ return max(0L, nr);
-+}
-+
-+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
-+{
-+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
-+ struct bucket_table *tbl;
-+ struct bkey_cached *ck, *n;
-+ struct rhash_head *pos;
-+ LIST_HEAD(items);
-+ unsigned i;
-+#ifdef __KERNEL__
-+ int cpu;
-+#endif
-+
-+ unregister_shrinker(&bc->shrink);
-+
-+ mutex_lock(&bc->lock);
-+
-+ /*
-+ * The loop is needed to guard against racing with rehash:
-+ */
-+ while (atomic_long_read(&bc->nr_keys)) {
-+ rcu_read_lock();
-+ tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
-+ if (tbl)
-+ for (i = 0; i < tbl->size; i++)
-+ rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
-+ bkey_cached_evict(bc, ck);
-+ list_add(&ck->list, &items);
-+ }
-+ rcu_read_unlock();
-+ }
-+
-+#ifdef __KERNEL__
-+ for_each_possible_cpu(cpu) {
-+ struct btree_key_cache_freelist *f =
-+ per_cpu_ptr(bc->pcpu_freed, cpu);
-+
-+ for (i = 0; i < f->nr; i++) {
-+ ck = f->objs[i];
-+ list_add(&ck->list, &items);
-+ }
-+ }
-+#endif
-+
-+ list_splice(&bc->freed_pcpu, &items);
-+ list_splice(&bc->freed_nonpcpu, &items);
-+
-+ mutex_unlock(&bc->lock);
-+
-+ list_for_each_entry_safe(ck, n, &items, list) {
-+ cond_resched();
-+
-+ bch2_journal_pin_drop(&c->journal, &ck->journal);
-+ bch2_journal_preres_put(&c->journal, &ck->res);
-+
-+ list_del(&ck->list);
-+ kfree(ck->k);
-+ six_lock_exit(&ck->c.lock);
-+ kmem_cache_free(bch2_key_cache, ck);
-+ }
-+
-+ if (atomic_long_read(&bc->nr_dirty) &&
-+ !bch2_journal_error(&c->journal) &&
-+ test_bit(BCH_FS_WAS_RW, &c->flags))
-+ panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n",
-+ atomic_long_read(&bc->nr_dirty));
-+
-+ if (atomic_long_read(&bc->nr_keys))
-+ panic("btree key cache shutdown error: nr_keys nonzero (%li)\n",
-+ atomic_long_read(&bc->nr_keys));
-+
-+ if (bc->table_init_done)
-+ rhashtable_destroy(&bc->table);
-+
-+ free_percpu(bc->pcpu_freed);
-+}
-+
-+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
-+{
-+ mutex_init(&c->lock);
-+ INIT_LIST_HEAD(&c->freed_pcpu);
-+ INIT_LIST_HEAD(&c->freed_nonpcpu);
-+}
-+
-+int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
-+{
-+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
-+
-+#ifdef __KERNEL__
-+ bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist);
-+ if (!bc->pcpu_freed)
-+ return -BCH_ERR_ENOMEM_fs_btree_cache_init;
-+#endif
-+
-+ if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params))
-+ return -BCH_ERR_ENOMEM_fs_btree_cache_init;
-+
-+ bc->table_init_done = true;
-+
-+ bc->shrink.seeks = 0;
-+ bc->shrink.count_objects = bch2_btree_key_cache_count;
-+ bc->shrink.scan_objects = bch2_btree_key_cache_scan;
-+ if (register_shrinker(&bc->shrink, "%s-btree_key_cache", c->name))
-+ return -BCH_ERR_ENOMEM_fs_btree_cache_init;
-+ return 0;
-+}
-+
-+void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
-+{
-+ prt_printf(out, "nr_freed:\t%lu", atomic_long_read(&c->nr_freed));
-+ prt_newline(out);
-+ prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys));
-+ prt_newline(out);
-+ prt_printf(out, "nr_dirty:\t%lu", atomic_long_read(&c->nr_dirty));
-+ prt_newline(out);
-+}
-+
-+void bch2_btree_key_cache_exit(void)
-+{
-+ kmem_cache_destroy(bch2_key_cache);
-+}
-+
-+int __init bch2_btree_key_cache_init(void)
-+{
-+ bch2_key_cache = KMEM_CACHE(bkey_cached, SLAB_RECLAIM_ACCOUNT);
-+ if (!bch2_key_cache)
-+ return -ENOMEM;
-+
-+ return 0;
-+}
-diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h
-new file mode 100644
-index 000000000000..be3acde2caa0
---- /dev/null
-+++ b/fs/bcachefs/btree_key_cache.h
-@@ -0,0 +1,48 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
-+#define _BCACHEFS_BTREE_KEY_CACHE_H
-+
-+static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
-+{
-+ size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
-+ size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
-+ size_t max_dirty = 1024 + nr_keys / 2;
-+
-+ return max_t(ssize_t, 0, nr_dirty - max_dirty);
-+}
-+
-+static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
-+{
-+ size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
-+ size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
-+ size_t max_dirty = 4096 + (nr_keys * 3) / 4;
-+
-+ return nr_dirty > max_dirty;
-+}
-+
-+int bch2_btree_key_cache_journal_flush(struct journal *,
-+ struct journal_entry_pin *, u64);
-+
-+struct bkey_cached *
-+bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
-+
-+int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *,
-+ unsigned);
-+
-+bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
-+ struct btree_insert_entry *);
-+int bch2_btree_key_cache_flush(struct btree_trans *,
-+ enum btree_id, struct bpos);
-+void bch2_btree_key_cache_drop(struct btree_trans *,
-+ struct btree_path *);
-+
-+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
-+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);
-+int bch2_fs_btree_key_cache_init(struct btree_key_cache *);
-+
-+void bch2_btree_key_cache_to_text(struct printbuf *, struct btree_key_cache *);
-+
-+void bch2_btree_key_cache_exit(void);
-+int __init bch2_btree_key_cache_init(void);
-+
-+#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */
-diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
-new file mode 100644
-index 000000000000..3d48834d091f
---- /dev/null
-+++ b/fs/bcachefs/btree_locking.c
-@@ -0,0 +1,817 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_locking.h"
-+#include "btree_types.h"
-+
-+static struct lock_class_key bch2_btree_node_lock_key;
-+
-+void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
-+ enum six_lock_init_flags flags)
-+{
-+ __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
-+ lockdep_set_novalidate_class(&b->lock);
-+}
-+
-+#ifdef CONFIG_LOCKDEP
-+void bch2_assert_btree_nodes_not_locked(void)
-+{
-+#if 0
-+ //Re-enable when lock_class_is_held() is merged:
-+ BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
-+#endif
-+}
-+#endif
-+
-+/* Btree node locking: */
-+
-+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
-+ struct btree_path *skip,
-+ struct btree_bkey_cached_common *b,
-+ unsigned level)
-+{
-+ struct btree_path *path;
-+ struct six_lock_count ret;
-+
-+ memset(&ret, 0, sizeof(ret));
-+
-+ if (IS_ERR_OR_NULL(b))
-+ return ret;
-+
-+ trans_for_each_path(trans, path)
-+ if (path != skip && &path->l[level].b->c == b) {
-+ int t = btree_node_locked_type(path, level);
-+
-+ if (t != BTREE_NODE_UNLOCKED)
-+ ret.n[t]++;
-+ }
-+
-+ return ret;
-+}
-+
-+/* unlock */
-+
-+void bch2_btree_node_unlock_write(struct btree_trans *trans,
-+ struct btree_path *path, struct btree *b)
-+{
-+ bch2_btree_node_unlock_write_inlined(trans, path, b);
-+}
-+
-+/* lock */
-+
-+/*
-+ * @trans wants to lock @b with type @type
-+ */
-+struct trans_waiting_for_lock {
-+ struct btree_trans *trans;
-+ struct btree_bkey_cached_common *node_want;
-+ enum six_lock_type lock_want;
-+
-+ /* for iterating over held locks :*/
-+ u8 path_idx;
-+ u8 level;
-+ u64 lock_start_time;
-+};
-+
-+struct lock_graph {
-+ struct trans_waiting_for_lock g[8];
-+ unsigned nr;
-+};
-+
-+static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
-+{
-+ struct trans_waiting_for_lock *i;
-+
-+ prt_printf(out, "Found lock cycle (%u entries):", g->nr);
-+ prt_newline(out);
-+
-+ for (i = g->g; i < g->g + g->nr; i++)
-+ bch2_btree_trans_to_text(out, i->trans);
-+}
-+
-+static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
-+{
-+ struct trans_waiting_for_lock *i;
-+
-+ for (i = g->g; i != g->g + g->nr; i++) {
-+ if (i != g->g)
-+ prt_str(out, "<- ");
-+ prt_printf(out, "%u ", i->trans->locking_wait.task->pid);
-+ }
-+ prt_newline(out);
-+}
-+
-+static void lock_graph_up(struct lock_graph *g)
-+{
-+ closure_put(&g->g[--g->nr].trans->ref);
-+}
-+
-+static noinline void lock_graph_pop_all(struct lock_graph *g)
-+{
-+ while (g->nr)
-+ lock_graph_up(g);
-+}
-+
-+static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
-+{
-+ g->g[g->nr++] = (struct trans_waiting_for_lock) {
-+ .trans = trans,
-+ .node_want = trans->locking,
-+ .lock_want = trans->locking_wait.lock_want,
-+ };
-+}
-+
-+static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
-+{
-+ closure_get(&trans->ref);
-+ __lock_graph_down(g, trans);
-+}
-+
-+static bool lock_graph_remove_non_waiters(struct lock_graph *g)
-+{
-+ struct trans_waiting_for_lock *i;
-+
-+ for (i = g->g + 1; i < g->g + g->nr; i++)
-+ if (i->trans->locking != i->node_want ||
-+ i->trans->locking_wait.start_time != i[-1].lock_start_time) {
-+ while (g->g + g->nr > i)
-+ lock_graph_up(g);
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
-+{
-+ if (i == g->g) {
-+ trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_);
-+ return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
-+ } else {
-+ i->trans->lock_must_abort = true;
-+ wake_up_process(i->trans->locking_wait.task);
-+ return 0;
-+ }
-+}
-+
-+static int btree_trans_abort_preference(struct btree_trans *trans)
-+{
-+ if (trans->lock_may_not_fail)
-+ return 0;
-+ if (trans->locking_wait.lock_want == SIX_LOCK_write)
-+ return 1;
-+ if (!trans->in_traverse_all)
-+ return 2;
-+ return 3;
-+}
-+
-+static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
-+{
-+ struct trans_waiting_for_lock *i, *abort = NULL;
-+ unsigned best = 0, pref;
-+ int ret;
-+
-+ if (lock_graph_remove_non_waiters(g))
-+ return 0;
-+
-+ /* Only checking, for debugfs: */
-+ if (cycle) {
-+ print_cycle(cycle, g);
-+ ret = -1;
-+ goto out;
-+ }
-+
-+ for (i = g->g; i < g->g + g->nr; i++) {
-+ pref = btree_trans_abort_preference(i->trans);
-+ if (pref > best) {
-+ abort = i;
-+ best = pref;
-+ }
-+ }
-+
-+ if (unlikely(!best)) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks"));
-+
-+ for (i = g->g; i < g->g + g->nr; i++) {
-+ struct btree_trans *trans = i->trans;
-+
-+ bch2_btree_trans_to_text(&buf, trans);
-+
-+ prt_printf(&buf, "backtrace:");
-+ prt_newline(&buf);
-+ printbuf_indent_add(&buf, 2);
-+ bch2_prt_task_backtrace(&buf, trans->locking_wait.task);
-+ printbuf_indent_sub(&buf, 2);
-+ prt_newline(&buf);
-+ }
-+
-+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
-+ printbuf_exit(&buf);
-+ BUG();
-+ }
-+
-+ ret = abort_lock(g, abort);
-+out:
-+ if (ret)
-+ while (g->nr)
-+ lock_graph_up(g);
-+ return ret;
-+}
-+
-+static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
-+ struct printbuf *cycle)
-+{
-+ struct btree_trans *orig_trans = g->g->trans;
-+ struct trans_waiting_for_lock *i;
-+
-+ for (i = g->g; i < g->g + g->nr; i++)
-+ if (i->trans == trans) {
-+ closure_put(&trans->ref);
-+ return break_cycle(g, cycle);
-+ }
-+
-+ if (g->nr == ARRAY_SIZE(g->g)) {
-+ closure_put(&trans->ref);
-+
-+ if (orig_trans->lock_may_not_fail)
-+ return 0;
-+
-+ while (g->nr)
-+ lock_graph_up(g);
-+
-+ if (cycle)
-+ return 0;
-+
-+ trace_and_count(trans->c, trans_restart_would_deadlock_recursion_limit, trans, _RET_IP_);
-+ return btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit);
-+ }
-+
-+ __lock_graph_down(g, trans);
-+ return 0;
-+}
-+
-+static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2)
-+{
-+ return t1 + t2 > 1;
-+}
-+
-+int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
-+{
-+ struct lock_graph g;
-+ struct trans_waiting_for_lock *top;
-+ struct btree_bkey_cached_common *b;
-+ struct btree_path *path;
-+ unsigned path_idx;
-+ int ret;
-+
-+ if (trans->lock_must_abort) {
-+ if (cycle)
-+ return -1;
-+
-+ trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_);
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
-+ }
-+
-+ g.nr = 0;
-+ lock_graph_down(&g, trans);
-+next:
-+ if (!g.nr)
-+ return 0;
-+
-+ top = &g.g[g.nr - 1];
-+
-+ trans_for_each_path_safe_from(top->trans, path, path_idx, top->path_idx) {
-+ if (!path->nodes_locked)
-+ continue;
-+
-+ if (path_idx != top->path_idx) {
-+ top->path_idx = path_idx;
-+ top->level = 0;
-+ top->lock_start_time = 0;
-+ }
-+
-+ for (;
-+ top->level < BTREE_MAX_DEPTH;
-+ top->level++, top->lock_start_time = 0) {
-+ int lock_held = btree_node_locked_type(path, top->level);
-+
-+ if (lock_held == BTREE_NODE_UNLOCKED)
-+ continue;
-+
-+ b = &READ_ONCE(path->l[top->level].b)->c;
-+
-+ if (IS_ERR_OR_NULL(b)) {
-+ /*
-+ * If we get here, it means we raced with the
-+ * other thread updating its btree_path
-+ * structures - which means it can't be blocked
-+ * waiting on a lock:
-+ */
-+ if (!lock_graph_remove_non_waiters(&g)) {
-+ /*
-+ * If lock_graph_remove_non_waiters()
-+ * didn't do anything, it must be
-+ * because we're being called by debugfs
-+ * checking for lock cycles, which
-+ * invokes us on btree_transactions that
-+ * aren't actually waiting on anything.
-+ * Just bail out:
-+ */
-+ lock_graph_pop_all(&g);
-+ }
-+
-+ goto next;
-+ }
-+
-+ if (list_empty_careful(&b->lock.wait_list))
-+ continue;
-+
-+ raw_spin_lock(&b->lock.wait_lock);
-+ list_for_each_entry(trans, &b->lock.wait_list, locking_wait.list) {
-+ BUG_ON(b != trans->locking);
-+
-+ if (top->lock_start_time &&
-+ time_after_eq64(top->lock_start_time, trans->locking_wait.start_time))
-+ continue;
-+
-+ top->lock_start_time = trans->locking_wait.start_time;
-+
-+ /* Don't check for self deadlock: */
-+ if (trans == top->trans ||
-+ !lock_type_conflicts(lock_held, trans->locking_wait.lock_want))
-+ continue;
-+
-+ closure_get(&trans->ref);
-+ raw_spin_unlock(&b->lock.wait_lock);
-+
-+ ret = lock_graph_descend(&g, trans, cycle);
-+ if (ret)
-+ return ret;
-+ goto next;
-+
-+ }
-+ raw_spin_unlock(&b->lock.wait_lock);
-+ }
-+ }
-+
-+ if (g.nr > 1 && cycle)
-+ print_chain(cycle, &g);
-+ lock_graph_up(&g);
-+ goto next;
-+}
-+
-+int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
-+{
-+ struct btree_trans *trans = p;
-+
-+ return bch2_check_for_deadlock(trans, NULL);
-+}
-+
-+int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *path,
-+ struct btree_bkey_cached_common *b,
-+ bool lock_may_not_fail)
-+{
-+ int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read];
-+ int ret;
-+
-+ /*
-+ * Must drop our read locks before calling six_lock_write() -
-+ * six_unlock() won't do wakeups until the reader count
-+ * goes to 0, and it's safe because we have the node intent
-+ * locked:
-+ */
-+ six_lock_readers_add(&b->lock, -readers);
-+ ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write,
-+ lock_may_not_fail, _RET_IP_);
-+ six_lock_readers_add(&b->lock, readers);
-+
-+ if (ret)
-+ mark_btree_node_locked_noreset(path, b->level, BTREE_NODE_INTENT_LOCKED);
-+
-+ return ret;
-+}
-+
-+void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree_bkey_cached_common *b)
-+{
-+ struct btree_path *linked;
-+ unsigned i;
-+ int ret;
-+
-+ /*
-+ * XXX BIG FAT NOTICE
-+ *
-+ * Drop all read locks before taking a write lock:
-+ *
-+ * This is a hack, because bch2_btree_node_lock_write_nofail() is a
-+ * hack - but by dropping read locks first, this should never fail, and
-+ * we only use this in code paths where whatever read locks we've
-+ * already taken are no longer needed:
-+ */
-+
-+ trans_for_each_path(trans, linked) {
-+ if (!linked->nodes_locked)
-+ continue;
-+
-+ for (i = 0; i < BTREE_MAX_DEPTH; i++)
-+ if (btree_node_read_locked(linked, i)) {
-+ btree_node_unlock(trans, linked, i);
-+ btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
-+ }
-+ }
-+
-+ ret = __btree_node_lock_write(trans, path, b, true);
-+ BUG_ON(ret);
-+}
-+
-+/* relock */
-+
-+static inline bool btree_path_get_locks(struct btree_trans *trans,
-+ struct btree_path *path,
-+ bool upgrade,
-+ struct get_locks_fail *f)
-+{
-+ unsigned l = path->level;
-+ int fail_idx = -1;
-+
-+ do {
-+ if (!btree_path_node(path, l))
-+ break;
-+
-+ if (!(upgrade
-+ ? bch2_btree_node_upgrade(trans, path, l)
-+ : bch2_btree_node_relock(trans, path, l))) {
-+ fail_idx = l;
-+
-+ if (f) {
-+ f->l = l;
-+ f->b = path->l[l].b;
-+ }
-+ }
-+
-+ l++;
-+ } while (l < path->locks_want);
-+
-+ /*
-+ * When we fail to get a lock, we have to ensure that any child nodes
-+ * can't be relocked so bch2_btree_path_traverse has to walk back up to
-+ * the node that we failed to relock:
-+ */
-+ if (fail_idx >= 0) {
-+ __bch2_btree_path_unlock(trans, path);
-+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-+
-+ do {
-+ path->l[fail_idx].b = upgrade
-+ ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
-+ : ERR_PTR(-BCH_ERR_no_btree_node_relock);
-+ --fail_idx;
-+ } while (fail_idx >= 0);
-+ }
-+
-+ if (path->uptodate == BTREE_ITER_NEED_RELOCK)
-+ path->uptodate = BTREE_ITER_UPTODATE;
-+
-+ bch2_trans_verify_locks(trans);
-+
-+ return path->uptodate < BTREE_ITER_NEED_RELOCK;
-+}
-+
-+bool __bch2_btree_node_relock(struct btree_trans *trans,
-+ struct btree_path *path, unsigned level,
-+ bool trace)
-+{
-+ struct btree *b = btree_path_node(path, level);
-+ int want = __btree_lock_want(path, level);
-+
-+ if (race_fault())
-+ goto fail;
-+
-+ if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
-+ (btree_node_lock_seq_matches(path, b, level) &&
-+ btree_node_lock_increment(trans, &b->c, level, want))) {
-+ mark_btree_node_locked(trans, path, level, want);
-+ return true;
-+ }
-+fail:
-+ if (trace && !trans->notrace_relock_fail)
-+ trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level);
-+ return false;
-+}
-+
-+/* upgrade */
-+
-+bool bch2_btree_node_upgrade(struct btree_trans *trans,
-+ struct btree_path *path, unsigned level)
-+{
-+ struct btree *b = path->l[level].b;
-+ struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level);
-+
-+ if (!is_btree_node(path, level))
-+ return false;
-+
-+ switch (btree_lock_want(path, level)) {
-+ case BTREE_NODE_UNLOCKED:
-+ BUG_ON(btree_node_locked(path, level));
-+ return true;
-+ case BTREE_NODE_READ_LOCKED:
-+ BUG_ON(btree_node_intent_locked(path, level));
-+ return bch2_btree_node_relock(trans, path, level);
-+ case BTREE_NODE_INTENT_LOCKED:
-+ break;
-+ case BTREE_NODE_WRITE_LOCKED:
-+ BUG();
-+ }
-+
-+ if (btree_node_intent_locked(path, level))
-+ return true;
-+
-+ if (race_fault())
-+ return false;
-+
-+ if (btree_node_locked(path, level)) {
-+ bool ret;
-+
-+ six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]);
-+ ret = six_lock_tryupgrade(&b->c.lock);
-+ six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]);
-+
-+ if (ret)
-+ goto success;
-+ } else {
-+ if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
-+ goto success;
-+ }
-+
-+ /*
-+ * Do we already have an intent lock via another path? If so, just bump
-+ * lock count:
-+ */
-+ if (btree_node_lock_seq_matches(path, b, level) &&
-+ btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) {
-+ btree_node_unlock(trans, path, level);
-+ goto success;
-+ }
-+
-+ trace_and_count(trans->c, btree_path_upgrade_fail, trans, _RET_IP_, path, level);
-+ return false;
-+success:
-+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
-+ return true;
-+}
-+
-+/* Btree path locking: */
-+
-+/*
-+ * Only for btree_cache.c - only relocks intent locks
-+ */
-+int bch2_btree_path_relock_intent(struct btree_trans *trans,
-+ struct btree_path *path)
-+{
-+ unsigned l;
-+
-+ for (l = path->level;
-+ l < path->locks_want && btree_path_node(path, l);
-+ l++) {
-+ if (!bch2_btree_node_relock(trans, path, l)) {
-+ __bch2_btree_path_unlock(trans, path);
-+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-+ trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path);
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+__flatten
-+bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
-+ struct btree_path *path, unsigned long trace_ip)
-+{
-+ struct get_locks_fail f;
-+
-+ return btree_path_get_locks(trans, path, false, &f);
-+}
-+
-+int __bch2_btree_path_relock(struct btree_trans *trans,
-+ struct btree_path *path, unsigned long trace_ip)
-+{
-+ if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
-+ trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
-+ }
-+
-+ return 0;
-+}
-+
-+bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned new_locks_want,
-+ struct get_locks_fail *f)
-+{
-+ EBUG_ON(path->locks_want >= new_locks_want);
-+
-+ path->locks_want = new_locks_want;
-+
-+ return btree_path_get_locks(trans, path, true, f);
-+}
-+
-+bool __bch2_btree_path_upgrade(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned new_locks_want,
-+ struct get_locks_fail *f)
-+{
-+ struct btree_path *linked;
-+
-+ if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f))
-+ return true;
-+
-+ /*
-+ * XXX: this is ugly - we'd prefer to not be mucking with other
-+ * iterators in the btree_trans here.
-+ *
-+ * On failure to upgrade the iterator, setting iter->locks_want and
-+ * calling get_locks() is sufficient to make bch2_btree_path_traverse()
-+ * get the locks we want on transaction restart.
-+ *
-+ * But if this iterator was a clone, on transaction restart what we did
-+ * to this iterator isn't going to be preserved.
-+ *
-+ * Possibly we could add an iterator field for the parent iterator when
-+ * an iterator is a copy - for now, we'll just upgrade any other
-+ * iterators with the same btree id.
-+ *
-+ * The code below used to be needed to ensure ancestor nodes get locked
-+ * before interior nodes - now that's handled by
-+ * bch2_btree_path_traverse_all().
-+ */
-+ if (!path->cached && !trans->in_traverse_all)
-+ trans_for_each_path(trans, linked)
-+ if (linked != path &&
-+ linked->cached == path->cached &&
-+ linked->btree_id == path->btree_id &&
-+ linked->locks_want < new_locks_want) {
-+ linked->locks_want = new_locks_want;
-+ btree_path_get_locks(trans, linked, true, NULL);
-+ }
-+
-+ return false;
-+}
-+
-+void __bch2_btree_path_downgrade(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned new_locks_want)
-+{
-+ unsigned l;
-+
-+ if (trans->restarted)
-+ return;
-+
-+ EBUG_ON(path->locks_want < new_locks_want);
-+
-+ path->locks_want = new_locks_want;
-+
-+ while (path->nodes_locked &&
-+ (l = btree_path_highest_level_locked(path)) >= path->locks_want) {
-+ if (l > path->level) {
-+ btree_node_unlock(trans, path, l);
-+ } else {
-+ if (btree_node_intent_locked(path, l)) {
-+ six_lock_downgrade(&path->l[l].b->c.lock);
-+ mark_btree_node_locked_noreset(path, l, BTREE_NODE_READ_LOCKED);
-+ }
-+ break;
-+ }
-+ }
-+
-+ bch2_btree_path_verify_locks(path);
-+
-+ path->downgrade_seq++;
-+ trace_path_downgrade(trans, _RET_IP_, path);
-+}
-+
-+/* Btree transaction locking: */
-+
-+void bch2_trans_downgrade(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+
-+ if (trans->restarted)
-+ return;
-+
-+ trans_for_each_path(trans, path)
-+ bch2_btree_path_downgrade(trans, path);
-+}
-+
-+int bch2_trans_relock(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+
-+ if (unlikely(trans->restarted))
-+ return -((int) trans->restarted);
-+
-+ trans_for_each_path(trans, path)
-+ if (path->should_be_locked &&
-+ !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
-+ trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path);
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
-+ }
-+ return 0;
-+}
-+
-+int bch2_trans_relock_notrace(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+
-+ if (unlikely(trans->restarted))
-+ return -((int) trans->restarted);
-+
-+ trans_for_each_path(trans, path)
-+ if (path->should_be_locked &&
-+ !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
-+ }
-+ return 0;
-+}
-+
-+void bch2_trans_unlock_noassert(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+
-+ trans_for_each_path(trans, path)
-+ __bch2_btree_path_unlock(trans, path);
-+}
-+
-+void bch2_trans_unlock(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+
-+ trans_for_each_path(trans, path)
-+ __bch2_btree_path_unlock(trans, path);
-+}
-+
-+void bch2_trans_unlock_long(struct btree_trans *trans)
-+{
-+ bch2_trans_unlock(trans);
-+ bch2_trans_srcu_unlock(trans);
-+}
-+
-+bool bch2_trans_locked(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+
-+ trans_for_each_path(trans, path)
-+ if (path->nodes_locked)
-+ return true;
-+ return false;
-+}
-+
-+int __bch2_trans_mutex_lock(struct btree_trans *trans,
-+ struct mutex *lock)
-+{
-+ int ret = drop_locks_do(trans, (mutex_lock(lock), 0));
-+
-+ if (ret)
-+ mutex_unlock(lock);
-+ return ret;
-+}
-+
-+/* Debug */
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+
-+void bch2_btree_path_verify_locks(struct btree_path *path)
-+{
-+ unsigned l;
-+
-+ if (!path->nodes_locked) {
-+ BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
-+ btree_path_node(path, path->level));
-+ return;
-+ }
-+
-+ for (l = 0; l < BTREE_MAX_DEPTH; l++) {
-+ int want = btree_lock_want(path, l);
-+ int have = btree_node_locked_type(path, l);
-+
-+ BUG_ON(!is_btree_node(path, l) && have != BTREE_NODE_UNLOCKED);
-+
-+ BUG_ON(is_btree_node(path, l) &&
-+ (want == BTREE_NODE_UNLOCKED ||
-+ have != BTREE_NODE_WRITE_LOCKED) &&
-+ want != have);
-+ }
-+}
-+
-+void bch2_trans_verify_locks(struct btree_trans *trans)
-+{
-+ struct btree_path *path;
-+
-+ trans_for_each_path(trans, path)
-+ bch2_btree_path_verify_locks(path);
-+}
-+
-+#endif
-diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
-new file mode 100644
-index 000000000000..11b0a2c8cd69
---- /dev/null
-+++ b/fs/bcachefs/btree_locking.h
-@@ -0,0 +1,433 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_LOCKING_H
-+#define _BCACHEFS_BTREE_LOCKING_H
-+
-+/*
-+ * Only for internal btree use:
-+ *
-+ * The btree iterator tracks what locks it wants to take, and what locks it
-+ * currently has - here we have wrappers for locking/unlocking btree nodes and
-+ * updating the iterator state
-+ */
-+
-+#include "btree_iter.h"
-+#include "six.h"
-+
-+void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags);
-+
-+#ifdef CONFIG_LOCKDEP
-+void bch2_assert_btree_nodes_not_locked(void);
-+#else
-+static inline void bch2_assert_btree_nodes_not_locked(void) {}
-+#endif
-+
-+void bch2_trans_unlock_noassert(struct btree_trans *);
-+
-+static inline bool is_btree_node(struct btree_path *path, unsigned l)
-+{
-+ return l < BTREE_MAX_DEPTH && !IS_ERR_OR_NULL(path->l[l].b);
-+}
-+
-+static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans)
-+{
-+ return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats)
-+ ? &trans->c->btree_transaction_stats[trans->fn_idx]
-+ : NULL;
-+}
-+
-+/* matches six lock types */
-+enum btree_node_locked_type {
-+ BTREE_NODE_UNLOCKED = -1,
-+ BTREE_NODE_READ_LOCKED = SIX_LOCK_read,
-+ BTREE_NODE_INTENT_LOCKED = SIX_LOCK_intent,
-+ BTREE_NODE_WRITE_LOCKED = SIX_LOCK_write,
-+};
-+
-+static inline int btree_node_locked_type(struct btree_path *path,
-+ unsigned level)
-+{
-+ return BTREE_NODE_UNLOCKED + ((path->nodes_locked >> (level << 1)) & 3);
-+}
-+
-+static inline bool btree_node_write_locked(struct btree_path *path, unsigned l)
-+{
-+ return btree_node_locked_type(path, l) == BTREE_NODE_WRITE_LOCKED;
-+}
-+
-+static inline bool btree_node_intent_locked(struct btree_path *path, unsigned l)
-+{
-+ return btree_node_locked_type(path, l) == BTREE_NODE_INTENT_LOCKED;
-+}
-+
-+static inline bool btree_node_read_locked(struct btree_path *path, unsigned l)
-+{
-+ return btree_node_locked_type(path, l) == BTREE_NODE_READ_LOCKED;
-+}
-+
-+static inline bool btree_node_locked(struct btree_path *path, unsigned level)
-+{
-+ return btree_node_locked_type(path, level) != BTREE_NODE_UNLOCKED;
-+}
-+
-+static inline void mark_btree_node_locked_noreset(struct btree_path *path,
-+ unsigned level,
-+ enum btree_node_locked_type type)
-+{
-+ /* relying on this to avoid a branch */
-+ BUILD_BUG_ON(SIX_LOCK_read != 0);
-+ BUILD_BUG_ON(SIX_LOCK_intent != 1);
-+
-+ path->nodes_locked &= ~(3U << (level << 1));
-+ path->nodes_locked |= (type + 1) << (level << 1);
-+}
-+
-+static inline void mark_btree_node_unlocked(struct btree_path *path,
-+ unsigned level)
-+{
-+ EBUG_ON(btree_node_write_locked(path, level));
-+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED);
-+}
-+
-+static inline void mark_btree_node_locked(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned level,
-+ enum btree_node_locked_type type)
-+{
-+ mark_btree_node_locked_noreset(path, level, (enum btree_node_locked_type) type);
-+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
-+ path->l[level].lock_taken_time = local_clock();
-+#endif
-+}
-+
-+static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
-+{
-+ return level < path->locks_want
-+ ? SIX_LOCK_intent
-+ : SIX_LOCK_read;
-+}
-+
-+static inline enum btree_node_locked_type
-+btree_lock_want(struct btree_path *path, int level)
-+{
-+ if (level < path->level)
-+ return BTREE_NODE_UNLOCKED;
-+ if (level < path->locks_want)
-+ return BTREE_NODE_INTENT_LOCKED;
-+ if (level == path->level)
-+ return BTREE_NODE_READ_LOCKED;
-+ return BTREE_NODE_UNLOCKED;
-+}
-+
-+static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
-+ struct btree_path *path, unsigned level)
-+{
-+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
-+ struct btree_transaction_stats *s = btree_trans_stats(trans);
-+
-+ if (s)
-+ __bch2_time_stats_update(&s->lock_hold_times,
-+ path->l[level].lock_taken_time,
-+ local_clock());
-+#endif
-+}
-+
-+/* unlock: */
-+
-+static inline void btree_node_unlock(struct btree_trans *trans,
-+ struct btree_path *path, unsigned level)
-+{
-+ int lock_type = btree_node_locked_type(path, level);
-+
-+ EBUG_ON(level >= BTREE_MAX_DEPTH);
-+
-+ if (lock_type != BTREE_NODE_UNLOCKED) {
-+ six_unlock_type(&path->l[level].b->c.lock, lock_type);
-+ btree_trans_lock_hold_time_update(trans, path, level);
-+ }
-+ mark_btree_node_unlocked(path, level);
-+}
-+
-+static inline int btree_path_lowest_level_locked(struct btree_path *path)
-+{
-+ return __ffs(path->nodes_locked) >> 1;
-+}
-+
-+static inline int btree_path_highest_level_locked(struct btree_path *path)
-+{
-+ return __fls(path->nodes_locked) >> 1;
-+}
-+
-+static inline void __bch2_btree_path_unlock(struct btree_trans *trans,
-+ struct btree_path *path)
-+{
-+ btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK);
-+
-+ while (path->nodes_locked)
-+ btree_node_unlock(trans, path, btree_path_lowest_level_locked(path));
-+}
-+
-+/*
-+ * Updates the saved lock sequence number, so that bch2_btree_node_relock() will
-+ * succeed:
-+ */
-+static inline void
-+bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path,
-+ struct btree *b)
-+{
-+ struct btree_path *linked;
-+
-+ EBUG_ON(path->l[b->c.level].b != b);
-+ EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock));
-+ EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write);
-+
-+ mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
-+
-+ trans_for_each_path_with_node(trans, b, linked)
-+ linked->l[b->c.level].lock_seq++;
-+
-+ six_unlock_write(&b->c.lock);
-+}
-+
-+void bch2_btree_node_unlock_write(struct btree_trans *,
-+ struct btree_path *, struct btree *);
-+
-+int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
-+
-+/* lock: */
-+
-+static inline int __btree_node_lock_nopath(struct btree_trans *trans,
-+ struct btree_bkey_cached_common *b,
-+ enum six_lock_type type,
-+ bool lock_may_not_fail,
-+ unsigned long ip)
-+{
-+ int ret;
-+
-+ trans->lock_may_not_fail = lock_may_not_fail;
-+ trans->lock_must_abort = false;
-+ trans->locking = b;
-+
-+ ret = six_lock_ip_waiter(&b->lock, type, &trans->locking_wait,
-+ bch2_six_check_for_deadlock, trans, ip);
-+ WRITE_ONCE(trans->locking, NULL);
-+ WRITE_ONCE(trans->locking_wait.start_time, 0);
-+ return ret;
-+}
-+
-+static inline int __must_check
-+btree_node_lock_nopath(struct btree_trans *trans,
-+ struct btree_bkey_cached_common *b,
-+ enum six_lock_type type,
-+ unsigned long ip)
-+{
-+ return __btree_node_lock_nopath(trans, b, type, false, ip);
-+}
-+
-+static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
-+ struct btree_bkey_cached_common *b,
-+ enum six_lock_type type)
-+{
-+ int ret = __btree_node_lock_nopath(trans, b, type, true, _THIS_IP_);
-+
-+ BUG_ON(ret);
-+}
-+
-+/*
-+ * Lock a btree node if we already have it locked on one of our linked
-+ * iterators:
-+ */
-+static inline bool btree_node_lock_increment(struct btree_trans *trans,
-+ struct btree_bkey_cached_common *b,
-+ unsigned level,
-+ enum btree_node_locked_type want)
-+{
-+ struct btree_path *path;
-+
-+ trans_for_each_path(trans, path)
-+ if (&path->l[level].b->c == b &&
-+ btree_node_locked_type(path, level) >= want) {
-+ six_lock_increment(&b->lock, (enum six_lock_type) want);
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+static inline int btree_node_lock(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree_bkey_cached_common *b,
-+ unsigned level,
-+ enum six_lock_type type,
-+ unsigned long ip)
-+{
-+ int ret = 0;
-+
-+ EBUG_ON(level >= BTREE_MAX_DEPTH);
-+ EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
-+
-+ if (likely(six_trylock_type(&b->lock, type)) ||
-+ btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) ||
-+ !(ret = btree_node_lock_nopath(trans, b, type, btree_path_ip_allocated(path)))) {
-+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
-+ path->l[b->level].lock_taken_time = local_clock();
-+#endif
-+ }
-+
-+ return ret;
-+}
-+
-+int __bch2_btree_node_lock_write(struct btree_trans *, struct btree_path *,
-+ struct btree_bkey_cached_common *b, bool);
-+
-+static inline int __btree_node_lock_write(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree_bkey_cached_common *b,
-+ bool lock_may_not_fail)
-+{
-+ EBUG_ON(&path->l[b->level].b->c != b);
-+ EBUG_ON(path->l[b->level].lock_seq != six_lock_seq(&b->lock));
-+ EBUG_ON(!btree_node_intent_locked(path, b->level));
-+
-+ /*
-+ * six locks are unfair, and read locks block while a thread wants a
-+ * write lock: thus, we need to tell the cycle detector we have a write
-+ * lock _before_ taking the lock:
-+ */
-+ mark_btree_node_locked_noreset(path, b->level, BTREE_NODE_WRITE_LOCKED);
-+
-+ return likely(six_trylock_write(&b->lock))
-+ ? 0
-+ : __bch2_btree_node_lock_write(trans, path, b, lock_may_not_fail);
-+}
-+
-+static inline int __must_check
-+bch2_btree_node_lock_write(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree_bkey_cached_common *b)
-+{
-+ return __btree_node_lock_write(trans, path, b, false);
-+}
-+
-+void bch2_btree_node_lock_write_nofail(struct btree_trans *,
-+ struct btree_path *,
-+ struct btree_bkey_cached_common *);
-+
-+/* relock: */
-+
-+bool bch2_btree_path_relock_norestart(struct btree_trans *,
-+ struct btree_path *, unsigned long);
-+int __bch2_btree_path_relock(struct btree_trans *,
-+ struct btree_path *, unsigned long);
-+
-+static inline int bch2_btree_path_relock(struct btree_trans *trans,
-+ struct btree_path *path, unsigned long trace_ip)
-+{
-+ return btree_node_locked(path, path->level)
-+ ? 0
-+ : __bch2_btree_path_relock(trans, path, trace_ip);
-+}
-+
-+bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned, bool trace);
-+
-+static inline bool bch2_btree_node_relock(struct btree_trans *trans,
-+ struct btree_path *path, unsigned level)
-+{
-+ EBUG_ON(btree_node_locked(path, level) &&
-+ !btree_node_write_locked(path, level) &&
-+ btree_node_locked_type(path, level) != __btree_lock_want(path, level));
-+
-+ return likely(btree_node_locked(path, level)) ||
-+ (!IS_ERR_OR_NULL(path->l[level].b) &&
-+ __bch2_btree_node_relock(trans, path, level, true));
-+}
-+
-+static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
-+ struct btree_path *path, unsigned level)
-+{
-+ EBUG_ON(btree_node_locked(path, level) &&
-+ !btree_node_write_locked(path, level) &&
-+ btree_node_locked_type(path, level) != __btree_lock_want(path, level));
-+
-+ return likely(btree_node_locked(path, level)) ||
-+ (!IS_ERR_OR_NULL(path->l[level].b) &&
-+ __bch2_btree_node_relock(trans, path, level, false));
-+}
-+
-+/* upgrade */
-+
-+
-+struct get_locks_fail {
-+ unsigned l;
-+ struct btree *b;
-+};
-+
-+bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
-+ struct btree_path *, unsigned,
-+ struct get_locks_fail *);
-+
-+bool __bch2_btree_path_upgrade(struct btree_trans *,
-+ struct btree_path *, unsigned,
-+ struct get_locks_fail *);
-+
-+static inline int bch2_btree_path_upgrade(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned new_locks_want)
-+{
-+ struct get_locks_fail f;
-+ unsigned old_locks_want = path->locks_want;
-+
-+ new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
-+
-+ if (path->locks_want < new_locks_want
-+ ? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f)
-+ : path->uptodate == BTREE_ITER_UPTODATE)
-+ return 0;
-+
-+ trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path,
-+ old_locks_want, new_locks_want, &f);
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
-+}
-+
-+/* misc: */
-+
-+static inline void btree_path_set_should_be_locked(struct btree_path *path)
-+{
-+ EBUG_ON(!btree_node_locked(path, path->level));
-+ EBUG_ON(path->uptodate);
-+
-+ path->should_be_locked = true;
-+}
-+
-+static inline void __btree_path_set_level_up(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned l)
-+{
-+ btree_node_unlock(trans, path, l);
-+ path->l[l].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
-+}
-+
-+static inline void btree_path_set_level_up(struct btree_trans *trans,
-+ struct btree_path *path)
-+{
-+ __btree_path_set_level_up(trans, path, path->level++);
-+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-+}
-+
-+/* debug */
-+
-+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *,
-+ struct btree_path *,
-+ struct btree_bkey_cached_common *b,
-+ unsigned);
-+
-+int bch2_check_for_deadlock(struct btree_trans *, struct printbuf *);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_btree_path_verify_locks(struct btree_path *);
-+void bch2_trans_verify_locks(struct btree_trans *);
-+#else
-+static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
-+static inline void bch2_trans_verify_locks(struct btree_trans *trans) {}
-+#endif
-+
-+#endif /* _BCACHEFS_BTREE_LOCKING_H */
-diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
-new file mode 100644
-index 000000000000..decad7b66c59
---- /dev/null
-+++ b/fs/bcachefs/btree_trans_commit.c
-@@ -0,0 +1,1145 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_gc.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_journal_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_update_interior.h"
-+#include "btree_write_buffer.h"
-+#include "buckets.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "replicas.h"
-+#include "snapshot.h"
-+
-+#include <linux/prefetch.h>
-+
-+static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ struct bch_fs *c = trans->c;
-+ struct bkey u;
-+ struct bkey_s_c k = bch2_btree_path_peek_slot_exact(i->path, &u);
-+
-+ if (unlikely(trans->journal_replay_not_finished)) {
-+ struct bkey_i *j_k =
-+ bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p);
-+
-+ if (j_k)
-+ k = bkey_i_to_s_c(j_k);
-+ }
-+
-+ u = *k.k;
-+ u.needs_whiteout = i->old_k.needs_whiteout;
-+
-+ BUG_ON(memcmp(&i->old_k, &u, sizeof(struct bkey)));
-+ BUG_ON(i->old_v != k.v);
-+#endif
-+}
-+
-+static inline struct btree_path_level *insert_l(struct btree_insert_entry *i)
-+{
-+ return i->path->l + i->level;
-+}
-+
-+static inline bool same_leaf_as_prev(struct btree_trans *trans,
-+ struct btree_insert_entry *i)
-+{
-+ return i != trans->updates &&
-+ insert_l(&i[0])->b == insert_l(&i[-1])->b;
-+}
-+
-+static inline bool same_leaf_as_next(struct btree_trans *trans,
-+ struct btree_insert_entry *i)
-+{
-+ return i + 1 < trans->updates + trans->nr_updates &&
-+ insert_l(&i[0])->b == insert_l(&i[1])->b;
-+}
-+
-+inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b)
-+{
-+ struct bch_fs *c = trans->c;
-+
-+ if (unlikely(btree_node_just_written(b)) &&
-+ bch2_btree_post_write_cleanup(c, b))
-+ bch2_trans_node_reinit_iter(trans, b);
-+
-+ /*
-+ * If the last bset has been written, or if it's gotten too big - start
-+ * a new bset to insert into:
-+ */
-+ if (want_new_bset(c, b))
-+ bch2_btree_init_next(trans, b);
-+}
-+
-+/* Inserting into a given leaf node (last stage of insert): */
-+
-+/* Handle overwrites and do insert, for non extents: */
-+bool bch2_btree_bset_insert_key(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b,
-+ struct btree_node_iter *node_iter,
-+ struct bkey_i *insert)
-+{
-+ struct bkey_packed *k;
-+ unsigned clobber_u64s = 0, new_u64s = 0;
-+
-+ EBUG_ON(btree_node_just_written(b));
-+ EBUG_ON(bset_written(b, btree_bset_last(b)));
-+ EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
-+ EBUG_ON(bpos_lt(insert->k.p, b->data->min_key));
-+ EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
-+ EBUG_ON(insert->k.u64s >
-+ bch_btree_keys_u64s_remaining(trans->c, b));
-+ EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
-+
-+ k = bch2_btree_node_iter_peek_all(node_iter, b);
-+ if (k && bkey_cmp_left_packed(b, k, &insert->k.p))
-+ k = NULL;
-+
-+ /* @k is the key being overwritten/deleted, if any: */
-+ EBUG_ON(k && bkey_deleted(k));
-+
-+ /* Deleting, but not found? nothing to do: */
-+ if (bkey_deleted(&insert->k) && !k)
-+ return false;
-+
-+ if (bkey_deleted(&insert->k)) {
-+ /* Deleting: */
-+ btree_account_key_drop(b, k);
-+ k->type = KEY_TYPE_deleted;
-+
-+ if (k->needs_whiteout)
-+ push_whiteout(trans->c, b, insert->k.p);
-+ k->needs_whiteout = false;
-+
-+ if (k >= btree_bset_last(b)->start) {
-+ clobber_u64s = k->u64s;
-+ bch2_bset_delete(b, k, clobber_u64s);
-+ goto fix_iter;
-+ } else {
-+ bch2_btree_path_fix_key_modified(trans, b, k);
-+ }
-+
-+ return true;
-+ }
-+
-+ if (k) {
-+ /* Overwriting: */
-+ btree_account_key_drop(b, k);
-+ k->type = KEY_TYPE_deleted;
-+
-+ insert->k.needs_whiteout = k->needs_whiteout;
-+ k->needs_whiteout = false;
-+
-+ if (k >= btree_bset_last(b)->start) {
-+ clobber_u64s = k->u64s;
-+ goto overwrite;
-+ } else {
-+ bch2_btree_path_fix_key_modified(trans, b, k);
-+ }
-+ }
-+
-+ k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
-+overwrite:
-+ bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
-+ new_u64s = k->u64s;
-+fix_iter:
-+ if (clobber_u64s != new_u64s)
-+ bch2_btree_node_iter_fix(trans, path, b, node_iter, k,
-+ clobber_u64s, new_u64s);
-+ return true;
-+}
-+
-+static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
-+ unsigned i, u64 seq)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct btree_write *w = container_of(pin, struct btree_write, journal);
-+ struct btree *b = container_of(w, struct btree, writes[i]);
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ unsigned long old, new, v;
-+ unsigned idx = w - b->writes;
-+
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
-+ v = READ_ONCE(b->flags);
-+
-+ do {
-+ old = new = v;
-+
-+ if (!(old & (1 << BTREE_NODE_dirty)) ||
-+ !!(old & (1 << BTREE_NODE_write_idx)) != idx ||
-+ w->journal.seq != seq)
-+ break;
-+
-+ new &= ~BTREE_WRITE_TYPE_MASK;
-+ new |= BTREE_WRITE_journal_reclaim;
-+ new |= 1 << BTREE_NODE_need_write;
-+ } while ((v = cmpxchg(&b->flags, old, new)) != old);
-+
-+ btree_node_write_if_need(c, b, SIX_LOCK_read);
-+ six_unlock_read(&b->c.lock);
-+
-+ bch2_trans_put(trans);
-+ return 0;
-+}
-+
-+int bch2_btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
-+{
-+ return __btree_node_flush(j, pin, 0, seq);
-+}
-+
-+int bch2_btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
-+{
-+ return __btree_node_flush(j, pin, 1, seq);
-+}
-+
-+inline void bch2_btree_add_journal_pin(struct bch_fs *c,
-+ struct btree *b, u64 seq)
-+{
-+ struct btree_write *w = btree_current_write(b);
-+
-+ bch2_journal_pin_add(&c->journal, seq, &w->journal,
-+ btree_node_write_idx(b) == 0
-+ ? bch2_btree_node_flush0
-+ : bch2_btree_node_flush1);
-+}
-+
-+/**
-+ * bch2_btree_insert_key_leaf() - insert a key one key into a leaf node
-+ * @trans: btree transaction object
-+ * @path: path pointing to @insert's pos
-+ * @insert: key to insert
-+ * @journal_seq: sequence number of journal reservation
-+ */
-+inline void bch2_btree_insert_key_leaf(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct bkey_i *insert,
-+ u64 journal_seq)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree *b = path_l(path)->b;
-+ struct bset_tree *t = bset_tree_last(b);
-+ struct bset *i = bset(b, t);
-+ int old_u64s = bset_u64s(t);
-+ int old_live_u64s = b->nr.live_u64s;
-+ int live_u64s_added, u64s_added;
-+
-+ if (unlikely(!bch2_btree_bset_insert_key(trans, path, b,
-+ &path_l(path)->iter, insert)))
-+ return;
-+
-+ i->journal_seq = cpu_to_le64(max(journal_seq, le64_to_cpu(i->journal_seq)));
-+
-+ bch2_btree_add_journal_pin(c, b, journal_seq);
-+
-+ if (unlikely(!btree_node_dirty(b))) {
-+ EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
-+ set_btree_node_dirty_acct(c, b);
-+ }
-+
-+ live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
-+ u64s_added = (int) bset_u64s(t) - old_u64s;
-+
-+ if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
-+ b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
-+ if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
-+ b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
-+
-+ if (u64s_added > live_u64s_added &&
-+ bch2_maybe_compact_whiteouts(c, b))
-+ bch2_trans_node_reinit_iter(trans, b);
-+}
-+
-+/* Cached btree updates: */
-+
-+/* Normal update interface: */
-+
-+static inline void btree_insert_entry_checks(struct btree_trans *trans,
-+ struct btree_insert_entry *i)
-+{
-+ BUG_ON(!bpos_eq(i->k->k.p, i->path->pos));
-+ BUG_ON(i->cached != i->path->cached);
-+ BUG_ON(i->level != i->path->level);
-+ BUG_ON(i->btree_id != i->path->btree_id);
-+ EBUG_ON(!i->level &&
-+ btree_type_has_snapshots(i->btree_id) &&
-+ !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
-+ test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
-+ i->k->k.p.snapshot &&
-+ bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
-+}
-+
-+static noinline int
-+bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
-+ unsigned long trace_ip)
-+{
-+ return drop_locks_do(trans,
-+ bch2_journal_preres_get(&trans->c->journal,
-+ &trans->journal_preres,
-+ trans->journal_preres_u64s,
-+ (flags & BCH_WATERMARK_MASK)));
-+}
-+
-+static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
-+ unsigned flags)
-+{
-+ return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
-+ trans->journal_u64s, flags);
-+}
-+
-+#define JSET_ENTRY_LOG_U64s 4
-+
-+static noinline void journal_transaction_name(struct btree_trans *trans)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct journal *j = &c->journal;
-+ struct jset_entry *entry =
-+ bch2_journal_add_entry(j, &trans->journal_res,
-+ BCH_JSET_ENTRY_log, 0, 0,
-+ JSET_ENTRY_LOG_U64s);
-+ struct jset_entry_log *l =
-+ container_of(entry, struct jset_entry_log, entry);
-+
-+ strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64));
-+}
-+
-+static inline int btree_key_can_insert(struct btree_trans *trans,
-+ struct btree *b, unsigned u64s)
-+{
-+ struct bch_fs *c = trans->c;
-+
-+ if (!bch2_btree_node_insert_fits(c, b, u64s))
-+ return -BCH_ERR_btree_insert_btree_node_full;
-+
-+ return 0;
-+}
-+
-+static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
-+ struct btree_path *path, unsigned u64s)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_cached *ck = (void *) path->l[0].b;
-+ struct btree_insert_entry *i;
-+ unsigned new_u64s;
-+ struct bkey_i *new_k;
-+
-+ EBUG_ON(path->level);
-+
-+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
-+ bch2_btree_key_cache_must_wait(c) &&
-+ !(flags & BTREE_INSERT_JOURNAL_RECLAIM))
-+ return -BCH_ERR_btree_insert_need_journal_reclaim;
-+
-+ /*
-+ * bch2_varint_decode can read past the end of the buffer by at most 7
-+ * bytes (it won't be used):
-+ */
-+ u64s += 1;
-+
-+ if (u64s <= ck->u64s)
-+ return 0;
-+
-+ new_u64s = roundup_pow_of_two(u64s);
-+ new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
-+ if (!new_k) {
-+ bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
-+ bch2_btree_id_str(path->btree_id), new_u64s);
-+ return -BCH_ERR_ENOMEM_btree_key_cache_insert;
-+ }
-+
-+ trans_for_each_update(trans, i)
-+ if (i->old_v == &ck->k->v)
-+ i->old_v = &new_k->v;
-+
-+ ck->u64s = new_u64s;
-+ ck->k = new_k;
-+ return 0;
-+}
-+
-+/* Triggers: */
-+
-+static int run_one_mem_trigger(struct btree_trans *trans,
-+ struct btree_insert_entry *i,
-+ unsigned flags)
-+{
-+ struct bkey_s_c old = { &i->old_k, i->old_v };
-+ struct bkey_i *new = i->k;
-+ const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type);
-+ const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type);
-+ int ret;
-+
-+ verify_update_old_key(trans, i);
-+
-+ if (unlikely(flags & BTREE_TRIGGER_NORUN))
-+ return 0;
-+
-+ if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)))
-+ return 0;
-+
-+ if (old_ops->atomic_trigger == new_ops->atomic_trigger) {
-+ ret = bch2_mark_key(trans, i->btree_id, i->level,
-+ old, bkey_i_to_s_c(new),
-+ BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
-+ } else {
-+ struct bkey _deleted = KEY(0, 0, 0);
-+ struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL };
-+
-+ _deleted.p = i->path->pos;
-+
-+ ret = bch2_mark_key(trans, i->btree_id, i->level,
-+ deleted, bkey_i_to_s_c(new),
-+ BTREE_TRIGGER_INSERT|flags) ?:
-+ bch2_mark_key(trans, i->btree_id, i->level,
-+ old, deleted,
-+ BTREE_TRIGGER_OVERWRITE|flags);
-+ }
-+
-+ return ret;
-+}
-+
-+static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i,
-+ bool overwrite)
-+{
-+ /*
-+ * Transactional triggers create new btree_insert_entries, so we can't
-+ * pass them a pointer to a btree_insert_entry, that memory is going to
-+ * move:
-+ */
-+ struct bkey old_k = i->old_k;
-+ struct bkey_s_c old = { &old_k, i->old_v };
-+ const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type);
-+ const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type);
-+
-+ verify_update_old_key(trans, i);
-+
-+ if ((i->flags & BTREE_TRIGGER_NORUN) ||
-+ !(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)))
-+ return 0;
-+
-+ if (!i->insert_trigger_run &&
-+ !i->overwrite_trigger_run &&
-+ old_ops->trans_trigger == new_ops->trans_trigger) {
-+ i->overwrite_trigger_run = true;
-+ i->insert_trigger_run = true;
-+ return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k,
-+ BTREE_TRIGGER_INSERT|
-+ BTREE_TRIGGER_OVERWRITE|
-+ i->flags) ?: 1;
-+ } else if (overwrite && !i->overwrite_trigger_run) {
-+ i->overwrite_trigger_run = true;
-+ return bch2_trans_mark_old(trans, i->btree_id, i->level, old, i->flags) ?: 1;
-+ } else if (!overwrite && !i->insert_trigger_run) {
-+ i->insert_trigger_run = true;
-+ return bch2_trans_mark_new(trans, i->btree_id, i->level, i->k, i->flags) ?: 1;
-+ } else {
-+ return 0;
-+ }
-+}
-+
-+static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
-+ struct btree_insert_entry *btree_id_start)
-+{
-+ struct btree_insert_entry *i;
-+ bool trans_trigger_run;
-+ int ret, overwrite;
-+
-+ for (overwrite = 1; overwrite >= 0; --overwrite) {
-+
-+ /*
-+ * Running triggers will append more updates to the list of updates as
-+ * we're walking it:
-+ */
-+ do {
-+ trans_trigger_run = false;
-+
-+ for (i = btree_id_start;
-+ i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
-+ i++) {
-+ if (i->btree_id != btree_id)
-+ continue;
-+
-+ ret = run_one_trans_trigger(trans, i, overwrite);
-+ if (ret < 0)
-+ return ret;
-+ if (ret)
-+ trans_trigger_run = true;
-+ }
-+ } while (trans_trigger_run);
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
-+{
-+ struct btree_insert_entry *i = NULL, *btree_id_start = trans->updates;
-+ unsigned btree_id = 0;
-+ int ret = 0;
-+
-+ /*
-+ *
-+ * For a given btree, this algorithm runs insert triggers before
-+ * overwrite triggers: this is so that when extents are being moved
-+ * (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before
-+ * they are re-added.
-+ */
-+ for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
-+ if (btree_id == BTREE_ID_alloc)
-+ continue;
-+
-+ while (btree_id_start < trans->updates + trans->nr_updates &&
-+ btree_id_start->btree_id < btree_id)
-+ btree_id_start++;
-+
-+ ret = run_btree_triggers(trans, btree_id, btree_id_start);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ trans_for_each_update(trans, i) {
-+ if (i->btree_id > BTREE_ID_alloc)
-+ break;
-+ if (i->btree_id == BTREE_ID_alloc) {
-+ ret = run_btree_triggers(trans, BTREE_ID_alloc, i);
-+ if (ret)
-+ return ret;
-+ break;
-+ }
-+ }
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ trans_for_each_update(trans, i)
-+ BUG_ON(!(i->flags & BTREE_TRIGGER_NORUN) &&
-+ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
-+ (!i->insert_trigger_run || !i->overwrite_trigger_run));
-+#endif
-+ return 0;
-+}
-+
-+static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_insert_entry *i;
-+ int ret = 0;
-+
-+ trans_for_each_update(trans, i) {
-+ /*
-+ * XXX: synchronization of cached update triggers with gc
-+ * XXX: synchronization of interior node updates with gc
-+ */
-+ BUG_ON(i->cached || i->level);
-+
-+ if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b))) {
-+ ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC);
-+ if (ret)
-+ break;
-+ }
-+ }
-+
-+ return ret;
-+}
-+
-+static inline int
-+bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
-+ struct btree_insert_entry **stopped_at,
-+ unsigned long trace_ip)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_insert_entry *i;
-+ struct btree_write_buffered_key *wb;
-+ struct btree_trans_commit_hook *h;
-+ unsigned u64s = 0;
-+ int ret;
-+
-+ if (race_fault()) {
-+ trace_and_count(c, trans_restart_fault_inject, trans, trace_ip);
-+ return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject);
-+ }
-+
-+ /*
-+ * Check if the insert will fit in the leaf node with the write lock
-+ * held, otherwise another thread could write the node changing the
-+ * amount of space available:
-+ */
-+
-+ prefetch(&trans->c->journal.flags);
-+
-+ trans_for_each_update(trans, i) {
-+ /* Multiple inserts might go to same leaf: */
-+ if (!same_leaf_as_prev(trans, i))
-+ u64s = 0;
-+
-+ u64s += i->k->k.u64s;
-+ ret = !i->cached
-+ ? btree_key_can_insert(trans, insert_l(i)->b, u64s)
-+ : btree_key_can_insert_cached(trans, flags, i->path, u64s);
-+ if (ret) {
-+ *stopped_at = i;
-+ return ret;
-+ }
-+ }
-+
-+ if (trans->nr_wb_updates &&
-+ trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size)
-+ return -BCH_ERR_btree_insert_need_flush_buffer;
-+
-+ /*
-+ * Don't get journal reservation until after we know insert will
-+ * succeed:
-+ */
-+ if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+ ret = bch2_trans_journal_res_get(trans,
-+ (flags & BCH_WATERMARK_MASK)|
-+ JOURNAL_RES_GET_NONBLOCK);
-+ if (ret)
-+ return ret;
-+
-+ if (unlikely(trans->journal_transaction_names))
-+ journal_transaction_name(trans);
-+ } else {
-+ trans->journal_res.seq = c->journal.replay_journal_seq;
-+ }
-+
-+ /*
-+ * Not allowed to fail after we've gotten our journal reservation - we
-+ * have to use it:
-+ */
-+
-+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
-+ !(flags & BTREE_INSERT_JOURNAL_REPLAY)) {
-+ if (bch2_journal_seq_verify)
-+ trans_for_each_update(trans, i)
-+ i->k->k.version.lo = trans->journal_res.seq;
-+ else if (bch2_inject_invalid_keys)
-+ trans_for_each_update(trans, i)
-+ i->k->k.version = MAX_VERSION;
-+ }
-+
-+ if (trans->fs_usage_deltas &&
-+ bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
-+ return -BCH_ERR_btree_insert_need_mark_replicas;
-+
-+ if (trans->nr_wb_updates) {
-+ EBUG_ON(flags & BTREE_INSERT_JOURNAL_REPLAY);
-+
-+ ret = bch2_btree_insert_keys_write_buffer(trans);
-+ if (ret)
-+ goto revert_fs_usage;
-+ }
-+
-+ h = trans->hooks;
-+ while (h) {
-+ ret = h->fn(trans, h);
-+ if (ret)
-+ goto revert_fs_usage;
-+ h = h->next;
-+ }
-+
-+ trans_for_each_update(trans, i)
-+ if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) {
-+ ret = run_one_mem_trigger(trans, i, i->flags);
-+ if (ret)
-+ goto fatal_err;
-+ }
-+
-+ if (unlikely(c->gc_pos.phase)) {
-+ ret = bch2_trans_commit_run_gc_triggers(trans);
-+ if (ret)
-+ goto fatal_err;
-+ }
-+
-+ if (unlikely(trans->extra_journal_entries.nr)) {
-+ memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
-+ trans->extra_journal_entries.data,
-+ trans->extra_journal_entries.nr);
-+
-+ trans->journal_res.offset += trans->extra_journal_entries.nr;
-+ trans->journal_res.u64s -= trans->extra_journal_entries.nr;
-+ }
-+
-+ if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-+ struct journal *j = &c->journal;
-+ struct jset_entry *entry;
-+
-+ trans_for_each_update(trans, i) {
-+ if (i->key_cache_already_flushed)
-+ continue;
-+
-+ if (i->flags & BTREE_UPDATE_NOJOURNAL)
-+ continue;
-+
-+ verify_update_old_key(trans, i);
-+
-+ if (trans->journal_transaction_names) {
-+ entry = bch2_journal_add_entry(j, &trans->journal_res,
-+ BCH_JSET_ENTRY_overwrite,
-+ i->btree_id, i->level,
-+ i->old_k.u64s);
-+ bkey_reassemble((struct bkey_i *) entry->start,
-+ (struct bkey_s_c) { &i->old_k, i->old_v });
-+ }
-+
-+ entry = bch2_journal_add_entry(j, &trans->journal_res,
-+ BCH_JSET_ENTRY_btree_keys,
-+ i->btree_id, i->level,
-+ i->k->k.u64s);
-+ bkey_copy((struct bkey_i *) entry->start, i->k);
-+ }
-+
-+ trans_for_each_wb_update(trans, wb) {
-+ entry = bch2_journal_add_entry(j, &trans->journal_res,
-+ BCH_JSET_ENTRY_btree_keys,
-+ wb->btree, 0,
-+ wb->k.k.u64s);
-+ bkey_copy((struct bkey_i *) entry->start, &wb->k);
-+ }
-+
-+ if (trans->journal_seq)
-+ *trans->journal_seq = trans->journal_res.seq;
-+ }
-+
-+ trans_for_each_update(trans, i) {
-+ i->k->k.needs_whiteout = false;
-+
-+ if (!i->cached) {
-+ u64 seq = trans->journal_res.seq;
-+
-+ if (i->flags & BTREE_UPDATE_PREJOURNAL)
-+ seq = i->seq;
-+
-+ bch2_btree_insert_key_leaf(trans, i->path, i->k, seq);
-+ } else if (!i->key_cache_already_flushed)
-+ bch2_btree_insert_key_cached(trans, flags, i);
-+ else {
-+ bch2_btree_key_cache_drop(trans, i->path);
-+ btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE);
-+ }
-+ }
-+
-+ return 0;
-+fatal_err:
-+ bch2_fatal_error(c);
-+revert_fs_usage:
-+ if (trans->fs_usage_deltas)
-+ bch2_trans_fs_usage_revert(trans, trans->fs_usage_deltas);
-+ return ret;
-+}
-+
-+static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
-+{
-+ while (--i >= trans->updates) {
-+ if (same_leaf_as_prev(trans, i))
-+ continue;
-+
-+ bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
-+ }
-+
-+ trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
-+}
-+
-+static inline int trans_lock_write(struct btree_trans *trans)
-+{
-+ struct btree_insert_entry *i;
-+
-+ trans_for_each_update(trans, i) {
-+ if (same_leaf_as_prev(trans, i))
-+ continue;
-+
-+ if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
-+ return trans_lock_write_fail(trans, i);
-+
-+ if (!i->cached)
-+ bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
-+ }
-+
-+ return 0;
-+}
-+
-+static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
-+{
-+ struct btree_insert_entry *i;
-+ struct btree_write_buffered_key *wb;
-+
-+ trans_for_each_update(trans, i)
-+ bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p);
-+
-+ trans_for_each_wb_update(trans, wb)
-+ bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p);
-+}
-+
-+static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans,
-+ enum bkey_invalid_flags flags,
-+ struct btree_insert_entry *i,
-+ struct printbuf *err)
-+{
-+ struct bch_fs *c = trans->c;
-+
-+ printbuf_reset(err);
-+ prt_printf(err, "invalid bkey on insert from %s -> %ps",
-+ trans->fn, (void *) i->ip_allocated);
-+ prt_newline(err);
-+ printbuf_indent_add(err, 2);
-+
-+ bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k));
-+ prt_newline(err);
-+
-+ bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err);
-+ bch2_print_string_as_lines(KERN_ERR, err->buf);
-+
-+ bch2_inconsistent_error(c);
-+ bch2_dump_trans_updates(trans);
-+
-+ return -EINVAL;
-+}
-+
-+/*
-+ * Get journal reservation, take write locks, and attempt to do btree update(s):
-+ */
-+static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags,
-+ struct btree_insert_entry **stopped_at,
-+ unsigned long trace_ip)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_insert_entry *i;
-+ int ret = 0, u64s_delta = 0;
-+
-+ trans_for_each_update(trans, i) {
-+ if (i->cached)
-+ continue;
-+
-+ u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0;
-+ u64s_delta -= i->old_btree_u64s;
-+
-+ if (!same_leaf_as_next(trans, i)) {
-+ if (u64s_delta <= 0) {
-+ ret = bch2_foreground_maybe_merge(trans, i->path,
-+ i->level, flags);
-+ if (unlikely(ret))
-+ return ret;
-+ }
-+
-+ u64s_delta = 0;
-+ }
-+ }
-+
-+ ret = bch2_journal_preres_get(&c->journal,
-+ &trans->journal_preres, trans->journal_preres_u64s,
-+ (flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
-+ if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
-+ ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
-+ if (unlikely(ret))
-+ return ret;
-+
-+ ret = trans_lock_write(trans);
-+ if (unlikely(ret))
-+ return ret;
-+
-+ ret = bch2_trans_commit_write_locked(trans, flags, stopped_at, trace_ip);
-+
-+ if (!ret && unlikely(trans->journal_replay_not_finished))
-+ bch2_drop_overwrites_from_journal(trans);
-+
-+ trans_for_each_update(trans, i)
-+ if (!same_leaf_as_prev(trans, i))
-+ bch2_btree_node_unlock_write_inlined(trans, i->path,
-+ insert_l(i)->b);
-+
-+ if (!ret && trans->journal_pin)
-+ bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
-+ trans->journal_pin, NULL);
-+
-+ /*
-+ * Drop journal reservation after dropping write locks, since dropping
-+ * the journal reservation may kick off a journal write:
-+ */
-+ bch2_journal_res_put(&c->journal, &trans->journal_res);
-+
-+ return ret;
-+}
-+
-+static int journal_reclaim_wait_done(struct bch_fs *c)
-+{
-+ int ret = bch2_journal_error(&c->journal) ?:
-+ !bch2_btree_key_cache_must_wait(c);
-+
-+ if (!ret)
-+ journal_reclaim_kick(&c->journal);
-+ return ret;
-+}
-+
-+static noinline
-+int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
-+ struct btree_insert_entry *i,
-+ int ret, unsigned long trace_ip)
-+{
-+ struct bch_fs *c = trans->c;
-+
-+ switch (ret) {
-+ case -BCH_ERR_btree_insert_btree_node_full:
-+ ret = bch2_btree_split_leaf(trans, i->path, flags);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path);
-+ break;
-+ case -BCH_ERR_btree_insert_need_mark_replicas:
-+ ret = drop_locks_do(trans,
-+ bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas));
-+ break;
-+ case -BCH_ERR_journal_res_get_blocked:
-+ /*
-+ * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
-+ * flag
-+ */
-+ if ((flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
-+ (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) {
-+ ret = -BCH_ERR_journal_reclaim_would_deadlock;
-+ break;
-+ }
-+
-+ ret = drop_locks_do(trans,
-+ bch2_trans_journal_res_get(trans,
-+ (flags & BCH_WATERMARK_MASK)|
-+ JOURNAL_RES_GET_CHECK));
-+ break;
-+ case -BCH_ERR_btree_insert_need_journal_reclaim:
-+ bch2_trans_unlock(trans);
-+
-+ trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip);
-+
-+ wait_event_freezable(c->journal.reclaim_wait,
-+ (ret = journal_reclaim_wait_done(c)));
-+ if (ret < 0)
-+ break;
-+
-+ ret = bch2_trans_relock(trans);
-+ break;
-+ case -BCH_ERR_btree_insert_need_flush_buffer: {
-+ struct btree_write_buffer *wb = &c->btree_write_buffer;
-+
-+ ret = 0;
-+
-+ if (wb->state.nr > wb->size * 3 / 4) {
-+ bch2_trans_unlock(trans);
-+ mutex_lock(&wb->flush_lock);
-+
-+ if (wb->state.nr > wb->size * 3 / 4) {
-+ bch2_trans_begin(trans);
-+ ret = __bch2_btree_write_buffer_flush(trans,
-+ flags|BTREE_INSERT_NOCHECK_RW, true);
-+ if (!ret) {
-+ trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
-+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
-+ }
-+ } else {
-+ mutex_unlock(&wb->flush_lock);
-+ ret = bch2_trans_relock(trans);
-+ }
-+ }
-+ break;
-+ }
-+ default:
-+ BUG_ON(ret >= 0);
-+ break;
-+ }
-+
-+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
-+
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&
-+ !(flags & BTREE_INSERT_NOWAIT) &&
-+ (flags & BTREE_INSERT_NOFAIL), c,
-+ "%s: incorrectly got %s\n", __func__, bch2_err_str(ret));
-+
-+ return ret;
-+}
-+
-+static noinline int
-+bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ int ret;
-+
-+ if (likely(!(flags & BTREE_INSERT_LAZY_RW)) ||
-+ test_bit(BCH_FS_STARTED, &c->flags))
-+ return -BCH_ERR_erofs_trans_commit;
-+
-+ ret = drop_locks_do(trans, bch2_fs_read_write_early(c));
-+ if (ret)
-+ return ret;
-+
-+ bch2_write_ref_get(c, BCH_WRITE_REF_trans);
-+ return 0;
-+}
-+
-+/*
-+ * This is for updates done in the early part of fsck - btree_gc - before we've
-+ * gone RW. we only add the new key to the list of keys for journal replay to
-+ * do.
-+ */
-+static noinline int
-+do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_insert_entry *i;
-+ int ret = 0;
-+
-+ trans_for_each_update(trans, i) {
-+ ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k);
-+ if (ret)
-+ break;
-+ }
-+
-+ return ret;
-+}
-+
-+int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_insert_entry *i = NULL;
-+ struct btree_write_buffered_key *wb;
-+ unsigned u64s;
-+ int ret = 0;
-+
-+ if (!trans->nr_updates &&
-+ !trans->nr_wb_updates &&
-+ !trans->extra_journal_entries.nr)
-+ goto out_reset;
-+
-+ if (flags & BTREE_INSERT_GC_LOCK_HELD)
-+ lockdep_assert_held(&c->gc_lock);
-+
-+ ret = bch2_trans_commit_run_triggers(trans);
-+ if (ret)
-+ goto out_reset;
-+
-+ trans_for_each_update(trans, i) {
-+ struct printbuf buf = PRINTBUF;
-+ enum bkey_invalid_flags invalid_flags = 0;
-+
-+ if (!(flags & BTREE_INSERT_JOURNAL_REPLAY))
-+ invalid_flags |= BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT;
-+
-+ if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
-+ i->bkey_type, invalid_flags, &buf)))
-+ ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf);
-+ btree_insert_entry_checks(trans, i);
-+ printbuf_exit(&buf);
-+
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) {
-+ ret = do_bch2_trans_commit_to_journal_replay(trans);
-+ goto out_reset;
-+ }
-+
-+ if (!(flags & BTREE_INSERT_NOCHECK_RW) &&
-+ unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) {
-+ ret = bch2_trans_commit_get_rw_cold(trans, flags);
-+ if (ret)
-+ goto out_reset;
-+ }
-+
-+ if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 &&
-+ mutex_trylock(&c->btree_write_buffer.flush_lock)) {
-+ bch2_trans_begin(trans);
-+ bch2_trans_unlock(trans);
-+
-+ ret = __bch2_btree_write_buffer_flush(trans,
-+ flags|BTREE_INSERT_NOCHECK_RW, true);
-+ if (!ret) {
-+ trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
-+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
-+ }
-+ goto out;
-+ }
-+
-+ EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
-+
-+ memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-+
-+ trans->journal_u64s = trans->extra_journal_entries.nr;
-+ trans->journal_preres_u64s = 0;
-+
-+ trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
-+
-+ if (trans->journal_transaction_names)
-+ trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
-+
-+ trans_for_each_update(trans, i) {
-+ EBUG_ON(!i->path->should_be_locked);
-+
-+ ret = bch2_btree_path_upgrade(trans, i->path, i->level + 1);
-+ if (unlikely(ret))
-+ goto out;
-+
-+ EBUG_ON(!btree_node_intent_locked(i->path, i->level));
-+
-+ if (i->key_cache_already_flushed)
-+ continue;
-+
-+ /* we're going to journal the key being updated: */
-+ u64s = jset_u64s(i->k->k.u64s);
-+ if (i->cached &&
-+ likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
-+ trans->journal_preres_u64s += u64s;
-+
-+ if (i->flags & BTREE_UPDATE_NOJOURNAL)
-+ continue;
-+
-+ trans->journal_u64s += u64s;
-+
-+ /* and we're also going to log the overwrite: */
-+ if (trans->journal_transaction_names)
-+ trans->journal_u64s += jset_u64s(i->old_k.u64s);
-+ }
-+
-+ trans_for_each_wb_update(trans, wb)
-+ trans->journal_u64s += jset_u64s(wb->k.k.u64s);
-+
-+ if (trans->extra_journal_res) {
-+ ret = bch2_disk_reservation_add(c, trans->disk_res,
-+ trans->extra_journal_res,
-+ (flags & BTREE_INSERT_NOFAIL)
-+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
-+ if (ret)
-+ goto err;
-+ }
-+retry:
-+ bch2_trans_verify_not_in_restart(trans);
-+ memset(&trans->journal_res, 0, sizeof(trans->journal_res));
-+
-+ ret = do_bch2_trans_commit(trans, flags, &i, _RET_IP_);
-+
-+ /* make sure we didn't drop or screw up locks: */
-+ bch2_trans_verify_locks(trans);
-+
-+ if (ret)
-+ goto err;
-+
-+ trace_and_count(c, transaction_commit, trans, _RET_IP_);
-+out:
-+ bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-+
-+ if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
-+ bch2_write_ref_put(c, BCH_WRITE_REF_trans);
-+out_reset:
-+ if (!ret)
-+ bch2_trans_downgrade(trans);
-+ bch2_trans_reset_updates(trans);
-+
-+ return ret;
-+err:
-+ ret = bch2_trans_commit_error(trans, flags, i, ret, _RET_IP_);
-+ if (ret)
-+ goto out;
-+
-+ goto retry;
-+}
-diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
-new file mode 100644
-index 000000000000..3ab773005484
---- /dev/null
-+++ b/fs/bcachefs/btree_types.h
-@@ -0,0 +1,756 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_TYPES_H
-+#define _BCACHEFS_BTREE_TYPES_H
-+
-+#include <linux/list.h>
-+#include <linux/rhashtable.h>
-+
-+//#include "bkey_methods.h"
-+#include "buckets_types.h"
-+#include "darray.h"
-+#include "errcode.h"
-+#include "journal_types.h"
-+#include "replicas_types.h"
-+#include "six.h"
-+
-+struct open_bucket;
-+struct btree_update;
-+struct btree_trans;
-+
-+#define MAX_BSETS 3U
-+
-+struct btree_nr_keys {
-+
-+ /*
-+ * Amount of live metadata (i.e. size of node after a compaction) in
-+ * units of u64s
-+ */
-+ u16 live_u64s;
-+ u16 bset_u64s[MAX_BSETS];
-+
-+ /* live keys only: */
-+ u16 packed_keys;
-+ u16 unpacked_keys;
-+};
-+
-+struct bset_tree {
-+ /*
-+ * We construct a binary tree in an array as if the array
-+ * started at 1, so that things line up on the same cachelines
-+ * better: see comments in bset.c at cacheline_to_bkey() for
-+ * details
-+ */
-+
-+ /* size of the binary tree and prev array */
-+ u16 size;
-+
-+ /* function of size - precalculated for to_inorder() */
-+ u16 extra;
-+
-+ u16 data_offset;
-+ u16 aux_data_offset;
-+ u16 end_offset;
-+};
-+
-+struct btree_write {
-+ struct journal_entry_pin journal;
-+};
-+
-+struct btree_alloc {
-+ struct open_buckets ob;
-+ __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+};
-+
-+struct btree_bkey_cached_common {
-+ struct six_lock lock;
-+ u8 level;
-+ u8 btree_id;
-+ bool cached;
-+};
-+
-+struct btree {
-+ struct btree_bkey_cached_common c;
-+
-+ struct rhash_head hash;
-+ u64 hash_val;
-+
-+ unsigned long flags;
-+ u16 written;
-+ u8 nsets;
-+ u8 nr_key_bits;
-+ u16 version_ondisk;
-+
-+ struct bkey_format format;
-+
-+ struct btree_node *data;
-+ void *aux_data;
-+
-+ /*
-+ * Sets of sorted keys - the real btree node - plus a binary search tree
-+ *
-+ * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
-+ * to the memory we have allocated for this btree node. Additionally,
-+ * set[0]->data points to the entire btree node as it exists on disk.
-+ */
-+ struct bset_tree set[MAX_BSETS];
-+
-+ struct btree_nr_keys nr;
-+ u16 sib_u64s[2];
-+ u16 whiteout_u64s;
-+ u8 byte_order;
-+ u8 unpack_fn_len;
-+
-+ struct btree_write writes[2];
-+
-+ /* Key/pointer for this btree node */
-+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+
-+ /*
-+ * XXX: add a delete sequence number, so when bch2_btree_node_relock()
-+ * fails because the lock sequence number has changed - i.e. the
-+ * contents were modified - we can still relock the node if it's still
-+ * the one we want, without redoing the traversal
-+ */
-+
-+ /*
-+ * For asynchronous splits/interior node updates:
-+ * When we do a split, we allocate new child nodes and update the parent
-+ * node to point to them: we update the parent in memory immediately,
-+ * but then we must wait until the children have been written out before
-+ * the update to the parent can be written - this is a list of the
-+ * btree_updates that are blocking this node from being
-+ * written:
-+ */
-+ struct list_head write_blocked;
-+
-+ /*
-+ * Also for asynchronous splits/interior node updates:
-+ * If a btree node isn't reachable yet, we don't want to kick off
-+ * another write - because that write also won't yet be reachable and
-+ * marking it as completed before it's reachable would be incorrect:
-+ */
-+ unsigned long will_make_reachable;
-+
-+ struct open_buckets ob;
-+
-+ /* lru list */
-+ struct list_head list;
-+};
-+
-+struct btree_cache {
-+ struct rhashtable table;
-+ bool table_init_done;
-+ /*
-+ * We never free a struct btree, except on shutdown - we just put it on
-+ * the btree_cache_freed list and reuse it later. This simplifies the
-+ * code, and it doesn't cost us much memory as the memory usage is
-+ * dominated by buffers that hold the actual btree node data and those
-+ * can be freed - and the number of struct btrees allocated is
-+ * effectively bounded.
-+ *
-+ * btree_cache_freeable effectively is a small cache - we use it because
-+ * high order page allocations can be rather expensive, and it's quite
-+ * common to delete and allocate btree nodes in quick succession. It
-+ * should never grow past ~2-3 nodes in practice.
-+ */
-+ struct mutex lock;
-+ struct list_head live;
-+ struct list_head freeable;
-+ struct list_head freed_pcpu;
-+ struct list_head freed_nonpcpu;
-+
-+ /* Number of elements in live + freeable lists */
-+ unsigned used;
-+ unsigned reserve;
-+ atomic_t dirty;
-+ struct shrinker shrink;
-+
-+ /*
-+ * If we need to allocate memory for a new btree node and that
-+ * allocation fails, we can cannibalize another node in the btree cache
-+ * to satisfy the allocation - lock to guarantee only one thread does
-+ * this at a time:
-+ */
-+ struct task_struct *alloc_lock;
-+ struct closure_waitlist alloc_wait;
-+};
-+
-+struct btree_node_iter {
-+ struct btree_node_iter_set {
-+ u16 k, end;
-+ } data[MAX_BSETS];
-+};
-+
-+/*
-+ * Iterate over all possible positions, synthesizing deleted keys for holes:
-+ */
-+static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0;
-+static const __maybe_unused u16 BTREE_ITER_ALL_LEVELS = 1 << 1;
-+/*
-+ * Indicates that intent locks should be taken on leaf nodes, because we expect
-+ * to be doing updates:
-+ */
-+static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 2;
-+/*
-+ * Causes the btree iterator code to prefetch additional btree nodes from disk:
-+ */
-+static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 3;
-+/*
-+ * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
-+ * @pos or the first key strictly greater than @pos
-+ */
-+static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 4;
-+static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 5;
-+static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 6;
-+static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7;
-+static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 8;
-+static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 9;
-+static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10;
-+static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11;
-+static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12;
-+static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 13;
-+static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 14;
-+static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15;
-+#define __BTREE_ITER_FLAGS_END 16
-+
-+enum btree_path_uptodate {
-+ BTREE_ITER_UPTODATE = 0,
-+ BTREE_ITER_NEED_RELOCK = 1,
-+ BTREE_ITER_NEED_TRAVERSE = 2,
-+};
-+
-+#if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG)
-+#define TRACK_PATH_ALLOCATED
-+#endif
-+
-+struct btree_path {
-+ u8 idx;
-+ u8 sorted_idx;
-+ u8 ref;
-+ u8 intent_ref;
-+ u32 alloc_seq;
-+ u32 downgrade_seq;
-+
-+ /* btree_iter_copy starts here: */
-+ struct bpos pos;
-+
-+ enum btree_id btree_id:5;
-+ bool cached:1;
-+ bool preserve:1;
-+ enum btree_path_uptodate uptodate:2;
-+ /*
-+ * When true, failing to relock this path will cause the transaction to
-+ * restart:
-+ */
-+ bool should_be_locked:1;
-+ unsigned level:3,
-+ locks_want:3;
-+ u8 nodes_locked;
-+
-+ struct btree_path_level {
-+ struct btree *b;
-+ struct btree_node_iter iter;
-+ u32 lock_seq;
-+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
-+ u64 lock_taken_time;
-+#endif
-+ } l[BTREE_MAX_DEPTH];
-+#ifdef TRACK_PATH_ALLOCATED
-+ unsigned long ip_allocated;
-+#endif
-+};
-+
-+static inline struct btree_path_level *path_l(struct btree_path *path)
-+{
-+ return path->l + path->level;
-+}
-+
-+static inline unsigned long btree_path_ip_allocated(struct btree_path *path)
-+{
-+#ifdef TRACK_PATH_ALLOCATED
-+ return path->ip_allocated;
-+#else
-+ return _THIS_IP_;
-+#endif
-+}
-+
-+/*
-+ * @pos - iterator's current position
-+ * @level - current btree depth
-+ * @locks_want - btree level below which we start taking intent locks
-+ * @nodes_locked - bitmask indicating which nodes in @nodes are locked
-+ * @nodes_intent_locked - bitmask indicating which locks are intent locks
-+ */
-+struct btree_iter {
-+ struct btree_trans *trans;
-+ struct btree_path *path;
-+ struct btree_path *update_path;
-+ struct btree_path *key_cache_path;
-+
-+ enum btree_id btree_id:8;
-+ unsigned min_depth:3;
-+ unsigned advanced:1;
-+
-+ /* btree_iter_copy starts here: */
-+ u16 flags;
-+
-+ /* When we're filtering by snapshot, the snapshot ID we're looking for: */
-+ unsigned snapshot;
-+
-+ struct bpos pos;
-+ /*
-+ * Current unpacked key - so that bch2_btree_iter_next()/
-+ * bch2_btree_iter_next_slot() can correctly advance pos.
-+ */
-+ struct bkey k;
-+
-+ /* BTREE_ITER_WITH_JOURNAL: */
-+ size_t journal_idx;
-+ struct bpos journal_pos;
-+#ifdef TRACK_PATH_ALLOCATED
-+ unsigned long ip_allocated;
-+#endif
-+};
-+
-+struct btree_key_cache_freelist {
-+ struct bkey_cached *objs[16];
-+ unsigned nr;
-+};
-+
-+struct btree_key_cache {
-+ struct mutex lock;
-+ struct rhashtable table;
-+ bool table_init_done;
-+ struct list_head freed_pcpu;
-+ struct list_head freed_nonpcpu;
-+ struct shrinker shrink;
-+ unsigned shrink_iter;
-+ struct btree_key_cache_freelist __percpu *pcpu_freed;
-+
-+ atomic_long_t nr_freed;
-+ atomic_long_t nr_keys;
-+ atomic_long_t nr_dirty;
-+};
-+
-+struct bkey_cached_key {
-+ u32 btree_id;
-+ struct bpos pos;
-+} __packed __aligned(4);
-+
-+#define BKEY_CACHED_ACCESSED 0
-+#define BKEY_CACHED_DIRTY 1
-+
-+struct bkey_cached {
-+ struct btree_bkey_cached_common c;
-+
-+ unsigned long flags;
-+ u16 u64s;
-+ bool valid;
-+ u32 btree_trans_barrier_seq;
-+ struct bkey_cached_key key;
-+
-+ struct rhash_head hash;
-+ struct list_head list;
-+
-+ struct journal_preres res;
-+ struct journal_entry_pin journal;
-+ u64 seq;
-+
-+ struct bkey_i *k;
-+};
-+
-+static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
-+{
-+ return !b->cached
-+ ? container_of(b, struct btree, c)->key.k.p
-+ : container_of(b, struct bkey_cached, c)->key.pos;
-+}
-+
-+struct btree_insert_entry {
-+ unsigned flags;
-+ u8 bkey_type;
-+ enum btree_id btree_id:8;
-+ u8 level:4;
-+ bool cached:1;
-+ bool insert_trigger_run:1;
-+ bool overwrite_trigger_run:1;
-+ bool key_cache_already_flushed:1;
-+ /*
-+ * @old_k may be a key from the journal; @old_btree_u64s always refers
-+ * to the size of the key being overwritten in the btree:
-+ */
-+ u8 old_btree_u64s;
-+ struct bkey_i *k;
-+ struct btree_path *path;
-+ u64 seq;
-+ /* key being overwritten: */
-+ struct bkey old_k;
-+ const struct bch_val *old_v;
-+ unsigned long ip_allocated;
-+};
-+
-+#ifndef CONFIG_LOCKDEP
-+#define BTREE_ITER_MAX 64
-+#else
-+#define BTREE_ITER_MAX 32
-+#endif
-+
-+struct btree_trans_commit_hook;
-+typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
-+
-+struct btree_trans_commit_hook {
-+ btree_trans_commit_hook_fn *fn;
-+ struct btree_trans_commit_hook *next;
-+};
-+
-+#define BTREE_TRANS_MEM_MAX (1U << 16)
-+
-+#define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000
-+
-+struct btree_trans {
-+ struct bch_fs *c;
-+ const char *fn;
-+ struct closure ref;
-+ struct list_head list;
-+ u64 last_begin_time;
-+
-+ u8 lock_may_not_fail;
-+ u8 lock_must_abort;
-+ struct btree_bkey_cached_common *locking;
-+ struct six_lock_waiter locking_wait;
-+
-+ int srcu_idx;
-+
-+ u8 fn_idx;
-+ u8 nr_sorted;
-+ u8 nr_updates;
-+ u8 nr_wb_updates;
-+ u8 wb_updates_size;
-+ bool srcu_held:1;
-+ bool used_mempool:1;
-+ bool in_traverse_all:1;
-+ bool paths_sorted:1;
-+ bool memory_allocation_failure:1;
-+ bool journal_transaction_names:1;
-+ bool journal_replay_not_finished:1;
-+ bool notrace_relock_fail:1;
-+ enum bch_errcode restarted:16;
-+ u32 restart_count;
-+ unsigned long last_begin_ip;
-+ unsigned long last_restarted_ip;
-+ unsigned long srcu_lock_time;
-+
-+ /*
-+ * For when bch2_trans_update notices we'll be splitting a compressed
-+ * extent:
-+ */
-+ unsigned extra_journal_res;
-+ unsigned nr_max_paths;
-+
-+ u64 paths_allocated;
-+
-+ unsigned mem_top;
-+ unsigned mem_max;
-+ unsigned mem_bytes;
-+ void *mem;
-+
-+ u8 sorted[BTREE_ITER_MAX + 8];
-+ struct btree_path paths[BTREE_ITER_MAX];
-+ struct btree_insert_entry updates[BTREE_ITER_MAX];
-+ struct btree_write_buffered_key *wb_updates;
-+
-+ /* update path: */
-+ struct btree_trans_commit_hook *hooks;
-+ darray_u64 extra_journal_entries;
-+ struct journal_entry_pin *journal_pin;
-+
-+ struct journal_res journal_res;
-+ struct journal_preres journal_preres;
-+ u64 *journal_seq;
-+ struct disk_reservation *disk_res;
-+ unsigned journal_u64s;
-+ unsigned journal_preres_u64s;
-+ struct replicas_delta_list *fs_usage_deltas;
-+};
-+
-+#define BCH_BTREE_WRITE_TYPES() \
-+ x(initial, 0) \
-+ x(init_next_bset, 1) \
-+ x(cache_reclaim, 2) \
-+ x(journal_reclaim, 3) \
-+ x(interior, 4)
-+
-+enum btree_write_type {
-+#define x(t, n) BTREE_WRITE_##t,
-+ BCH_BTREE_WRITE_TYPES()
-+#undef x
-+ BTREE_WRITE_TYPE_NR,
-+};
-+
-+#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1)
-+#define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR))
-+
-+#define BTREE_FLAGS() \
-+ x(read_in_flight) \
-+ x(read_error) \
-+ x(dirty) \
-+ x(need_write) \
-+ x(write_blocked) \
-+ x(will_make_reachable) \
-+ x(noevict) \
-+ x(write_idx) \
-+ x(accessed) \
-+ x(write_in_flight) \
-+ x(write_in_flight_inner) \
-+ x(just_written) \
-+ x(dying) \
-+ x(fake) \
-+ x(need_rewrite) \
-+ x(never_write)
-+
-+enum btree_flags {
-+ /* First bits for btree node write type */
-+ BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1,
-+#define x(flag) BTREE_NODE_##flag,
-+ BTREE_FLAGS()
-+#undef x
-+};
-+
-+#define x(flag) \
-+static inline bool btree_node_ ## flag(struct btree *b) \
-+{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
-+ \
-+static inline void set_btree_node_ ## flag(struct btree *b) \
-+{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \
-+ \
-+static inline void clear_btree_node_ ## flag(struct btree *b) \
-+{ clear_bit(BTREE_NODE_ ## flag, &b->flags); }
-+
-+BTREE_FLAGS()
-+#undef x
-+
-+static inline struct btree_write *btree_current_write(struct btree *b)
-+{
-+ return b->writes + btree_node_write_idx(b);
-+}
-+
-+static inline struct btree_write *btree_prev_write(struct btree *b)
-+{
-+ return b->writes + (btree_node_write_idx(b) ^ 1);
-+}
-+
-+static inline struct bset_tree *bset_tree_last(struct btree *b)
-+{
-+ EBUG_ON(!b->nsets);
-+ return b->set + b->nsets - 1;
-+}
-+
-+static inline void *
-+__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
-+{
-+ return (void *) ((u64 *) b->data + 1 + offset);
-+}
-+
-+static inline u16
-+__btree_node_ptr_to_offset(const struct btree *b, const void *p)
-+{
-+ u16 ret = (u64 *) p - 1 - (u64 *) b->data;
-+
-+ EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
-+ return ret;
-+}
-+
-+static inline struct bset *bset(const struct btree *b,
-+ const struct bset_tree *t)
-+{
-+ return __btree_node_offset_to_ptr(b, t->data_offset);
-+}
-+
-+static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
-+{
-+ t->end_offset =
-+ __btree_node_ptr_to_offset(b, vstruct_last(bset(b, t)));
-+}
-+
-+static inline void set_btree_bset(struct btree *b, struct bset_tree *t,
-+ const struct bset *i)
-+{
-+ t->data_offset = __btree_node_ptr_to_offset(b, i);
-+ set_btree_bset_end(b, t);
-+}
-+
-+static inline struct bset *btree_bset_first(struct btree *b)
-+{
-+ return bset(b, b->set);
-+}
-+
-+static inline struct bset *btree_bset_last(struct btree *b)
-+{
-+ return bset(b, bset_tree_last(b));
-+}
-+
-+static inline u16
-+__btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k)
-+{
-+ return __btree_node_ptr_to_offset(b, k);
-+}
-+
-+static inline struct bkey_packed *
-+__btree_node_offset_to_key(const struct btree *b, u16 k)
-+{
-+ return __btree_node_offset_to_ptr(b, k);
-+}
-+
-+static inline unsigned btree_bkey_first_offset(const struct bset_tree *t)
-+{
-+ return t->data_offset + offsetof(struct bset, _data) / sizeof(u64);
-+}
-+
-+#define btree_bkey_first(_b, _t) \
-+({ \
-+ EBUG_ON(bset(_b, _t)->start != \
-+ __btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\
-+ \
-+ bset(_b, _t)->start; \
-+})
-+
-+#define btree_bkey_last(_b, _t) \
-+({ \
-+ EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) != \
-+ vstruct_last(bset(_b, _t))); \
-+ \
-+ __btree_node_offset_to_key(_b, (_t)->end_offset); \
-+})
-+
-+static inline unsigned bset_u64s(struct bset_tree *t)
-+{
-+ return t->end_offset - t->data_offset -
-+ sizeof(struct bset) / sizeof(u64);
-+}
-+
-+static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t)
-+{
-+ return bset_u64s(t) - b->nr.bset_u64s[t - b->set];
-+}
-+
-+static inline unsigned bset_byte_offset(struct btree *b, void *i)
-+{
-+ return i - (void *) b->data;
-+}
-+
-+enum btree_node_type {
-+ BKEY_TYPE_btree,
-+#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1,
-+ BCH_BTREE_IDS()
-+#undef x
-+ BKEY_TYPE_NR
-+};
-+
-+/* Type of a key in btree @id at level @level: */
-+static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
-+{
-+ return level ? BKEY_TYPE_btree : (unsigned) id + 1;
-+}
-+
-+/* Type of keys @b contains: */
-+static inline enum btree_node_type btree_node_type(struct btree *b)
-+{
-+ return __btree_node_type(b->c.level, b->c.btree_id);
-+}
-+
-+const char *bch2_btree_node_type_str(enum btree_node_type);
-+
-+#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
-+ (BIT_ULL(BKEY_TYPE_extents)| \
-+ BIT_ULL(BKEY_TYPE_alloc)| \
-+ BIT_ULL(BKEY_TYPE_inodes)| \
-+ BIT_ULL(BKEY_TYPE_stripes)| \
-+ BIT_ULL(BKEY_TYPE_reflink)| \
-+ BIT_ULL(BKEY_TYPE_btree))
-+
-+#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
-+ (BIT_ULL(BKEY_TYPE_alloc)| \
-+ BIT_ULL(BKEY_TYPE_inodes)| \
-+ BIT_ULL(BKEY_TYPE_stripes)| \
-+ BIT_ULL(BKEY_TYPE_snapshots))
-+
-+#define BTREE_NODE_TYPE_HAS_TRIGGERS \
-+ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
-+ BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
-+
-+static inline bool btree_node_type_needs_gc(enum btree_node_type type)
-+{
-+ return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type);
-+}
-+
-+static inline bool btree_node_type_is_extents(enum btree_node_type type)
-+{
-+ const unsigned mask = 0
-+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
-+ BCH_BTREE_IDS()
-+#undef x
-+ ;
-+
-+ return (1U << type) & mask;
-+}
-+
-+static inline bool btree_id_is_extents(enum btree_id btree)
-+{
-+ return btree_node_type_is_extents(__btree_node_type(0, btree));
-+}
-+
-+static inline bool btree_type_has_snapshots(enum btree_id id)
-+{
-+ const unsigned mask = 0
-+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
-+ BCH_BTREE_IDS()
-+#undef x
-+ ;
-+
-+ return (1U << id) & mask;
-+}
-+
-+static inline bool btree_type_has_snapshot_field(enum btree_id id)
-+{
-+ const unsigned mask = 0
-+#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
-+ BCH_BTREE_IDS()
-+#undef x
-+ ;
-+
-+ return (1U << id) & mask;
-+}
-+
-+static inline bool btree_type_has_ptrs(enum btree_id id)
-+{
-+ const unsigned mask = 0
-+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
-+ BCH_BTREE_IDS()
-+#undef x
-+ ;
-+
-+ return (1U << id) & mask;
-+}
-+
-+struct btree_root {
-+ struct btree *b;
-+
-+ /* On disk root - see async splits: */
-+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-+ u8 level;
-+ u8 alive;
-+ s8 error;
-+};
-+
-+enum btree_gc_coalesce_fail_reason {
-+ BTREE_GC_COALESCE_FAIL_RESERVE_GET,
-+ BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
-+ BTREE_GC_COALESCE_FAIL_FORMAT_FITS,
-+};
-+
-+enum btree_node_sibling {
-+ btree_prev_sib,
-+ btree_next_sib,
-+};
-+
-+#endif /* _BCACHEFS_BTREE_TYPES_H */
-diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
-new file mode 100644
-index 000000000000..324767c0ddcc
---- /dev/null
-+++ b/fs/bcachefs/btree_update.c
-@@ -0,0 +1,933 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "btree_iter.h"
-+#include "btree_journal_iter.h"
-+#include "btree_locking.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "keylist.h"
-+#include "snapshot.h"
-+#include "trace.h"
-+
-+static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
-+ const struct btree_insert_entry *r)
-+{
-+ return cmp_int(l->btree_id, r->btree_id) ?:
-+ cmp_int(l->cached, r->cached) ?:
-+ -cmp_int(l->level, r->level) ?:
-+ bpos_cmp(l->k->k.p, r->k->k.p);
-+}
-+
-+static int __must_check
-+bch2_trans_update_by_path(struct btree_trans *, struct btree_path *,
-+ struct bkey_i *, enum btree_update_flags,
-+ unsigned long ip);
-+
-+static noinline int extent_front_merge(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ struct bkey_i **insert,
-+ enum btree_update_flags flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_i *update;
-+ int ret;
-+
-+ update = bch2_bkey_make_mut_noupdate(trans, k);
-+ ret = PTR_ERR_OR_ZERO(update);
-+ if (ret)
-+ return ret;
-+
-+ if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert)))
-+ return 0;
-+
-+ ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?:
-+ bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p);
-+ if (ret < 0)
-+ return ret;
-+ if (ret)
-+ return 0;
-+
-+ ret = bch2_btree_delete_at(trans, iter, flags);
-+ if (ret)
-+ return ret;
-+
-+ *insert = update;
-+ return 0;
-+}
-+
-+static noinline int extent_back_merge(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_i *insert,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ int ret;
-+
-+ ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?:
-+ bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p);
-+ if (ret < 0)
-+ return ret;
-+ if (ret)
-+ return 0;
-+
-+ bch2_bkey_merge(c, bkey_i_to_s(insert), k);
-+ return 0;
-+}
-+
-+/*
-+ * When deleting, check if we need to emit a whiteout (because we're overwriting
-+ * something in an ancestor snapshot)
-+ */
-+static int need_whiteout_for_snapshot(struct btree_trans *trans,
-+ enum btree_id btree_id, struct bpos pos)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ u32 snapshot = pos.snapshot;
-+ int ret;
-+
-+ if (!bch2_snapshot_parent(trans->c, pos.snapshot))
-+ return 0;
-+
-+ pos.snapshot++;
-+
-+ for_each_btree_key_norestart(trans, iter, btree_id, pos,
-+ BTREE_ITER_ALL_SNAPSHOTS|
-+ BTREE_ITER_NOPRESERVE, k, ret) {
-+ if (!bkey_eq(k.k->p, pos))
-+ break;
-+
-+ if (bch2_snapshot_is_ancestor(trans->c, snapshot,
-+ k.k->p.snapshot)) {
-+ ret = !bkey_whiteout(k.k);
-+ break;
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret;
-+}
-+
-+int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
-+ enum btree_id id,
-+ struct bpos old_pos,
-+ struct bpos new_pos)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter old_iter, new_iter = { NULL };
-+ struct bkey_s_c old_k, new_k;
-+ snapshot_id_list s;
-+ struct bkey_i *update;
-+ int ret = 0;
-+
-+ if (!bch2_snapshot_has_children(c, old_pos.snapshot))
-+ return 0;
-+
-+ darray_init(&s);
-+
-+ bch2_trans_iter_init(trans, &old_iter, id, old_pos,
-+ BTREE_ITER_NOT_EXTENTS|
-+ BTREE_ITER_ALL_SNAPSHOTS);
-+ while ((old_k = bch2_btree_iter_prev(&old_iter)).k &&
-+ !(ret = bkey_err(old_k)) &&
-+ bkey_eq(old_pos, old_k.k->p)) {
-+ struct bpos whiteout_pos =
-+ SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);;
-+
-+ if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) ||
-+ snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot))
-+ continue;
-+
-+ new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos,
-+ BTREE_ITER_NOT_EXTENTS|
-+ BTREE_ITER_INTENT);
-+ ret = bkey_err(new_k);
-+ if (ret)
-+ break;
-+
-+ if (new_k.k->type == KEY_TYPE_deleted) {
-+ update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
-+ ret = PTR_ERR_OR_ZERO(update);
-+ if (ret)
-+ break;
-+
-+ bkey_init(&update->k);
-+ update->k.p = whiteout_pos;
-+ update->k.type = KEY_TYPE_whiteout;
-+
-+ ret = bch2_trans_update(trans, &new_iter, update,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ }
-+ bch2_trans_iter_exit(trans, &new_iter);
-+
-+ ret = snapshot_list_add(c, &s, old_k.k->p.snapshot);
-+ if (ret)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &new_iter);
-+ bch2_trans_iter_exit(trans, &old_iter);
-+ darray_exit(&s);
-+
-+ return ret;
-+}
-+
-+int bch2_trans_update_extent_overwrite(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ enum btree_update_flags flags,
-+ struct bkey_s_c old,
-+ struct bkey_s_c new)
-+{
-+ enum btree_id btree_id = iter->btree_id;
-+ struct bkey_i *update;
-+ struct bpos new_start = bkey_start_pos(new.k);
-+ bool front_split = bkey_lt(bkey_start_pos(old.k), new_start);
-+ bool back_split = bkey_gt(old.k->p, new.k->p);
-+ int ret = 0, compressed_sectors;
-+
-+ /*
-+ * If we're going to be splitting a compressed extent, note it
-+ * so that __bch2_trans_commit() can increase our disk
-+ * reservation:
-+ */
-+ if (((front_split && back_split) ||
-+ ((front_split || back_split) && old.k->p.snapshot != new.k->p.snapshot)) &&
-+ (compressed_sectors = bch2_bkey_sectors_compressed(old)))
-+ trans->extra_journal_res += compressed_sectors;
-+
-+ if (front_split) {
-+ update = bch2_bkey_make_mut_noupdate(trans, old);
-+ if ((ret = PTR_ERR_OR_ZERO(update)))
-+ return ret;
-+
-+ bch2_cut_back(new_start, update);
-+
-+ ret = bch2_insert_snapshot_whiteouts(trans, btree_id,
-+ old.k->p, update->k.p) ?:
-+ bch2_btree_insert_nonextent(trans, btree_id, update,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ /* If we're overwriting in a different snapshot - middle split: */
-+ if (old.k->p.snapshot != new.k->p.snapshot &&
-+ (front_split || back_split)) {
-+ update = bch2_bkey_make_mut_noupdate(trans, old);
-+ if ((ret = PTR_ERR_OR_ZERO(update)))
-+ return ret;
-+
-+ bch2_cut_front(new_start, update);
-+ bch2_cut_back(new.k->p, update);
-+
-+ ret = bch2_insert_snapshot_whiteouts(trans, btree_id,
-+ old.k->p, update->k.p) ?:
-+ bch2_btree_insert_nonextent(trans, btree_id, update,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (bkey_le(old.k->p, new.k->p)) {
-+ update = bch2_trans_kmalloc(trans, sizeof(*update));
-+ if ((ret = PTR_ERR_OR_ZERO(update)))
-+ return ret;
-+
-+ bkey_init(&update->k);
-+ update->k.p = old.k->p;
-+ update->k.p.snapshot = new.k->p.snapshot;
-+
-+ if (new.k->p.snapshot != old.k->p.snapshot) {
-+ update->k.type = KEY_TYPE_whiteout;
-+ } else if (btree_type_has_snapshots(btree_id)) {
-+ ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p);
-+ if (ret < 0)
-+ return ret;
-+ if (ret)
-+ update->k.type = KEY_TYPE_whiteout;
-+ }
-+
-+ ret = bch2_btree_insert_nonextent(trans, btree_id, update,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (back_split) {
-+ update = bch2_bkey_make_mut_noupdate(trans, old);
-+ if ((ret = PTR_ERR_OR_ZERO(update)))
-+ return ret;
-+
-+ bch2_cut_front(new.k->p, update);
-+
-+ ret = bch2_trans_update_by_path(trans, iter->path, update,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
-+ flags, _RET_IP_);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_trans_update_extent(struct btree_trans *trans,
-+ struct btree_iter *orig_iter,
-+ struct bkey_i *insert,
-+ enum btree_update_flags flags)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ enum btree_id btree_id = orig_iter->btree_id;
-+ int ret = 0;
-+
-+ bch2_trans_iter_init(trans, &iter, btree_id, bkey_start_pos(&insert->k),
-+ BTREE_ITER_INTENT|
-+ BTREE_ITER_WITH_UPDATES|
-+ BTREE_ITER_NOT_EXTENTS);
-+ k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
-+ if ((ret = bkey_err(k)))
-+ goto err;
-+ if (!k.k)
-+ goto out;
-+
-+ if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) {
-+ if (bch2_bkey_maybe_mergable(k.k, &insert->k)) {
-+ ret = extent_front_merge(trans, &iter, k, &insert, flags);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ goto next;
-+ }
-+
-+ while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) {
-+ bool done = bkey_lt(insert->k.p, k.k->p);
-+
-+ ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert));
-+ if (ret)
-+ goto err;
-+
-+ if (done)
-+ goto out;
-+next:
-+ bch2_btree_iter_advance(&iter);
-+ k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
-+ if ((ret = bkey_err(k)))
-+ goto err;
-+ if (!k.k)
-+ goto out;
-+ }
-+
-+ if (bch2_bkey_maybe_mergable(&insert->k, k.k)) {
-+ ret = extent_back_merge(trans, &iter, insert, k);
-+ if (ret)
-+ goto err;
-+ }
-+out:
-+ if (!bkey_deleted(&insert->k))
-+ ret = bch2_btree_insert_nonextent(trans, btree_id, insert, flags);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret;
-+}
-+
-+static noinline int flush_new_cached_update(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree_insert_entry *i,
-+ enum btree_update_flags flags,
-+ unsigned long ip)
-+{
-+ struct btree_path *btree_path;
-+ struct bkey k;
-+ int ret;
-+
-+ btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
-+ BTREE_ITER_INTENT, _THIS_IP_);
-+ ret = bch2_btree_path_traverse(trans, btree_path, 0);
-+ if (ret)
-+ goto out;
-+
-+ /*
-+ * The old key in the insert entry might actually refer to an existing
-+ * key in the btree that has been deleted from cache and not yet
-+ * flushed. Check for this and skip the flush so we don't run triggers
-+ * against a stale key.
-+ */
-+ bch2_btree_path_peek_slot_exact(btree_path, &k);
-+ if (!bkey_deleted(&k))
-+ goto out;
-+
-+ i->key_cache_already_flushed = true;
-+ i->flags |= BTREE_TRIGGER_NORUN;
-+
-+ btree_path_set_should_be_locked(btree_path);
-+ ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip);
-+out:
-+ bch2_path_put(trans, btree_path, true);
-+ return ret;
-+}
-+
-+static int __must_check
-+bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
-+ struct bkey_i *k, enum btree_update_flags flags,
-+ unsigned long ip)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_insert_entry *i, n;
-+ u64 seq = 0;
-+ int cmp;
-+
-+ EBUG_ON(!path->should_be_locked);
-+ EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
-+ EBUG_ON(!bpos_eq(k->k.p, path->pos));
-+
-+ /*
-+ * The transaction journal res hasn't been allocated at this point.
-+ * That occurs at commit time. Reuse the seq field to pass in the seq
-+ * of a prejournaled key.
-+ */
-+ if (flags & BTREE_UPDATE_PREJOURNAL)
-+ seq = trans->journal_res.seq;
-+
-+ n = (struct btree_insert_entry) {
-+ .flags = flags,
-+ .bkey_type = __btree_node_type(path->level, path->btree_id),
-+ .btree_id = path->btree_id,
-+ .level = path->level,
-+ .cached = path->cached,
-+ .path = path,
-+ .k = k,
-+ .seq = seq,
-+ .ip_allocated = ip,
-+ };
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ trans_for_each_update(trans, i)
-+ BUG_ON(i != trans->updates &&
-+ btree_insert_entry_cmp(i - 1, i) >= 0);
-+#endif
-+
-+ /*
-+ * Pending updates are kept sorted: first, find position of new update,
-+ * then delete/trim any updates the new update overwrites:
-+ */
-+ trans_for_each_update(trans, i) {
-+ cmp = btree_insert_entry_cmp(&n, i);
-+ if (cmp <= 0)
-+ break;
-+ }
-+
-+ if (!cmp && i < trans->updates + trans->nr_updates) {
-+ EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run);
-+
-+ bch2_path_put(trans, i->path, true);
-+ i->flags = n.flags;
-+ i->cached = n.cached;
-+ i->k = n.k;
-+ i->path = n.path;
-+ i->seq = n.seq;
-+ i->ip_allocated = n.ip_allocated;
-+ } else {
-+ array_insert_item(trans->updates, trans->nr_updates,
-+ i - trans->updates, n);
-+
-+ i->old_v = bch2_btree_path_peek_slot_exact(path, &i->old_k).v;
-+ i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0;
-+
-+ if (unlikely(trans->journal_replay_not_finished)) {
-+ struct bkey_i *j_k =
-+ bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p);
-+
-+ if (j_k) {
-+ i->old_k = j_k->k;
-+ i->old_v = &j_k->v;
-+ }
-+ }
-+ }
-+
-+ __btree_path_get(i->path, true);
-+
-+ /*
-+ * If a key is present in the key cache, it must also exist in the
-+ * btree - this is necessary for cache coherency. When iterating over
-+ * a btree that's cached in the key cache, the btree iter code checks
-+ * the key cache - but the key has to exist in the btree for that to
-+ * work:
-+ */
-+ if (path->cached && bkey_deleted(&i->old_k))
-+ return flush_new_cached_update(trans, path, i, flags, ip);
-+
-+ return 0;
-+}
-+
-+static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct btree_path *path)
-+{
-+ if (!iter->key_cache_path ||
-+ !iter->key_cache_path->should_be_locked ||
-+ !bpos_eq(iter->key_cache_path->pos, iter->pos)) {
-+ struct bkey_cached *ck;
-+ int ret;
-+
-+ if (!iter->key_cache_path)
-+ iter->key_cache_path =
-+ bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
-+ BTREE_ITER_INTENT|
-+ BTREE_ITER_CACHED, _THIS_IP_);
-+
-+ iter->key_cache_path =
-+ bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos,
-+ iter->flags & BTREE_ITER_INTENT,
-+ _THIS_IP_);
-+
-+ ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
-+ BTREE_ITER_CACHED);
-+ if (unlikely(ret))
-+ return ret;
-+
-+ ck = (void *) iter->key_cache_path->l[0].b;
-+
-+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
-+ trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_);
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
-+ }
-+
-+ btree_path_set_should_be_locked(iter->key_cache_path);
-+ }
-+
-+ return 0;
-+}
-+
-+int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_i *k, enum btree_update_flags flags)
-+{
-+ struct btree_path *path = iter->update_path ?: iter->path;
-+ int ret;
-+
-+ if (iter->flags & BTREE_ITER_IS_EXTENTS)
-+ return bch2_trans_update_extent(trans, iter, k, flags);
-+
-+ if (bkey_deleted(&k->k) &&
-+ !(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
-+ (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
-+ ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p);
-+ if (unlikely(ret < 0))
-+ return ret;
-+
-+ if (ret)
-+ k->k.type = KEY_TYPE_whiteout;
-+ }
-+
-+ /*
-+ * Ensure that updates to cached btrees go to the key cache:
-+ */
-+ if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
-+ !path->cached &&
-+ !path->level &&
-+ btree_id_cached(trans->c, path->btree_id)) {
-+ ret = bch2_trans_update_get_key_cache(trans, iter, path);
-+ if (ret)
-+ return ret;
-+
-+ path = iter->key_cache_path;
-+ }
-+
-+ return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_);
-+}
-+
-+/*
-+ * Add a transaction update for a key that has already been journaled.
-+ */
-+int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq,
-+ struct btree_iter *iter, struct bkey_i *k,
-+ enum btree_update_flags flags)
-+{
-+ trans->journal_res.seq = seq;
-+ return bch2_trans_update(trans, iter, k, flags|BTREE_UPDATE_NOJOURNAL|
-+ BTREE_UPDATE_PREJOURNAL);
-+}
-+
-+int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
-+ enum btree_id btree,
-+ struct bkey_i *k)
-+{
-+ struct btree_write_buffered_key *i;
-+ int ret;
-+
-+ EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size);
-+ EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
-+
-+ trans_for_each_wb_update(trans, i) {
-+ if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) {
-+ bkey_copy(&i->k, k);
-+ return 0;
-+ }
-+ }
-+
-+ if (!trans->wb_updates ||
-+ trans->nr_wb_updates == trans->wb_updates_size) {
-+ struct btree_write_buffered_key *u;
-+
-+ if (trans->nr_wb_updates == trans->wb_updates_size) {
-+ struct btree_transaction_stats *s = btree_trans_stats(trans);
-+
-+ BUG_ON(trans->wb_updates_size > U8_MAX / 2);
-+ trans->wb_updates_size = max(1, trans->wb_updates_size * 2);
-+ if (s)
-+ s->wb_updates_size = trans->wb_updates_size;
-+ }
-+
-+ u = bch2_trans_kmalloc_nomemzero(trans,
-+ trans->wb_updates_size *
-+ sizeof(struct btree_write_buffered_key));
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ return ret;
-+
-+ if (trans->nr_wb_updates)
-+ memcpy(u, trans->wb_updates, trans->nr_wb_updates *
-+ sizeof(struct btree_write_buffered_key));
-+ trans->wb_updates = u;
-+ }
-+
-+ trans->wb_updates[trans->nr_wb_updates] = (struct btree_write_buffered_key) {
-+ .btree = btree,
-+ };
-+
-+ bkey_copy(&trans->wb_updates[trans->nr_wb_updates].k, k);
-+ trans->nr_wb_updates++;
-+
-+ return 0;
-+}
-+
-+int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
-+ enum btree_id btree, struct bpos end)
-+{
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_INTENT);
-+ k = bch2_btree_iter_prev(iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ bch2_btree_iter_advance(iter);
-+ k = bch2_btree_iter_peek_slot(iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(k.k->type != KEY_TYPE_deleted);
-+
-+ if (bkey_gt(k.k->p, end)) {
-+ ret = -BCH_ERR_ENOSPC_btree_slot;
-+ goto err;
-+ }
-+
-+ return 0;
-+err:
-+ bch2_trans_iter_exit(trans, iter);
-+ return ret;
-+}
-+
-+void bch2_trans_commit_hook(struct btree_trans *trans,
-+ struct btree_trans_commit_hook *h)
-+{
-+ h->next = trans->hooks;
-+ trans->hooks = h;
-+}
-+
-+int bch2_btree_insert_nonextent(struct btree_trans *trans,
-+ enum btree_id btree, struct bkey_i *k,
-+ enum btree_update_flags flags)
-+{
-+ struct btree_iter iter;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, btree, k->k.p,
-+ BTREE_ITER_CACHED|
-+ BTREE_ITER_NOT_EXTENTS|
-+ BTREE_ITER_INTENT);
-+ ret = bch2_btree_iter_traverse(&iter) ?:
-+ bch2_trans_update(trans, &iter, k, flags);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id,
-+ struct bkey_i *k, enum btree_update_flags flags)
-+{
-+ struct btree_iter iter;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k),
-+ BTREE_ITER_CACHED|
-+ BTREE_ITER_INTENT);
-+ ret = bch2_btree_iter_traverse(&iter) ?:
-+ bch2_trans_update(trans, &iter, k, flags);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+/**
-+ * bch2_btree_insert - insert keys into the extent btree
-+ * @c: pointer to struct bch_fs
-+ * @id: btree to insert into
-+ * @k: key to insert
-+ * @disk_res: must be non-NULL whenever inserting or potentially
-+ * splitting data extents
-+ * @flags: transaction commit flags
-+ *
-+ * Returns: 0 on success, error code on failure
-+ */
-+int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
-+ struct disk_reservation *disk_res, int flags)
-+{
-+ return bch2_trans_do(c, disk_res, NULL, flags,
-+ bch2_btree_insert_trans(trans, id, k, 0));
-+}
-+
-+int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter,
-+ unsigned len, unsigned update_flags)
-+{
-+ struct bkey_i *k;
-+
-+ k = bch2_trans_kmalloc(trans, sizeof(*k));
-+ if (IS_ERR(k))
-+ return PTR_ERR(k);
-+
-+ bkey_init(&k->k);
-+ k->k.p = iter->pos;
-+ bch2_key_resize(&k->k, len);
-+ return bch2_trans_update(trans, iter, k, update_flags);
-+}
-+
-+int bch2_btree_delete_at(struct btree_trans *trans,
-+ struct btree_iter *iter, unsigned update_flags)
-+{
-+ return bch2_btree_delete_extent_at(trans, iter, 0, update_flags);
-+}
-+
-+int bch2_btree_delete_at_buffered(struct btree_trans *trans,
-+ enum btree_id btree, struct bpos pos)
-+{
-+ struct bkey_i *k;
-+
-+ k = bch2_trans_kmalloc(trans, sizeof(*k));
-+ if (IS_ERR(k))
-+ return PTR_ERR(k);
-+
-+ bkey_init(&k->k);
-+ k->k.p = pos;
-+ return bch2_trans_update_buffered(trans, btree, k);
-+}
-+
-+int bch2_btree_delete(struct btree_trans *trans,
-+ enum btree_id btree, struct bpos pos,
-+ unsigned update_flags)
-+{
-+ struct btree_iter iter;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, btree, pos,
-+ BTREE_ITER_CACHED|
-+ BTREE_ITER_INTENT);
-+ ret = bch2_btree_iter_traverse(&iter) ?:
-+ bch2_btree_delete_at(trans, &iter, update_flags);
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret;
-+}
-+
-+int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
-+ struct bpos start, struct bpos end,
-+ unsigned update_flags,
-+ u64 *journal_seq)
-+{
-+ u32 restart_count = trans->restart_count;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
-+ while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) {
-+ struct disk_reservation disk_res =
-+ bch2_disk_reservation_init(trans->c, 0);
-+ struct bkey_i delete;
-+
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ bkey_init(&delete.k);
-+
-+ /*
-+ * This could probably be more efficient for extents:
-+ */
-+
-+ /*
-+ * For extents, iter.pos won't necessarily be the same as
-+ * bkey_start_pos(k.k) (for non extents they always will be the
-+ * same). It's important that we delete starting from iter.pos
-+ * because the range we want to delete could start in the middle
-+ * of k.
-+ *
-+ * (bch2_btree_iter_peek() does guarantee that iter.pos >=
-+ * bkey_start_pos(k.k)).
-+ */
-+ delete.k.p = iter.pos;
-+
-+ if (iter.flags & BTREE_ITER_IS_EXTENTS)
-+ bch2_key_resize(&delete.k,
-+ bpos_min(end, k.k->p).offset -
-+ iter.pos.offset);
-+
-+ ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?:
-+ bch2_trans_commit(trans, &disk_res, journal_seq,
-+ BTREE_INSERT_NOFAIL);
-+ bch2_disk_reservation_put(trans->c, &disk_res);
-+err:
-+ /*
-+ * the bch2_trans_begin() call is in a weird place because we
-+ * need to call it after every transaction commit, to avoid path
-+ * overflow, but don't want to call it if the delete operation
-+ * is a no-op and we have no work to do:
-+ */
-+ bch2_trans_begin(trans);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ ret = 0;
-+ if (ret)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret ?: trans_was_restarted(trans, restart_count);
-+}
-+
-+/*
-+ * bch_btree_delete_range - delete everything within a given range
-+ *
-+ * Range is a half open interval - [start, end)
-+ */
-+int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
-+ struct bpos start, struct bpos end,
-+ unsigned update_flags,
-+ u64 *journal_seq)
-+{
-+ int ret = bch2_trans_run(c,
-+ bch2_btree_delete_range_trans(trans, id, start, end,
-+ update_flags, journal_seq));
-+ if (ret == -BCH_ERR_transaction_restart_nested)
-+ ret = 0;
-+ return ret;
-+}
-+
-+int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
-+ struct bpos pos, bool set)
-+{
-+ struct bkey_i *k;
-+ int ret = 0;
-+
-+ k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k));
-+ ret = PTR_ERR_OR_ZERO(k);
-+ if (unlikely(ret))
-+ return ret;
-+
-+ bkey_init(&k->k);
-+ k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
-+ k->k.p = pos;
-+
-+ return bch2_trans_update_buffered(trans, btree, k);
-+}
-+
-+__printf(2, 0)
-+static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args)
-+{
-+ struct printbuf buf = PRINTBUF;
-+ struct jset_entry_log *l;
-+ unsigned u64s;
-+ int ret;
-+
-+ prt_vprintf(&buf, fmt, args);
-+ ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
-+ if (ret)
-+ goto err;
-+
-+ u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
-+
-+ ret = darray_make_room(entries, jset_u64s(u64s));
-+ if (ret)
-+ goto err;
-+
-+ l = (void *) &darray_top(*entries);
-+ l->entry.u64s = cpu_to_le16(u64s);
-+ l->entry.btree_id = 0;
-+ l->entry.level = 1;
-+ l->entry.type = BCH_JSET_ENTRY_log;
-+ l->entry.pad[0] = 0;
-+ l->entry.pad[1] = 0;
-+ l->entry.pad[2] = 0;
-+ memcpy(l->d, buf.buf, buf.pos);
-+ while (buf.pos & 7)
-+ l->d[buf.pos++] = '\0';
-+
-+ entries->nr += jset_u64s(u64s);
-+err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+__printf(3, 0)
-+static int
-+__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
-+ va_list args)
-+{
-+ int ret;
-+
-+ if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
-+ ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
-+ } else {
-+ ret = bch2_trans_do(c, NULL, NULL,
-+ BTREE_INSERT_LAZY_RW|commit_flags,
-+ __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args));
-+ }
-+
-+ return ret;
-+}
-+
-+__printf(2, 3)
-+int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
-+{
-+ va_list args;
-+ int ret;
-+
-+ va_start(args, fmt);
-+ ret = __bch2_fs_log_msg(c, 0, fmt, args);
-+ va_end(args);
-+ return ret;
-+}
-+
-+/*
-+ * Use for logging messages during recovery to enable reserved space and avoid
-+ * blocking.
-+ */
-+__printf(2, 3)
-+int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...)
-+{
-+ va_list args;
-+ int ret;
-+
-+ va_start(args, fmt);
-+ ret = __bch2_fs_log_msg(c, BCH_WATERMARK_reclaim, fmt, args);
-+ va_end(args);
-+ return ret;
-+}
-diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
-new file mode 100644
-index 000000000000..9816d2286540
---- /dev/null
-+++ b/fs/bcachefs/btree_update.h
-@@ -0,0 +1,340 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_UPDATE_H
-+#define _BCACHEFS_BTREE_UPDATE_H
-+
-+#include "btree_iter.h"
-+#include "journal.h"
-+
-+struct bch_fs;
-+struct btree;
-+
-+void bch2_btree_node_prep_for_write(struct btree_trans *,
-+ struct btree_path *, struct btree *);
-+bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
-+ struct btree *, struct btree_node_iter *,
-+ struct bkey_i *);
-+
-+int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64);
-+int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64);
-+void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
-+
-+void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
-+ struct bkey_i *, u64);
-+
-+enum btree_insert_flags {
-+ /* First bits for bch_watermark: */
-+ __BTREE_INSERT_NOFAIL = BCH_WATERMARK_BITS,
-+ __BTREE_INSERT_NOCHECK_RW,
-+ __BTREE_INSERT_LAZY_RW,
-+ __BTREE_INSERT_JOURNAL_REPLAY,
-+ __BTREE_INSERT_JOURNAL_RECLAIM,
-+ __BTREE_INSERT_NOWAIT,
-+ __BTREE_INSERT_GC_LOCK_HELD,
-+ __BCH_HASH_SET_MUST_CREATE,
-+ __BCH_HASH_SET_MUST_REPLACE,
-+};
-+
-+/* Don't check for -ENOSPC: */
-+#define BTREE_INSERT_NOFAIL BIT(__BTREE_INSERT_NOFAIL)
-+
-+#define BTREE_INSERT_NOCHECK_RW BIT(__BTREE_INSERT_NOCHECK_RW)
-+#define BTREE_INSERT_LAZY_RW BIT(__BTREE_INSERT_LAZY_RW)
-+
-+/* Insert is for journal replay - don't get journal reservations: */
-+#define BTREE_INSERT_JOURNAL_REPLAY BIT(__BTREE_INSERT_JOURNAL_REPLAY)
-+
-+/* Insert is being called from journal reclaim path: */
-+#define BTREE_INSERT_JOURNAL_RECLAIM BIT(__BTREE_INSERT_JOURNAL_RECLAIM)
-+
-+/* Don't block on allocation failure (for new btree nodes: */
-+#define BTREE_INSERT_NOWAIT BIT(__BTREE_INSERT_NOWAIT)
-+#define BTREE_INSERT_GC_LOCK_HELD BIT(__BTREE_INSERT_GC_LOCK_HELD)
-+
-+#define BCH_HASH_SET_MUST_CREATE BIT(__BCH_HASH_SET_MUST_CREATE)
-+#define BCH_HASH_SET_MUST_REPLACE BIT(__BCH_HASH_SET_MUST_REPLACE)
-+
-+int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
-+ unsigned, unsigned);
-+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
-+int bch2_btree_delete_at_buffered(struct btree_trans *, enum btree_id, struct bpos);
-+int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
-+
-+int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
-+ struct bkey_i *, enum btree_update_flags);
-+
-+int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *,
-+ enum btree_update_flags);
-+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
-+ struct disk_reservation *, int flags);
-+
-+int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
-+ struct bpos, struct bpos, unsigned, u64 *);
-+int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
-+ struct bpos, struct bpos, unsigned, u64 *);
-+
-+int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
-+
-+int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
-+ struct bpos, struct bpos);
-+
-+/*
-+ * For use when splitting extents in existing snapshots:
-+ *
-+ * If @old_pos is an interior snapshot node, iterate over descendent snapshot
-+ * nodes: for every descendent snapshot in whiche @old_pos is overwritten and
-+ * not visible, emit a whiteout at @new_pos.
-+ */
-+static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
-+ enum btree_id btree,
-+ struct bpos old_pos,
-+ struct bpos new_pos)
-+{
-+ if (!btree_type_has_snapshots(btree) ||
-+ bkey_eq(old_pos, new_pos))
-+ return 0;
-+
-+ return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos);
-+}
-+
-+int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *,
-+ enum btree_update_flags,
-+ struct bkey_s_c, struct bkey_s_c);
-+
-+int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
-+ enum btree_id, struct bpos);
-+
-+int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
-+ struct bkey_i *, enum btree_update_flags);
-+int __must_check bch2_trans_update_seq(struct btree_trans *, u64, struct btree_iter *,
-+ struct bkey_i *, enum btree_update_flags);
-+int __must_check bch2_trans_update_buffered(struct btree_trans *,
-+ enum btree_id, struct bkey_i *);
-+
-+void bch2_trans_commit_hook(struct btree_trans *,
-+ struct btree_trans_commit_hook *);
-+int __bch2_trans_commit(struct btree_trans *, unsigned);
-+
-+__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
-+__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
-+
-+/**
-+ * bch2_trans_commit - insert keys at given iterator positions
-+ *
-+ * This is main entry point for btree updates.
-+ *
-+ * Return values:
-+ * -EROFS: filesystem read only
-+ * -EIO: journal or btree node IO error
-+ */
-+static inline int bch2_trans_commit(struct btree_trans *trans,
-+ struct disk_reservation *disk_res,
-+ u64 *journal_seq,
-+ unsigned flags)
-+{
-+ trans->disk_res = disk_res;
-+ trans->journal_seq = journal_seq;
-+
-+ return __bch2_trans_commit(trans, flags);
-+}
-+
-+#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
-+ lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
-+ (_journal_seq), (_flags)))
-+
-+#define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
-+ nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
-+ (_journal_seq), (_flags)))
-+
-+#define bch2_trans_run(_c, _do) \
-+({ \
-+ struct btree_trans *trans = bch2_trans_get(_c); \
-+ int _ret = (_do); \
-+ bch2_trans_put(trans); \
-+ _ret; \
-+})
-+
-+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
-+ bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))
-+
-+#define trans_for_each_update(_trans, _i) \
-+ for ((_i) = (_trans)->updates; \
-+ (_i) < (_trans)->updates + (_trans)->nr_updates; \
-+ (_i)++)
-+
-+#define trans_for_each_wb_update(_trans, _i) \
-+ for ((_i) = (_trans)->wb_updates; \
-+ (_i) < (_trans)->wb_updates + (_trans)->nr_wb_updates; \
-+ (_i)++)
-+
-+static inline void bch2_trans_reset_updates(struct btree_trans *trans)
-+{
-+ struct btree_insert_entry *i;
-+
-+ trans_for_each_update(trans, i)
-+ bch2_path_put(trans, i->path, true);
-+
-+ trans->extra_journal_res = 0;
-+ trans->nr_updates = 0;
-+ trans->nr_wb_updates = 0;
-+ trans->wb_updates = NULL;
-+ trans->hooks = NULL;
-+ trans->extra_journal_entries.nr = 0;
-+
-+ if (trans->fs_usage_deltas) {
-+ trans->fs_usage_deltas->used = 0;
-+ memset((void *) trans->fs_usage_deltas +
-+ offsetof(struct replicas_delta_list, memset_start), 0,
-+ (void *) &trans->fs_usage_deltas->memset_end -
-+ (void *) &trans->fs_usage_deltas->memset_start);
-+ }
-+}
-+
-+static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
-+ unsigned type, unsigned min_bytes)
-+{
-+ unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k));
-+ struct bkey_i *mut;
-+
-+ if (type && k.k->type != type)
-+ return ERR_PTR(-ENOENT);
-+
-+ mut = bch2_trans_kmalloc_nomemzero(trans, bytes);
-+ if (!IS_ERR(mut)) {
-+ bkey_reassemble(mut, k);
-+
-+ if (unlikely(bytes > bkey_bytes(k.k))) {
-+ memset((void *) mut + bkey_bytes(k.k), 0,
-+ bytes - bkey_bytes(k.k));
-+ mut->k.u64s = DIV_ROUND_UP(bytes, sizeof(u64));
-+ }
-+ }
-+ return mut;
-+}
-+
-+static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
-+{
-+ return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0);
-+}
-+
-+#define bch2_bkey_make_mut_noupdate_typed(_trans, _k, _type) \
-+ bkey_i_to_##_type(__bch2_bkey_make_mut_noupdate(_trans, _k, \
-+ KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
-+
-+static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c *k, unsigned flags,
-+ unsigned type, unsigned min_bytes)
-+{
-+ struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes);
-+ int ret;
-+
-+ if (IS_ERR(mut))
-+ return mut;
-+
-+ ret = bch2_trans_update(trans, iter, mut, flags);
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ *k = bkey_i_to_s_c(mut);
-+ return mut;
-+}
-+
-+static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c *k, unsigned flags)
-+{
-+ return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0);
-+}
-+
-+#define bch2_bkey_make_mut_typed(_trans, _iter, _k, _flags, _type) \
-+ bkey_i_to_##_type(__bch2_bkey_make_mut(_trans, _iter, _k, _flags,\
-+ KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
-+
-+static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned flags, unsigned type, unsigned min_bytes)
-+{
-+ struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter,
-+ btree_id, pos, flags|BTREE_ITER_INTENT, type);
-+ struct bkey_i *ret = IS_ERR(k.k)
-+ ? ERR_CAST(k.k)
-+ : __bch2_bkey_make_mut_noupdate(trans, k, 0, min_bytes);
-+ if (IS_ERR(ret))
-+ bch2_trans_iter_exit(trans, iter);
-+ return ret;
-+}
-+
-+static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned flags)
-+{
-+ return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0);
-+}
-+
-+static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned flags, unsigned type, unsigned min_bytes)
-+{
-+ struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter,
-+ btree_id, pos, flags|BTREE_ITER_INTENT, type, min_bytes);
-+ int ret;
-+
-+ if (IS_ERR(mut))
-+ return mut;
-+
-+ ret = bch2_trans_update(trans, iter, mut, flags);
-+ if (ret) {
-+ bch2_trans_iter_exit(trans, iter);
-+ return ERR_PTR(ret);
-+ }
-+
-+ return mut;
-+}
-+
-+static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned flags, unsigned min_bytes)
-+{
-+ return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes);
-+}
-+
-+static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ unsigned btree_id, struct bpos pos,
-+ unsigned flags)
-+{
-+ return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0);
-+}
-+
-+#define bch2_bkey_get_mut_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\
-+ bkey_i_to_##_type(__bch2_bkey_get_mut(_trans, _iter, \
-+ _btree_id, _pos, _flags, \
-+ KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
-+
-+static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter,
-+ unsigned flags, unsigned type, unsigned val_size)
-+{
-+ struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size);
-+ int ret;
-+
-+ if (IS_ERR(k))
-+ return k;
-+
-+ bkey_init(&k->k);
-+ k->k.p = iter->pos;
-+ k->k.type = type;
-+ set_bkey_val_bytes(&k->k, val_size);
-+
-+ ret = bch2_trans_update(trans, iter, k, flags);
-+ if (unlikely(ret))
-+ return ERR_PTR(ret);
-+ return k;
-+}
-+
-+#define bch2_bkey_alloc(_trans, _iter, _flags, _type) \
-+ bkey_i_to_##_type(__bch2_bkey_alloc(_trans, _iter, _flags, \
-+ KEY_TYPE_##_type, sizeof(struct bch_##_type)))
-+
-+#endif /* _BCACHEFS_BTREE_UPDATE_H */
-diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
-new file mode 100644
-index 000000000000..39c2db68123b
---- /dev/null
-+++ b/fs/bcachefs/btree_update_interior.c
-@@ -0,0 +1,2474 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_gc.h"
-+#include "btree_journal_iter.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "keylist.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "trace.h"
-+
-+#include <linux/random.h>
-+
-+static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
-+ struct btree_path *, struct btree *,
-+ struct keylist *, unsigned);
-+static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
-+
-+static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans,
-+ enum btree_id btree_id,
-+ unsigned level,
-+ struct bpos pos)
-+{
-+ struct btree_path *path;
-+
-+ path = bch2_path_get(trans, btree_id, pos, level + 1, level,
-+ BTREE_ITER_NOPRESERVE|
-+ BTREE_ITER_INTENT, _RET_IP_);
-+ path = bch2_btree_path_make_mut(trans, path, true, _RET_IP_);
-+ bch2_btree_path_downgrade(trans, path);
-+ __bch2_btree_path_unlock(trans, path);
-+ return path;
-+}
-+
-+/* Debug code: */
-+
-+/*
-+ * Verify that child nodes correctly span parent node's range:
-+ */
-+static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ struct bpos next_node = b->data->min_key;
-+ struct btree_node_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_s_c_btree_ptr_v2 bp;
-+ struct bkey unpacked;
-+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-+
-+ BUG_ON(!b->c.level);
-+
-+ if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
-+ return;
-+
-+ bch2_btree_node_iter_init_from_start(&iter, b);
-+
-+ while (1) {
-+ k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
-+ if (k.k->type != KEY_TYPE_btree_ptr_v2)
-+ break;
-+ bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+ if (!bpos_eq(next_node, bp.v->min_key)) {
-+ bch2_dump_btree_node(c, b);
-+ bch2_bpos_to_text(&buf1, next_node);
-+ bch2_bpos_to_text(&buf2, bp.v->min_key);
-+ panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf);
-+ }
-+
-+ bch2_btree_node_iter_advance(&iter, b);
-+
-+ if (bch2_btree_node_iter_end(&iter)) {
-+ if (!bpos_eq(k.k->p, b->key.k.p)) {
-+ bch2_dump_btree_node(c, b);
-+ bch2_bpos_to_text(&buf1, b->key.k.p);
-+ bch2_bpos_to_text(&buf2, k.k->p);
-+ panic("expected end %s got %s\n", buf1.buf, buf2.buf);
-+ }
-+ break;
-+ }
-+
-+ next_node = bpos_successor(k.k->p);
-+ }
-+#endif
-+}
-+
-+/* Calculate ideal packed bkey format for new btree nodes: */
-+
-+void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
-+{
-+ struct bkey_packed *k;
-+ struct bset_tree *t;
-+ struct bkey uk;
-+
-+ for_each_bset(b, t)
-+ bset_tree_for_each_key(b, t, k)
-+ if (!bkey_deleted(k)) {
-+ uk = bkey_unpack_key(b, k);
-+ bch2_bkey_format_add_key(s, &uk);
-+ }
-+}
-+
-+static struct bkey_format bch2_btree_calc_format(struct btree *b)
-+{
-+ struct bkey_format_state s;
-+
-+ bch2_bkey_format_init(&s);
-+ bch2_bkey_format_add_pos(&s, b->data->min_key);
-+ bch2_bkey_format_add_pos(&s, b->data->max_key);
-+ __bch2_btree_calc_format(&s, b);
-+
-+ return bch2_bkey_format_done(&s);
-+}
-+
-+static size_t btree_node_u64s_with_format(struct btree *b,
-+ struct bkey_format *new_f)
-+{
-+ struct bkey_format *old_f = &b->format;
-+
-+ /* stupid integer promotion rules */
-+ ssize_t delta =
-+ (((int) new_f->key_u64s - old_f->key_u64s) *
-+ (int) b->nr.packed_keys) +
-+ (((int) new_f->key_u64s - BKEY_U64s) *
-+ (int) b->nr.unpacked_keys);
-+
-+ BUG_ON(delta + b->nr.live_u64s < 0);
-+
-+ return b->nr.live_u64s + delta;
-+}
-+
-+/**
-+ * bch2_btree_node_format_fits - check if we could rewrite node with a new format
-+ *
-+ * @c: filesystem handle
-+ * @b: btree node to rewrite
-+ * @new_f: bkey format to translate keys to
-+ *
-+ * Returns: true if all re-packed keys will be able to fit in a new node.
-+ *
-+ * Assumes all keys will successfully pack with the new format.
-+ */
-+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
-+ struct bkey_format *new_f)
-+{
-+ size_t u64s = btree_node_u64s_with_format(b, new_f);
-+
-+ return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
-+}
-+
-+/* Btree node freeing/allocation: */
-+
-+static void __btree_node_free(struct bch_fs *c, struct btree *b)
-+{
-+ trace_and_count(c, btree_node_free, c, b);
-+
-+ BUG_ON(btree_node_write_blocked(b));
-+ BUG_ON(btree_node_dirty(b));
-+ BUG_ON(btree_node_need_write(b));
-+ BUG_ON(b == btree_node_root(c, b));
-+ BUG_ON(b->ob.nr);
-+ BUG_ON(!list_empty(&b->write_blocked));
-+ BUG_ON(b->will_make_reachable);
-+
-+ clear_btree_node_noevict(b);
-+
-+ mutex_lock(&c->btree_cache.lock);
-+ list_move(&b->list, &c->btree_cache.freeable);
-+ mutex_unlock(&c->btree_cache.lock);
-+}
-+
-+static void bch2_btree_node_free_inmem(struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b)
-+{
-+ struct bch_fs *c = trans->c;
-+ unsigned level = b->c.level;
-+
-+ bch2_btree_node_lock_write_nofail(trans, path, &b->c);
-+ bch2_btree_node_hash_remove(&c->btree_cache, b);
-+ __btree_node_free(c, b);
-+ six_unlock_write(&b->c.lock);
-+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
-+
-+ trans_for_each_path(trans, path)
-+ if (path->l[level].b == b) {
-+ btree_node_unlock(trans, path, level);
-+ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
-+ }
-+}
-+
-+static void bch2_btree_node_free_never_used(struct btree_update *as,
-+ struct btree_trans *trans,
-+ struct btree *b)
-+{
-+ struct bch_fs *c = as->c;
-+ struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL];
-+ struct btree_path *path;
-+ unsigned level = b->c.level;
-+
-+ BUG_ON(!list_empty(&b->write_blocked));
-+ BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as));
-+
-+ b->will_make_reachable = 0;
-+ closure_put(&as->cl);
-+
-+ clear_btree_node_will_make_reachable(b);
-+ clear_btree_node_accessed(b);
-+ clear_btree_node_dirty_acct(c, b);
-+ clear_btree_node_need_write(b);
-+
-+ mutex_lock(&c->btree_cache.lock);
-+ list_del_init(&b->list);
-+ bch2_btree_node_hash_remove(&c->btree_cache, b);
-+ mutex_unlock(&c->btree_cache.lock);
-+
-+ BUG_ON(p->nr >= ARRAY_SIZE(p->b));
-+ p->b[p->nr++] = b;
-+
-+ six_unlock_intent(&b->c.lock);
-+
-+ trans_for_each_path(trans, path)
-+ if (path->l[level].b == b) {
-+ btree_node_unlock(trans, path, level);
-+ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
-+ }
-+}
-+
-+static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
-+ struct disk_reservation *res,
-+ struct closure *cl,
-+ bool interior_node,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct write_point *wp;
-+ struct btree *b;
-+ BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-+ struct open_buckets obs = { .nr = 0 };
-+ struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
-+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-+ unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
-+ ? BTREE_NODE_RESERVE
-+ : 0;
-+ int ret;
-+
-+ mutex_lock(&c->btree_reserve_cache_lock);
-+ if (c->btree_reserve_cache_nr > nr_reserve) {
-+ struct btree_alloc *a =
-+ &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
-+
-+ obs = a->ob;
-+ bkey_copy(&tmp.k, &a->k);
-+ mutex_unlock(&c->btree_reserve_cache_lock);
-+ goto mem_alloc;
-+ }
-+ mutex_unlock(&c->btree_reserve_cache_lock);
-+
-+retry:
-+ ret = bch2_alloc_sectors_start_trans(trans,
-+ c->opts.metadata_target ?:
-+ c->opts.foreground_target,
-+ 0,
-+ writepoint_ptr(&c->btree_write_point),
-+ &devs_have,
-+ res->nr_replicas,
-+ c->opts.metadata_replicas_required,
-+ watermark, 0, cl, &wp);
-+ if (unlikely(ret))
-+ return ERR_PTR(ret);
-+
-+ if (wp->sectors_free < btree_sectors(c)) {
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ open_bucket_for_each(c, &wp->ptrs, ob, i)
-+ if (ob->sectors_free < btree_sectors(c))
-+ ob->sectors_free = 0;
-+
-+ bch2_alloc_sectors_done(c, wp);
-+ goto retry;
-+ }
-+
-+ bkey_btree_ptr_v2_init(&tmp.k);
-+ bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false);
-+
-+ bch2_open_bucket_get(c, wp, &obs);
-+ bch2_alloc_sectors_done(c, wp);
-+mem_alloc:
-+ b = bch2_btree_node_mem_alloc(trans, interior_node);
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+
-+ /* we hold cannibalize_lock: */
-+ BUG_ON(IS_ERR(b));
-+ BUG_ON(b->ob.nr);
-+
-+ bkey_copy(&b->key, &tmp.k);
-+ b->ob = obs;
-+
-+ return b;
-+}
-+
-+static struct btree *bch2_btree_node_alloc(struct btree_update *as,
-+ struct btree_trans *trans,
-+ unsigned level)
-+{
-+ struct bch_fs *c = as->c;
-+ struct btree *b;
-+ struct prealloc_nodes *p = &as->prealloc_nodes[!!level];
-+ int ret;
-+
-+ BUG_ON(level >= BTREE_MAX_DEPTH);
-+ BUG_ON(!p->nr);
-+
-+ b = p->b[--p->nr];
-+
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
-+
-+ set_btree_node_accessed(b);
-+ set_btree_node_dirty_acct(c, b);
-+ set_btree_node_need_write(b);
-+
-+ bch2_bset_init_first(b, &b->data->keys);
-+ b->c.level = level;
-+ b->c.btree_id = as->btree_id;
-+ b->version_ondisk = c->sb.version;
-+
-+ memset(&b->nr, 0, sizeof(b->nr));
-+ b->data->magic = cpu_to_le64(bset_magic(c));
-+ memset(&b->data->_ptr, 0, sizeof(b->data->_ptr));
-+ b->data->flags = 0;
-+ SET_BTREE_NODE_ID(b->data, as->btree_id);
-+ SET_BTREE_NODE_LEVEL(b->data, level);
-+
-+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+ struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
-+
-+ bp->v.mem_ptr = 0;
-+ bp->v.seq = b->data->keys.seq;
-+ bp->v.sectors_written = 0;
-+ }
-+
-+ SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
-+
-+ bch2_btree_build_aux_trees(b);
-+
-+ ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
-+ BUG_ON(ret);
-+
-+ trace_and_count(c, btree_node_alloc, c, b);
-+ bch2_increment_clock(c, btree_sectors(c), WRITE);
-+ return b;
-+}
-+
-+static void btree_set_min(struct btree *b, struct bpos pos)
-+{
-+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2)
-+ bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos;
-+ b->data->min_key = pos;
-+}
-+
-+static void btree_set_max(struct btree *b, struct bpos pos)
-+{
-+ b->key.k.p = pos;
-+ b->data->max_key = pos;
-+}
-+
-+static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
-+ struct btree_trans *trans,
-+ struct btree *b)
-+{
-+ struct btree *n = bch2_btree_node_alloc(as, trans, b->c.level);
-+ struct bkey_format format = bch2_btree_calc_format(b);
-+
-+ /*
-+ * The keys might expand with the new format - if they wouldn't fit in
-+ * the btree node anymore, use the old format for now:
-+ */
-+ if (!bch2_btree_node_format_fits(as->c, b, &format))
-+ format = b->format;
-+
-+ SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
-+
-+ btree_set_min(n, b->data->min_key);
-+ btree_set_max(n, b->data->max_key);
-+
-+ n->data->format = format;
-+ btree_node_set_format(n, format);
-+
-+ bch2_btree_sort_into(as->c, n, b);
-+
-+ btree_node_reset_sib_u64s(n);
-+ return n;
-+}
-+
-+static struct btree *__btree_root_alloc(struct btree_update *as,
-+ struct btree_trans *trans, unsigned level)
-+{
-+ struct btree *b = bch2_btree_node_alloc(as, trans, level);
-+
-+ btree_set_min(b, POS_MIN);
-+ btree_set_max(b, SPOS_MAX);
-+ b->data->format = bch2_btree_calc_format(b);
-+
-+ btree_node_set_format(b, b->data->format);
-+ bch2_btree_build_aux_trees(b);
-+
-+ return b;
-+}
-+
-+static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans)
-+{
-+ struct bch_fs *c = as->c;
-+ struct prealloc_nodes *p;
-+
-+ for (p = as->prealloc_nodes;
-+ p < as->prealloc_nodes + ARRAY_SIZE(as->prealloc_nodes);
-+ p++) {
-+ while (p->nr) {
-+ struct btree *b = p->b[--p->nr];
-+
-+ mutex_lock(&c->btree_reserve_cache_lock);
-+
-+ if (c->btree_reserve_cache_nr <
-+ ARRAY_SIZE(c->btree_reserve_cache)) {
-+ struct btree_alloc *a =
-+ &c->btree_reserve_cache[c->btree_reserve_cache_nr++];
-+
-+ a->ob = b->ob;
-+ b->ob.nr = 0;
-+ bkey_copy(&a->k, &b->key);
-+ } else {
-+ bch2_open_buckets_put(c, &b->ob);
-+ }
-+
-+ mutex_unlock(&c->btree_reserve_cache_lock);
-+
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
-+ __btree_node_free(c, b);
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+ }
-+ }
-+}
-+
-+static int bch2_btree_reserve_get(struct btree_trans *trans,
-+ struct btree_update *as,
-+ unsigned nr_nodes[2],
-+ unsigned flags,
-+ struct closure *cl)
-+{
-+ struct bch_fs *c = as->c;
-+ struct btree *b;
-+ unsigned interior;
-+ int ret = 0;
-+
-+ BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX);
-+
-+ /*
-+ * Protects reaping from the btree node cache and using the btree node
-+ * open bucket reserve:
-+ *
-+ * BTREE_INSERT_NOWAIT only applies to btree node allocation, not
-+ * blocking on this lock:
-+ */
-+ ret = bch2_btree_cache_cannibalize_lock(c, cl);
-+ if (ret)
-+ return ret;
-+
-+ for (interior = 0; interior < 2; interior++) {
-+ struct prealloc_nodes *p = as->prealloc_nodes + interior;
-+
-+ while (p->nr < nr_nodes[interior]) {
-+ b = __bch2_btree_node_alloc(trans, &as->disk_res,
-+ flags & BTREE_INSERT_NOWAIT ? NULL : cl,
-+ interior, flags);
-+ if (IS_ERR(b)) {
-+ ret = PTR_ERR(b);
-+ goto err;
-+ }
-+
-+ p->b[p->nr++] = b;
-+ }
-+ }
-+err:
-+ bch2_btree_cache_cannibalize_unlock(c);
-+ return ret;
-+}
-+
-+/* Asynchronous interior node update machinery */
-+
-+static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans)
-+{
-+ struct bch_fs *c = as->c;
-+
-+ if (as->took_gc_lock)
-+ up_read(&c->gc_lock);
-+ as->took_gc_lock = false;
-+
-+ bch2_journal_preres_put(&c->journal, &as->journal_preres);
-+
-+ bch2_journal_pin_drop(&c->journal, &as->journal);
-+ bch2_journal_pin_flush(&c->journal, &as->journal);
-+ bch2_disk_reservation_put(c, &as->disk_res);
-+ bch2_btree_reserve_put(as, trans);
-+
-+ bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
-+ as->start_time);
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ list_del(&as->unwritten_list);
-+ list_del(&as->list);
-+
-+ closure_debug_destroy(&as->cl);
-+ mempool_free(as, &c->btree_interior_update_pool);
-+
-+ /*
-+ * Have to do the wakeup with btree_interior_update_lock still held,
-+ * since being on btree_interior_update_list is our ref on @c:
-+ */
-+ closure_wake_up(&c->btree_interior_update_wait);
-+
-+ mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+static void btree_update_add_key(struct btree_update *as,
-+ struct keylist *keys, struct btree *b)
-+{
-+ struct bkey_i *k = &b->key;
-+
-+ BUG_ON(bch2_keylist_u64s(keys) + k->k.u64s >
-+ ARRAY_SIZE(as->_old_keys));
-+
-+ bkey_copy(keys->top, k);
-+ bkey_i_to_btree_ptr_v2(keys->top)->v.mem_ptr = b->c.level + 1;
-+
-+ bch2_keylist_push(keys);
-+}
-+
-+/*
-+ * The transactional part of an interior btree node update, where we journal the
-+ * update we did to the interior node and update alloc info:
-+ */
-+static int btree_update_nodes_written_trans(struct btree_trans *trans,
-+ struct btree_update *as)
-+{
-+ struct bkey_i *k;
-+ int ret;
-+
-+ ret = darray_make_room(&trans->extra_journal_entries, as->journal_u64s);
-+ if (ret)
-+ return ret;
-+
-+ memcpy(&darray_top(trans->extra_journal_entries),
-+ as->journal_entries,
-+ as->journal_u64s * sizeof(u64));
-+ trans->extra_journal_entries.nr += as->journal_u64s;
-+
-+ trans->journal_pin = &as->journal;
-+
-+ for_each_keylist_key(&as->old_keys, k) {
-+ unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
-+
-+ ret = bch2_trans_mark_old(trans, as->btree_id, level, bkey_i_to_s_c(k), 0);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ for_each_keylist_key(&as->new_keys, k) {
-+ unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
-+
-+ ret = bch2_trans_mark_new(trans, as->btree_id, level, k, 0);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static void btree_update_nodes_written(struct btree_update *as)
-+{
-+ struct bch_fs *c = as->c;
-+ struct btree *b;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ u64 journal_seq = 0;
-+ unsigned i;
-+ int ret;
-+
-+ /*
-+ * If we're already in an error state, it might be because a btree node
-+ * was never written, and we might be trying to free that same btree
-+ * node here, but it won't have been marked as allocated and we'll see
-+ * spurious disk usage inconsistencies in the transactional part below
-+ * if we don't skip it:
-+ */
-+ ret = bch2_journal_error(&c->journal);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * Wait for any in flight writes to finish before we free the old nodes
-+ * on disk:
-+ */
-+ for (i = 0; i < as->nr_old_nodes; i++) {
-+ __le64 seq;
-+
-+ b = as->old_nodes[i];
-+
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
-+ seq = b->data ? b->data->keys.seq : 0;
-+ six_unlock_read(&b->c.lock);
-+
-+ if (seq == as->old_nodes_seq[i])
-+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
-+ TASK_UNINTERRUPTIBLE);
-+ }
-+
-+ /*
-+ * We did an update to a parent node where the pointers we added pointed
-+ * to child nodes that weren't written yet: now, the child nodes have
-+ * been written so we can write out the update to the interior node.
-+ */
-+
-+ /*
-+ * We can't call into journal reclaim here: we'd block on the journal
-+ * reclaim lock, but we may need to release the open buckets we have
-+ * pinned in order for other btree updates to make forward progress, and
-+ * journal reclaim does btree updates when flushing bkey_cached entries,
-+ * which may require allocations as well.
-+ */
-+ ret = commit_do(trans, &as->disk_res, &journal_seq,
-+ BCH_WATERMARK_reclaim|
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_NOCHECK_RW|
-+ BTREE_INSERT_JOURNAL_RECLAIM,
-+ btree_update_nodes_written_trans(trans, as));
-+ bch2_trans_unlock(trans);
-+
-+ bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
-+ "%s(): error %s", __func__, bch2_err_str(ret));
-+err:
-+ if (as->b) {
-+ struct btree_path *path;
-+
-+ b = as->b;
-+ path = get_unlocked_mut_path(trans, as->btree_id, b->c.level, b->key.k.p);
-+ /*
-+ * @b is the node we did the final insert into:
-+ *
-+ * On failure to get a journal reservation, we still have to
-+ * unblock the write and allow most of the write path to happen
-+ * so that shutdown works, but the i->journal_seq mechanism
-+ * won't work to prevent the btree write from being visible (we
-+ * didn't get a journal sequence number) - instead
-+ * __bch2_btree_node_write() doesn't do the actual write if
-+ * we're in journal error state:
-+ */
-+
-+ /*
-+ * Ensure transaction is unlocked before using
-+ * btree_node_lock_nopath() (the use of which is always suspect,
-+ * we need to work on removing this in the future)
-+ *
-+ * It should be, but get_unlocked_mut_path() -> bch2_path_get()
-+ * calls bch2_path_upgrade(), before we call path_make_mut(), so
-+ * we may rarely end up with a locked path besides the one we
-+ * have here:
-+ */
-+ bch2_trans_unlock(trans);
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
-+ mark_btree_node_locked(trans, path, b->c.level, BTREE_NODE_INTENT_LOCKED);
-+ path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
-+ path->l[b->c.level].b = b;
-+
-+ bch2_btree_node_lock_write_nofail(trans, path, &b->c);
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+
-+ list_del(&as->write_blocked_list);
-+ if (list_empty(&b->write_blocked))
-+ clear_btree_node_write_blocked(b);
-+
-+ /*
-+ * Node might have been freed, recheck under
-+ * btree_interior_update_lock:
-+ */
-+ if (as->b == b) {
-+ BUG_ON(!b->c.level);
-+ BUG_ON(!btree_node_dirty(b));
-+
-+ if (!ret) {
-+ struct bset *last = btree_bset_last(b);
-+
-+ last->journal_seq = cpu_to_le64(
-+ max(journal_seq,
-+ le64_to_cpu(last->journal_seq)));
-+
-+ bch2_btree_add_journal_pin(c, b, journal_seq);
-+ } else {
-+ /*
-+ * If we didn't get a journal sequence number we
-+ * can't write this btree node, because recovery
-+ * won't know to ignore this write:
-+ */
-+ set_btree_node_never_write(b);
-+ }
-+ }
-+
-+ mutex_unlock(&c->btree_interior_update_lock);
-+
-+ mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
-+ six_unlock_write(&b->c.lock);
-+
-+ btree_node_write_if_need(c, b, SIX_LOCK_intent);
-+ btree_node_unlock(trans, path, b->c.level);
-+ bch2_path_put(trans, path, true);
-+ }
-+
-+ bch2_journal_pin_drop(&c->journal, &as->journal);
-+
-+ bch2_journal_preres_put(&c->journal, &as->journal_preres);
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ for (i = 0; i < as->nr_new_nodes; i++) {
-+ b = as->new_nodes[i];
-+
-+ BUG_ON(b->will_make_reachable != (unsigned long) as);
-+ b->will_make_reachable = 0;
-+ clear_btree_node_will_make_reachable(b);
-+ }
-+ mutex_unlock(&c->btree_interior_update_lock);
-+
-+ for (i = 0; i < as->nr_new_nodes; i++) {
-+ b = as->new_nodes[i];
-+
-+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
-+ btree_node_write_if_need(c, b, SIX_LOCK_read);
-+ six_unlock_read(&b->c.lock);
-+ }
-+
-+ for (i = 0; i < as->nr_open_buckets; i++)
-+ bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
-+
-+ bch2_btree_update_free(as, trans);
-+ bch2_trans_put(trans);
-+}
-+
-+static void btree_interior_update_work(struct work_struct *work)
-+{
-+ struct bch_fs *c =
-+ container_of(work, struct bch_fs, btree_interior_update_work);
-+ struct btree_update *as;
-+
-+ while (1) {
-+ mutex_lock(&c->btree_interior_update_lock);
-+ as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
-+ struct btree_update, unwritten_list);
-+ if (as && !as->nodes_written)
-+ as = NULL;
-+ mutex_unlock(&c->btree_interior_update_lock);
-+
-+ if (!as)
-+ break;
-+
-+ btree_update_nodes_written(as);
-+ }
-+}
-+
-+static void btree_update_set_nodes_written(struct closure *cl)
-+{
-+ struct btree_update *as = container_of(cl, struct btree_update, cl);
-+ struct bch_fs *c = as->c;
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ as->nodes_written = true;
-+ mutex_unlock(&c->btree_interior_update_lock);
-+
-+ queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
-+}
-+
-+/*
-+ * We're updating @b with pointers to nodes that haven't finished writing yet:
-+ * block @b from being written until @as completes
-+ */
-+static void btree_update_updated_node(struct btree_update *as, struct btree *b)
-+{
-+ struct bch_fs *c = as->c;
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-+
-+ BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
-+ BUG_ON(!btree_node_dirty(b));
-+ BUG_ON(!b->c.level);
-+
-+ as->mode = BTREE_INTERIOR_UPDATING_NODE;
-+ as->b = b;
-+
-+ set_btree_node_write_blocked(b);
-+ list_add(&as->write_blocked_list, &b->write_blocked);
-+
-+ mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+static void btree_update_reparent(struct btree_update *as,
-+ struct btree_update *child)
-+{
-+ struct bch_fs *c = as->c;
-+
-+ lockdep_assert_held(&c->btree_interior_update_lock);
-+
-+ child->b = NULL;
-+ child->mode = BTREE_INTERIOR_UPDATING_AS;
-+
-+ bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL);
-+}
-+
-+static void btree_update_updated_root(struct btree_update *as, struct btree *b)
-+{
-+ struct bkey_i *insert = &b->key;
-+ struct bch_fs *c = as->c;
-+
-+ BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
-+
-+ BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
-+ ARRAY_SIZE(as->journal_entries));
-+
-+ as->journal_u64s +=
-+ journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
-+ BCH_JSET_ENTRY_btree_root,
-+ b->c.btree_id, b->c.level,
-+ insert, insert->k.u64s);
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-+
-+ as->mode = BTREE_INTERIOR_UPDATING_ROOT;
-+ mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+/*
-+ * bch2_btree_update_add_new_node:
-+ *
-+ * This causes @as to wait on @b to be written, before it gets to
-+ * bch2_btree_update_nodes_written
-+ *
-+ * Additionally, it sets b->will_make_reachable to prevent any additional writes
-+ * to @b from happening besides the first until @b is reachable on disk
-+ *
-+ * And it adds @b to the list of @as's new nodes, so that we can update sector
-+ * counts in bch2_btree_update_nodes_written:
-+ */
-+static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b)
-+{
-+ struct bch_fs *c = as->c;
-+
-+ closure_get(&as->cl);
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes));
-+ BUG_ON(b->will_make_reachable);
-+
-+ as->new_nodes[as->nr_new_nodes++] = b;
-+ b->will_make_reachable = 1UL|(unsigned long) as;
-+ set_btree_node_will_make_reachable(b);
-+
-+ mutex_unlock(&c->btree_interior_update_lock);
-+
-+ btree_update_add_key(as, &as->new_keys, b);
-+
-+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
-+ unsigned bytes = vstruct_end(&b->data->keys) - (void *) b->data;
-+ unsigned sectors = round_up(bytes, block_bytes(c)) >> 9;
-+
-+ bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
-+ cpu_to_le16(sectors);
-+ }
-+}
-+
-+/*
-+ * returns true if @b was a new node
-+ */
-+static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
-+{
-+ struct btree_update *as;
-+ unsigned long v;
-+ unsigned i;
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ /*
-+ * When b->will_make_reachable != 0, it owns a ref on as->cl that's
-+ * dropped when it gets written by bch2_btree_complete_write - the
-+ * xchg() is for synchronization with bch2_btree_complete_write:
-+ */
-+ v = xchg(&b->will_make_reachable, 0);
-+ clear_btree_node_will_make_reachable(b);
-+ as = (struct btree_update *) (v & ~1UL);
-+
-+ if (!as) {
-+ mutex_unlock(&c->btree_interior_update_lock);
-+ return;
-+ }
-+
-+ for (i = 0; i < as->nr_new_nodes; i++)
-+ if (as->new_nodes[i] == b)
-+ goto found;
-+
-+ BUG();
-+found:
-+ array_remove_item(as->new_nodes, as->nr_new_nodes, i);
-+ mutex_unlock(&c->btree_interior_update_lock);
-+
-+ if (v & 1)
-+ closure_put(&as->cl);
-+}
-+
-+static void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b)
-+{
-+ while (b->ob.nr)
-+ as->open_buckets[as->nr_open_buckets++] =
-+ b->ob.v[--b->ob.nr];
-+}
-+
-+/*
-+ * @b is being split/rewritten: it may have pointers to not-yet-written btree
-+ * nodes and thus outstanding btree_updates - redirect @b's
-+ * btree_updates to point to this btree_update:
-+ */
-+static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
-+ struct btree *b)
-+{
-+ struct bch_fs *c = as->c;
-+ struct btree_update *p, *n;
-+ struct btree_write *w;
-+
-+ set_btree_node_dying(b);
-+
-+ if (btree_node_fake(b))
-+ return;
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+
-+ /*
-+ * Does this node have any btree_update operations preventing
-+ * it from being written?
-+ *
-+ * If so, redirect them to point to this btree_update: we can
-+ * write out our new nodes, but we won't make them visible until those
-+ * operations complete
-+ */
-+ list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
-+ list_del_init(&p->write_blocked_list);
-+ btree_update_reparent(as, p);
-+
-+ /*
-+ * for flush_held_btree_writes() waiting on updates to flush or
-+ * nodes to be writeable:
-+ */
-+ closure_wake_up(&c->btree_interior_update_wait);
-+ }
-+
-+ clear_btree_node_dirty_acct(c, b);
-+ clear_btree_node_need_write(b);
-+ clear_btree_node_write_blocked(b);
-+
-+ /*
-+ * Does this node have unwritten data that has a pin on the journal?
-+ *
-+ * If so, transfer that pin to the btree_update operation -
-+ * note that if we're freeing multiple nodes, we only need to keep the
-+ * oldest pin of any of the nodes we're freeing. We'll release the pin
-+ * when the new nodes are persistent and reachable on disk:
-+ */
-+ w = btree_current_write(b);
-+ bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
-+ bch2_journal_pin_drop(&c->journal, &w->journal);
-+
-+ w = btree_prev_write(b);
-+ bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
-+ bch2_journal_pin_drop(&c->journal, &w->journal);
-+
-+ mutex_unlock(&c->btree_interior_update_lock);
-+
-+ /*
-+ * Is this a node that isn't reachable on disk yet?
-+ *
-+ * Nodes that aren't reachable yet have writes blocked until they're
-+ * reachable - now that we've cancelled any pending writes and moved
-+ * things waiting on that write to wait on this update, we can drop this
-+ * node from the list of nodes that the other update is making
-+ * reachable, prior to freeing it:
-+ */
-+ btree_update_drop_new_node(c, b);
-+
-+ btree_update_add_key(as, &as->old_keys, b);
-+
-+ as->old_nodes[as->nr_old_nodes] = b;
-+ as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq;
-+ as->nr_old_nodes++;
-+}
-+
-+static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans)
-+{
-+ struct bch_fs *c = as->c;
-+ u64 start_time = as->start_time;
-+
-+ BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
-+
-+ if (as->took_gc_lock)
-+ up_read(&as->c->gc_lock);
-+ as->took_gc_lock = false;
-+
-+ bch2_btree_reserve_put(as, trans);
-+
-+ continue_at(&as->cl, btree_update_set_nodes_written,
-+ as->c->btree_interior_update_worker);
-+
-+ bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_foreground],
-+ start_time);
-+}
-+
-+static struct btree_update *
-+bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
-+ unsigned level, bool split, unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_update *as;
-+ u64 start_time = local_clock();
-+ int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
-+ ? BCH_DISK_RESERVATION_NOFAIL : 0;
-+ unsigned nr_nodes[2] = { 0, 0 };
-+ unsigned update_level = level;
-+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-+ unsigned journal_flags = 0;
-+ int ret = 0;
-+ u32 restart_count = trans->restart_count;
-+
-+ BUG_ON(!path->should_be_locked);
-+
-+ if (watermark == BCH_WATERMARK_copygc)
-+ watermark = BCH_WATERMARK_btree_copygc;
-+ if (watermark < BCH_WATERMARK_btree)
-+ watermark = BCH_WATERMARK_btree;
-+
-+ flags &= ~BCH_WATERMARK_MASK;
-+ flags |= watermark;
-+
-+ if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
-+ journal_flags |= JOURNAL_RES_GET_NONBLOCK;
-+ journal_flags |= watermark;
-+
-+ while (1) {
-+ nr_nodes[!!update_level] += 1 + split;
-+ update_level++;
-+
-+ ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ if (!btree_path_node(path, update_level)) {
-+ /* Allocating new root? */
-+ nr_nodes[1] += split;
-+ update_level = BTREE_MAX_DEPTH;
-+ break;
-+ }
-+
-+ if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
-+ BKEY_BTREE_PTR_U64s_MAX * (1 + split)))
-+ break;
-+
-+ split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
-+ }
-+
-+ if (flags & BTREE_INSERT_GC_LOCK_HELD)
-+ lockdep_assert_held(&c->gc_lock);
-+ else if (!down_read_trylock(&c->gc_lock)) {
-+ ret = drop_locks_do(trans, (down_read(&c->gc_lock), 0));
-+ if (ret) {
-+ up_read(&c->gc_lock);
-+ return ERR_PTR(ret);
-+ }
-+ }
-+
-+ as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
-+ memset(as, 0, sizeof(*as));
-+ closure_init(&as->cl, NULL);
-+ as->c = c;
-+ as->start_time = start_time;
-+ as->mode = BTREE_INTERIOR_NO_UPDATE;
-+ as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD);
-+ as->btree_id = path->btree_id;
-+ as->update_level = update_level;
-+ INIT_LIST_HEAD(&as->list);
-+ INIT_LIST_HEAD(&as->unwritten_list);
-+ INIT_LIST_HEAD(&as->write_blocked_list);
-+ bch2_keylist_init(&as->old_keys, as->_old_keys);
-+ bch2_keylist_init(&as->new_keys, as->_new_keys);
-+ bch2_keylist_init(&as->parent_keys, as->inline_keys);
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ list_add_tail(&as->list, &c->btree_interior_update_list);
-+ mutex_unlock(&c->btree_interior_update_lock);
-+
-+ /*
-+ * We don't want to allocate if we're in an error state, that can cause
-+ * deadlock on emergency shutdown due to open buckets getting stuck in
-+ * the btree_reserve_cache after allocator shutdown has cleared it out.
-+ * This check needs to come after adding us to the btree_interior_update
-+ * list but before calling bch2_btree_reserve_get, to synchronize with
-+ * __bch2_fs_read_only().
-+ */
-+ ret = bch2_journal_error(&c->journal);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-+ BTREE_UPDATE_JOURNAL_RES,
-+ journal_flags|JOURNAL_RES_GET_NONBLOCK);
-+ if (ret) {
-+ if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
-+ ret = -BCH_ERR_journal_reclaim_would_deadlock;
-+ goto err;
-+ }
-+
-+ ret = drop_locks_do(trans,
-+ bch2_journal_preres_get(&c->journal, &as->journal_preres,
-+ BTREE_UPDATE_JOURNAL_RES,
-+ journal_flags));
-+ if (ret == -BCH_ERR_journal_preres_get_blocked) {
-+ trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
-+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
-+ }
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = bch2_disk_reservation_get(c, &as->disk_res,
-+ (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
-+ c->opts.metadata_replicas,
-+ disk_res_flags);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL);
-+ if (bch2_err_matches(ret, ENOSPC) ||
-+ bch2_err_matches(ret, ENOMEM)) {
-+ struct closure cl;
-+
-+ /*
-+ * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
-+ * flag
-+ */
-+ if (bch2_err_matches(ret, ENOSPC) &&
-+ (flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
-+ watermark != BCH_WATERMARK_reclaim) {
-+ ret = -BCH_ERR_journal_reclaim_would_deadlock;
-+ goto err;
-+ }
-+
-+ closure_init_stack(&cl);
-+
-+ do {
-+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
-+
-+ bch2_trans_unlock(trans);
-+ closure_sync(&cl);
-+ } while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
-+ }
-+
-+ if (ret) {
-+ trace_and_count(c, btree_reserve_get_fail, trans->fn,
-+ _RET_IP_, nr_nodes[0] + nr_nodes[1], ret);
-+ goto err;
-+ }
-+
-+ ret = bch2_trans_relock(trans);
-+ if (ret)
-+ goto err;
-+
-+ bch2_trans_verify_not_restarted(trans, restart_count);
-+ return as;
-+err:
-+ bch2_btree_update_free(as, trans);
-+ return ERR_PTR(ret);
-+}
-+
-+/* Btree root updates: */
-+
-+static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
-+{
-+ /* Root nodes cannot be reaped */
-+ mutex_lock(&c->btree_cache.lock);
-+ list_del_init(&b->list);
-+ mutex_unlock(&c->btree_cache.lock);
-+
-+ mutex_lock(&c->btree_root_lock);
-+ BUG_ON(btree_node_root(c, b) &&
-+ (b->c.level < btree_node_root(c, b)->c.level ||
-+ !btree_node_dying(btree_node_root(c, b))));
-+
-+ bch2_btree_id_root(c, b->c.btree_id)->b = b;
-+ mutex_unlock(&c->btree_root_lock);
-+
-+ bch2_recalc_btree_reserve(c);
-+}
-+
-+static void bch2_btree_set_root(struct btree_update *as,
-+ struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b)
-+{
-+ struct bch_fs *c = as->c;
-+ struct btree *old;
-+
-+ trace_and_count(c, btree_node_set_root, c, b);
-+
-+ old = btree_node_root(c, b);
-+
-+ /*
-+ * Ensure no one is using the old root while we switch to the
-+ * new root:
-+ */
-+ bch2_btree_node_lock_write_nofail(trans, path, &old->c);
-+
-+ bch2_btree_set_root_inmem(c, b);
-+
-+ btree_update_updated_root(as, b);
-+
-+ /*
-+ * Unlock old root after new root is visible:
-+ *
-+ * The new root isn't persistent, but that's ok: we still have
-+ * an intent lock on the new root, and any updates that would
-+ * depend on the new root would have to update the new root.
-+ */
-+ bch2_btree_node_unlock_write(trans, path, old);
-+}
-+
-+/* Interior node updates: */
-+
-+static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
-+ struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b,
-+ struct btree_node_iter *node_iter,
-+ struct bkey_i *insert)
-+{
-+ struct bch_fs *c = as->c;
-+ struct bkey_packed *k;
-+ struct printbuf buf = PRINTBUF;
-+ unsigned long old, new, v;
-+
-+ BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
-+ !btree_ptr_sectors_written(insert));
-+
-+ if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
-+ bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
-+
-+ if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
-+ btree_node_type(b), WRITE, &buf) ?:
-+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) {
-+ printbuf_reset(&buf);
-+ prt_printf(&buf, "inserting invalid bkey\n ");
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
-+ prt_printf(&buf, "\n ");
-+ bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
-+ btree_node_type(b), WRITE, &buf);
-+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf);
-+
-+ bch2_fs_inconsistent(c, "%s", buf.buf);
-+ dump_stack();
-+ }
-+
-+ BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
-+ ARRAY_SIZE(as->journal_entries));
-+
-+ as->journal_u64s +=
-+ journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
-+ BCH_JSET_ENTRY_btree_keys,
-+ b->c.btree_id, b->c.level,
-+ insert, insert->k.u64s);
-+
-+ while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
-+ bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
-+ bch2_btree_node_iter_advance(node_iter, b);
-+
-+ bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
-+ set_btree_node_dirty_acct(c, b);
-+
-+ v = READ_ONCE(b->flags);
-+ do {
-+ old = new = v;
-+
-+ new &= ~BTREE_WRITE_TYPE_MASK;
-+ new |= BTREE_WRITE_interior;
-+ new |= 1 << BTREE_NODE_need_write;
-+ } while ((v = cmpxchg(&b->flags, old, new)) != old);
-+
-+ printbuf_exit(&buf);
-+}
-+
-+static void
-+__bch2_btree_insert_keys_interior(struct btree_update *as,
-+ struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b,
-+ struct btree_node_iter node_iter,
-+ struct keylist *keys)
-+{
-+ struct bkey_i *insert = bch2_keylist_front(keys);
-+ struct bkey_packed *k;
-+
-+ BUG_ON(btree_node_type(b) != BKEY_TYPE_btree);
-+
-+ while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
-+ (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0))
-+ ;
-+
-+ while (!bch2_keylist_empty(keys)) {
-+ insert = bch2_keylist_front(keys);
-+
-+ if (bpos_gt(insert->k.p, b->key.k.p))
-+ break;
-+
-+ bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert);
-+ bch2_keylist_pop_front(keys);
-+ }
-+}
-+
-+/*
-+ * Move keys from n1 (original replacement node, now lower node) to n2 (higher
-+ * node)
-+ */
-+static void __btree_split_node(struct btree_update *as,
-+ struct btree_trans *trans,
-+ struct btree *b,
-+ struct btree *n[2])
-+{
-+ struct bkey_packed *k;
-+ struct bpos n1_pos = POS_MIN;
-+ struct btree_node_iter iter;
-+ struct bset *bsets[2];
-+ struct bkey_format_state format[2];
-+ struct bkey_packed *out[2];
-+ struct bkey uk;
-+ unsigned u64s, n1_u64s = (b->nr.live_u64s * 3) / 5;
-+ int i;
-+
-+ for (i = 0; i < 2; i++) {
-+ BUG_ON(n[i]->nsets != 1);
-+
-+ bsets[i] = btree_bset_first(n[i]);
-+ out[i] = bsets[i]->start;
-+
-+ SET_BTREE_NODE_SEQ(n[i]->data, BTREE_NODE_SEQ(b->data) + 1);
-+ bch2_bkey_format_init(&format[i]);
-+ }
-+
-+ u64s = 0;
-+ for_each_btree_node_key(b, k, &iter) {
-+ if (bkey_deleted(k))
-+ continue;
-+
-+ i = u64s >= n1_u64s;
-+ u64s += k->u64s;
-+ uk = bkey_unpack_key(b, k);
-+ if (!i)
-+ n1_pos = uk.p;
-+ bch2_bkey_format_add_key(&format[i], &uk);
-+ }
-+
-+ btree_set_min(n[0], b->data->min_key);
-+ btree_set_max(n[0], n1_pos);
-+ btree_set_min(n[1], bpos_successor(n1_pos));
-+ btree_set_max(n[1], b->data->max_key);
-+
-+ for (i = 0; i < 2; i++) {
-+ bch2_bkey_format_add_pos(&format[i], n[i]->data->min_key);
-+ bch2_bkey_format_add_pos(&format[i], n[i]->data->max_key);
-+
-+ n[i]->data->format = bch2_bkey_format_done(&format[i]);
-+ btree_node_set_format(n[i], n[i]->data->format);
-+ }
-+
-+ u64s = 0;
-+ for_each_btree_node_key(b, k, &iter) {
-+ if (bkey_deleted(k))
-+ continue;
-+
-+ i = u64s >= n1_u64s;
-+ u64s += k->u64s;
-+
-+ if (bch2_bkey_transform(&n[i]->format, out[i], bkey_packed(k)
-+ ? &b->format: &bch2_bkey_format_current, k))
-+ out[i]->format = KEY_FORMAT_LOCAL_BTREE;
-+ else
-+ bch2_bkey_unpack(b, (void *) out[i], k);
-+
-+ out[i]->needs_whiteout = false;
-+
-+ btree_keys_account_key_add(&n[i]->nr, 0, out[i]);
-+ out[i] = bkey_p_next(out[i]);
-+ }
-+
-+ for (i = 0; i < 2; i++) {
-+ bsets[i]->u64s = cpu_to_le16((u64 *) out[i] - bsets[i]->_data);
-+
-+ BUG_ON(!bsets[i]->u64s);
-+
-+ set_btree_bset_end(n[i], n[i]->set);
-+
-+ btree_node_reset_sib_u64s(n[i]);
-+
-+ bch2_verify_btree_nr_keys(n[i]);
-+
-+ if (b->c.level)
-+ btree_node_interior_verify(as->c, n[i]);
-+ }
-+}
-+
-+/*
-+ * For updates to interior nodes, we've got to do the insert before we split
-+ * because the stuff we're inserting has to be inserted atomically. Post split,
-+ * the keys might have to go in different nodes and the split would no longer be
-+ * atomic.
-+ *
-+ * Worse, if the insert is from btree node coalescing, if we do the insert after
-+ * we do the split (and pick the pivot) - the pivot we pick might be between
-+ * nodes that were coalesced, and thus in the middle of a child node post
-+ * coalescing:
-+ */
-+static void btree_split_insert_keys(struct btree_update *as,
-+ struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b,
-+ struct keylist *keys)
-+{
-+ if (!bch2_keylist_empty(keys) &&
-+ bpos_le(bch2_keylist_front(keys)->k.p, b->data->max_key)) {
-+ struct btree_node_iter node_iter;
-+
-+ bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
-+
-+ __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
-+
-+ btree_node_interior_verify(as->c, b);
-+ }
-+}
-+
-+static int btree_split(struct btree_update *as, struct btree_trans *trans,
-+ struct btree_path *path, struct btree *b,
-+ struct keylist *keys, unsigned flags)
-+{
-+ struct bch_fs *c = as->c;
-+ struct btree *parent = btree_node_parent(path, b);
-+ struct btree *n1, *n2 = NULL, *n3 = NULL;
-+ struct btree_path *path1 = NULL, *path2 = NULL;
-+ u64 start_time = local_clock();
-+ int ret = 0;
-+
-+ BUG_ON(!parent && (b != btree_node_root(c, b)));
-+ BUG_ON(parent && !btree_node_intent_locked(path, b->c.level + 1));
-+
-+ bch2_btree_interior_update_will_free_node(as, b);
-+
-+ if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
-+ struct btree *n[2];
-+
-+ trace_and_count(c, btree_node_split, c, b);
-+
-+ n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level);
-+ n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level);
-+
-+ __btree_split_node(as, trans, b, n);
-+
-+ if (keys) {
-+ btree_split_insert_keys(as, trans, path, n1, keys);
-+ btree_split_insert_keys(as, trans, path, n2, keys);
-+ BUG_ON(!bch2_keylist_empty(keys));
-+ }
-+
-+ bch2_btree_build_aux_trees(n2);
-+ bch2_btree_build_aux_trees(n1);
-+
-+ bch2_btree_update_add_new_node(as, n1);
-+ bch2_btree_update_add_new_node(as, n2);
-+ six_unlock_write(&n2->c.lock);
-+ six_unlock_write(&n1->c.lock);
-+
-+ path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
-+ six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
-+ mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
-+ bch2_btree_path_level_init(trans, path1, n1);
-+
-+ path2 = get_unlocked_mut_path(trans, path->btree_id, n2->c.level, n2->key.k.p);
-+ six_lock_increment(&n2->c.lock, SIX_LOCK_intent);
-+ mark_btree_node_locked(trans, path2, n2->c.level, BTREE_NODE_INTENT_LOCKED);
-+ bch2_btree_path_level_init(trans, path2, n2);
-+
-+ /*
-+ * Note that on recursive parent_keys == keys, so we
-+ * can't start adding new keys to parent_keys before emptying it
-+ * out (which we did with btree_split_insert_keys() above)
-+ */
-+ bch2_keylist_add(&as->parent_keys, &n1->key);
-+ bch2_keylist_add(&as->parent_keys, &n2->key);
-+
-+ if (!parent) {
-+ /* Depth increases, make a new root */
-+ n3 = __btree_root_alloc(as, trans, b->c.level + 1);
-+
-+ bch2_btree_update_add_new_node(as, n3);
-+ six_unlock_write(&n3->c.lock);
-+
-+ path2->locks_want++;
-+ BUG_ON(btree_node_locked(path2, n3->c.level));
-+ six_lock_increment(&n3->c.lock, SIX_LOCK_intent);
-+ mark_btree_node_locked(trans, path2, n3->c.level, BTREE_NODE_INTENT_LOCKED);
-+ bch2_btree_path_level_init(trans, path2, n3);
-+
-+ n3->sib_u64s[0] = U16_MAX;
-+ n3->sib_u64s[1] = U16_MAX;
-+
-+ btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
-+ }
-+ } else {
-+ trace_and_count(c, btree_node_compact, c, b);
-+
-+ n1 = bch2_btree_node_alloc_replacement(as, trans, b);
-+
-+ if (keys) {
-+ btree_split_insert_keys(as, trans, path, n1, keys);
-+ BUG_ON(!bch2_keylist_empty(keys));
-+ }
-+
-+ bch2_btree_build_aux_trees(n1);
-+ bch2_btree_update_add_new_node(as, n1);
-+ six_unlock_write(&n1->c.lock);
-+
-+ path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
-+ six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
-+ mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
-+ bch2_btree_path_level_init(trans, path1, n1);
-+
-+ if (parent)
-+ bch2_keylist_add(&as->parent_keys, &n1->key);
-+ }
-+
-+ /* New nodes all written, now make them visible: */
-+
-+ if (parent) {
-+ /* Split a non root node */
-+ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
-+ if (ret)
-+ goto err;
-+ } else if (n3) {
-+ bch2_btree_set_root(as, trans, path, n3);
-+ } else {
-+ /* Root filled up but didn't need to be split */
-+ bch2_btree_set_root(as, trans, path, n1);
-+ }
-+
-+ if (n3) {
-+ bch2_btree_update_get_open_buckets(as, n3);
-+ bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
-+ }
-+ if (n2) {
-+ bch2_btree_update_get_open_buckets(as, n2);
-+ bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
-+ }
-+ bch2_btree_update_get_open_buckets(as, n1);
-+ bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
-+
-+ /*
-+ * The old node must be freed (in memory) _before_ unlocking the new
-+ * nodes - else another thread could re-acquire a read lock on the old
-+ * node after another thread has locked and updated the new node, thus
-+ * seeing stale data:
-+ */
-+ bch2_btree_node_free_inmem(trans, path, b);
-+
-+ if (n3)
-+ bch2_trans_node_add(trans, n3);
-+ if (n2)
-+ bch2_trans_node_add(trans, n2);
-+ bch2_trans_node_add(trans, n1);
-+
-+ if (n3)
-+ six_unlock_intent(&n3->c.lock);
-+ if (n2)
-+ six_unlock_intent(&n2->c.lock);
-+ six_unlock_intent(&n1->c.lock);
-+out:
-+ if (path2) {
-+ __bch2_btree_path_unlock(trans, path2);
-+ bch2_path_put(trans, path2, true);
-+ }
-+ if (path1) {
-+ __bch2_btree_path_unlock(trans, path1);
-+ bch2_path_put(trans, path1, true);
-+ }
-+
-+ bch2_trans_verify_locks(trans);
-+
-+ bch2_time_stats_update(&c->times[n2
-+ ? BCH_TIME_btree_node_split
-+ : BCH_TIME_btree_node_compact],
-+ start_time);
-+ return ret;
-+err:
-+ if (n3)
-+ bch2_btree_node_free_never_used(as, trans, n3);
-+ if (n2)
-+ bch2_btree_node_free_never_used(as, trans, n2);
-+ bch2_btree_node_free_never_used(as, trans, n1);
-+ goto out;
-+}
-+
-+static void
-+bch2_btree_insert_keys_interior(struct btree_update *as,
-+ struct btree_trans *trans,
-+ struct btree_path *path,
-+ struct btree *b,
-+ struct keylist *keys)
-+{
-+ struct btree_path *linked;
-+
-+ __bch2_btree_insert_keys_interior(as, trans, path, b,
-+ path->l[b->c.level].iter, keys);
-+
-+ btree_update_updated_node(as, b);
-+
-+ trans_for_each_path_with_node(trans, b, linked)
-+ bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-+
-+ bch2_trans_verify_paths(trans);
-+}
-+
-+/**
-+ * bch2_btree_insert_node - insert bkeys into a given btree node
-+ *
-+ * @as: btree_update object
-+ * @trans: btree_trans object
-+ * @path: path that points to current node
-+ * @b: node to insert keys into
-+ * @keys: list of keys to insert
-+ * @flags: transaction commit flags
-+ *
-+ * Returns: 0 on success, typically transaction restart error on failure
-+ *
-+ * Inserts as many keys as it can into a given btree node, splitting it if full.
-+ * If a split occurred, this function will return early. This can only happen
-+ * for leaf nodes -- inserts into interior nodes have to be atomic.
-+ */
-+static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
-+ struct btree_path *path, struct btree *b,
-+ struct keylist *keys, unsigned flags)
-+{
-+ struct bch_fs *c = as->c;
-+ int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
-+ int old_live_u64s = b->nr.live_u64s;
-+ int live_u64s_added, u64s_added;
-+ int ret;
-+
-+ lockdep_assert_held(&c->gc_lock);
-+ BUG_ON(!btree_node_intent_locked(path, b->c.level));
-+ BUG_ON(!b->c.level);
-+ BUG_ON(!as || as->b);
-+ bch2_verify_keylist_sorted(keys);
-+
-+ ret = bch2_btree_node_lock_write(trans, path, &b->c);
-+ if (ret)
-+ return ret;
-+
-+ bch2_btree_node_prep_for_write(trans, path, b);
-+
-+ if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
-+ bch2_btree_node_unlock_write(trans, path, b);
-+ goto split;
-+ }
-+
-+ btree_node_interior_verify(c, b);
-+
-+ bch2_btree_insert_keys_interior(as, trans, path, b, keys);
-+
-+ live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
-+ u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
-+
-+ if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
-+ b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
-+ if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
-+ b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
-+
-+ if (u64s_added > live_u64s_added &&
-+ bch2_maybe_compact_whiteouts(c, b))
-+ bch2_trans_node_reinit_iter(trans, b);
-+
-+ bch2_btree_node_unlock_write(trans, path, b);
-+
-+ btree_node_interior_verify(c, b);
-+ return 0;
-+split:
-+ /*
-+ * We could attempt to avoid the transaction restart, by calling
-+ * bch2_btree_path_upgrade() and allocating more nodes:
-+ */
-+ if (b->c.level >= as->update_level) {
-+ trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
-+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
-+ }
-+
-+ return btree_split(as, trans, path, b, keys, flags);
-+}
-+
-+int bch2_btree_split_leaf(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned flags)
-+{
-+ struct btree *b = path_l(path)->b;
-+ struct btree_update *as;
-+ unsigned l;
-+ int ret = 0;
-+
-+ as = bch2_btree_update_start(trans, path, path->level,
-+ true, flags);
-+ if (IS_ERR(as))
-+ return PTR_ERR(as);
-+
-+ ret = btree_split(as, trans, path, b, NULL, flags);
-+ if (ret) {
-+ bch2_btree_update_free(as, trans);
-+ return ret;
-+ }
-+
-+ bch2_btree_update_done(as, trans);
-+
-+ for (l = path->level + 1; btree_node_intent_locked(path, l) && !ret; l++)
-+ ret = bch2_foreground_maybe_merge(trans, path, l, flags);
-+
-+ return ret;
-+}
-+
-+int __bch2_foreground_maybe_merge(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned level,
-+ unsigned flags,
-+ enum btree_node_sibling sib)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_path *sib_path = NULL, *new_path = NULL;
-+ struct btree_update *as;
-+ struct bkey_format_state new_s;
-+ struct bkey_format new_f;
-+ struct bkey_i delete;
-+ struct btree *b, *m, *n, *prev, *next, *parent;
-+ struct bpos sib_pos;
-+ size_t sib_u64s;
-+ u64 start_time = local_clock();
-+ int ret = 0;
-+
-+ BUG_ON(!path->should_be_locked);
-+ BUG_ON(!btree_node_locked(path, level));
-+
-+ b = path->l[level].b;
-+
-+ if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) ||
-+ (sib == btree_next_sib && bpos_eq(b->data->max_key, SPOS_MAX))) {
-+ b->sib_u64s[sib] = U16_MAX;
-+ return 0;
-+ }
-+
-+ sib_pos = sib == btree_prev_sib
-+ ? bpos_predecessor(b->data->min_key)
-+ : bpos_successor(b->data->max_key);
-+
-+ sib_path = bch2_path_get(trans, path->btree_id, sib_pos,
-+ U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_);
-+ ret = bch2_btree_path_traverse(trans, sib_path, false);
-+ if (ret)
-+ goto err;
-+
-+ btree_path_set_should_be_locked(sib_path);
-+
-+ m = sib_path->l[level].b;
-+
-+ if (btree_node_parent(path, b) !=
-+ btree_node_parent(sib_path, m)) {
-+ b->sib_u64s[sib] = U16_MAX;
-+ goto out;
-+ }
-+
-+ if (sib == btree_prev_sib) {
-+ prev = m;
-+ next = b;
-+ } else {
-+ prev = b;
-+ next = m;
-+ }
-+
-+ if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) {
-+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-+
-+ bch2_bpos_to_text(&buf1, prev->data->max_key);
-+ bch2_bpos_to_text(&buf2, next->data->min_key);
-+ bch_err(c,
-+ "%s(): btree topology error:\n"
-+ " prev ends at %s\n"
-+ " next starts at %s",
-+ __func__, buf1.buf, buf2.buf);
-+ printbuf_exit(&buf1);
-+ printbuf_exit(&buf2);
-+ bch2_topology_error(c);
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ bch2_bkey_format_init(&new_s);
-+ bch2_bkey_format_add_pos(&new_s, prev->data->min_key);
-+ __bch2_btree_calc_format(&new_s, prev);
-+ __bch2_btree_calc_format(&new_s, next);
-+ bch2_bkey_format_add_pos(&new_s, next->data->max_key);
-+ new_f = bch2_bkey_format_done(&new_s);
-+
-+ sib_u64s = btree_node_u64s_with_format(b, &new_f) +
-+ btree_node_u64s_with_format(m, &new_f);
-+
-+ if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) {
-+ sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
-+ sib_u64s /= 2;
-+ sib_u64s += BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
-+ }
-+
-+ sib_u64s = min(sib_u64s, btree_max_u64s(c));
-+ sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1);
-+ b->sib_u64s[sib] = sib_u64s;
-+
-+ if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
-+ goto out;
-+
-+ parent = btree_node_parent(path, b);
-+ as = bch2_btree_update_start(trans, path, level, false,
-+ BTREE_INSERT_NOFAIL|flags);
-+ ret = PTR_ERR_OR_ZERO(as);
-+ if (ret)
-+ goto err;
-+
-+ trace_and_count(c, btree_node_merge, c, b);
-+
-+ bch2_btree_interior_update_will_free_node(as, b);
-+ bch2_btree_interior_update_will_free_node(as, m);
-+
-+ n = bch2_btree_node_alloc(as, trans, b->c.level);
-+
-+ SET_BTREE_NODE_SEQ(n->data,
-+ max(BTREE_NODE_SEQ(b->data),
-+ BTREE_NODE_SEQ(m->data)) + 1);
-+
-+ btree_set_min(n, prev->data->min_key);
-+ btree_set_max(n, next->data->max_key);
-+
-+ n->data->format = new_f;
-+ btree_node_set_format(n, new_f);
-+
-+ bch2_btree_sort_into(c, n, prev);
-+ bch2_btree_sort_into(c, n, next);
-+
-+ bch2_btree_build_aux_trees(n);
-+ bch2_btree_update_add_new_node(as, n);
-+ six_unlock_write(&n->c.lock);
-+
-+ new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
-+ six_lock_increment(&n->c.lock, SIX_LOCK_intent);
-+ mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
-+ bch2_btree_path_level_init(trans, new_path, n);
-+
-+ bkey_init(&delete.k);
-+ delete.k.p = prev->key.k.p;
-+ bch2_keylist_add(&as->parent_keys, &delete);
-+ bch2_keylist_add(&as->parent_keys, &n->key);
-+
-+ bch2_trans_verify_paths(trans);
-+
-+ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
-+ if (ret)
-+ goto err_free_update;
-+
-+ bch2_trans_verify_paths(trans);
-+
-+ bch2_btree_update_get_open_buckets(as, n);
-+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-+
-+ bch2_btree_node_free_inmem(trans, path, b);
-+ bch2_btree_node_free_inmem(trans, sib_path, m);
-+
-+ bch2_trans_node_add(trans, n);
-+
-+ bch2_trans_verify_paths(trans);
-+
-+ six_unlock_intent(&n->c.lock);
-+
-+ bch2_btree_update_done(as, trans);
-+
-+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
-+out:
-+err:
-+ if (new_path)
-+ bch2_path_put(trans, new_path, true);
-+ bch2_path_put(trans, sib_path, true);
-+ bch2_trans_verify_locks(trans);
-+ return ret;
-+err_free_update:
-+ bch2_btree_node_free_never_used(as, trans, n);
-+ bch2_btree_update_free(as, trans);
-+ goto out;
-+}
-+
-+int bch2_btree_node_rewrite(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct btree *b,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_path *new_path = NULL;
-+ struct btree *n, *parent;
-+ struct btree_update *as;
-+ int ret;
-+
-+ flags |= BTREE_INSERT_NOFAIL;
-+
-+ parent = btree_node_parent(iter->path, b);
-+ as = bch2_btree_update_start(trans, iter->path, b->c.level,
-+ false, flags);
-+ ret = PTR_ERR_OR_ZERO(as);
-+ if (ret)
-+ goto out;
-+
-+ bch2_btree_interior_update_will_free_node(as, b);
-+
-+ n = bch2_btree_node_alloc_replacement(as, trans, b);
-+
-+ bch2_btree_build_aux_trees(n);
-+ bch2_btree_update_add_new_node(as, n);
-+ six_unlock_write(&n->c.lock);
-+
-+ new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p);
-+ six_lock_increment(&n->c.lock, SIX_LOCK_intent);
-+ mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
-+ bch2_btree_path_level_init(trans, new_path, n);
-+
-+ trace_and_count(c, btree_node_rewrite, c, b);
-+
-+ if (parent) {
-+ bch2_keylist_add(&as->parent_keys, &n->key);
-+ ret = bch2_btree_insert_node(as, trans, iter->path, parent,
-+ &as->parent_keys, flags);
-+ if (ret)
-+ goto err;
-+ } else {
-+ bch2_btree_set_root(as, trans, iter->path, n);
-+ }
-+
-+ bch2_btree_update_get_open_buckets(as, n);
-+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-+
-+ bch2_btree_node_free_inmem(trans, iter->path, b);
-+
-+ bch2_trans_node_add(trans, n);
-+ six_unlock_intent(&n->c.lock);
-+
-+ bch2_btree_update_done(as, trans);
-+out:
-+ if (new_path)
-+ bch2_path_put(trans, new_path, true);
-+ bch2_trans_downgrade(trans);
-+ return ret;
-+err:
-+ bch2_btree_node_free_never_used(as, trans, n);
-+ bch2_btree_update_free(as, trans);
-+ goto out;
-+}
-+
-+struct async_btree_rewrite {
-+ struct bch_fs *c;
-+ struct work_struct work;
-+ struct list_head list;
-+ enum btree_id btree_id;
-+ unsigned level;
-+ struct bpos pos;
-+ __le64 seq;
-+};
-+
-+static int async_btree_node_rewrite_trans(struct btree_trans *trans,
-+ struct async_btree_rewrite *a)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct btree *b;
-+ int ret;
-+
-+ bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos,
-+ BTREE_MAX_DEPTH, a->level, 0);
-+ b = bch2_btree_iter_peek_node(&iter);
-+ ret = PTR_ERR_OR_ZERO(b);
-+ if (ret)
-+ goto out;
-+
-+ if (!b || b->data->keys.seq != a->seq) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ if (b)
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
-+ else
-+ prt_str(&buf, "(null");
-+ bch_info(c, "%s: node to rewrite not found:, searching for seq %llu, got\n%s",
-+ __func__, a->seq, buf.buf);
-+ printbuf_exit(&buf);
-+ goto out;
-+ }
-+
-+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret;
-+}
-+
-+static void async_btree_node_rewrite_work(struct work_struct *work)
-+{
-+ struct async_btree_rewrite *a =
-+ container_of(work, struct async_btree_rewrite, work);
-+ struct bch_fs *c = a->c;
-+ int ret;
-+
-+ ret = bch2_trans_do(c, NULL, NULL, 0,
-+ async_btree_node_rewrite_trans(trans, a));
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
-+ kfree(a);
-+}
-+
-+void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
-+{
-+ struct async_btree_rewrite *a;
-+ int ret;
-+
-+ a = kmalloc(sizeof(*a), GFP_NOFS);
-+ if (!a) {
-+ bch_err(c, "%s: error allocating memory", __func__);
-+ return;
-+ }
-+
-+ a->c = c;
-+ a->btree_id = b->c.btree_id;
-+ a->level = b->c.level;
-+ a->pos = b->key.k.p;
-+ a->seq = b->data->keys.seq;
-+ INIT_WORK(&a->work, async_btree_node_rewrite_work);
-+
-+ if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) {
-+ mutex_lock(&c->pending_node_rewrites_lock);
-+ list_add(&a->list, &c->pending_node_rewrites);
-+ mutex_unlock(&c->pending_node_rewrites_lock);
-+ return;
-+ }
-+
-+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
-+ if (test_bit(BCH_FS_STARTED, &c->flags)) {
-+ bch_err(c, "%s: error getting c->writes ref", __func__);
-+ kfree(a);
-+ return;
-+ }
-+
-+ ret = bch2_fs_read_write_early(c);
-+ if (ret) {
-+ bch_err_msg(c, ret, "going read-write");
-+ kfree(a);
-+ return;
-+ }
-+
-+ bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
-+ }
-+
-+ queue_work(c->btree_interior_update_worker, &a->work);
-+}
-+
-+void bch2_do_pending_node_rewrites(struct bch_fs *c)
-+{
-+ struct async_btree_rewrite *a, *n;
-+
-+ mutex_lock(&c->pending_node_rewrites_lock);
-+ list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) {
-+ list_del(&a->list);
-+
-+ bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
-+ queue_work(c->btree_interior_update_worker, &a->work);
-+ }
-+ mutex_unlock(&c->pending_node_rewrites_lock);
-+}
-+
-+void bch2_free_pending_node_rewrites(struct bch_fs *c)
-+{
-+ struct async_btree_rewrite *a, *n;
-+
-+ mutex_lock(&c->pending_node_rewrites_lock);
-+ list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) {
-+ list_del(&a->list);
-+
-+ kfree(a);
-+ }
-+ mutex_unlock(&c->pending_node_rewrites_lock);
-+}
-+
-+static int __bch2_btree_node_update_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct btree *b, struct btree *new_hash,
-+ struct bkey_i *new_key,
-+ unsigned commit_flags,
-+ bool skip_triggers)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter2 = { NULL };
-+ struct btree *parent;
-+ int ret;
-+
-+ if (!skip_triggers) {
-+ ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1,
-+ bkey_i_to_s_c(&b->key), 0);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1,
-+ new_key, 0);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (new_hash) {
-+ bkey_copy(&new_hash->key, new_key);
-+ ret = bch2_btree_node_hash_insert(&c->btree_cache,
-+ new_hash, b->c.level, b->c.btree_id);
-+ BUG_ON(ret);
-+ }
-+
-+ parent = btree_node_parent(iter->path, b);
-+ if (parent) {
-+ bch2_trans_copy_iter(&iter2, iter);
-+
-+ iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
-+ iter2.flags & BTREE_ITER_INTENT,
-+ _THIS_IP_);
-+
-+ BUG_ON(iter2.path->level != b->c.level);
-+ BUG_ON(!bpos_eq(iter2.path->pos, new_key->k.p));
-+
-+ btree_path_set_level_up(trans, iter2.path);
-+
-+ trans->paths_sorted = false;
-+
-+ ret = bch2_btree_iter_traverse(&iter2) ?:
-+ bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN);
-+ if (ret)
-+ goto err;
-+ } else {
-+ BUG_ON(btree_node_root(c, b) != b);
-+
-+ ret = darray_make_room(&trans->extra_journal_entries,
-+ jset_u64s(new_key->k.u64s));
-+ if (ret)
-+ return ret;
-+
-+ journal_entry_set((void *) &darray_top(trans->extra_journal_entries),
-+ BCH_JSET_ENTRY_btree_root,
-+ b->c.btree_id, b->c.level,
-+ new_key, new_key->k.u64s);
-+ trans->extra_journal_entries.nr += jset_u64s(new_key->k.u64s);
-+ }
-+
-+ ret = bch2_trans_commit(trans, NULL, NULL, commit_flags);
-+ if (ret)
-+ goto err;
-+
-+ bch2_btree_node_lock_write_nofail(trans, iter->path, &b->c);
-+
-+ if (new_hash) {
-+ mutex_lock(&c->btree_cache.lock);
-+ bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
-+ bch2_btree_node_hash_remove(&c->btree_cache, b);
-+
-+ bkey_copy(&b->key, new_key);
-+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
-+ BUG_ON(ret);
-+ mutex_unlock(&c->btree_cache.lock);
-+ } else {
-+ bkey_copy(&b->key, new_key);
-+ }
-+
-+ bch2_btree_node_unlock_write(trans, iter->path, b);
-+out:
-+ bch2_trans_iter_exit(trans, &iter2);
-+ return ret;
-+err:
-+ if (new_hash) {
-+ mutex_lock(&c->btree_cache.lock);
-+ bch2_btree_node_hash_remove(&c->btree_cache, b);
-+ mutex_unlock(&c->btree_cache.lock);
-+ }
-+ goto out;
-+}
-+
-+int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *iter,
-+ struct btree *b, struct bkey_i *new_key,
-+ unsigned commit_flags, bool skip_triggers)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree *new_hash = NULL;
-+ struct btree_path *path = iter->path;
-+ struct closure cl;
-+ int ret = 0;
-+
-+ ret = bch2_btree_path_upgrade(trans, path, b->c.level + 1);
-+ if (ret)
-+ return ret;
-+
-+ closure_init_stack(&cl);
-+
-+ /*
-+ * check btree_ptr_hash_val() after @b is locked by
-+ * btree_iter_traverse():
-+ */
-+ if (btree_ptr_hash_val(new_key) != b->hash_val) {
-+ ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+ if (ret) {
-+ ret = drop_locks_do(trans, (closure_sync(&cl), 0));
-+ if (ret)
-+ return ret;
-+ }
-+
-+ new_hash = bch2_btree_node_mem_alloc(trans, false);
-+ }
-+
-+ path->intent_ref++;
-+ ret = __bch2_btree_node_update_key(trans, iter, b, new_hash, new_key,
-+ commit_flags, skip_triggers);
-+ --path->intent_ref;
-+
-+ if (new_hash) {
-+ mutex_lock(&c->btree_cache.lock);
-+ list_move(&new_hash->list, &c->btree_cache.freeable);
-+ mutex_unlock(&c->btree_cache.lock);
-+
-+ six_unlock_write(&new_hash->c.lock);
-+ six_unlock_intent(&new_hash->c.lock);
-+ }
-+ closure_sync(&cl);
-+ bch2_btree_cache_cannibalize_unlock(c);
-+ return ret;
-+}
-+
-+int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
-+ struct btree *b, struct bkey_i *new_key,
-+ unsigned commit_flags, bool skip_triggers)
-+{
-+ struct btree_iter iter;
-+ int ret;
-+
-+ bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p,
-+ BTREE_MAX_DEPTH, b->c.level,
-+ BTREE_ITER_INTENT);
-+ ret = bch2_btree_iter_traverse(&iter);
-+ if (ret)
-+ goto out;
-+
-+ /* has node been freed? */
-+ if (iter.path->l[b->c.level].b != b) {
-+ /* node has been freed: */
-+ BUG_ON(!btree_node_dying(b));
-+ goto out;
-+ }
-+
-+ BUG_ON(!btree_node_hashed(b));
-+
-+ ret = bch2_btree_node_update_key(trans, &iter, b, new_key,
-+ commit_flags, skip_triggers);
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+/* Init code: */
-+
-+/*
-+ * Only for filesystem bringup, when first reading the btree roots or allocating
-+ * btree roots when initializing a new filesystem:
-+ */
-+void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
-+{
-+ BUG_ON(btree_node_root(c, b));
-+
-+ bch2_btree_set_root_inmem(c, b);
-+}
-+
-+static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct closure cl;
-+ struct btree *b;
-+ int ret;
-+
-+ closure_init_stack(&cl);
-+
-+ do {
-+ ret = bch2_btree_cache_cannibalize_lock(c, &cl);
-+ closure_sync(&cl);
-+ } while (ret);
-+
-+ b = bch2_btree_node_mem_alloc(trans, false);
-+ bch2_btree_cache_cannibalize_unlock(c);
-+
-+ set_btree_node_fake(b);
-+ set_btree_node_need_rewrite(b);
-+ b->c.level = 0;
-+ b->c.btree_id = id;
-+
-+ bkey_btree_ptr_init(&b->key);
-+ b->key.k.p = SPOS_MAX;
-+ *((u64 *) bkey_i_to_btree_ptr(&b->key)->v.start) = U64_MAX - id;
-+
-+ bch2_bset_init_first(b, &b->data->keys);
-+ bch2_btree_build_aux_trees(b);
-+
-+ b->data->flags = 0;
-+ btree_set_min(b, POS_MIN);
-+ btree_set_max(b, SPOS_MAX);
-+ b->data->format = bch2_btree_calc_format(b);
-+ btree_node_set_format(b, b->data->format);
-+
-+ ret = bch2_btree_node_hash_insert(&c->btree_cache, b,
-+ b->c.level, b->c.btree_id);
-+ BUG_ON(ret);
-+
-+ bch2_btree_set_root_inmem(c, b);
-+
-+ six_unlock_write(&b->c.lock);
-+ six_unlock_intent(&b->c.lock);
-+ return 0;
-+}
-+
-+void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
-+{
-+ bch2_trans_run(c, __bch2_btree_root_alloc(trans, id));
-+}
-+
-+void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ struct btree_update *as;
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ list_for_each_entry(as, &c->btree_interior_update_list, list)
-+ prt_printf(out, "%p m %u w %u r %u j %llu\n",
-+ as,
-+ as->mode,
-+ as->nodes_written,
-+ closure_nr_remaining(&as->cl),
-+ as->journal.seq);
-+ mutex_unlock(&c->btree_interior_update_lock);
-+}
-+
-+static bool bch2_btree_interior_updates_pending(struct bch_fs *c)
-+{
-+ bool ret;
-+
-+ mutex_lock(&c->btree_interior_update_lock);
-+ ret = !list_empty(&c->btree_interior_update_list);
-+ mutex_unlock(&c->btree_interior_update_lock);
-+
-+ return ret;
-+}
-+
-+bool bch2_btree_interior_updates_flush(struct bch_fs *c)
-+{
-+ bool ret = bch2_btree_interior_updates_pending(c);
-+
-+ if (ret)
-+ closure_wait_event(&c->btree_interior_update_wait,
-+ !bch2_btree_interior_updates_pending(c));
-+ return ret;
-+}
-+
-+void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry)
-+{
-+ struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);
-+
-+ mutex_lock(&c->btree_root_lock);
-+
-+ r->level = entry->level;
-+ r->alive = true;
-+ bkey_copy(&r->key, (struct bkey_i *) entry->start);
-+
-+ mutex_unlock(&c->btree_root_lock);
-+}
-+
-+struct jset_entry *
-+bch2_btree_roots_to_journal_entries(struct bch_fs *c,
-+ struct jset_entry *end,
-+ unsigned long skip)
-+{
-+ unsigned i;
-+
-+ mutex_lock(&c->btree_root_lock);
-+
-+ for (i = 0; i < btree_id_nr_alive(c); i++) {
-+ struct btree_root *r = bch2_btree_id_root(c, i);
-+
-+ if (r->alive && !test_bit(i, &skip)) {
-+ journal_entry_set(end, BCH_JSET_ENTRY_btree_root,
-+ i, r->level, &r->key, r->key.k.u64s);
-+ end = vstruct_next(end);
-+ }
-+ }
-+
-+ mutex_unlock(&c->btree_root_lock);
-+
-+ return end;
-+}
-+
-+void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
-+{
-+ if (c->btree_interior_update_worker)
-+ destroy_workqueue(c->btree_interior_update_worker);
-+ mempool_exit(&c->btree_interior_update_pool);
-+}
-+
-+void bch2_fs_btree_interior_update_init_early(struct bch_fs *c)
-+{
-+ mutex_init(&c->btree_reserve_cache_lock);
-+ INIT_LIST_HEAD(&c->btree_interior_update_list);
-+ INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
-+ mutex_init(&c->btree_interior_update_lock);
-+ INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
-+
-+ INIT_LIST_HEAD(&c->pending_node_rewrites);
-+ mutex_init(&c->pending_node_rewrites_lock);
-+}
-+
-+int bch2_fs_btree_interior_update_init(struct bch_fs *c)
-+{
-+ c->btree_interior_update_worker =
-+ alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
-+ if (!c->btree_interior_update_worker)
-+ return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
-+
-+ if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
-+ sizeof(struct btree_update)))
-+ return -BCH_ERR_ENOMEM_btree_interior_update_pool_init;
-+
-+ return 0;
-+}
-diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
-new file mode 100644
-index 000000000000..4df21512d640
---- /dev/null
-+++ b/fs/bcachefs/btree_update_interior.h
-@@ -0,0 +1,337 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
-+#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
-+
-+#include "btree_cache.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+
-+void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
-+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
-+ struct bkey_format *);
-+
-+#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
-+
-+#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
-+
-+/*
-+ * Tracks an in progress split/rewrite of a btree node and the update to the
-+ * parent node:
-+ *
-+ * When we split/rewrite a node, we do all the updates in memory without
-+ * waiting for any writes to complete - we allocate the new node(s) and update
-+ * the parent node, possibly recursively up to the root.
-+ *
-+ * The end result is that we have one or more new nodes being written -
-+ * possibly several, if there were multiple splits - and then a write (updating
-+ * an interior node) which will make all these new nodes visible.
-+ *
-+ * Additionally, as we split/rewrite nodes we free the old nodes - but the old
-+ * nodes can't be freed (their space on disk can't be reclaimed) until the
-+ * update to the interior node that makes the new node visible completes -
-+ * until then, the old nodes are still reachable on disk.
-+ *
-+ */
-+struct btree_update {
-+ struct closure cl;
-+ struct bch_fs *c;
-+ u64 start_time;
-+
-+ struct list_head list;
-+ struct list_head unwritten_list;
-+
-+ /* What kind of update are we doing? */
-+ enum {
-+ BTREE_INTERIOR_NO_UPDATE,
-+ BTREE_INTERIOR_UPDATING_NODE,
-+ BTREE_INTERIOR_UPDATING_ROOT,
-+ BTREE_INTERIOR_UPDATING_AS,
-+ } mode;
-+
-+ unsigned nodes_written:1;
-+ unsigned took_gc_lock:1;
-+
-+ enum btree_id btree_id;
-+ unsigned update_level;
-+
-+ struct disk_reservation disk_res;
-+ struct journal_preres journal_preres;
-+
-+ /*
-+ * BTREE_INTERIOR_UPDATING_NODE:
-+ * The update that made the new nodes visible was a regular update to an
-+ * existing interior node - @b. We can't write out the update to @b
-+ * until the new nodes we created are finished writing, so we block @b
-+ * from writing by putting this btree_interior update on the
-+ * @b->write_blocked list with @write_blocked_list:
-+ */
-+ struct btree *b;
-+ struct list_head write_blocked_list;
-+
-+ /*
-+ * We may be freeing nodes that were dirty, and thus had journal entries
-+ * pinned: we need to transfer the oldest of those pins to the
-+ * btree_update operation, and release it when the new node(s)
-+ * are all persistent and reachable:
-+ */
-+ struct journal_entry_pin journal;
-+
-+ /* Preallocated nodes we reserve when we start the update: */
-+ struct prealloc_nodes {
-+ struct btree *b[BTREE_UPDATE_NODES_MAX];
-+ unsigned nr;
-+ } prealloc_nodes[2];
-+
-+ /* Nodes being freed: */
-+ struct keylist old_keys;
-+ u64 _old_keys[BTREE_UPDATE_NODES_MAX *
-+ BKEY_BTREE_PTR_U64s_MAX];
-+
-+ /* Nodes being added: */
-+ struct keylist new_keys;
-+ u64 _new_keys[BTREE_UPDATE_NODES_MAX *
-+ BKEY_BTREE_PTR_U64s_MAX];
-+
-+ /* New nodes, that will be made reachable by this update: */
-+ struct btree *new_nodes[BTREE_UPDATE_NODES_MAX];
-+ unsigned nr_new_nodes;
-+
-+ struct btree *old_nodes[BTREE_UPDATE_NODES_MAX];
-+ __le64 old_nodes_seq[BTREE_UPDATE_NODES_MAX];
-+ unsigned nr_old_nodes;
-+
-+ open_bucket_idx_t open_buckets[BTREE_UPDATE_NODES_MAX *
-+ BCH_REPLICAS_MAX];
-+ open_bucket_idx_t nr_open_buckets;
-+
-+ unsigned journal_u64s;
-+ u64 journal_entries[BTREE_UPDATE_JOURNAL_RES];
-+
-+ /* Only here to reduce stack usage on recursive splits: */
-+ struct keylist parent_keys;
-+ /*
-+ * Enough room for btree_split's keys without realloc - btree node
-+ * pointers never have crc/compression info, so we only need to acount
-+ * for the pointers for three keys
-+ */
-+ u64 inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
-+};
-+
-+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
-+ struct btree_trans *,
-+ struct btree *,
-+ struct bkey_format);
-+
-+int bch2_btree_split_leaf(struct btree_trans *, struct btree_path *, unsigned);
-+
-+int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_path *,
-+ unsigned, unsigned, enum btree_node_sibling);
-+
-+static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned level, unsigned flags,
-+ enum btree_node_sibling sib)
-+{
-+ struct btree *b;
-+
-+ EBUG_ON(!btree_node_locked(path, level));
-+
-+ b = path->l[level].b;
-+ if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold)
-+ return 0;
-+
-+ return __bch2_foreground_maybe_merge(trans, path, level, flags, sib);
-+}
-+
-+static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
-+ struct btree_path *path,
-+ unsigned level,
-+ unsigned flags)
-+{
-+ return bch2_foreground_maybe_merge_sibling(trans, path, level, flags,
-+ btree_prev_sib) ?:
-+ bch2_foreground_maybe_merge_sibling(trans, path, level, flags,
-+ btree_next_sib);
-+}
-+
-+int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
-+ struct btree *, unsigned);
-+void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
-+int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
-+ struct btree *, struct bkey_i *,
-+ unsigned, bool);
-+int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *,
-+ struct bkey_i *, unsigned, bool);
-+
-+void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
-+void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
-+
-+static inline unsigned btree_update_reserve_required(struct bch_fs *c,
-+ struct btree *b)
-+{
-+ unsigned depth = btree_node_root(c, b)->c.level + 1;
-+
-+ /*
-+ * Number of nodes we might have to allocate in a worst case btree
-+ * split operation - we split all the way up to the root, then allocate
-+ * a new root, unless we're already at max depth:
-+ */
-+ if (depth < BTREE_MAX_DEPTH)
-+ return (depth - b->c.level) * 2 + 1;
-+ else
-+ return (depth - b->c.level) * 2 - 1;
-+}
-+
-+static inline void btree_node_reset_sib_u64s(struct btree *b)
-+{
-+ b->sib_u64s[0] = b->nr.live_u64s;
-+ b->sib_u64s[1] = b->nr.live_u64s;
-+}
-+
-+static inline void *btree_data_end(struct bch_fs *c, struct btree *b)
-+{
-+ return (void *) b->data + btree_bytes(c);
-+}
-+
-+static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c,
-+ struct btree *b)
-+{
-+ return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s);
-+}
-+
-+static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c,
-+ struct btree *b)
-+{
-+ return btree_data_end(c, b);
-+}
-+
-+static inline void *write_block(struct btree *b)
-+{
-+ return (void *) b->data + (b->written << 9);
-+}
-+
-+static inline bool __btree_addr_written(struct btree *b, void *p)
-+{
-+ return p < write_block(b);
-+}
-+
-+static inline bool bset_written(struct btree *b, struct bset *i)
-+{
-+ return __btree_addr_written(b, i);
-+}
-+
-+static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
-+{
-+ return __btree_addr_written(b, k);
-+}
-+
-+static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
-+ struct btree *b,
-+ void *end)
-+{
-+ ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
-+ b->whiteout_u64s;
-+ ssize_t total = c->opts.btree_node_size >> 3;
-+
-+ /* Always leave one extra u64 for bch2_varint_decode: */
-+ used++;
-+
-+ return total - used;
-+}
-+
-+static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
-+ struct btree *b)
-+{
-+ ssize_t remaining = __bch_btree_u64s_remaining(c, b,
-+ btree_bkey_last(b, bset_tree_last(b)));
-+
-+ BUG_ON(remaining < 0);
-+
-+ if (bset_written(b, btree_bset_last(b)))
-+ return 0;
-+
-+ return remaining;
-+}
-+
-+#define BTREE_WRITE_SET_U64s_BITS 9
-+
-+static inline unsigned btree_write_set_buffer(struct btree *b)
-+{
-+ /*
-+ * Could buffer up larger amounts of keys for btrees with larger keys,
-+ * pending benchmarking:
-+ */
-+ return 8 << BTREE_WRITE_SET_U64s_BITS;
-+}
-+
-+static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
-+ struct btree *b)
-+{
-+ struct bset_tree *t = bset_tree_last(b);
-+ struct btree_node_entry *bne = max(write_block(b),
-+ (void *) btree_bkey_last(b, bset_tree_last(b)));
-+ ssize_t remaining_space =
-+ __bch_btree_u64s_remaining(c, b, bne->keys.start);
-+
-+ if (unlikely(bset_written(b, bset(b, t)))) {
-+ if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
-+ return bne;
-+ } else {
-+ if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
-+ remaining_space > (ssize_t) (btree_write_set_buffer(b) >> 3))
-+ return bne;
-+ }
-+
-+ return NULL;
-+}
-+
-+static inline void push_whiteout(struct bch_fs *c, struct btree *b,
-+ struct bpos pos)
-+{
-+ struct bkey_packed k;
-+
-+ BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
-+ EBUG_ON(btree_node_just_written(b));
-+
-+ if (!bkey_pack_pos(&k, pos, b)) {
-+ struct bkey *u = (void *) &k;
-+
-+ bkey_init(u);
-+ u->p = pos;
-+ }
-+
-+ k.needs_whiteout = true;
-+
-+ b->whiteout_u64s += k.u64s;
-+ bkey_p_copy(unwritten_whiteouts_start(c, b), &k);
-+}
-+
-+/*
-+ * write lock must be held on @b (else the dirty bset that we were going to
-+ * insert into could be written out from under us)
-+ */
-+static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
-+ struct btree *b, unsigned u64s)
-+{
-+ if (unlikely(btree_node_need_rewrite(b)))
-+ return false;
-+
-+ return u64s <= bch_btree_keys_u64s_remaining(c, b);
-+}
-+
-+void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
-+
-+bool bch2_btree_interior_updates_flush(struct bch_fs *);
-+
-+void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *);
-+struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
-+ struct jset_entry *, unsigned long);
-+
-+void bch2_do_pending_node_rewrites(struct bch_fs *);
-+void bch2_free_pending_node_rewrites(struct bch_fs *);
-+
-+void bch2_fs_btree_interior_update_exit(struct bch_fs *);
-+void bch2_fs_btree_interior_update_init_early(struct bch_fs *);
-+int bch2_fs_btree_interior_update_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
-diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
-new file mode 100644
-index 000000000000..4e6241db518b
---- /dev/null
-+++ b/fs/bcachefs/btree_write_buffer.c
-@@ -0,0 +1,375 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_write_buffer.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+
-+#include <linux/sort.h>
-+
-+static int btree_write_buffered_key_cmp(const void *_l, const void *_r)
-+{
-+ const struct btree_write_buffered_key *l = _l;
-+ const struct btree_write_buffered_key *r = _r;
-+
-+ return cmp_int(l->btree, r->btree) ?:
-+ bpos_cmp(l->k.k.p, r->k.k.p) ?:
-+ cmp_int(l->journal_seq, r->journal_seq) ?:
-+ cmp_int(l->journal_offset, r->journal_offset);
-+}
-+
-+static int btree_write_buffered_journal_cmp(const void *_l, const void *_r)
-+{
-+ const struct btree_write_buffered_key *l = _l;
-+ const struct btree_write_buffered_key *r = _r;
-+
-+ return cmp_int(l->journal_seq, r->journal_seq);
-+}
-+
-+static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct btree_write_buffered_key *wb,
-+ unsigned commit_flags,
-+ bool *write_locked,
-+ size_t *fast)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_path *path;
-+ int ret;
-+
-+ ret = bch2_btree_iter_traverse(iter);
-+ if (ret)
-+ return ret;
-+
-+ path = iter->path;
-+
-+ if (!*write_locked) {
-+ ret = bch2_btree_node_lock_write(trans, path, &path->l[0].b->c);
-+ if (ret)
-+ return ret;
-+
-+ bch2_btree_node_prep_for_write(trans, path, path->l[0].b);
-+ *write_locked = true;
-+ }
-+
-+ if (!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s)) {
-+ bch2_btree_node_unlock_write(trans, path, path->l[0].b);
-+ *write_locked = false;
-+ goto trans_commit;
-+ }
-+
-+ bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
-+ (*fast)++;
-+
-+ if (path->ref > 1) {
-+ /*
-+ * We can't clone a path that has write locks: if the path is
-+ * shared, unlock before set_pos(), traverse():
-+ */
-+ bch2_btree_node_unlock_write(trans, path, path->l[0].b);
-+ *write_locked = false;
-+ }
-+ return 0;
-+trans_commit:
-+ return bch2_trans_update_seq(trans, wb->journal_seq, iter, &wb->k,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ commit_flags|
-+ BTREE_INSERT_NOCHECK_RW|
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_JOURNAL_RECLAIM);
-+}
-+
-+static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
-+{
-+ union btree_write_buffer_state old, new;
-+ u64 v = READ_ONCE(wb->state.v);
-+
-+ do {
-+ old.v = new.v = v;
-+
-+ new.nr = 0;
-+ new.idx++;
-+ } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
-+
-+ while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1)
-+ cpu_relax();
-+
-+ smp_mb();
-+
-+ return old;
-+}
-+
-+/*
-+ * Update a btree with a write buffered key using the journal seq of the
-+ * original write buffer insert.
-+ *
-+ * It is not safe to rejournal the key once it has been inserted into the write
-+ * buffer because that may break recovery ordering. For example, the key may
-+ * have already been modified in the active write buffer in a seq that comes
-+ * before the current transaction. If we were to journal this key again and
-+ * crash, recovery would process updates in the wrong order.
-+ */
-+static int
-+btree_write_buffered_insert(struct btree_trans *trans,
-+ struct btree_write_buffered_key *wb)
-+{
-+ struct btree_iter iter;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k),
-+ BTREE_ITER_CACHED|BTREE_ITER_INTENT);
-+
-+ ret = bch2_btree_iter_traverse(&iter) ?:
-+ bch2_trans_update_seq(trans, wb->journal_seq, &iter, &wb->k,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags,
-+ bool locked)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct journal *j = &c->journal;
-+ struct btree_write_buffer *wb = &c->btree_write_buffer;
-+ struct journal_entry_pin pin;
-+ struct btree_write_buffered_key *i, *keys;
-+ struct btree_iter iter = { NULL };
-+ size_t nr = 0, skipped = 0, fast = 0, slowpath = 0;
-+ bool write_locked = false;
-+ union btree_write_buffer_state s;
-+ int ret = 0;
-+
-+ memset(&pin, 0, sizeof(pin));
-+
-+ if (!locked && !mutex_trylock(&wb->flush_lock))
-+ return 0;
-+
-+ bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL);
-+ bch2_journal_pin_drop(j, &wb->journal_pin);
-+
-+ s = btree_write_buffer_switch(wb);
-+ keys = wb->keys[s.idx];
-+ nr = s.nr;
-+
-+ if (race_fault())
-+ goto slowpath;
-+
-+ /*
-+ * We first sort so that we can detect and skip redundant updates, and
-+ * then we attempt to flush in sorted btree order, as this is most
-+ * efficient.
-+ *
-+ * However, since we're not flushing in the order they appear in the
-+ * journal we won't be able to drop our journal pin until everything is
-+ * flushed - which means this could deadlock the journal if we weren't
-+ * passing BTREE_INSERT_JOURNAL_RECLAIM. This causes the update to fail
-+ * if it would block taking a journal reservation.
-+ *
-+ * If that happens, simply skip the key so we can optimistically insert
-+ * as many keys as possible in the fast path.
-+ */
-+ sort(keys, nr, sizeof(keys[0]),
-+ btree_write_buffered_key_cmp, NULL);
-+
-+ for (i = keys; i < keys + nr; i++) {
-+ if (i + 1 < keys + nr &&
-+ i[0].btree == i[1].btree &&
-+ bpos_eq(i[0].k.k.p, i[1].k.k.p)) {
-+ skipped++;
-+ i->journal_seq = 0;
-+ continue;
-+ }
-+
-+ if (write_locked &&
-+ (iter.path->btree_id != i->btree ||
-+ bpos_gt(i->k.k.p, iter.path->l[0].b->key.k.p))) {
-+ bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
-+ write_locked = false;
-+ }
-+
-+ if (!iter.path || iter.path->btree_id != i->btree) {
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p,
-+ BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS);
-+ }
-+
-+ bch2_btree_iter_set_pos(&iter, i->k.k.p);
-+ iter.path->preserve = false;
-+
-+ do {
-+ ret = bch2_btree_write_buffer_flush_one(trans, &iter, i,
-+ commit_flags, &write_locked, &fast);
-+ if (!write_locked)
-+ bch2_trans_begin(trans);
-+ } while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
-+
-+ if (ret == -BCH_ERR_journal_reclaim_would_deadlock) {
-+ slowpath++;
-+ continue;
-+ }
-+ if (ret)
-+ break;
-+
-+ i->journal_seq = 0;
-+ }
-+
-+ if (write_locked)
-+ bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ trace_write_buffer_flush(trans, nr, skipped, fast, wb->size);
-+
-+ if (slowpath)
-+ goto slowpath;
-+
-+ bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
-+out:
-+ bch2_journal_pin_drop(j, &pin);
-+ mutex_unlock(&wb->flush_lock);
-+ return ret;
-+slowpath:
-+ trace_write_buffer_flush_slowpath(trans, i - keys, nr);
-+
-+ /*
-+ * Now sort the rest by journal seq and bump the journal pin as we go.
-+ * The slowpath zapped the seq of keys that were successfully flushed so
-+ * we can skip those here.
-+ */
-+ sort(keys, nr, sizeof(keys[0]),
-+ btree_write_buffered_journal_cmp,
-+ NULL);
-+
-+ commit_flags &= ~BCH_WATERMARK_MASK;
-+ commit_flags |= BCH_WATERMARK_reclaim;
-+
-+ for (i = keys; i < keys + nr; i++) {
-+ if (!i->journal_seq)
-+ continue;
-+
-+ if (i->journal_seq > pin.seq) {
-+ struct journal_entry_pin pin2;
-+
-+ memset(&pin2, 0, sizeof(pin2));
-+
-+ bch2_journal_pin_add(j, i->journal_seq, &pin2, NULL);
-+ bch2_journal_pin_drop(j, &pin);
-+ bch2_journal_pin_copy(j, &pin, &pin2, NULL);
-+ bch2_journal_pin_drop(j, &pin2);
-+ }
-+
-+ ret = commit_do(trans, NULL, NULL,
-+ commit_flags|
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_JOURNAL_RECLAIM,
-+ btree_write_buffered_insert(trans, i));
-+ if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)))
-+ break;
-+ }
-+
-+ goto out;
-+}
-+
-+int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
-+{
-+ bch2_trans_unlock(trans);
-+ mutex_lock(&trans->c->btree_write_buffer.flush_lock);
-+ return __bch2_btree_write_buffer_flush(trans, 0, true);
-+}
-+
-+int bch2_btree_write_buffer_flush(struct btree_trans *trans)
-+{
-+ return __bch2_btree_write_buffer_flush(trans, 0, false);
-+}
-+
-+static int bch2_btree_write_buffer_journal_flush(struct journal *j,
-+ struct journal_entry_pin *_pin, u64 seq)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct btree_write_buffer *wb = &c->btree_write_buffer;
-+
-+ mutex_lock(&wb->flush_lock);
-+
-+ return bch2_trans_run(c,
-+ __bch2_btree_write_buffer_flush(trans, BTREE_INSERT_NOCHECK_RW, true));
-+}
-+
-+static inline u64 btree_write_buffer_ref(int idx)
-+{
-+ return ((union btree_write_buffer_state) {
-+ .ref0 = idx == 0,
-+ .ref1 = idx == 1,
-+ }).v;
-+}
-+
-+int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_write_buffer *wb = &c->btree_write_buffer;
-+ struct btree_write_buffered_key *i;
-+ union btree_write_buffer_state old, new;
-+ int ret = 0;
-+ u64 v;
-+
-+ trans_for_each_wb_update(trans, i) {
-+ EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
-+
-+ i->journal_seq = trans->journal_res.seq;
-+ i->journal_offset = trans->journal_res.offset;
-+ }
-+
-+ preempt_disable();
-+ v = READ_ONCE(wb->state.v);
-+ do {
-+ old.v = new.v = v;
-+
-+ new.v += btree_write_buffer_ref(new.idx);
-+ new.nr += trans->nr_wb_updates;
-+ if (new.nr > wb->size) {
-+ ret = -BCH_ERR_btree_insert_need_flush_buffer;
-+ goto out;
-+ }
-+ } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
-+
-+ memcpy(wb->keys[new.idx] + old.nr,
-+ trans->wb_updates,
-+ sizeof(trans->wb_updates[0]) * trans->nr_wb_updates);
-+
-+ bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin,
-+ bch2_btree_write_buffer_journal_flush);
-+
-+ atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter);
-+out:
-+ preempt_enable();
-+ return ret;
-+}
-+
-+void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
-+{
-+ struct btree_write_buffer *wb = &c->btree_write_buffer;
-+
-+ BUG_ON(wb->state.nr && !bch2_journal_error(&c->journal));
-+
-+ kvfree(wb->keys[1]);
-+ kvfree(wb->keys[0]);
-+}
-+
-+int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
-+{
-+ struct btree_write_buffer *wb = &c->btree_write_buffer;
-+
-+ mutex_init(&wb->flush_lock);
-+ wb->size = c->opts.btree_write_buffer_size;
-+
-+ wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL);
-+ wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL);
-+ if (!wb->keys[0] || !wb->keys[1])
-+ return -BCH_ERR_ENOMEM_fs_btree_write_buffer_init;
-+
-+ return 0;
-+}
-diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
-new file mode 100644
-index 000000000000..322df1c8304e
---- /dev/null
-+++ b/fs/bcachefs/btree_write_buffer.h
-@@ -0,0 +1,14 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H
-+#define _BCACHEFS_BTREE_WRITE_BUFFER_H
-+
-+int __bch2_btree_write_buffer_flush(struct btree_trans *, unsigned, bool);
-+int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
-+int bch2_btree_write_buffer_flush(struct btree_trans *);
-+
-+int bch2_btree_insert_keys_write_buffer(struct btree_trans *);
-+
-+void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
-+int bch2_fs_btree_write_buffer_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */
-diff --git a/fs/bcachefs/btree_write_buffer_types.h b/fs/bcachefs/btree_write_buffer_types.h
-new file mode 100644
-index 000000000000..99993ba77aea
---- /dev/null
-+++ b/fs/bcachefs/btree_write_buffer_types.h
-@@ -0,0 +1,44 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
-+#define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
-+
-+#include "journal_types.h"
-+
-+#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4
-+#define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX)
-+
-+struct btree_write_buffered_key {
-+ u64 journal_seq;
-+ unsigned journal_offset;
-+ enum btree_id btree;
-+ __BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
-+};
-+
-+union btree_write_buffer_state {
-+ struct {
-+ atomic64_t counter;
-+ };
-+
-+ struct {
-+ u64 v;
-+ };
-+
-+ struct {
-+ u64 nr:23;
-+ u64 idx:1;
-+ u64 ref0:20;
-+ u64 ref1:20;
-+ };
-+};
-+
-+struct btree_write_buffer {
-+ struct mutex flush_lock;
-+ struct journal_entry_pin journal_pin;
-+
-+ union btree_write_buffer_state state;
-+ size_t size;
-+
-+ struct btree_write_buffered_key *keys[2];
-+};
-+
-+#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */
-diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
-new file mode 100644
-index 000000000000..58d8c6ffd955
---- /dev/null
-+++ b/fs/bcachefs/buckets.c
-@@ -0,0 +1,2168 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for manipulating bucket marks for garbage collection.
-+ *
-+ * Copyright 2014 Datera, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "backpointers.h"
-+#include "bset.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "buckets_waiting_for_journal.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "inode.h"
-+#include "movinggc.h"
-+#include "recovery.h"
-+#include "reflink.h"
-+#include "replicas.h"
-+#include "subvolume.h"
-+#include "trace.h"
-+
-+#include <linux/preempt.h>
-+
-+static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage,
-+ enum bch_data_type data_type,
-+ s64 sectors)
-+{
-+ switch (data_type) {
-+ case BCH_DATA_btree:
-+ fs_usage->btree += sectors;
-+ break;
-+ case BCH_DATA_user:
-+ case BCH_DATA_parity:
-+ fs_usage->data += sectors;
-+ break;
-+ case BCH_DATA_cached:
-+ fs_usage->cached += sectors;
-+ break;
-+ default:
-+ break;
-+ }
-+}
-+
-+void bch2_fs_usage_initialize(struct bch_fs *c)
-+{
-+ struct bch_fs_usage *usage;
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ percpu_down_write(&c->mark_lock);
-+ usage = c->usage_base;
-+
-+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
-+ bch2_fs_usage_acc_to_base(c, i);
-+
-+ for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+ usage->reserved += usage->persistent_reserved[i];
-+
-+ for (i = 0; i < c->replicas.nr; i++) {
-+ struct bch_replicas_entry *e =
-+ cpu_replicas_entry(&c->replicas, i);
-+
-+ fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]);
-+ }
-+
-+ for_each_member_device(ca, c, i) {
-+ struct bch_dev_usage dev = bch2_dev_usage_read(ca);
-+
-+ usage->hidden += (dev.d[BCH_DATA_sb].buckets +
-+ dev.d[BCH_DATA_journal].buckets) *
-+ ca->mi.bucket_size;
-+ }
-+
-+ percpu_up_write(&c->mark_lock);
-+}
-+
-+static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca,
-+ unsigned journal_seq,
-+ bool gc)
-+{
-+ BUG_ON(!gc && !journal_seq);
-+
-+ return this_cpu_ptr(gc
-+ ? ca->usage_gc
-+ : ca->usage[journal_seq & JOURNAL_BUF_MASK]);
-+}
-+
-+void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
-+{
-+ struct bch_fs *c = ca->fs;
-+ unsigned seq, i, u64s = dev_usage_u64s();
-+
-+ do {
-+ seq = read_seqcount_begin(&c->usage_lock);
-+ memcpy(usage, ca->usage_base, u64s * sizeof(u64));
-+ for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
-+ acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage[i], u64s);
-+ } while (read_seqcount_retry(&c->usage_lock, seq));
-+}
-+
-+u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
-+{
-+ ssize_t offset = v - (u64 *) c->usage_base;
-+ unsigned i, seq;
-+ u64 ret;
-+
-+ BUG_ON(offset < 0 || offset >= fs_usage_u64s(c));
-+ percpu_rwsem_assert_held(&c->mark_lock);
-+
-+ do {
-+ seq = read_seqcount_begin(&c->usage_lock);
-+ ret = *v;
-+
-+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
-+ ret += percpu_u64_get((u64 __percpu *) c->usage[i] + offset);
-+ } while (read_seqcount_retry(&c->usage_lock, seq));
-+
-+ return ret;
-+}
-+
-+struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
-+{
-+ struct bch_fs_usage_online *ret;
-+ unsigned nr_replicas = READ_ONCE(c->replicas.nr);
-+ unsigned seq, i;
-+retry:
-+ ret = kmalloc(__fs_usage_online_u64s(nr_replicas) * sizeof(u64), GFP_KERNEL);
-+ if (unlikely(!ret))
-+ return NULL;
-+
-+ percpu_down_read(&c->mark_lock);
-+
-+ if (nr_replicas != c->replicas.nr) {
-+ nr_replicas = c->replicas.nr;
-+ percpu_up_read(&c->mark_lock);
-+ kfree(ret);
-+ goto retry;
-+ }
-+
-+ ret->online_reserved = percpu_u64_get(c->online_reserved);
-+
-+ do {
-+ seq = read_seqcount_begin(&c->usage_lock);
-+ unsafe_memcpy(&ret->u, c->usage_base,
-+ __fs_usage_u64s(nr_replicas) * sizeof(u64),
-+ "embedded variable length struct");
-+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
-+ acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i],
-+ __fs_usage_u64s(nr_replicas));
-+ } while (read_seqcount_retry(&c->usage_lock, seq));
-+
-+ return ret;
-+}
-+
-+void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
-+{
-+ struct bch_dev *ca;
-+ unsigned i, u64s = fs_usage_u64s(c);
-+
-+ BUG_ON(idx >= ARRAY_SIZE(c->usage));
-+
-+ preempt_disable();
-+ write_seqcount_begin(&c->usage_lock);
-+
-+ acc_u64s_percpu((u64 *) c->usage_base,
-+ (u64 __percpu *) c->usage[idx], u64s);
-+ percpu_memset(c->usage[idx], 0, u64s * sizeof(u64));
-+
-+ rcu_read_lock();
-+ for_each_member_device_rcu(ca, c, i, NULL) {
-+ u64s = dev_usage_u64s();
-+
-+ acc_u64s_percpu((u64 *) ca->usage_base,
-+ (u64 __percpu *) ca->usage[idx], u64s);
-+ percpu_memset(ca->usage[idx], 0, u64s * sizeof(u64));
-+ }
-+ rcu_read_unlock();
-+
-+ write_seqcount_end(&c->usage_lock);
-+ preempt_enable();
-+}
-+
-+void bch2_fs_usage_to_text(struct printbuf *out,
-+ struct bch_fs *c,
-+ struct bch_fs_usage_online *fs_usage)
-+{
-+ unsigned i;
-+
-+ prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity);
-+
-+ prt_printf(out, "hidden:\t\t\t\t%llu\n",
-+ fs_usage->u.hidden);
-+ prt_printf(out, "data:\t\t\t\t%llu\n",
-+ fs_usage->u.data);
-+ prt_printf(out, "cached:\t\t\t\t%llu\n",
-+ fs_usage->u.cached);
-+ prt_printf(out, "reserved:\t\t\t%llu\n",
-+ fs_usage->u.reserved);
-+ prt_printf(out, "nr_inodes:\t\t\t%llu\n",
-+ fs_usage->u.nr_inodes);
-+ prt_printf(out, "online reserved:\t\t%llu\n",
-+ fs_usage->online_reserved);
-+
-+ for (i = 0;
-+ i < ARRAY_SIZE(fs_usage->u.persistent_reserved);
-+ i++) {
-+ prt_printf(out, "%u replicas:\n", i + 1);
-+ prt_printf(out, "\treserved:\t\t%llu\n",
-+ fs_usage->u.persistent_reserved[i]);
-+ }
-+
-+ for (i = 0; i < c->replicas.nr; i++) {
-+ struct bch_replicas_entry *e =
-+ cpu_replicas_entry(&c->replicas, i);
-+
-+ prt_printf(out, "\t");
-+ bch2_replicas_entry_to_text(out, e);
-+ prt_printf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
-+ }
-+}
-+
-+static u64 reserve_factor(u64 r)
-+{
-+ return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
-+}
-+
-+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage)
-+{
-+ return min(fs_usage->u.hidden +
-+ fs_usage->u.btree +
-+ fs_usage->u.data +
-+ reserve_factor(fs_usage->u.reserved +
-+ fs_usage->online_reserved),
-+ c->capacity);
-+}
-+
-+static struct bch_fs_usage_short
-+__bch2_fs_usage_read_short(struct bch_fs *c)
-+{
-+ struct bch_fs_usage_short ret;
-+ u64 data, reserved;
-+
-+ ret.capacity = c->capacity -
-+ bch2_fs_usage_read_one(c, &c->usage_base->hidden);
-+
-+ data = bch2_fs_usage_read_one(c, &c->usage_base->data) +
-+ bch2_fs_usage_read_one(c, &c->usage_base->btree);
-+ reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
-+ percpu_u64_get(c->online_reserved);
-+
-+ ret.used = min(ret.capacity, data + reserve_factor(reserved));
-+ ret.free = ret.capacity - ret.used;
-+
-+ ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes);
-+
-+ return ret;
-+}
-+
-+struct bch_fs_usage_short
-+bch2_fs_usage_read_short(struct bch_fs *c)
-+{
-+ struct bch_fs_usage_short ret;
-+
-+ percpu_down_read(&c->mark_lock);
-+ ret = __bch2_fs_usage_read_short(c);
-+ percpu_up_read(&c->mark_lock);
-+
-+ return ret;
-+}
-+
-+void bch2_dev_usage_init(struct bch_dev *ca)
-+{
-+ ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
-+}
-+
-+static inline int bucket_sectors_fragmented(struct bch_dev *ca,
-+ struct bch_alloc_v4 a)
-+{
-+ return a.dirty_sectors
-+ ? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors)
-+ : 0;
-+}
-+
-+static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
-+ struct bch_alloc_v4 old,
-+ struct bch_alloc_v4 new,
-+ u64 journal_seq, bool gc)
-+{
-+ struct bch_fs_usage *fs_usage;
-+ struct bch_dev_usage *u;
-+
-+ preempt_disable();
-+ fs_usage = fs_usage_ptr(c, journal_seq, gc);
-+
-+ if (data_type_is_hidden(old.data_type))
-+ fs_usage->hidden -= ca->mi.bucket_size;
-+ if (data_type_is_hidden(new.data_type))
-+ fs_usage->hidden += ca->mi.bucket_size;
-+
-+ u = dev_usage_ptr(ca, journal_seq, gc);
-+
-+ u->d[old.data_type].buckets--;
-+ u->d[new.data_type].buckets++;
-+
-+ u->buckets_ec -= (int) !!old.stripe;
-+ u->buckets_ec += (int) !!new.stripe;
-+
-+ u->d[old.data_type].sectors -= old.dirty_sectors;
-+ u->d[new.data_type].sectors += new.dirty_sectors;
-+
-+ u->d[BCH_DATA_cached].sectors += new.cached_sectors;
-+ u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
-+
-+ u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old);
-+ u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
-+
-+ preempt_enable();
-+}
-+
-+static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
-+ struct bucket old, struct bucket new,
-+ u64 journal_seq, bool gc)
-+{
-+ struct bch_alloc_v4 old_a = {
-+ .gen = old.gen,
-+ .data_type = old.data_type,
-+ .dirty_sectors = old.dirty_sectors,
-+ .cached_sectors = old.cached_sectors,
-+ .stripe = old.stripe,
-+ };
-+ struct bch_alloc_v4 new_a = {
-+ .gen = new.gen,
-+ .data_type = new.data_type,
-+ .dirty_sectors = new.dirty_sectors,
-+ .cached_sectors = new.cached_sectors,
-+ .stripe = new.stripe,
-+ };
-+
-+ bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
-+}
-+
-+static inline int __update_replicas(struct bch_fs *c,
-+ struct bch_fs_usage *fs_usage,
-+ struct bch_replicas_entry *r,
-+ s64 sectors)
-+{
-+ int idx = bch2_replicas_entry_idx(c, r);
-+
-+ if (idx < 0)
-+ return -1;
-+
-+ fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
-+ fs_usage->replicas[idx] += sectors;
-+ return 0;
-+}
-+
-+static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
-+ struct bch_replicas_entry *r, s64 sectors,
-+ unsigned journal_seq, bool gc)
-+{
-+ struct bch_fs_usage *fs_usage;
-+ int idx, ret = 0;
-+ struct printbuf buf = PRINTBUF;
-+
-+ percpu_down_read(&c->mark_lock);
-+
-+ idx = bch2_replicas_entry_idx(c, r);
-+ if (idx < 0 &&
-+ fsck_err(c, ptr_to_missing_replicas_entry,
-+ "no replicas entry\n while marking %s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-+ percpu_up_read(&c->mark_lock);
-+ ret = bch2_mark_replicas(c, r);
-+ percpu_down_read(&c->mark_lock);
-+
-+ if (ret)
-+ goto err;
-+ idx = bch2_replicas_entry_idx(c, r);
-+ }
-+ if (idx < 0) {
-+ ret = -1;
-+ goto err;
-+ }
-+
-+ preempt_disable();
-+ fs_usage = fs_usage_ptr(c, journal_seq, gc);
-+ fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
-+ fs_usage->replicas[idx] += sectors;
-+ preempt_enable();
-+err:
-+fsck_err:
-+ percpu_up_read(&c->mark_lock);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static inline int update_cached_sectors(struct bch_fs *c,
-+ struct bkey_s_c k,
-+ unsigned dev, s64 sectors,
-+ unsigned journal_seq, bool gc)
-+{
-+ struct bch_replicas_padded r;
-+
-+ bch2_replicas_entry_cached(&r.e, dev);
-+
-+ return update_replicas(c, k, &r.e, sectors, journal_seq, gc);
-+}
-+
-+static int __replicas_deltas_realloc(struct btree_trans *trans, unsigned more,
-+ gfp_t gfp)
-+{
-+ struct replicas_delta_list *d = trans->fs_usage_deltas;
-+ unsigned new_size = d ? (d->size + more) * 2 : 128;
-+ unsigned alloc_size = sizeof(*d) + new_size;
-+
-+ WARN_ON_ONCE(alloc_size > REPLICAS_DELTA_LIST_MAX);
-+
-+ if (!d || d->used + more > d->size) {
-+ d = krealloc(d, alloc_size, gfp|__GFP_ZERO);
-+
-+ if (unlikely(!d)) {
-+ if (alloc_size > REPLICAS_DELTA_LIST_MAX)
-+ return -ENOMEM;
-+
-+ d = mempool_alloc(&trans->c->replicas_delta_pool, gfp);
-+ if (!d)
-+ return -ENOMEM;
-+
-+ memset(d, 0, REPLICAS_DELTA_LIST_MAX);
-+
-+ if (trans->fs_usage_deltas)
-+ memcpy(d, trans->fs_usage_deltas,
-+ trans->fs_usage_deltas->size + sizeof(*d));
-+
-+ new_size = REPLICAS_DELTA_LIST_MAX - sizeof(*d);
-+ kfree(trans->fs_usage_deltas);
-+ }
-+
-+ d->size = new_size;
-+ trans->fs_usage_deltas = d;
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
-+{
-+ return allocate_dropping_locks_errcode(trans,
-+ __replicas_deltas_realloc(trans, more, _gfp));
-+}
-+
-+static inline int update_replicas_list(struct btree_trans *trans,
-+ struct bch_replicas_entry *r,
-+ s64 sectors)
-+{
-+ struct replicas_delta_list *d;
-+ struct replicas_delta *n;
-+ unsigned b;
-+ int ret;
-+
-+ if (!sectors)
-+ return 0;
-+
-+ b = replicas_entry_bytes(r) + 8;
-+ ret = bch2_replicas_deltas_realloc(trans, b);
-+ if (ret)
-+ return ret;
-+
-+ d = trans->fs_usage_deltas;
-+ n = (void *) d->d + d->used;
-+ n->delta = sectors;
-+ unsafe_memcpy((void *) n + offsetof(struct replicas_delta, r),
-+ r, replicas_entry_bytes(r),
-+ "flexible array member embedded in strcuct with padding");
-+ bch2_replicas_entry_sort(&n->r);
-+ d->used += b;
-+ return 0;
-+}
-+
-+static inline int update_cached_sectors_list(struct btree_trans *trans,
-+ unsigned dev, s64 sectors)
-+{
-+ struct bch_replicas_padded r;
-+
-+ bch2_replicas_entry_cached(&r.e, dev);
-+
-+ return update_replicas_list(trans, &r.e, sectors);
-+}
-+
-+int bch2_mark_alloc(struct btree_trans *trans,
-+ enum btree_id btree, unsigned level,
-+ struct bkey_s_c old, struct bkey_s_c new,
-+ unsigned flags)
-+{
-+ bool gc = flags & BTREE_TRIGGER_GC;
-+ u64 journal_seq = trans->journal_res.seq;
-+ u64 bucket_journal_seq;
-+ struct bch_fs *c = trans->c;
-+ struct bch_alloc_v4 old_a_convert, new_a_convert;
-+ const struct bch_alloc_v4 *old_a, *new_a;
-+ struct bch_dev *ca;
-+ int ret = 0;
-+
-+ /*
-+ * alloc btree is read in by bch2_alloc_read, not gc:
-+ */
-+ if ((flags & BTREE_TRIGGER_GC) &&
-+ !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
-+ return 0;
-+
-+ if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
-+ "alloc key for invalid device or bucket"))
-+ return -EIO;
-+
-+ ca = bch_dev_bkey_exists(c, new.k->p.inode);
-+
-+ old_a = bch2_alloc_to_v4(old, &old_a_convert);
-+ new_a = bch2_alloc_to_v4(new, &new_a_convert);
-+
-+ bucket_journal_seq = new_a->journal_seq;
-+
-+ if ((flags & BTREE_TRIGGER_INSERT) &&
-+ data_type_is_empty(old_a->data_type) !=
-+ data_type_is_empty(new_a->data_type) &&
-+ new.k->type == KEY_TYPE_alloc_v4) {
-+ struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
-+
-+ EBUG_ON(!journal_seq);
-+
-+ /*
-+ * If the btree updates referring to a bucket weren't flushed
-+ * before the bucket became empty again, then the we don't have
-+ * to wait on a journal flush before we can reuse the bucket:
-+ */
-+ v->journal_seq = bucket_journal_seq =
-+ data_type_is_empty(new_a->data_type) &&
-+ (journal_seq == v->journal_seq ||
-+ bch2_journal_noflush_seq(&c->journal, v->journal_seq))
-+ ? 0 : journal_seq;
-+ }
-+
-+ if (!data_type_is_empty(old_a->data_type) &&
-+ data_type_is_empty(new_a->data_type) &&
-+ bucket_journal_seq) {
-+ ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
-+ c->journal.flushed_seq_ondisk,
-+ new.k->p.inode, new.k->p.offset,
-+ bucket_journal_seq);
-+ if (ret) {
-+ bch2_fs_fatal_error(c,
-+ "error setting bucket_needs_journal_commit: %i", ret);
-+ return ret;
-+ }
-+ }
-+
-+ percpu_down_read(&c->mark_lock);
-+ if (!gc && new_a->gen != old_a->gen)
-+ *bucket_gen(ca, new.k->p.offset) = new_a->gen;
-+
-+ bch2_dev_usage_update(c, ca, *old_a, *new_a, journal_seq, gc);
-+
-+ if (gc) {
-+ struct bucket *g = gc_bucket(ca, new.k->p.offset);
-+
-+ bucket_lock(g);
-+
-+ g->gen_valid = 1;
-+ g->gen = new_a->gen;
-+ g->data_type = new_a->data_type;
-+ g->stripe = new_a->stripe;
-+ g->stripe_redundancy = new_a->stripe_redundancy;
-+ g->dirty_sectors = new_a->dirty_sectors;
-+ g->cached_sectors = new_a->cached_sectors;
-+
-+ bucket_unlock(g);
-+ }
-+ percpu_up_read(&c->mark_lock);
-+
-+ /*
-+ * need to know if we're getting called from the invalidate path or
-+ * not:
-+ */
-+
-+ if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
-+ old_a->cached_sectors) {
-+ ret = update_cached_sectors(c, new, ca->dev_idx,
-+ -((s64) old_a->cached_sectors),
-+ journal_seq, gc);
-+ if (ret) {
-+ bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors",
-+ __func__);
-+ return ret;
-+ }
-+ }
-+
-+ if (new_a->data_type == BCH_DATA_free &&
-+ (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
-+ closure_wake_up(&c->freelist_wait);
-+
-+ if (new_a->data_type == BCH_DATA_need_discard &&
-+ (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk))
-+ bch2_do_discards(c);
-+
-+ if (old_a->data_type != BCH_DATA_cached &&
-+ new_a->data_type == BCH_DATA_cached &&
-+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
-+ bch2_do_invalidates(c);
-+
-+ if (new_a->data_type == BCH_DATA_need_gc_gens)
-+ bch2_do_gc_gens(c);
-+
-+ return 0;
-+}
-+
-+int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
-+ size_t b, enum bch_data_type data_type,
-+ unsigned sectors, struct gc_pos pos,
-+ unsigned flags)
-+{
-+ struct bucket old, new, *g;
-+ int ret = 0;
-+
-+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
-+ BUG_ON(data_type != BCH_DATA_sb &&
-+ data_type != BCH_DATA_journal);
-+
-+ /*
-+ * Backup superblock might be past the end of our normal usable space:
-+ */
-+ if (b >= ca->mi.nbuckets)
-+ return 0;
-+
-+ percpu_down_read(&c->mark_lock);
-+ g = gc_bucket(ca, b);
-+
-+ bucket_lock(g);
-+ old = *g;
-+
-+ if (bch2_fs_inconsistent_on(g->data_type &&
-+ g->data_type != data_type, c,
-+ "different types of data in same bucket: %s, %s",
-+ bch2_data_types[g->data_type],
-+ bch2_data_types[data_type])) {
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
-+ "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size",
-+ ca->dev_idx, b, g->gen,
-+ bch2_data_types[g->data_type ?: data_type],
-+ g->dirty_sectors, sectors)) {
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+
-+ g->data_type = data_type;
-+ g->dirty_sectors += sectors;
-+ new = *g;
-+err:
-+ bucket_unlock(g);
-+ if (!ret)
-+ bch2_dev_usage_update_m(c, ca, old, new, 0, true);
-+ percpu_up_read(&c->mark_lock);
-+ return ret;
-+}
-+
-+static int check_bucket_ref(struct btree_trans *trans,
-+ struct bkey_s_c k,
-+ const struct bch_extent_ptr *ptr,
-+ s64 sectors, enum bch_data_type ptr_data_type,
-+ u8 b_gen, u8 bucket_data_type,
-+ u32 dirty_sectors, u32 cached_sectors)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+ size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
-+ u32 bucket_sectors = !ptr->cached
-+ ? dirty_sectors
-+ : cached_sectors;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ if (bucket_data_type == BCH_DATA_cached)
-+ bucket_data_type = BCH_DATA_user;
-+
-+ if ((bucket_data_type == BCH_DATA_stripe && ptr_data_type == BCH_DATA_user) ||
-+ (bucket_data_type == BCH_DATA_user && ptr_data_type == BCH_DATA_stripe))
-+ bucket_data_type = ptr_data_type = BCH_DATA_stripe;
-+
-+ if (gen_after(ptr->gen, b_gen)) {
-+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+ BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen,
-+ "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
-+ "while marking %s",
-+ ptr->dev, bucket_nr, b_gen,
-+ bch2_data_types[bucket_data_type ?: ptr_data_type],
-+ ptr->gen,
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
-+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+ BCH_FSCK_ERR_ptr_too_stale,
-+ "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
-+ "while marking %s",
-+ ptr->dev, bucket_nr, b_gen,
-+ bch2_data_types[bucket_data_type ?: ptr_data_type],
-+ ptr->gen,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (b_gen != ptr->gen && !ptr->cached) {
-+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+ BCH_FSCK_ERR_stale_dirty_ptr,
-+ "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
-+ "while marking %s",
-+ ptr->dev, bucket_nr, b_gen,
-+ *bucket_gen(ca, bucket_nr),
-+ bch2_data_types[bucket_data_type ?: ptr_data_type],
-+ ptr->gen,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (b_gen != ptr->gen) {
-+ ret = 1;
-+ goto out;
-+ }
-+
-+ if (!data_type_is_empty(bucket_data_type) &&
-+ ptr_data_type &&
-+ bucket_data_type != ptr_data_type) {
-+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+ BCH_FSCK_ERR_ptr_bucket_data_type_mismatch,
-+ "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
-+ "while marking %s",
-+ ptr->dev, bucket_nr, b_gen,
-+ bch2_data_types[bucket_data_type],
-+ bch2_data_types[ptr_data_type],
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if ((u64) bucket_sectors + sectors > U32_MAX) {
-+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+ BCH_FSCK_ERR_bucket_sector_count_overflow,
-+ "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
-+ "while marking %s",
-+ ptr->dev, bucket_nr, b_gen,
-+ bch2_data_types[bucket_data_type ?: ptr_data_type],
-+ bucket_sectors, sectors,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ ret = -EIO;
-+ goto err;
-+ }
-+out:
-+ printbuf_exit(&buf);
-+ return ret;
-+err:
-+ bch2_dump_trans_updates(trans);
-+ goto out;
-+}
-+
-+static int mark_stripe_bucket(struct btree_trans *trans,
-+ struct bkey_s_c k,
-+ unsigned ptr_idx,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ u64 journal_seq = trans->journal_res.seq;
-+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+ unsigned nr_data = s->nr_blocks - s->nr_redundant;
-+ bool parity = ptr_idx >= nr_data;
-+ enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
-+ s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
-+ const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+ struct bucket old, new, *g;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
-+
-+ /* * XXX doesn't handle deletion */
-+
-+ percpu_down_read(&c->mark_lock);
-+ g = PTR_GC_BUCKET(ca, ptr);
-+
-+ if (g->dirty_sectors ||
-+ (g->stripe && g->stripe != k.k->p.offset)) {
-+ bch2_fs_inconsistent(c,
-+ "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
-+ ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ bucket_lock(g);
-+ old = *g;
-+
-+ ret = check_bucket_ref(trans, k, ptr, sectors, data_type,
-+ g->gen, g->data_type,
-+ g->dirty_sectors, g->cached_sectors);
-+ if (ret)
-+ goto err;
-+
-+ g->data_type = data_type;
-+ g->dirty_sectors += sectors;
-+
-+ g->stripe = k.k->p.offset;
-+ g->stripe_redundancy = s->nr_redundant;
-+ new = *g;
-+err:
-+ bucket_unlock(g);
-+ if (!ret)
-+ bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
-+ percpu_up_read(&c->mark_lock);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int __mark_pointer(struct btree_trans *trans,
-+ struct bkey_s_c k,
-+ const struct bch_extent_ptr *ptr,
-+ s64 sectors, enum bch_data_type ptr_data_type,
-+ u8 bucket_gen, u8 *bucket_data_type,
-+ u32 *dirty_sectors, u32 *cached_sectors)
-+{
-+ u32 *dst_sectors = !ptr->cached
-+ ? dirty_sectors
-+ : cached_sectors;
-+ int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
-+ bucket_gen, *bucket_data_type,
-+ *dirty_sectors, *cached_sectors);
-+
-+ if (ret)
-+ return ret;
-+
-+ *dst_sectors += sectors;
-+ *bucket_data_type = *dirty_sectors || *cached_sectors
-+ ? ptr_data_type : 0;
-+ return 0;
-+}
-+
-+static int bch2_mark_pointer(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k,
-+ struct extent_ptr_decoded p,
-+ s64 sectors,
-+ unsigned flags)
-+{
-+ u64 journal_seq = trans->journal_res.seq;
-+ struct bch_fs *c = trans->c;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+ struct bucket old, new, *g;
-+ enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
-+ u8 bucket_data_type;
-+ int ret = 0;
-+
-+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
-+
-+ percpu_down_read(&c->mark_lock);
-+ g = PTR_GC_BUCKET(ca, &p.ptr);
-+ bucket_lock(g);
-+ old = *g;
-+
-+ bucket_data_type = g->data_type;
-+ ret = __mark_pointer(trans, k, &p.ptr, sectors,
-+ data_type, g->gen,
-+ &bucket_data_type,
-+ &g->dirty_sectors,
-+ &g->cached_sectors);
-+ if (!ret)
-+ g->data_type = bucket_data_type;
-+
-+ new = *g;
-+ bucket_unlock(g);
-+ if (!ret)
-+ bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
-+ percpu_up_read(&c->mark_lock);
-+
-+ return ret;
-+}
-+
-+static int bch2_mark_stripe_ptr(struct btree_trans *trans,
-+ struct bkey_s_c k,
-+ struct bch_extent_stripe_ptr p,
-+ enum bch_data_type data_type,
-+ s64 sectors,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_replicas_padded r;
-+ struct gc_stripe *m;
-+
-+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
-+
-+ m = genradix_ptr_alloc(&c->gc_stripes, p.idx, GFP_KERNEL);
-+ if (!m) {
-+ bch_err(c, "error allocating memory for gc_stripes, idx %llu",
-+ (u64) p.idx);
-+ return -BCH_ERR_ENOMEM_mark_stripe_ptr;
-+ }
-+
-+ mutex_lock(&c->ec_stripes_heap_lock);
-+
-+ if (!m || !m->alive) {
-+ mutex_unlock(&c->ec_stripes_heap_lock);
-+ bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
-+ (u64) p.idx);
-+ bch2_inconsistent_error(c);
-+ return -EIO;
-+ }
-+
-+ m->block_sectors[p.block] += sectors;
-+
-+ r = m->r;
-+ mutex_unlock(&c->ec_stripes_heap_lock);
-+
-+ r.e.data_type = data_type;
-+ update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true);
-+
-+ return 0;
-+}
-+
-+static int __mark_extent(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k, unsigned flags)
-+{
-+ u64 journal_seq = trans->journal_res.seq;
-+ struct bch_fs *c = trans->c;
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ struct bch_replicas_padded r;
-+ enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
-+ ? BCH_DATA_btree
-+ : BCH_DATA_user;
-+ s64 sectors = bkey_is_btree_ptr(k.k)
-+ ? btree_sectors(c)
-+ : k.k->size;
-+ s64 dirty_sectors = 0;
-+ bool stale;
-+ int ret;
-+
-+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
-+
-+ r.e.data_type = data_type;
-+ r.e.nr_devs = 0;
-+ r.e.nr_required = 1;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ s64 disk_sectors = ptr_disk_sectors(sectors, p);
-+
-+ if (flags & BTREE_TRIGGER_OVERWRITE)
-+ disk_sectors = -disk_sectors;
-+
-+ ret = bch2_mark_pointer(trans, btree_id, level, k, p, disk_sectors, flags);
-+ if (ret < 0)
-+ return ret;
-+
-+ stale = ret > 0;
-+
-+ if (p.ptr.cached) {
-+ if (!stale) {
-+ ret = update_cached_sectors(c, k, p.ptr.dev,
-+ disk_sectors, journal_seq, true);
-+ if (ret) {
-+ bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors",
-+ __func__);
-+ return ret;
-+ }
-+ }
-+ } else if (!p.has_ec) {
-+ dirty_sectors += disk_sectors;
-+ r.e.devs[r.e.nr_devs++] = p.ptr.dev;
-+ } else {
-+ ret = bch2_mark_stripe_ptr(trans, k, p.ec, data_type,
-+ disk_sectors, flags);
-+ if (ret)
-+ return ret;
-+
-+ /*
-+ * There may be other dirty pointers in this extent, but
-+ * if so they're not required for mounting if we have an
-+ * erasure coded pointer in this extent:
-+ */
-+ r.e.nr_required = 0;
-+ }
-+ }
-+
-+ if (r.e.nr_devs) {
-+ ret = update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true);
-+ if (ret) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
-+ printbuf_exit(&buf);
-+ return ret;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_mark_extent(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_s_c new,
-+ unsigned flags)
-+{
-+ return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags);
-+}
-+
-+int bch2_mark_stripe(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_s_c new,
-+ unsigned flags)
-+{
-+ bool gc = flags & BTREE_TRIGGER_GC;
-+ u64 journal_seq = trans->journal_res.seq;
-+ struct bch_fs *c = trans->c;
-+ u64 idx = new.k->p.offset;
-+ const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
-+ ? bkey_s_c_to_stripe(old).v : NULL;
-+ const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
-+ ? bkey_s_c_to_stripe(new).v : NULL;
-+ unsigned i;
-+ int ret;
-+
-+ BUG_ON(gc && old_s);
-+
-+ if (!gc) {
-+ struct stripe *m = genradix_ptr(&c->stripes, idx);
-+
-+ if (!m) {
-+ struct printbuf buf1 = PRINTBUF;
-+ struct printbuf buf2 = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf1, c, old);
-+ bch2_bkey_val_to_text(&buf2, c, new);
-+ bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n"
-+ "old %s\n"
-+ "new %s", idx, buf1.buf, buf2.buf);
-+ printbuf_exit(&buf2);
-+ printbuf_exit(&buf1);
-+ bch2_inconsistent_error(c);
-+ return -1;
-+ }
-+
-+ if (!new_s) {
-+ bch2_stripes_heap_del(c, m, idx);
-+
-+ memset(m, 0, sizeof(*m));
-+ } else {
-+ m->sectors = le16_to_cpu(new_s->sectors);
-+ m->algorithm = new_s->algorithm;
-+ m->nr_blocks = new_s->nr_blocks;
-+ m->nr_redundant = new_s->nr_redundant;
-+ m->blocks_nonempty = 0;
-+
-+ for (i = 0; i < new_s->nr_blocks; i++)
-+ m->blocks_nonempty += !!stripe_blockcount_get(new_s, i);
-+
-+ if (!old_s)
-+ bch2_stripes_heap_insert(c, m, idx);
-+ else
-+ bch2_stripes_heap_update(c, m, idx);
-+ }
-+ } else {
-+ struct gc_stripe *m =
-+ genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
-+
-+ if (!m) {
-+ bch_err(c, "error allocating memory for gc_stripes, idx %llu",
-+ idx);
-+ return -BCH_ERR_ENOMEM_mark_stripe;
-+ }
-+ /*
-+ * This will be wrong when we bring back runtime gc: we should
-+ * be unmarking the old key and then marking the new key
-+ */
-+ m->alive = true;
-+ m->sectors = le16_to_cpu(new_s->sectors);
-+ m->nr_blocks = new_s->nr_blocks;
-+ m->nr_redundant = new_s->nr_redundant;
-+
-+ for (i = 0; i < new_s->nr_blocks; i++)
-+ m->ptrs[i] = new_s->ptrs[i];
-+
-+ bch2_bkey_to_replicas(&m->r.e, new);
-+
-+ /*
-+ * gc recalculates this field from stripe ptr
-+ * references:
-+ */
-+ memset(m->block_sectors, 0, sizeof(m->block_sectors));
-+
-+ for (i = 0; i < new_s->nr_blocks; i++) {
-+ ret = mark_stripe_bucket(trans, new, i, flags);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ ret = update_replicas(c, new, &m->r.e,
-+ ((s64) m->sectors * m->nr_redundant),
-+ journal_seq, gc);
-+ if (ret) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, new);
-+ bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
-+ printbuf_exit(&buf);
-+ return ret;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static int __mark_reservation(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k, unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_fs_usage *fs_usage;
-+ unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-+ s64 sectors = (s64) k.k->size;
-+
-+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
-+
-+ if (flags & BTREE_TRIGGER_OVERWRITE)
-+ sectors = -sectors;
-+ sectors *= replicas;
-+
-+ percpu_down_read(&c->mark_lock);
-+ preempt_disable();
-+
-+ fs_usage = fs_usage_ptr(c, trans->journal_res.seq, flags & BTREE_TRIGGER_GC);
-+ replicas = clamp_t(unsigned, replicas, 1,
-+ ARRAY_SIZE(fs_usage->persistent_reserved));
-+
-+ fs_usage->reserved += sectors;
-+ fs_usage->persistent_reserved[replicas - 1] += sectors;
-+
-+ preempt_enable();
-+ percpu_up_read(&c->mark_lock);
-+
-+ return 0;
-+}
-+
-+int bch2_mark_reservation(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_s_c new,
-+ unsigned flags)
-+{
-+ return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags);
-+}
-+
-+static s64 __bch2_mark_reflink_p(struct btree_trans *trans,
-+ struct bkey_s_c_reflink_p p,
-+ u64 start, u64 end,
-+ u64 *idx, unsigned flags, size_t r_idx)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct reflink_gc *r;
-+ int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
-+ u64 next_idx = end;
-+ s64 ret = 0;
-+ struct printbuf buf = PRINTBUF;
-+
-+ if (r_idx >= c->reflink_gc_nr)
-+ goto not_found;
-+
-+ r = genradix_ptr(&c->reflink_gc_table, r_idx);
-+ next_idx = min(next_idx, r->offset - r->size);
-+ if (*idx < next_idx)
-+ goto not_found;
-+
-+ BUG_ON((s64) r->refcount + add < 0);
-+
-+ r->refcount += add;
-+ *idx = r->offset;
-+ return 0;
-+not_found:
-+ if (fsck_err(c, reflink_p_to_missing_reflink_v,
-+ "pointer to missing indirect extent\n"
-+ " %s\n"
-+ " missing range %llu-%llu",
-+ (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
-+ *idx, next_idx)) {
-+ struct bkey_i_error *new;
-+
-+ new = bch2_trans_kmalloc(trans, sizeof(*new));
-+ ret = PTR_ERR_OR_ZERO(new);
-+ if (ret)
-+ goto err;
-+
-+ bkey_init(&new->k);
-+ new->k.type = KEY_TYPE_error;
-+ new->k.p = bkey_start_pos(p.k);
-+ new->k.p.offset += *idx - start;
-+ bch2_key_resize(&new->k, next_idx - *idx);
-+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i,
-+ BTREE_TRIGGER_NORUN);
-+ }
-+
-+ *idx = next_idx;
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int __mark_reflink_p(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k, unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+ struct reflink_gc *ref;
-+ size_t l, r, m;
-+ u64 idx = le64_to_cpu(p.v->idx), start = idx;
-+ u64 end = le64_to_cpu(p.v->idx) + p.k->size;
-+ int ret = 0;
-+
-+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
-+
-+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) {
-+ idx -= le32_to_cpu(p.v->front_pad);
-+ end += le32_to_cpu(p.v->back_pad);
-+ }
-+
-+ l = 0;
-+ r = c->reflink_gc_nr;
-+ while (l < r) {
-+ m = l + (r - l) / 2;
-+
-+ ref = genradix_ptr(&c->reflink_gc_table, m);
-+ if (ref->offset <= idx)
-+ l = m + 1;
-+ else
-+ r = m;
-+ }
-+
-+ while (idx < end && !ret)
-+ ret = __bch2_mark_reflink_p(trans, p, start, end,
-+ &idx, flags, l++);
-+
-+ return ret;
-+}
-+
-+int bch2_mark_reflink_p(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_s_c new,
-+ unsigned flags)
-+{
-+ return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags);
-+}
-+
-+void bch2_trans_fs_usage_revert(struct btree_trans *trans,
-+ struct replicas_delta_list *deltas)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_fs_usage *dst;
-+ struct replicas_delta *d, *top = (void *) deltas->d + deltas->used;
-+ s64 added = 0;
-+ unsigned i;
-+
-+ percpu_down_read(&c->mark_lock);
-+ preempt_disable();
-+ dst = fs_usage_ptr(c, trans->journal_res.seq, false);
-+
-+ /* revert changes: */
-+ for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
-+ switch (d->r.data_type) {
-+ case BCH_DATA_btree:
-+ case BCH_DATA_user:
-+ case BCH_DATA_parity:
-+ added += d->delta;
-+ }
-+ BUG_ON(__update_replicas(c, dst, &d->r, -d->delta));
-+ }
-+
-+ dst->nr_inodes -= deltas->nr_inodes;
-+
-+ for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-+ added -= deltas->persistent_reserved[i];
-+ dst->reserved -= deltas->persistent_reserved[i];
-+ dst->persistent_reserved[i] -= deltas->persistent_reserved[i];
-+ }
-+
-+ if (added > 0) {
-+ trans->disk_res->sectors += added;
-+ this_cpu_add(*c->online_reserved, added);
-+ }
-+
-+ preempt_enable();
-+ percpu_up_read(&c->mark_lock);
-+}
-+
-+int bch2_trans_fs_usage_apply(struct btree_trans *trans,
-+ struct replicas_delta_list *deltas)
-+{
-+ struct bch_fs *c = trans->c;
-+ static int warned_disk_usage = 0;
-+ bool warn = false;
-+ u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
-+ struct replicas_delta *d, *d2;
-+ struct replicas_delta *top = (void *) deltas->d + deltas->used;
-+ struct bch_fs_usage *dst;
-+ s64 added = 0, should_not_have_added;
-+ unsigned i;
-+
-+ percpu_down_read(&c->mark_lock);
-+ preempt_disable();
-+ dst = fs_usage_ptr(c, trans->journal_res.seq, false);
-+
-+ for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
-+ switch (d->r.data_type) {
-+ case BCH_DATA_btree:
-+ case BCH_DATA_user:
-+ case BCH_DATA_parity:
-+ added += d->delta;
-+ }
-+
-+ if (__update_replicas(c, dst, &d->r, d->delta))
-+ goto need_mark;
-+ }
-+
-+ dst->nr_inodes += deltas->nr_inodes;
-+
-+ for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-+ added += deltas->persistent_reserved[i];
-+ dst->reserved += deltas->persistent_reserved[i];
-+ dst->persistent_reserved[i] += deltas->persistent_reserved[i];
-+ }
-+
-+ /*
-+ * Not allowed to reduce sectors_available except by getting a
-+ * reservation:
-+ */
-+ should_not_have_added = added - (s64) disk_res_sectors;
-+ if (unlikely(should_not_have_added > 0)) {
-+ u64 old, new, v = atomic64_read(&c->sectors_available);
-+
-+ do {
-+ old = v;
-+ new = max_t(s64, 0, old - should_not_have_added);
-+ } while ((v = atomic64_cmpxchg(&c->sectors_available,
-+ old, new)) != old);
-+
-+ added -= should_not_have_added;
-+ warn = true;
-+ }
-+
-+ if (added > 0) {
-+ trans->disk_res->sectors -= added;
-+ this_cpu_sub(*c->online_reserved, added);
-+ }
-+
-+ preempt_enable();
-+ percpu_up_read(&c->mark_lock);
-+
-+ if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
-+ bch2_trans_inconsistent(trans,
-+ "disk usage increased %lli more than %llu sectors reserved)",
-+ should_not_have_added, disk_res_sectors);
-+ return 0;
-+need_mark:
-+ /* revert changes: */
-+ for (d2 = deltas->d; d2 != d; d2 = replicas_delta_next(d2))
-+ BUG_ON(__update_replicas(c, dst, &d2->r, -d2->delta));
-+
-+ preempt_enable();
-+ percpu_up_read(&c->mark_lock);
-+ return -1;
-+}
-+
-+/* trans_mark: */
-+
-+static inline int bch2_trans_mark_pointer(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k, struct extent_ptr_decoded p,
-+ unsigned flags)
-+{
-+ bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
-+ struct btree_iter iter;
-+ struct bkey_i_alloc_v4 *a;
-+ struct bpos bucket;
-+ struct bch_backpointer bp;
-+ s64 sectors;
-+ int ret;
-+
-+ bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
-+ sectors = bp.bucket_len;
-+ if (!insert)
-+ sectors = -sectors;
-+
-+ a = bch2_trans_start_alloc_update(trans, &iter, bucket);
-+ if (IS_ERR(a))
-+ return PTR_ERR(a);
-+
-+ ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type,
-+ a->v.gen, &a->v.data_type,
-+ &a->v.dirty_sectors, &a->v.cached_sectors) ?:
-+ bch2_trans_update(trans, &iter, &a->k_i, 0);
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret)
-+ return ret;
-+
-+ if (!p.ptr.cached) {
-+ ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
-+ struct extent_ptr_decoded p,
-+ s64 sectors, enum bch_data_type data_type)
-+{
-+ struct btree_iter iter;
-+ struct bkey_i_stripe *s;
-+ struct bch_replicas_padded r;
-+ int ret = 0;
-+
-+ s = bch2_bkey_get_mut_typed(trans, &iter,
-+ BTREE_ID_stripes, POS(0, p.ec.idx),
-+ BTREE_ITER_WITH_UPDATES, stripe);
-+ ret = PTR_ERR_OR_ZERO(s);
-+ if (unlikely(ret)) {
-+ bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans,
-+ "pointer to nonexistent stripe %llu",
-+ (u64) p.ec.idx);
-+ goto err;
-+ }
-+
-+ if (!bch2_ptr_matches_stripe(&s->v, p)) {
-+ bch2_trans_inconsistent(trans,
-+ "stripe pointer doesn't match stripe %llu",
-+ (u64) p.ec.idx);
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ stripe_blockcount_set(&s->v, p.ec.block,
-+ stripe_blockcount_get(&s->v, p.ec.block) +
-+ sectors);
-+
-+ bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
-+ r.e.data_type = data_type;
-+ ret = update_replicas_list(trans, &r.e, sectors);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int __trans_mark_extent(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k, unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ struct bch_replicas_padded r;
-+ enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
-+ ? BCH_DATA_btree
-+ : BCH_DATA_user;
-+ s64 sectors = bkey_is_btree_ptr(k.k)
-+ ? btree_sectors(c)
-+ : k.k->size;
-+ s64 dirty_sectors = 0;
-+ bool stale;
-+ int ret = 0;
-+
-+ r.e.data_type = data_type;
-+ r.e.nr_devs = 0;
-+ r.e.nr_required = 1;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ s64 disk_sectors = ptr_disk_sectors(sectors, p);
-+
-+ if (flags & BTREE_TRIGGER_OVERWRITE)
-+ disk_sectors = -disk_sectors;
-+
-+ ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags);
-+ if (ret < 0)
-+ return ret;
-+
-+ stale = ret > 0;
-+
-+ if (p.ptr.cached) {
-+ if (!stale) {
-+ ret = update_cached_sectors_list(trans, p.ptr.dev,
-+ disk_sectors);
-+ if (ret)
-+ return ret;
-+ }
-+ } else if (!p.has_ec) {
-+ dirty_sectors += disk_sectors;
-+ r.e.devs[r.e.nr_devs++] = p.ptr.dev;
-+ } else {
-+ ret = bch2_trans_mark_stripe_ptr(trans, p,
-+ disk_sectors, data_type);
-+ if (ret)
-+ return ret;
-+
-+ r.e.nr_required = 0;
-+ }
-+ }
-+
-+ if (r.e.nr_devs)
-+ ret = update_replicas_list(trans, &r.e, dirty_sectors);
-+
-+ return ret;
-+}
-+
-+int bch2_trans_mark_extent(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_i *new,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) -
-+ (int) bch2_bkey_needs_rebalance(c, old);
-+
-+ if (mod) {
-+ int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags);
-+}
-+
-+static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
-+ struct bkey_s_c_stripe s,
-+ unsigned idx, bool deleting)
-+{
-+ struct bch_fs *c = trans->c;
-+ const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
-+ struct btree_iter iter;
-+ struct bkey_i_alloc_v4 *a;
-+ enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
-+ ? BCH_DATA_parity : 0;
-+ s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
-+ int ret = 0;
-+
-+ if (deleting)
-+ sectors = -sectors;
-+
-+ a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr));
-+ if (IS_ERR(a))
-+ return PTR_ERR(a);
-+
-+ ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
-+ a->v.gen, a->v.data_type,
-+ a->v.dirty_sectors, a->v.cached_sectors);
-+ if (ret)
-+ goto err;
-+
-+ if (!deleting) {
-+ if (bch2_trans_inconsistent_on(a->v.stripe ||
-+ a->v.stripe_redundancy, trans,
-+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
-+ iter.pos.inode, iter.pos.offset, a->v.gen,
-+ bch2_data_types[a->v.data_type],
-+ a->v.dirty_sectors,
-+ a->v.stripe, s.k->p.offset)) {
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
-+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
-+ iter.pos.inode, iter.pos.offset, a->v.gen,
-+ bch2_data_types[a->v.data_type],
-+ a->v.dirty_sectors,
-+ s.k->p.offset)) {
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ a->v.stripe = s.k->p.offset;
-+ a->v.stripe_redundancy = s.v->nr_redundant;
-+ a->v.data_type = BCH_DATA_stripe;
-+ } else {
-+ if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
-+ a->v.stripe_redundancy != s.v->nr_redundant, trans,
-+ "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
-+ iter.pos.inode, iter.pos.offset, a->v.gen,
-+ s.k->p.offset, a->v.stripe)) {
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ a->v.stripe = 0;
-+ a->v.stripe_redundancy = 0;
-+ a->v.data_type = alloc_data_type(a->v, BCH_DATA_user);
-+ }
-+
-+ a->v.dirty_sectors += sectors;
-+ if (data_type)
-+ a->v.data_type = !deleting ? data_type : 0;
-+
-+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
-+ if (ret)
-+ goto err;
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_trans_mark_stripe(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_i *new,
-+ unsigned flags)
-+{
-+ const struct bch_stripe *old_s = NULL;
-+ struct bch_stripe *new_s = NULL;
-+ struct bch_replicas_padded r;
-+ unsigned i, nr_blocks;
-+ int ret = 0;
-+
-+ if (old.k->type == KEY_TYPE_stripe)
-+ old_s = bkey_s_c_to_stripe(old).v;
-+ if (new->k.type == KEY_TYPE_stripe)
-+ new_s = &bkey_i_to_stripe(new)->v;
-+
-+ /*
-+ * If the pointers aren't changing, we don't need to do anything:
-+ */
-+ if (new_s && old_s &&
-+ new_s->nr_blocks == old_s->nr_blocks &&
-+ new_s->nr_redundant == old_s->nr_redundant &&
-+ !memcmp(old_s->ptrs, new_s->ptrs,
-+ new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
-+ return 0;
-+
-+ BUG_ON(new_s && old_s &&
-+ (new_s->nr_blocks != old_s->nr_blocks ||
-+ new_s->nr_redundant != old_s->nr_redundant));
-+
-+ nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks;
-+
-+ if (new_s) {
-+ s64 sectors = le16_to_cpu(new_s->sectors);
-+
-+ bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new));
-+ ret = update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (old_s) {
-+ s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
-+
-+ bch2_bkey_to_replicas(&r.e, old);
-+ ret = update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ for (i = 0; i < nr_blocks; i++) {
-+ if (new_s && old_s &&
-+ !memcmp(&new_s->ptrs[i],
-+ &old_s->ptrs[i],
-+ sizeof(new_s->ptrs[i])))
-+ continue;
-+
-+ if (new_s) {
-+ ret = bch2_trans_mark_stripe_bucket(trans,
-+ bkey_i_to_s_c_stripe(new), i, false);
-+ if (ret)
-+ break;
-+ }
-+
-+ if (old_s) {
-+ ret = bch2_trans_mark_stripe_bucket(trans,
-+ bkey_s_c_to_stripe(old), i, true);
-+ if (ret)
-+ break;
-+ }
-+ }
-+
-+ return ret;
-+}
-+
-+static int __trans_mark_reservation(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k, unsigned flags)
-+{
-+ unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
-+ s64 sectors = (s64) k.k->size;
-+ struct replicas_delta_list *d;
-+ int ret;
-+
-+ if (flags & BTREE_TRIGGER_OVERWRITE)
-+ sectors = -sectors;
-+ sectors *= replicas;
-+
-+ ret = bch2_replicas_deltas_realloc(trans, 0);
-+ if (ret)
-+ return ret;
-+
-+ d = trans->fs_usage_deltas;
-+ replicas = clamp_t(unsigned, replicas, 1,
-+ ARRAY_SIZE(d->persistent_reserved));
-+
-+ d->persistent_reserved[replicas - 1] += sectors;
-+ return 0;
-+}
-+
-+int bch2_trans_mark_reservation(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old,
-+ struct bkey_i *new,
-+ unsigned flags)
-+{
-+ return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags);
-+}
-+
-+static int trans_mark_reflink_p_segment(struct btree_trans *trans,
-+ struct bkey_s_c_reflink_p p,
-+ u64 *idx, unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_i *k;
-+ __le64 *refcount;
-+ int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
-+ struct printbuf buf = PRINTBUF;
-+ int ret;
-+
-+ k = bch2_bkey_get_mut_noupdate(trans, &iter,
-+ BTREE_ID_reflink, POS(0, *idx),
-+ BTREE_ITER_WITH_UPDATES);
-+ ret = PTR_ERR_OR_ZERO(k);
-+ if (ret)
-+ goto err;
-+
-+ refcount = bkey_refcount(k);
-+ if (!refcount) {
-+ bch2_bkey_val_to_text(&buf, c, p.s_c);
-+ bch2_trans_inconsistent(trans,
-+ "nonexistent indirect extent at %llu while marking\n %s",
-+ *idx, buf.buf);
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
-+ bch2_bkey_val_to_text(&buf, c, p.s_c);
-+ bch2_trans_inconsistent(trans,
-+ "indirect extent refcount underflow at %llu while marking\n %s",
-+ *idx, buf.buf);
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (flags & BTREE_TRIGGER_INSERT) {
-+ struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
-+ u64 pad;
-+
-+ pad = max_t(s64, le32_to_cpu(v->front_pad),
-+ le64_to_cpu(v->idx) - bkey_start_offset(&k->k));
-+ BUG_ON(pad > U32_MAX);
-+ v->front_pad = cpu_to_le32(pad);
-+
-+ pad = max_t(s64, le32_to_cpu(v->back_pad),
-+ k->k.p.offset - p.k->size - le64_to_cpu(v->idx));
-+ BUG_ON(pad > U32_MAX);
-+ v->back_pad = cpu_to_le32(pad);
-+ }
-+
-+ le64_add_cpu(refcount, add);
-+
-+ bch2_btree_iter_set_pos_to_extent_start(&iter);
-+ ret = bch2_trans_update(trans, &iter, k, 0);
-+ if (ret)
-+ goto err;
-+
-+ *idx = k->k.p.offset;
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int __trans_mark_reflink_p(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c k, unsigned flags)
-+{
-+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+ u64 idx, end_idx;
-+ int ret = 0;
-+
-+ idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
-+ end_idx = le64_to_cpu(p.v->idx) + p.k->size +
-+ le32_to_cpu(p.v->back_pad);
-+
-+ while (idx < end_idx && !ret)
-+ ret = trans_mark_reflink_p_segment(trans, p, &idx, flags);
-+ return ret;
-+}
-+
-+int bch2_trans_mark_reflink_p(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old,
-+ struct bkey_i *new,
-+ unsigned flags)
-+{
-+ if (flags & BTREE_TRIGGER_INSERT) {
-+ struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v;
-+
-+ v->front_pad = v->back_pad = 0;
-+ }
-+
-+ return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags);
-+}
-+
-+static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
-+ struct bch_dev *ca, size_t b,
-+ enum bch_data_type type,
-+ unsigned sectors)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_i_alloc_v4 *a;
-+ int ret = 0;
-+
-+ /*
-+ * Backup superblock might be past the end of our normal usable space:
-+ */
-+ if (b >= ca->mi.nbuckets)
-+ return 0;
-+
-+ a = bch2_trans_start_alloc_update(trans, &iter, POS(ca->dev_idx, b));
-+ if (IS_ERR(a))
-+ return PTR_ERR(a);
-+
-+ if (a->v.data_type && type && a->v.data_type != type) {
-+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-+ BCH_FSCK_ERR_bucket_metadata_type_mismatch,
-+ "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
-+ "while marking %s",
-+ iter.pos.inode, iter.pos.offset, a->v.gen,
-+ bch2_data_types[a->v.data_type],
-+ bch2_data_types[type],
-+ bch2_data_types[type]);
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (a->v.data_type != type ||
-+ a->v.dirty_sectors != sectors) {
-+ a->v.data_type = type;
-+ a->v.dirty_sectors = sectors;
-+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
-+ }
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
-+ struct bch_dev *ca, size_t b,
-+ enum bch_data_type type,
-+ unsigned sectors)
-+{
-+ return commit_do(trans, NULL, NULL, 0,
-+ __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
-+}
-+
-+static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans,
-+ struct bch_dev *ca,
-+ u64 start, u64 end,
-+ enum bch_data_type type,
-+ u64 *bucket, unsigned *bucket_sectors)
-+{
-+ do {
-+ u64 b = sector_to_bucket(ca, start);
-+ unsigned sectors =
-+ min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
-+
-+ if (b != *bucket && *bucket_sectors) {
-+ int ret = bch2_trans_mark_metadata_bucket(trans, ca, *bucket,
-+ type, *bucket_sectors);
-+ if (ret)
-+ return ret;
-+
-+ *bucket_sectors = 0;
-+ }
-+
-+ *bucket = b;
-+ *bucket_sectors += sectors;
-+ start += sectors;
-+ } while (start < end);
-+
-+ return 0;
-+}
-+
-+static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
-+ struct bch_dev *ca)
-+{
-+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
-+ u64 bucket = 0;
-+ unsigned i, bucket_sectors = 0;
-+ int ret;
-+
-+ for (i = 0; i < layout->nr_superblocks; i++) {
-+ u64 offset = le64_to_cpu(layout->sb_offset[i]);
-+
-+ if (offset == BCH_SB_SECTOR) {
-+ ret = bch2_trans_mark_metadata_sectors(trans, ca,
-+ 0, BCH_SB_SECTOR,
-+ BCH_DATA_sb, &bucket, &bucket_sectors);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ ret = bch2_trans_mark_metadata_sectors(trans, ca, offset,
-+ offset + (1 << layout->sb_max_size_bits),
-+ BCH_DATA_sb, &bucket, &bucket_sectors);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (bucket_sectors) {
-+ ret = bch2_trans_mark_metadata_bucket(trans, ca,
-+ bucket, BCH_DATA_sb, bucket_sectors);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ for (i = 0; i < ca->journal.nr; i++) {
-+ ret = bch2_trans_mark_metadata_bucket(trans, ca,
-+ ca->journal.buckets[i],
-+ BCH_DATA_journal, ca->mi.bucket_size);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(trans, ca));
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+int bch2_trans_mark_dev_sbs(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ for_each_online_member(ca, c, i) {
-+ int ret = bch2_trans_mark_dev_sb(c, ca);
-+ if (ret) {
-+ percpu_ref_put(&ca->ref);
-+ return ret;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* Disk reservations: */
-+
-+#define SECTORS_CACHE 1024
-+
-+int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
-+ u64 sectors, int flags)
-+{
-+ struct bch_fs_pcpu *pcpu;
-+ u64 old, v, get;
-+ s64 sectors_available;
-+ int ret;
-+
-+ percpu_down_read(&c->mark_lock);
-+ preempt_disable();
-+ pcpu = this_cpu_ptr(c->pcpu);
-+
-+ if (sectors <= pcpu->sectors_available)
-+ goto out;
-+
-+ v = atomic64_read(&c->sectors_available);
-+ do {
-+ old = v;
-+ get = min((u64) sectors + SECTORS_CACHE, old);
-+
-+ if (get < sectors) {
-+ preempt_enable();
-+ goto recalculate;
-+ }
-+ } while ((v = atomic64_cmpxchg(&c->sectors_available,
-+ old, old - get)) != old);
-+
-+ pcpu->sectors_available += get;
-+
-+out:
-+ pcpu->sectors_available -= sectors;
-+ this_cpu_add(*c->online_reserved, sectors);
-+ res->sectors += sectors;
-+
-+ preempt_enable();
-+ percpu_up_read(&c->mark_lock);
-+ return 0;
-+
-+recalculate:
-+ mutex_lock(&c->sectors_available_lock);
-+
-+ percpu_u64_set(&c->pcpu->sectors_available, 0);
-+ sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
-+
-+ if (sectors <= sectors_available ||
-+ (flags & BCH_DISK_RESERVATION_NOFAIL)) {
-+ atomic64_set(&c->sectors_available,
-+ max_t(s64, 0, sectors_available - sectors));
-+ this_cpu_add(*c->online_reserved, sectors);
-+ res->sectors += sectors;
-+ ret = 0;
-+ } else {
-+ atomic64_set(&c->sectors_available, sectors_available);
-+ ret = -BCH_ERR_ENOSPC_disk_reservation;
-+ }
-+
-+ mutex_unlock(&c->sectors_available_lock);
-+ percpu_up_read(&c->mark_lock);
-+
-+ return ret;
-+}
-+
-+/* Startup/shutdown: */
-+
-+static void bucket_gens_free_rcu(struct rcu_head *rcu)
-+{
-+ struct bucket_gens *buckets =
-+ container_of(rcu, struct bucket_gens, rcu);
-+
-+ kvpfree(buckets, sizeof(*buckets) + buckets->nbuckets);
-+}
-+
-+int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
-+{
-+ struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL;
-+ unsigned long *buckets_nouse = NULL;
-+ bool resize = ca->bucket_gens != NULL;
-+ int ret;
-+
-+ if (!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
-+ GFP_KERNEL|__GFP_ZERO))) {
-+ ret = -BCH_ERR_ENOMEM_bucket_gens;
-+ goto err;
-+ }
-+
-+ if ((c->opts.buckets_nouse &&
-+ !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
-+ sizeof(unsigned long),
-+ GFP_KERNEL|__GFP_ZERO)))) {
-+ ret = -BCH_ERR_ENOMEM_buckets_nouse;
-+ goto err;
-+ }
-+
-+ bucket_gens->first_bucket = ca->mi.first_bucket;
-+ bucket_gens->nbuckets = nbuckets;
-+
-+ bch2_copygc_stop(c);
-+
-+ if (resize) {
-+ down_write(&c->gc_lock);
-+ down_write(&ca->bucket_lock);
-+ percpu_down_write(&c->mark_lock);
-+ }
-+
-+ old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
-+
-+ if (resize) {
-+ size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets);
-+
-+ memcpy(bucket_gens->b,
-+ old_bucket_gens->b,
-+ n);
-+ if (buckets_nouse)
-+ memcpy(buckets_nouse,
-+ ca->buckets_nouse,
-+ BITS_TO_LONGS(n) * sizeof(unsigned long));
-+ }
-+
-+ rcu_assign_pointer(ca->bucket_gens, bucket_gens);
-+ bucket_gens = old_bucket_gens;
-+
-+ swap(ca->buckets_nouse, buckets_nouse);
-+
-+ nbuckets = ca->mi.nbuckets;
-+
-+ if (resize) {
-+ percpu_up_write(&c->mark_lock);
-+ up_write(&ca->bucket_lock);
-+ up_write(&c->gc_lock);
-+ }
-+
-+ ret = 0;
-+err:
-+ kvpfree(buckets_nouse,
-+ BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
-+ if (bucket_gens)
-+ call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu);
-+
-+ return ret;
-+}
-+
-+void bch2_dev_buckets_free(struct bch_dev *ca)
-+{
-+ unsigned i;
-+
-+ kvpfree(ca->buckets_nouse,
-+ BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
-+ kvpfree(rcu_dereference_protected(ca->bucket_gens, 1),
-+ sizeof(struct bucket_gens) + ca->mi.nbuckets);
-+
-+ for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
-+ free_percpu(ca->usage[i]);
-+ kfree(ca->usage_base);
-+}
-+
-+int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ unsigned i;
-+
-+ ca->usage_base = kzalloc(sizeof(struct bch_dev_usage), GFP_KERNEL);
-+ if (!ca->usage_base)
-+ return -BCH_ERR_ENOMEM_usage_init;
-+
-+ for (i = 0; i < ARRAY_SIZE(ca->usage); i++) {
-+ ca->usage[i] = alloc_percpu(struct bch_dev_usage);
-+ if (!ca->usage[i])
-+ return -BCH_ERR_ENOMEM_usage_init;
-+ }
-+
-+ return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);
-+}
-diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
-new file mode 100644
-index 000000000000..21f6cb356921
---- /dev/null
-+++ b/fs/bcachefs/buckets.h
-@@ -0,0 +1,458 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+/*
-+ * Code for manipulating bucket marks for garbage collection.
-+ *
-+ * Copyright 2014 Datera, Inc.
-+ */
-+
-+#ifndef _BUCKETS_H
-+#define _BUCKETS_H
-+
-+#include "buckets_types.h"
-+#include "extents.h"
-+#include "sb-members.h"
-+
-+static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s)
-+{
-+ return div_u64(s, ca->mi.bucket_size);
-+}
-+
-+static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b)
-+{
-+ return ((sector_t) b) * ca->mi.bucket_size;
-+}
-+
-+static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s)
-+{
-+ u32 remainder;
-+
-+ div_u64_rem(s, ca->mi.bucket_size, &remainder);
-+ return remainder;
-+}
-+
-+static inline size_t sector_to_bucket_and_offset(const struct bch_dev *ca, sector_t s,
-+ u32 *offset)
-+{
-+ return div_u64_rem(s, ca->mi.bucket_size, offset);
-+}
-+
-+#define for_each_bucket(_b, _buckets) \
-+ for (_b = (_buckets)->b + (_buckets)->first_bucket; \
-+ _b < (_buckets)->b + (_buckets)->nbuckets; _b++)
-+
-+/*
-+ * Ugly hack alert:
-+ *
-+ * We need to cram a spinlock in a single byte, because that's what we have left
-+ * in struct bucket, and we care about the size of these - during fsck, we need
-+ * in memory state for every single bucket on every device.
-+ *
-+ * We used to do
-+ * while (xchg(&b->lock, 1) cpu_relax();
-+ * but, it turns out not all architectures support xchg on a single byte.
-+ *
-+ * So now we use bit_spin_lock(), with fun games since we can't burn a whole
-+ * ulong for this - we just need to make sure the lock bit always ends up in the
-+ * first byte.
-+ */
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+#define BUCKET_LOCK_BITNR 0
-+#else
-+#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
-+#endif
-+
-+union ulong_byte_assert {
-+ ulong ulong;
-+ u8 byte;
-+};
-+
-+static inline void bucket_unlock(struct bucket *b)
-+{
-+ BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte);
-+
-+ clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock);
-+ wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR);
-+}
-+
-+static inline void bucket_lock(struct bucket *b)
-+{
-+ wait_on_bit_lock((void *) &b->lock, BUCKET_LOCK_BITNR,
-+ TASK_UNINTERRUPTIBLE);
-+}
-+
-+static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
-+{
-+ return rcu_dereference_check(ca->buckets_gc,
-+ !ca->fs ||
-+ percpu_rwsem_is_held(&ca->fs->mark_lock) ||
-+ lockdep_is_held(&ca->fs->gc_lock) ||
-+ lockdep_is_held(&ca->bucket_lock));
-+}
-+
-+static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
-+{
-+ struct bucket_array *buckets = gc_bucket_array(ca);
-+
-+ BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
-+ return buckets->b + b;
-+}
-+
-+static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
-+{
-+ return rcu_dereference_check(ca->bucket_gens,
-+ !ca->fs ||
-+ percpu_rwsem_is_held(&ca->fs->mark_lock) ||
-+ lockdep_is_held(&ca->fs->gc_lock) ||
-+ lockdep_is_held(&ca->bucket_lock));
-+}
-+
-+static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
-+{
-+ struct bucket_gens *gens = bucket_gens(ca);
-+
-+ BUG_ON(b < gens->first_bucket || b >= gens->nbuckets);
-+ return gens->b + b;
-+}
-+
-+static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
-+ const struct bch_extent_ptr *ptr)
-+{
-+ return sector_to_bucket(ca, ptr->offset);
-+}
-+
-+static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c,
-+ const struct bch_extent_ptr *ptr)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+ return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
-+}
-+
-+static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c,
-+ const struct bch_extent_ptr *ptr,
-+ u32 *bucket_offset)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+ return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset));
-+}
-+
-+static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca,
-+ const struct bch_extent_ptr *ptr)
-+{
-+ return gc_bucket(ca, PTR_BUCKET_NR(ca, ptr));
-+}
-+
-+static inline enum bch_data_type ptr_data_type(const struct bkey *k,
-+ const struct bch_extent_ptr *ptr)
-+{
-+ if (bkey_is_btree_ptr(k))
-+ return BCH_DATA_btree;
-+
-+ return ptr->cached ? BCH_DATA_cached : BCH_DATA_user;
-+}
-+
-+static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
-+{
-+ EBUG_ON(sectors < 0);
-+
-+ return crc_is_compressed(p.crc)
-+ ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
-+ p.crc.uncompressed_size)
-+ : sectors;
-+}
-+
-+static inline int gen_cmp(u8 a, u8 b)
-+{
-+ return (s8) (a - b);
-+}
-+
-+static inline int gen_after(u8 a, u8 b)
-+{
-+ int r = gen_cmp(a, b);
-+
-+ return r > 0 ? r : 0;
-+}
-+
-+/**
-+ * ptr_stale() - check if a pointer points into a bucket that has been
-+ * invalidated.
-+ */
-+static inline u8 ptr_stale(struct bch_dev *ca,
-+ const struct bch_extent_ptr *ptr)
-+{
-+ u8 ret;
-+
-+ rcu_read_lock();
-+ ret = gen_after(*bucket_gen(ca, PTR_BUCKET_NR(ca, ptr)), ptr->gen);
-+ rcu_read_unlock();
-+
-+ return ret;
-+}
-+
-+/* Device usage: */
-+
-+void bch2_dev_usage_read_fast(struct bch_dev *, struct bch_dev_usage *);
-+static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
-+{
-+ struct bch_dev_usage ret;
-+
-+ bch2_dev_usage_read_fast(ca, &ret);
-+ return ret;
-+}
-+
-+void bch2_dev_usage_init(struct bch_dev *);
-+
-+static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark)
-+{
-+ s64 reserved = 0;
-+
-+ switch (watermark) {
-+ case BCH_WATERMARK_NR:
-+ BUG();
-+ case BCH_WATERMARK_stripe:
-+ reserved += ca->mi.nbuckets >> 6;
-+ fallthrough;
-+ case BCH_WATERMARK_normal:
-+ reserved += ca->mi.nbuckets >> 6;
-+ fallthrough;
-+ case BCH_WATERMARK_copygc:
-+ reserved += ca->nr_btree_reserve;
-+ fallthrough;
-+ case BCH_WATERMARK_btree:
-+ reserved += ca->nr_btree_reserve;
-+ fallthrough;
-+ case BCH_WATERMARK_btree_copygc:
-+ case BCH_WATERMARK_reclaim:
-+ break;
-+ }
-+
-+ return reserved;
-+}
-+
-+static inline u64 dev_buckets_free(struct bch_dev *ca,
-+ struct bch_dev_usage usage,
-+ enum bch_watermark watermark)
-+{
-+ return max_t(s64, 0,
-+ usage.d[BCH_DATA_free].buckets -
-+ ca->nr_open_buckets -
-+ bch2_dev_buckets_reserved(ca, watermark));
-+}
-+
-+static inline u64 __dev_buckets_available(struct bch_dev *ca,
-+ struct bch_dev_usage usage,
-+ enum bch_watermark watermark)
-+{
-+ return max_t(s64, 0,
-+ usage.d[BCH_DATA_free].buckets
-+ + usage.d[BCH_DATA_cached].buckets
-+ + usage.d[BCH_DATA_need_gc_gens].buckets
-+ + usage.d[BCH_DATA_need_discard].buckets
-+ - ca->nr_open_buckets
-+ - bch2_dev_buckets_reserved(ca, watermark));
-+}
-+
-+static inline u64 dev_buckets_available(struct bch_dev *ca,
-+ enum bch_watermark watermark)
-+{
-+ return __dev_buckets_available(ca, bch2_dev_usage_read(ca), watermark);
-+}
-+
-+/* Filesystem usage: */
-+
-+static inline unsigned __fs_usage_u64s(unsigned nr_replicas)
-+{
-+ return sizeof(struct bch_fs_usage) / sizeof(u64) + nr_replicas;
-+}
-+
-+static inline unsigned fs_usage_u64s(struct bch_fs *c)
-+{
-+ return __fs_usage_u64s(READ_ONCE(c->replicas.nr));
-+}
-+
-+static inline unsigned __fs_usage_online_u64s(unsigned nr_replicas)
-+{
-+ return sizeof(struct bch_fs_usage_online) / sizeof(u64) + nr_replicas;
-+}
-+
-+static inline unsigned fs_usage_online_u64s(struct bch_fs *c)
-+{
-+ return __fs_usage_online_u64s(READ_ONCE(c->replicas.nr));
-+}
-+
-+static inline unsigned dev_usage_u64s(void)
-+{
-+ return sizeof(struct bch_dev_usage) / sizeof(u64);
-+}
-+
-+u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *);
-+
-+struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *);
-+
-+void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned);
-+
-+void bch2_fs_usage_to_text(struct printbuf *,
-+ struct bch_fs *, struct bch_fs_usage_online *);
-+
-+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *);
-+
-+struct bch_fs_usage_short
-+bch2_fs_usage_read_short(struct bch_fs *);
-+
-+/* key/bucket marking: */
-+
-+static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
-+ unsigned journal_seq,
-+ bool gc)
-+{
-+ percpu_rwsem_assert_held(&c->mark_lock);
-+ BUG_ON(!gc && !journal_seq);
-+
-+ return this_cpu_ptr(gc
-+ ? c->usage_gc
-+ : c->usage[journal_seq & JOURNAL_BUF_MASK]);
-+}
-+
-+int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned);
-+
-+void bch2_fs_usage_initialize(struct bch_fs *);
-+
-+int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
-+ size_t, enum bch_data_type, unsigned,
-+ struct gc_pos, unsigned);
-+
-+int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_s_c, unsigned);
-+int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_s_c, unsigned);
-+int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_s_c, unsigned);
-+int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_s_c, unsigned);
-+int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_s_c, unsigned);
-+
-+int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-+int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-+int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-+int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-+
-+#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
-+({ \
-+ int ret = 0; \
-+ \
-+ if (_old.k->type) \
-+ ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \
-+ if (!ret && _new.k->type) \
-+ ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \
-+ ret; \
-+})
-+
-+#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \
-+ mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags)
-+
-+void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
-+int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
-+
-+int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,
-+ size_t, enum bch_data_type, unsigned);
-+int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *);
-+int bch2_trans_mark_dev_sbs(struct bch_fs *);
-+
-+static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b)
-+{
-+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
-+ u64 b_offset = bucket_to_sector(ca, b);
-+ u64 b_end = bucket_to_sector(ca, b + 1);
-+ unsigned i;
-+
-+ if (!b)
-+ return true;
-+
-+ for (i = 0; i < layout->nr_superblocks; i++) {
-+ u64 offset = le64_to_cpu(layout->sb_offset[i]);
-+ u64 end = offset + (1 << layout->sb_max_size_bits);
-+
-+ if (!(offset >= b_end || end <= b_offset))
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+/* disk reservations: */
-+
-+static inline void bch2_disk_reservation_put(struct bch_fs *c,
-+ struct disk_reservation *res)
-+{
-+ if (res->sectors) {
-+ this_cpu_sub(*c->online_reserved, res->sectors);
-+ res->sectors = 0;
-+ }
-+}
-+
-+#define BCH_DISK_RESERVATION_NOFAIL (1 << 0)
-+
-+int __bch2_disk_reservation_add(struct bch_fs *,
-+ struct disk_reservation *,
-+ u64, int);
-+
-+static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
-+ u64 sectors, int flags)
-+{
-+#ifdef __KERNEL__
-+ u64 old, new;
-+
-+ do {
-+ old = this_cpu_read(c->pcpu->sectors_available);
-+ if (sectors > old)
-+ return __bch2_disk_reservation_add(c, res, sectors, flags);
-+
-+ new = old - sectors;
-+ } while (this_cpu_cmpxchg(c->pcpu->sectors_available, old, new) != old);
-+
-+ this_cpu_add(*c->online_reserved, sectors);
-+ res->sectors += sectors;
-+ return 0;
-+#else
-+ return __bch2_disk_reservation_add(c, res, sectors, flags);
-+#endif
-+}
-+
-+static inline struct disk_reservation
-+bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas)
-+{
-+ return (struct disk_reservation) {
-+ .sectors = 0,
-+#if 0
-+ /* not used yet: */
-+ .gen = c->capacity_gen,
-+#endif
-+ .nr_replicas = nr_replicas,
-+ };
-+}
-+
-+static inline int bch2_disk_reservation_get(struct bch_fs *c,
-+ struct disk_reservation *res,
-+ u64 sectors, unsigned nr_replicas,
-+ int flags)
-+{
-+ *res = bch2_disk_reservation_init(c, nr_replicas);
-+
-+ return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags);
-+}
-+
-+#define RESERVE_FACTOR 6
-+
-+static inline u64 avail_factor(u64 r)
-+{
-+ return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1);
-+}
-+
-+int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64);
-+void bch2_dev_buckets_free(struct bch_dev *);
-+int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *);
-+
-+#endif /* _BUCKETS_H */
-diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
-new file mode 100644
-index 000000000000..2a9dab9006ef
---- /dev/null
-+++ b/fs/bcachefs/buckets_types.h
-@@ -0,0 +1,92 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BUCKETS_TYPES_H
-+#define _BUCKETS_TYPES_H
-+
-+#include "bcachefs_format.h"
-+#include "util.h"
-+
-+#define BUCKET_JOURNAL_SEQ_BITS 16
-+
-+struct bucket {
-+ u8 lock;
-+ u8 gen_valid:1;
-+ u8 data_type:7;
-+ u8 gen;
-+ u8 stripe_redundancy;
-+ u32 stripe;
-+ u32 dirty_sectors;
-+ u32 cached_sectors;
-+};
-+
-+struct bucket_array {
-+ struct rcu_head rcu;
-+ u16 first_bucket;
-+ size_t nbuckets;
-+ struct bucket b[];
-+};
-+
-+struct bucket_gens {
-+ struct rcu_head rcu;
-+ u16 first_bucket;
-+ size_t nbuckets;
-+ u8 b[];
-+};
-+
-+struct bch_dev_usage {
-+ u64 buckets_ec;
-+
-+ struct {
-+ u64 buckets;
-+ u64 sectors; /* _compressed_ sectors: */
-+ /*
-+ * XXX
-+ * Why do we have this? Isn't it just buckets * bucket_size -
-+ * sectors?
-+ */
-+ u64 fragmented;
-+ } d[BCH_DATA_NR];
-+};
-+
-+struct bch_fs_usage {
-+ /* all fields are in units of 512 byte sectors: */
-+ u64 hidden;
-+ u64 btree;
-+ u64 data;
-+ u64 cached;
-+ u64 reserved;
-+ u64 nr_inodes;
-+
-+ /* XXX: add stats for compression ratio */
-+#if 0
-+ u64 uncompressed;
-+ u64 compressed;
-+#endif
-+
-+ /* broken out: */
-+
-+ u64 persistent_reserved[BCH_REPLICAS_MAX];
-+ u64 replicas[];
-+};
-+
-+struct bch_fs_usage_online {
-+ u64 online_reserved;
-+ struct bch_fs_usage u;
-+};
-+
-+struct bch_fs_usage_short {
-+ u64 capacity;
-+ u64 used;
-+ u64 free;
-+ u64 nr_inodes;
-+};
-+
-+/*
-+ * A reservation for space on disk:
-+ */
-+struct disk_reservation {
-+ u64 sectors;
-+ u32 gen;
-+ unsigned nr_replicas;
-+};
-+
-+#endif /* _BUCKETS_TYPES_H */
-diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c
-new file mode 100644
-index 000000000000..ec1b636ef78d
---- /dev/null
-+++ b/fs/bcachefs/buckets_waiting_for_journal.c
-@@ -0,0 +1,166 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "buckets_waiting_for_journal.h"
-+#include <linux/hash.h>
-+#include <linux/random.h>
-+
-+static inline struct bucket_hashed *
-+bucket_hash(struct buckets_waiting_for_journal_table *t,
-+ unsigned hash_seed_idx, u64 dev_bucket)
-+{
-+ return t->d + hash_64(dev_bucket ^ t->hash_seeds[hash_seed_idx], t->bits);
-+}
-+
-+static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_t bits)
-+{
-+ unsigned i;
-+
-+ t->bits = bits;
-+ for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++)
-+ get_random_bytes(&t->hash_seeds[i], sizeof(t->hash_seeds[i]));
-+ memset(t->d, 0, sizeof(t->d[0]) << t->bits);
-+}
-+
-+bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
-+ u64 flushed_seq,
-+ unsigned dev, u64 bucket)
-+{
-+ struct buckets_waiting_for_journal_table *t;
-+ u64 dev_bucket = (u64) dev << 56 | bucket;
-+ bool ret = false;
-+ unsigned i;
-+
-+ mutex_lock(&b->lock);
-+ t = b->t;
-+
-+ for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
-+ struct bucket_hashed *h = bucket_hash(t, i, dev_bucket);
-+
-+ if (h->dev_bucket == dev_bucket) {
-+ ret = h->journal_seq > flushed_seq;
-+ break;
-+ }
-+ }
-+
-+ mutex_unlock(&b->lock);
-+
-+ return ret;
-+}
-+
-+static bool bucket_table_insert(struct buckets_waiting_for_journal_table *t,
-+ struct bucket_hashed *new,
-+ u64 flushed_seq)
-+{
-+ struct bucket_hashed *last_evicted = NULL;
-+ unsigned tries, i;
-+
-+ for (tries = 0; tries < 10; tries++) {
-+ struct bucket_hashed *old, *victim = NULL;
-+
-+ for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
-+ old = bucket_hash(t, i, new->dev_bucket);
-+
-+ if (old->dev_bucket == new->dev_bucket ||
-+ old->journal_seq <= flushed_seq) {
-+ *old = *new;
-+ return true;
-+ }
-+
-+ if (last_evicted != old)
-+ victim = old;
-+ }
-+
-+ /* hashed to same slot 3 times: */
-+ if (!victim)
-+ break;
-+
-+ /* Failed to find an empty slot: */
-+ swap(*new, *victim);
-+ last_evicted = victim;
-+ }
-+
-+ return false;
-+}
-+
-+int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
-+ u64 flushed_seq,
-+ unsigned dev, u64 bucket,
-+ u64 journal_seq)
-+{
-+ struct buckets_waiting_for_journal_table *t, *n;
-+ struct bucket_hashed tmp, new = {
-+ .dev_bucket = (u64) dev << 56 | bucket,
-+ .journal_seq = journal_seq,
-+ };
-+ size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0;
-+ int ret = 0;
-+
-+ mutex_lock(&b->lock);
-+
-+ if (likely(bucket_table_insert(b->t, &new, flushed_seq)))
-+ goto out;
-+
-+ t = b->t;
-+ size = 1UL << t->bits;
-+ for (i = 0; i < size; i++)
-+ nr_elements += t->d[i].journal_seq > flushed_seq;
-+
-+ new_bits = t->bits + (nr_elements * 3 > size);
-+
-+ n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL);
-+ if (!n) {
-+ ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set;
-+ goto out;
-+ }
-+
-+retry_rehash:
-+ nr_rehashes++;
-+ bucket_table_init(n, new_bits);
-+
-+ tmp = new;
-+ BUG_ON(!bucket_table_insert(n, &tmp, flushed_seq));
-+
-+ for (i = 0; i < 1UL << t->bits; i++) {
-+ if (t->d[i].journal_seq <= flushed_seq)
-+ continue;
-+
-+ tmp = t->d[i];
-+ if (!bucket_table_insert(n, &tmp, flushed_seq))
-+ goto retry_rehash;
-+ }
-+
-+ b->t = n;
-+ kvfree(t);
-+
-+ pr_debug("took %zu rehashes, table at %zu/%lu elements",
-+ nr_rehashes, nr_elements, 1UL << b->t->bits);
-+out:
-+ mutex_unlock(&b->lock);
-+
-+ return ret;
-+}
-+
-+void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *c)
-+{
-+ struct buckets_waiting_for_journal *b = &c->buckets_waiting_for_journal;
-+
-+ kvfree(b->t);
-+}
-+
-+#define INITIAL_TABLE_BITS 3
-+
-+int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *c)
-+{
-+ struct buckets_waiting_for_journal *b = &c->buckets_waiting_for_journal;
-+
-+ mutex_init(&b->lock);
-+
-+ b->t = kvmalloc(sizeof(*b->t) +
-+ (sizeof(b->t->d[0]) << INITIAL_TABLE_BITS), GFP_KERNEL);
-+ if (!b->t)
-+ return -BCH_ERR_ENOMEM_buckets_waiting_for_journal_init;
-+
-+ bucket_table_init(b->t, INITIAL_TABLE_BITS);
-+ return 0;
-+}
-diff --git a/fs/bcachefs/buckets_waiting_for_journal.h b/fs/bcachefs/buckets_waiting_for_journal.h
-new file mode 100644
-index 000000000000..d2ae19cbe18c
---- /dev/null
-+++ b/fs/bcachefs/buckets_waiting_for_journal.h
-@@ -0,0 +1,15 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BUCKETS_WAITING_FOR_JOURNAL_H
-+#define _BUCKETS_WAITING_FOR_JOURNAL_H
-+
-+#include "buckets_waiting_for_journal_types.h"
-+
-+bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
-+ u64, unsigned, u64);
-+int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
-+ u64, unsigned, u64, u64);
-+
-+void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *);
-+int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *);
-+
-+#endif /* _BUCKETS_WAITING_FOR_JOURNAL_H */
-diff --git a/fs/bcachefs/buckets_waiting_for_journal_types.h b/fs/bcachefs/buckets_waiting_for_journal_types.h
-new file mode 100644
-index 000000000000..e593db061d81
---- /dev/null
-+++ b/fs/bcachefs/buckets_waiting_for_journal_types.h
-@@ -0,0 +1,23 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H
-+#define _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H
-+
-+#include <linux/siphash.h>
-+
-+struct bucket_hashed {
-+ u64 dev_bucket;
-+ u64 journal_seq;
-+};
-+
-+struct buckets_waiting_for_journal_table {
-+ unsigned bits;
-+ u64 hash_seeds[3];
-+ struct bucket_hashed d[];
-+};
-+
-+struct buckets_waiting_for_journal {
-+ struct mutex lock;
-+ struct buckets_waiting_for_journal_table *t;
-+};
-+
-+#endif /* _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H */
-diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
-new file mode 100644
-index 000000000000..4bb88aefed12
---- /dev/null
-+++ b/fs/bcachefs/chardev.c
-@@ -0,0 +1,784 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_CHARDEV
-+
-+#include "bcachefs.h"
-+#include "bcachefs_ioctl.h"
-+#include "buckets.h"
-+#include "chardev.h"
-+#include "journal.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/anon_inodes.h>
-+#include <linux/cdev.h>
-+#include <linux/device.h>
-+#include <linux/file.h>
-+#include <linux/fs.h>
-+#include <linux/ioctl.h>
-+#include <linux/kthread.h>
-+#include <linux/major.h>
-+#include <linux/sched/task.h>
-+#include <linux/slab.h>
-+#include <linux/uaccess.h>
-+
-+/* returns with ref on ca->ref */
-+static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
-+ unsigned flags)
-+{
-+ struct bch_dev *ca;
-+
-+ if (flags & BCH_BY_INDEX) {
-+ if (dev >= c->sb.nr_devices)
-+ return ERR_PTR(-EINVAL);
-+
-+ rcu_read_lock();
-+ ca = rcu_dereference(c->devs[dev]);
-+ if (ca)
-+ percpu_ref_get(&ca->ref);
-+ rcu_read_unlock();
-+
-+ if (!ca)
-+ return ERR_PTR(-EINVAL);
-+ } else {
-+ char *path;
-+
-+ path = strndup_user((const char __user *)
-+ (unsigned long) dev, PATH_MAX);
-+ if (IS_ERR(path))
-+ return ERR_CAST(path);
-+
-+ ca = bch2_dev_lookup(c, path);
-+ kfree(path);
-+ }
-+
-+ return ca;
-+}
-+
-+#if 0
-+static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
-+{
-+ struct bch_ioctl_assemble arg;
-+ struct bch_fs *c;
-+ u64 *user_devs = NULL;
-+ char **devs = NULL;
-+ unsigned i;
-+ int ret = -EFAULT;
-+
-+ if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+ return -EFAULT;
-+
-+ if (arg.flags || arg.pad)
-+ return -EINVAL;
-+
-+ user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
-+ if (!user_devs)
-+ return -ENOMEM;
-+
-+ devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
-+
-+ if (copy_from_user(user_devs, user_arg->devs,
-+ sizeof(u64) * arg.nr_devs))
-+ goto err;
-+
-+ for (i = 0; i < arg.nr_devs; i++) {
-+ devs[i] = strndup_user((const char __user *)(unsigned long)
-+ user_devs[i],
-+ PATH_MAX);
-+ ret= PTR_ERR_OR_ZERO(devs[i]);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
-+ ret = PTR_ERR_OR_ZERO(c);
-+ if (!ret)
-+ closure_put(&c->cl);
-+err:
-+ if (devs)
-+ for (i = 0; i < arg.nr_devs; i++)
-+ kfree(devs[i]);
-+ kfree(devs);
-+ return ret;
-+}
-+
-+static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
-+{
-+ struct bch_ioctl_incremental arg;
-+ const char *err;
-+ char *path;
-+
-+ if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+ return -EFAULT;
-+
-+ if (arg.flags || arg.pad)
-+ return -EINVAL;
-+
-+ path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+ ret = PTR_ERR_OR_ZERO(path);
-+ if (ret)
-+ return ret;
-+
-+ err = bch2_fs_open_incremental(path);
-+ kfree(path);
-+
-+ if (err) {
-+ pr_err("Could not register bcachefs devices: %s", err);
-+ return -EINVAL;
-+ }
-+
-+ return 0;
-+}
-+#endif
-+
-+static long bch2_global_ioctl(unsigned cmd, void __user *arg)
-+{
-+ switch (cmd) {
-+#if 0
-+ case BCH_IOCTL_ASSEMBLE:
-+ return bch2_ioctl_assemble(arg);
-+ case BCH_IOCTL_INCREMENTAL:
-+ return bch2_ioctl_incremental(arg);
-+#endif
-+ default:
-+ return -ENOTTY;
-+ }
-+}
-+
-+static long bch2_ioctl_query_uuid(struct bch_fs *c,
-+ struct bch_ioctl_query_uuid __user *user_arg)
-+{
-+ if (copy_to_user(&user_arg->uuid, &c->sb.user_uuid,
-+ sizeof(c->sb.user_uuid)))
-+ return -EFAULT;
-+ return 0;
-+}
-+
-+#if 0
-+static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
-+{
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if (arg.flags || arg.pad)
-+ return -EINVAL;
-+
-+ return bch2_fs_start(c);
-+}
-+
-+static long bch2_ioctl_stop(struct bch_fs *c)
-+{
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ bch2_fs_stop(c);
-+ return 0;
-+}
-+#endif
-+
-+static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+ char *path;
-+ int ret;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if (arg.flags || arg.pad)
-+ return -EINVAL;
-+
-+ path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+ ret = PTR_ERR_OR_ZERO(path);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_dev_add(c, path);
-+ kfree(path);
-+
-+ return ret;
-+}
-+
-+static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+ struct bch_dev *ca;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+ BCH_FORCE_IF_METADATA_LOST|
-+ BCH_FORCE_IF_DEGRADED|
-+ BCH_BY_INDEX)) ||
-+ arg.pad)
-+ return -EINVAL;
-+
-+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+ if (IS_ERR(ca))
-+ return PTR_ERR(ca);
-+
-+ return bch2_dev_remove(c, ca, arg.flags);
-+}
-+
-+static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+ char *path;
-+ int ret;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if (arg.flags || arg.pad)
-+ return -EINVAL;
-+
-+ path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
-+ ret = PTR_ERR_OR_ZERO(path);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_dev_online(c, path);
-+ kfree(path);
-+ return ret;
-+}
-+
-+static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
-+{
-+ struct bch_dev *ca;
-+ int ret;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+ BCH_FORCE_IF_METADATA_LOST|
-+ BCH_FORCE_IF_DEGRADED|
-+ BCH_BY_INDEX)) ||
-+ arg.pad)
-+ return -EINVAL;
-+
-+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+ if (IS_ERR(ca))
-+ return PTR_ERR(ca);
-+
-+ ret = bch2_dev_offline(c, ca, arg.flags);
-+ percpu_ref_put(&ca->ref);
-+ return ret;
-+}
-+
-+static long bch2_ioctl_disk_set_state(struct bch_fs *c,
-+ struct bch_ioctl_disk_set_state arg)
-+{
-+ struct bch_dev *ca;
-+ int ret;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
-+ BCH_FORCE_IF_METADATA_LOST|
-+ BCH_FORCE_IF_DEGRADED|
-+ BCH_BY_INDEX)) ||
-+ arg.pad[0] || arg.pad[1] || arg.pad[2] ||
-+ arg.new_state >= BCH_MEMBER_STATE_NR)
-+ return -EINVAL;
-+
-+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+ if (IS_ERR(ca))
-+ return PTR_ERR(ca);
-+
-+ ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
-+ if (ret)
-+ bch_err(c, "Error setting device state: %s", bch2_err_str(ret));
-+
-+ percpu_ref_put(&ca->ref);
-+ return ret;
-+}
-+
-+struct bch_data_ctx {
-+ struct bch_fs *c;
-+ struct bch_ioctl_data arg;
-+ struct bch_move_stats stats;
-+
-+ int ret;
-+
-+ struct task_struct *thread;
-+};
-+
-+static int bch2_data_thread(void *arg)
-+{
-+ struct bch_data_ctx *ctx = arg;
-+
-+ ctx->ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
-+
-+ ctx->stats.data_type = U8_MAX;
-+ return 0;
-+}
-+
-+static int bch2_data_job_release(struct inode *inode, struct file *file)
-+{
-+ struct bch_data_ctx *ctx = file->private_data;
-+
-+ kthread_stop(ctx->thread);
-+ put_task_struct(ctx->thread);
-+ kfree(ctx);
-+ return 0;
-+}
-+
-+static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
-+ size_t len, loff_t *ppos)
-+{
-+ struct bch_data_ctx *ctx = file->private_data;
-+ struct bch_fs *c = ctx->c;
-+ struct bch_ioctl_data_event e = {
-+ .type = BCH_DATA_EVENT_PROGRESS,
-+ .p.data_type = ctx->stats.data_type,
-+ .p.btree_id = ctx->stats.pos.btree,
-+ .p.pos = ctx->stats.pos.pos,
-+ .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
-+ .p.sectors_total = bch2_fs_usage_read_short(c).used,
-+ };
-+
-+ if (len < sizeof(e))
-+ return -EINVAL;
-+
-+ if (copy_to_user(buf, &e, sizeof(e)))
-+ return -EFAULT;
-+
-+ return sizeof(e);
-+}
-+
-+static const struct file_operations bcachefs_data_ops = {
-+ .release = bch2_data_job_release,
-+ .read = bch2_data_job_read,
-+ .llseek = no_llseek,
-+};
-+
-+static long bch2_ioctl_data(struct bch_fs *c,
-+ struct bch_ioctl_data arg)
-+{
-+ struct bch_data_ctx *ctx = NULL;
-+ struct file *file = NULL;
-+ unsigned flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
-+ int ret, fd = -1;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if (arg.op >= BCH_DATA_OP_NR || arg.flags)
-+ return -EINVAL;
-+
-+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-+ if (!ctx)
-+ return -ENOMEM;
-+
-+ ctx->c = c;
-+ ctx->arg = arg;
-+
-+ ctx->thread = kthread_create(bch2_data_thread, ctx,
-+ "bch-data/%s", c->name);
-+ if (IS_ERR(ctx->thread)) {
-+ ret = PTR_ERR(ctx->thread);
-+ goto err;
-+ }
-+
-+ ret = get_unused_fd_flags(flags);
-+ if (ret < 0)
-+ goto err;
-+ fd = ret;
-+
-+ file = anon_inode_getfile("[bcachefs]", &bcachefs_data_ops, ctx, flags);
-+ if (IS_ERR(file)) {
-+ ret = PTR_ERR(file);
-+ goto err;
-+ }
-+
-+ fd_install(fd, file);
-+
-+ get_task_struct(ctx->thread);
-+ wake_up_process(ctx->thread);
-+
-+ return fd;
-+err:
-+ if (fd >= 0)
-+ put_unused_fd(fd);
-+ if (!IS_ERR_OR_NULL(ctx->thread))
-+ kthread_stop(ctx->thread);
-+ kfree(ctx);
-+ return ret;
-+}
-+
-+static long bch2_ioctl_fs_usage(struct bch_fs *c,
-+ struct bch_ioctl_fs_usage __user *user_arg)
-+{
-+ struct bch_ioctl_fs_usage *arg = NULL;
-+ struct bch_replicas_usage *dst_e, *dst_end;
-+ struct bch_fs_usage_online *src;
-+ u32 replica_entries_bytes;
-+ unsigned i;
-+ int ret = 0;
-+
-+ if (!test_bit(BCH_FS_STARTED, &c->flags))
-+ return -EINVAL;
-+
-+ if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
-+ return -EFAULT;
-+
-+ arg = kzalloc(size_add(sizeof(*arg), replica_entries_bytes), GFP_KERNEL);
-+ if (!arg)
-+ return -ENOMEM;
-+
-+ src = bch2_fs_usage_read(c);
-+ if (!src) {
-+ ret = -ENOMEM;
-+ goto err;
-+ }
-+
-+ arg->capacity = c->capacity;
-+ arg->used = bch2_fs_sectors_used(c, src);
-+ arg->online_reserved = src->online_reserved;
-+
-+ for (i = 0; i < BCH_REPLICAS_MAX; i++)
-+ arg->persistent_reserved[i] = src->u.persistent_reserved[i];
-+
-+ dst_e = arg->replicas;
-+ dst_end = (void *) arg->replicas + replica_entries_bytes;
-+
-+ for (i = 0; i < c->replicas.nr; i++) {
-+ struct bch_replicas_entry *src_e =
-+ cpu_replicas_entry(&c->replicas, i);
-+
-+ /* check that we have enough space for one replicas entry */
-+ if (dst_e + 1 > dst_end) {
-+ ret = -ERANGE;
-+ break;
-+ }
-+
-+ dst_e->sectors = src->u.replicas[i];
-+ dst_e->r = *src_e;
-+
-+ /* recheck after setting nr_devs: */
-+ if (replicas_usage_next(dst_e) > dst_end) {
-+ ret = -ERANGE;
-+ break;
-+ }
-+
-+ memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
-+
-+ dst_e = replicas_usage_next(dst_e);
-+ }
-+
-+ arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
-+
-+ percpu_up_read(&c->mark_lock);
-+ kfree(src);
-+
-+ if (ret)
-+ goto err;
-+ if (copy_to_user(user_arg, arg,
-+ sizeof(*arg) + arg->replica_entries_bytes))
-+ ret = -EFAULT;
-+err:
-+ kfree(arg);
-+ return ret;
-+}
-+
-+static long bch2_ioctl_dev_usage(struct bch_fs *c,
-+ struct bch_ioctl_dev_usage __user *user_arg)
-+{
-+ struct bch_ioctl_dev_usage arg;
-+ struct bch_dev_usage src;
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ if (!test_bit(BCH_FS_STARTED, &c->flags))
-+ return -EINVAL;
-+
-+ if (copy_from_user(&arg, user_arg, sizeof(arg)))
-+ return -EFAULT;
-+
-+ if ((arg.flags & ~BCH_BY_INDEX) ||
-+ arg.pad[0] ||
-+ arg.pad[1] ||
-+ arg.pad[2])
-+ return -EINVAL;
-+
-+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+ if (IS_ERR(ca))
-+ return PTR_ERR(ca);
-+
-+ src = bch2_dev_usage_read(ca);
-+
-+ arg.state = ca->mi.state;
-+ arg.bucket_size = ca->mi.bucket_size;
-+ arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
-+ arg.buckets_ec = src.buckets_ec;
-+
-+ for (i = 0; i < BCH_DATA_NR; i++) {
-+ arg.d[i].buckets = src.d[i].buckets;
-+ arg.d[i].sectors = src.d[i].sectors;
-+ arg.d[i].fragmented = src.d[i].fragmented;
-+ }
-+
-+ percpu_ref_put(&ca->ref);
-+
-+ if (copy_to_user(user_arg, &arg, sizeof(arg)))
-+ return -EFAULT;
-+
-+ return 0;
-+}
-+
-+static long bch2_ioctl_read_super(struct bch_fs *c,
-+ struct bch_ioctl_read_super arg)
-+{
-+ struct bch_dev *ca = NULL;
-+ struct bch_sb *sb;
-+ int ret = 0;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
-+ arg.pad)
-+ return -EINVAL;
-+
-+ mutex_lock(&c->sb_lock);
-+
-+ if (arg.flags & BCH_READ_DEV) {
-+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+
-+ if (IS_ERR(ca)) {
-+ ret = PTR_ERR(ca);
-+ goto err;
-+ }
-+
-+ sb = ca->disk_sb.sb;
-+ } else {
-+ sb = c->disk_sb.sb;
-+ }
-+
-+ if (vstruct_bytes(sb) > arg.size) {
-+ ret = -ERANGE;
-+ goto err;
-+ }
-+
-+ if (copy_to_user((void __user *)(unsigned long)arg.sb, sb,
-+ vstruct_bytes(sb)))
-+ ret = -EFAULT;
-+err:
-+ if (!IS_ERR_OR_NULL(ca))
-+ percpu_ref_put(&ca->ref);
-+ mutex_unlock(&c->sb_lock);
-+ return ret;
-+}
-+
-+static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
-+ struct bch_ioctl_disk_get_idx arg)
-+{
-+ dev_t dev = huge_decode_dev(arg.dev);
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if (!dev)
-+ return -EINVAL;
-+
-+ for_each_online_member(ca, c, i)
-+ if (ca->dev == dev) {
-+ percpu_ref_put(&ca->io_ref);
-+ return i;
-+ }
-+
-+ return -BCH_ERR_ENOENT_dev_idx_not_found;
-+}
-+
-+static long bch2_ioctl_disk_resize(struct bch_fs *c,
-+ struct bch_ioctl_disk_resize arg)
-+{
-+ struct bch_dev *ca;
-+ int ret;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if ((arg.flags & ~BCH_BY_INDEX) ||
-+ arg.pad)
-+ return -EINVAL;
-+
-+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+ if (IS_ERR(ca))
-+ return PTR_ERR(ca);
-+
-+ ret = bch2_dev_resize(c, ca, arg.nbuckets);
-+
-+ percpu_ref_put(&ca->ref);
-+ return ret;
-+}
-+
-+static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
-+ struct bch_ioctl_disk_resize_journal arg)
-+{
-+ struct bch_dev *ca;
-+ int ret;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if ((arg.flags & ~BCH_BY_INDEX) ||
-+ arg.pad)
-+ return -EINVAL;
-+
-+ if (arg.nbuckets > U32_MAX)
-+ return -EINVAL;
-+
-+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
-+ if (IS_ERR(ca))
-+ return PTR_ERR(ca);
-+
-+ ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets);
-+
-+ percpu_ref_put(&ca->ref);
-+ return ret;
-+}
-+
-+#define BCH_IOCTL(_name, _argtype) \
-+do { \
-+ _argtype i; \
-+ \
-+ if (copy_from_user(&i, arg, sizeof(i))) \
-+ return -EFAULT; \
-+ ret = bch2_ioctl_##_name(c, i); \
-+ goto out; \
-+} while (0)
-+
-+long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
-+{
-+ long ret;
-+
-+ switch (cmd) {
-+ case BCH_IOCTL_QUERY_UUID:
-+ return bch2_ioctl_query_uuid(c, arg);
-+ case BCH_IOCTL_FS_USAGE:
-+ return bch2_ioctl_fs_usage(c, arg);
-+ case BCH_IOCTL_DEV_USAGE:
-+ return bch2_ioctl_dev_usage(c, arg);
-+#if 0
-+ case BCH_IOCTL_START:
-+ BCH_IOCTL(start, struct bch_ioctl_start);
-+ case BCH_IOCTL_STOP:
-+ return bch2_ioctl_stop(c);
-+#endif
-+ case BCH_IOCTL_READ_SUPER:
-+ BCH_IOCTL(read_super, struct bch_ioctl_read_super);
-+ case BCH_IOCTL_DISK_GET_IDX:
-+ BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
-+ }
-+
-+ if (!test_bit(BCH_FS_STARTED, &c->flags))
-+ return -EINVAL;
-+
-+ switch (cmd) {
-+ case BCH_IOCTL_DISK_ADD:
-+ BCH_IOCTL(disk_add, struct bch_ioctl_disk);
-+ case BCH_IOCTL_DISK_REMOVE:
-+ BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
-+ case BCH_IOCTL_DISK_ONLINE:
-+ BCH_IOCTL(disk_online, struct bch_ioctl_disk);
-+ case BCH_IOCTL_DISK_OFFLINE:
-+ BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
-+ case BCH_IOCTL_DISK_SET_STATE:
-+ BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
-+ case BCH_IOCTL_DATA:
-+ BCH_IOCTL(data, struct bch_ioctl_data);
-+ case BCH_IOCTL_DISK_RESIZE:
-+ BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
-+ case BCH_IOCTL_DISK_RESIZE_JOURNAL:
-+ BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal);
-+
-+ default:
-+ return -ENOTTY;
-+ }
-+out:
-+ if (ret < 0)
-+ ret = bch2_err_class(ret);
-+ return ret;
-+}
-+
-+static DEFINE_IDR(bch_chardev_minor);
-+
-+static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
-+{
-+ unsigned minor = iminor(file_inode(filp));
-+ struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
-+ void __user *arg = (void __user *) v;
-+
-+ return c
-+ ? bch2_fs_ioctl(c, cmd, arg)
-+ : bch2_global_ioctl(cmd, arg);
-+}
-+
-+static const struct file_operations bch_chardev_fops = {
-+ .owner = THIS_MODULE,
-+ .unlocked_ioctl = bch2_chardev_ioctl,
-+ .open = nonseekable_open,
-+};
-+
-+static int bch_chardev_major;
-+static struct class *bch_chardev_class;
-+static struct device *bch_chardev;
-+
-+void bch2_fs_chardev_exit(struct bch_fs *c)
-+{
-+ if (!IS_ERR_OR_NULL(c->chardev))
-+ device_unregister(c->chardev);
-+ if (c->minor >= 0)
-+ idr_remove(&bch_chardev_minor, c->minor);
-+}
-+
-+int bch2_fs_chardev_init(struct bch_fs *c)
-+{
-+ c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
-+ if (c->minor < 0)
-+ return c->minor;
-+
-+ c->chardev = device_create(bch_chardev_class, NULL,
-+ MKDEV(bch_chardev_major, c->minor), c,
-+ "bcachefs%u-ctl", c->minor);
-+ if (IS_ERR(c->chardev))
-+ return PTR_ERR(c->chardev);
-+
-+ return 0;
-+}
-+
-+void bch2_chardev_exit(void)
-+{
-+ if (!IS_ERR_OR_NULL(bch_chardev_class))
-+ device_destroy(bch_chardev_class,
-+ MKDEV(bch_chardev_major, U8_MAX));
-+ if (!IS_ERR_OR_NULL(bch_chardev_class))
-+ class_destroy(bch_chardev_class);
-+ if (bch_chardev_major > 0)
-+ unregister_chrdev(bch_chardev_major, "bcachefs");
-+}
-+
-+int __init bch2_chardev_init(void)
-+{
-+ bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
-+ if (bch_chardev_major < 0)
-+ return bch_chardev_major;
-+
-+ bch_chardev_class = class_create("bcachefs");
-+ if (IS_ERR(bch_chardev_class))
-+ return PTR_ERR(bch_chardev_class);
-+
-+ bch_chardev = device_create(bch_chardev_class, NULL,
-+ MKDEV(bch_chardev_major, U8_MAX),
-+ NULL, "bcachefs-ctl");
-+ if (IS_ERR(bch_chardev))
-+ return PTR_ERR(bch_chardev);
-+
-+ return 0;
-+}
-+
-+#endif /* NO_BCACHEFS_CHARDEV */
-diff --git a/fs/bcachefs/chardev.h b/fs/bcachefs/chardev.h
-new file mode 100644
-index 000000000000..0f563ca53c36
---- /dev/null
-+++ b/fs/bcachefs/chardev.h
-@@ -0,0 +1,31 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CHARDEV_H
-+#define _BCACHEFS_CHARDEV_H
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+long bch2_fs_ioctl(struct bch_fs *, unsigned, void __user *);
-+
-+void bch2_fs_chardev_exit(struct bch_fs *);
-+int bch2_fs_chardev_init(struct bch_fs *);
-+
-+void bch2_chardev_exit(void);
-+int __init bch2_chardev_init(void);
-+
-+#else
-+
-+static inline long bch2_fs_ioctl(struct bch_fs *c,
-+ unsigned cmd, void __user * arg)
-+{
-+ return -ENOTTY;
-+}
-+
-+static inline void bch2_fs_chardev_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_chardev_init(struct bch_fs *c) { return 0; }
-+
-+static inline void bch2_chardev_exit(void) {}
-+static inline int __init bch2_chardev_init(void) { return 0; }
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+#endif /* _BCACHEFS_CHARDEV_H */
-diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
-new file mode 100644
-index 000000000000..3c761ad6b1c8
---- /dev/null
-+++ b/fs/bcachefs/checksum.c
-@@ -0,0 +1,804 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "checksum.h"
-+#include "errcode.h"
-+#include "super.h"
-+#include "super-io.h"
-+
-+#include <linux/crc32c.h>
-+#include <linux/crypto.h>
-+#include <linux/xxhash.h>
-+#include <linux/key.h>
-+#include <linux/random.h>
-+#include <linux/scatterlist.h>
-+#include <crypto/algapi.h>
-+#include <crypto/chacha.h>
-+#include <crypto/hash.h>
-+#include <crypto/poly1305.h>
-+#include <crypto/skcipher.h>
-+#include <keys/user-type.h>
-+
-+/*
-+ * bch2_checksum state is an abstraction of the checksum state calculated over different pages.
-+ * it features page merging without having the checksum algorithm lose its state.
-+ * for native checksum aglorithms (like crc), a default seed value will do.
-+ * for hash-like algorithms, a state needs to be stored
-+ */
-+
-+struct bch2_checksum_state {
-+ union {
-+ u64 seed;
-+ struct xxh64_state h64state;
-+ };
-+ unsigned int type;
-+};
-+
-+static void bch2_checksum_init(struct bch2_checksum_state *state)
-+{
-+ switch (state->type) {
-+ case BCH_CSUM_none:
-+ case BCH_CSUM_crc32c:
-+ case BCH_CSUM_crc64:
-+ state->seed = 0;
-+ break;
-+ case BCH_CSUM_crc32c_nonzero:
-+ state->seed = U32_MAX;
-+ break;
-+ case BCH_CSUM_crc64_nonzero:
-+ state->seed = U64_MAX;
-+ break;
-+ case BCH_CSUM_xxhash:
-+ xxh64_reset(&state->h64state, 0);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static u64 bch2_checksum_final(const struct bch2_checksum_state *state)
-+{
-+ switch (state->type) {
-+ case BCH_CSUM_none:
-+ case BCH_CSUM_crc32c:
-+ case BCH_CSUM_crc64:
-+ return state->seed;
-+ case BCH_CSUM_crc32c_nonzero:
-+ return state->seed ^ U32_MAX;
-+ case BCH_CSUM_crc64_nonzero:
-+ return state->seed ^ U64_MAX;
-+ case BCH_CSUM_xxhash:
-+ return xxh64_digest(&state->h64state);
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static void bch2_checksum_update(struct bch2_checksum_state *state, const void *data, size_t len)
-+{
-+ switch (state->type) {
-+ case BCH_CSUM_none:
-+ return;
-+ case BCH_CSUM_crc32c_nonzero:
-+ case BCH_CSUM_crc32c:
-+ state->seed = crc32c(state->seed, data, len);
-+ break;
-+ case BCH_CSUM_crc64_nonzero:
-+ case BCH_CSUM_crc64:
-+ state->seed = crc64_be(state->seed, data, len);
-+ break;
-+ case BCH_CSUM_xxhash:
-+ xxh64_update(&state->h64state, data, len);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm,
-+ struct nonce nonce,
-+ struct scatterlist *sg, size_t len)
-+{
-+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
-+ int ret;
-+
-+ skcipher_request_set_sync_tfm(req, tfm);
-+ skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
-+
-+ ret = crypto_skcipher_encrypt(req);
-+ if (ret)
-+ pr_err("got error %i from crypto_skcipher_encrypt()", ret);
-+
-+ return ret;
-+}
-+
-+static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
-+ struct nonce nonce,
-+ void *buf, size_t len)
-+{
-+ if (!is_vmalloc_addr(buf)) {
-+ struct scatterlist sg;
-+
-+ sg_init_table(&sg, 1);
-+ sg_set_page(&sg,
-+ is_vmalloc_addr(buf)
-+ ? vmalloc_to_page(buf)
-+ : virt_to_page(buf),
-+ len, offset_in_page(buf));
-+ return do_encrypt_sg(tfm, nonce, &sg, len);
-+ } else {
-+ unsigned pages = buf_pages(buf, len);
-+ struct scatterlist *sg;
-+ size_t orig_len = len;
-+ int ret, i;
-+
-+ sg = kmalloc_array(pages, sizeof(*sg), GFP_KERNEL);
-+ if (!sg)
-+ return -BCH_ERR_ENOMEM_do_encrypt;
-+
-+ sg_init_table(sg, pages);
-+
-+ for (i = 0; i < pages; i++) {
-+ unsigned offset = offset_in_page(buf);
-+ unsigned pg_len = min_t(size_t, len, PAGE_SIZE - offset);
-+
-+ sg_set_page(sg + i, vmalloc_to_page(buf), pg_len, offset);
-+ buf += pg_len;
-+ len -= pg_len;
-+ }
-+
-+ ret = do_encrypt_sg(tfm, nonce, sg, orig_len);
-+ kfree(sg);
-+ return ret;
-+ }
-+}
-+
-+int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
-+ void *buf, size_t len)
-+{
-+ struct crypto_sync_skcipher *chacha20 =
-+ crypto_alloc_sync_skcipher("chacha20", 0, 0);
-+ int ret;
-+
-+ ret = PTR_ERR_OR_ZERO(chacha20);
-+ if (ret) {
-+ pr_err("error requesting chacha20 cipher: %s", bch2_err_str(ret));
-+ return ret;
-+ }
-+
-+ ret = crypto_skcipher_setkey(&chacha20->base,
-+ (void *) key, sizeof(*key));
-+ if (ret) {
-+ pr_err("error from crypto_skcipher_setkey(): %s", bch2_err_str(ret));
-+ goto err;
-+ }
-+
-+ ret = do_encrypt(chacha20, nonce, buf, len);
-+err:
-+ crypto_free_sync_skcipher(chacha20);
-+ return ret;
-+}
-+
-+static int gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
-+ struct nonce nonce)
-+{
-+ u8 key[POLY1305_KEY_SIZE];
-+ int ret;
-+
-+ nonce.d[3] ^= BCH_NONCE_POLY;
-+
-+ memset(key, 0, sizeof(key));
-+ ret = do_encrypt(c->chacha20, nonce, key, sizeof(key));
-+ if (ret)
-+ return ret;
-+
-+ desc->tfm = c->poly1305;
-+ crypto_shash_init(desc);
-+ crypto_shash_update(desc, key, sizeof(key));
-+ return 0;
-+}
-+
-+struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
-+ struct nonce nonce, const void *data, size_t len)
-+{
-+ switch (type) {
-+ case BCH_CSUM_none:
-+ case BCH_CSUM_crc32c_nonzero:
-+ case BCH_CSUM_crc64_nonzero:
-+ case BCH_CSUM_crc32c:
-+ case BCH_CSUM_xxhash:
-+ case BCH_CSUM_crc64: {
-+ struct bch2_checksum_state state;
-+
-+ state.type = type;
-+
-+ bch2_checksum_init(&state);
-+ bch2_checksum_update(&state, data, len);
-+
-+ return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) };
-+ }
-+
-+ case BCH_CSUM_chacha20_poly1305_80:
-+ case BCH_CSUM_chacha20_poly1305_128: {
-+ SHASH_DESC_ON_STACK(desc, c->poly1305);
-+ u8 digest[POLY1305_DIGEST_SIZE];
-+ struct bch_csum ret = { 0 };
-+
-+ gen_poly_key(c, desc, nonce);
-+
-+ crypto_shash_update(desc, data, len);
-+ crypto_shash_final(desc, digest);
-+
-+ memcpy(&ret, digest, bch_crc_bytes[type]);
-+ return ret;
-+ }
-+ default:
-+ BUG();
-+ }
-+}
-+
-+int bch2_encrypt(struct bch_fs *c, unsigned type,
-+ struct nonce nonce, void *data, size_t len)
-+{
-+ if (!bch2_csum_type_is_encryption(type))
-+ return 0;
-+
-+ return do_encrypt(c->chacha20, nonce, data, len);
-+}
-+
-+static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
-+ struct nonce nonce, struct bio *bio,
-+ struct bvec_iter *iter)
-+{
-+ struct bio_vec bv;
-+
-+ switch (type) {
-+ case BCH_CSUM_none:
-+ return (struct bch_csum) { 0 };
-+ case BCH_CSUM_crc32c_nonzero:
-+ case BCH_CSUM_crc64_nonzero:
-+ case BCH_CSUM_crc32c:
-+ case BCH_CSUM_xxhash:
-+ case BCH_CSUM_crc64: {
-+ struct bch2_checksum_state state;
-+
-+ state.type = type;
-+ bch2_checksum_init(&state);
-+
-+#ifdef CONFIG_HIGHMEM
-+ __bio_for_each_segment(bv, bio, *iter, *iter) {
-+ void *p = kmap_local_page(bv.bv_page) + bv.bv_offset;
-+
-+ bch2_checksum_update(&state, p, bv.bv_len);
-+ kunmap_local(p);
-+ }
-+#else
-+ __bio_for_each_bvec(bv, bio, *iter, *iter)
-+ bch2_checksum_update(&state, page_address(bv.bv_page) + bv.bv_offset,
-+ bv.bv_len);
-+#endif
-+ return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) };
-+ }
-+
-+ case BCH_CSUM_chacha20_poly1305_80:
-+ case BCH_CSUM_chacha20_poly1305_128: {
-+ SHASH_DESC_ON_STACK(desc, c->poly1305);
-+ u8 digest[POLY1305_DIGEST_SIZE];
-+ struct bch_csum ret = { 0 };
-+
-+ gen_poly_key(c, desc, nonce);
-+
-+#ifdef CONFIG_HIGHMEM
-+ __bio_for_each_segment(bv, bio, *iter, *iter) {
-+ void *p = kmap_local_page(bv.bv_page) + bv.bv_offset;
-+
-+ crypto_shash_update(desc, p, bv.bv_len);
-+ kunmap_local(p);
-+ }
-+#else
-+ __bio_for_each_bvec(bv, bio, *iter, *iter)
-+ crypto_shash_update(desc,
-+ page_address(bv.bv_page) + bv.bv_offset,
-+ bv.bv_len);
-+#endif
-+ crypto_shash_final(desc, digest);
-+
-+ memcpy(&ret, digest, bch_crc_bytes[type]);
-+ return ret;
-+ }
-+ default:
-+ BUG();
-+ }
-+}
-+
-+struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type,
-+ struct nonce nonce, struct bio *bio)
-+{
-+ struct bvec_iter iter = bio->bi_iter;
-+
-+ return __bch2_checksum_bio(c, type, nonce, bio, &iter);
-+}
-+
-+int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
-+ struct nonce nonce, struct bio *bio)
-+{
-+ struct bio_vec bv;
-+ struct bvec_iter iter;
-+ struct scatterlist sgl[16], *sg = sgl;
-+ size_t bytes = 0;
-+ int ret = 0;
-+
-+ if (!bch2_csum_type_is_encryption(type))
-+ return 0;
-+
-+ sg_init_table(sgl, ARRAY_SIZE(sgl));
-+
-+ bio_for_each_segment(bv, bio, iter) {
-+ if (sg == sgl + ARRAY_SIZE(sgl)) {
-+ sg_mark_end(sg - 1);
-+
-+ ret = do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
-+ if (ret)
-+ return ret;
-+
-+ nonce = nonce_add(nonce, bytes);
-+ bytes = 0;
-+
-+ sg_init_table(sgl, ARRAY_SIZE(sgl));
-+ sg = sgl;
-+ }
-+
-+ sg_set_page(sg++, bv.bv_page, bv.bv_len, bv.bv_offset);
-+ bytes += bv.bv_len;
-+ }
-+
-+ sg_mark_end(sg - 1);
-+ return do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
-+}
-+
-+struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
-+ struct bch_csum b, size_t b_len)
-+{
-+ struct bch2_checksum_state state;
-+
-+ state.type = type;
-+ bch2_checksum_init(&state);
-+ state.seed = le64_to_cpu(a.lo);
-+
-+ BUG_ON(!bch2_checksum_mergeable(type));
-+
-+ while (b_len) {
-+ unsigned page_len = min_t(unsigned, b_len, PAGE_SIZE);
-+
-+ bch2_checksum_update(&state,
-+ page_address(ZERO_PAGE(0)), page_len);
-+ b_len -= page_len;
-+ }
-+ a.lo = cpu_to_le64(bch2_checksum_final(&state));
-+ a.lo ^= b.lo;
-+ a.hi ^= b.hi;
-+ return a;
-+}
-+
-+int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
-+ struct bversion version,
-+ struct bch_extent_crc_unpacked crc_old,
-+ struct bch_extent_crc_unpacked *crc_a,
-+ struct bch_extent_crc_unpacked *crc_b,
-+ unsigned len_a, unsigned len_b,
-+ unsigned new_csum_type)
-+{
-+ struct bvec_iter iter = bio->bi_iter;
-+ struct nonce nonce = extent_nonce(version, crc_old);
-+ struct bch_csum merged = { 0 };
-+ struct crc_split {
-+ struct bch_extent_crc_unpacked *crc;
-+ unsigned len;
-+ unsigned csum_type;
-+ struct bch_csum csum;
-+ } splits[3] = {
-+ { crc_a, len_a, new_csum_type, { 0 }},
-+ { crc_b, len_b, new_csum_type, { 0 } },
-+ { NULL, bio_sectors(bio) - len_a - len_b, new_csum_type, { 0 } },
-+ }, *i;
-+ bool mergeable = crc_old.csum_type == new_csum_type &&
-+ bch2_checksum_mergeable(new_csum_type);
-+ unsigned crc_nonce = crc_old.nonce;
-+
-+ BUG_ON(len_a + len_b > bio_sectors(bio));
-+ BUG_ON(crc_old.uncompressed_size != bio_sectors(bio));
-+ BUG_ON(crc_is_compressed(crc_old));
-+ BUG_ON(bch2_csum_type_is_encryption(crc_old.csum_type) !=
-+ bch2_csum_type_is_encryption(new_csum_type));
-+
-+ for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
-+ iter.bi_size = i->len << 9;
-+ if (mergeable || i->crc)
-+ i->csum = __bch2_checksum_bio(c, i->csum_type,
-+ nonce, bio, &iter);
-+ else
-+ bio_advance_iter(bio, &iter, i->len << 9);
-+ nonce = nonce_add(nonce, i->len << 9);
-+ }
-+
-+ if (mergeable)
-+ for (i = splits; i < splits + ARRAY_SIZE(splits); i++)
-+ merged = bch2_checksum_merge(new_csum_type, merged,
-+ i->csum, i->len << 9);
-+ else
-+ merged = bch2_checksum_bio(c, crc_old.csum_type,
-+ extent_nonce(version, crc_old), bio);
-+
-+ if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
-+ bch_err(c, "checksum error in %s() (memory corruption or bug?)\n"
-+ "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)",
-+ __func__,
-+ crc_old.csum.hi,
-+ crc_old.csum.lo,
-+ merged.hi,
-+ merged.lo,
-+ bch2_csum_types[crc_old.csum_type],
-+ bch2_csum_types[new_csum_type]);
-+ return -EIO;
-+ }
-+
-+ for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
-+ if (i->crc)
-+ *i->crc = (struct bch_extent_crc_unpacked) {
-+ .csum_type = i->csum_type,
-+ .compression_type = crc_old.compression_type,
-+ .compressed_size = i->len,
-+ .uncompressed_size = i->len,
-+ .offset = 0,
-+ .live_size = i->len,
-+ .nonce = crc_nonce,
-+ .csum = i->csum,
-+ };
-+
-+ if (bch2_csum_type_is_encryption(new_csum_type))
-+ crc_nonce += i->len;
-+ }
-+
-+ return 0;
-+}
-+
-+/* BCH_SB_FIELD_crypt: */
-+
-+static int bch2_sb_crypt_validate(struct bch_sb *sb,
-+ struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
-+
-+ if (vstruct_bytes(&crypt->field) < sizeof(*crypt)) {
-+ prt_printf(err, "wrong size (got %zu should be %zu)",
-+ vstruct_bytes(&crypt->field), sizeof(*crypt));
-+ return -BCH_ERR_invalid_sb_crypt;
-+ }
-+
-+ if (BCH_CRYPT_KDF_TYPE(crypt)) {
-+ prt_printf(err, "bad kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt));
-+ return -BCH_ERR_invalid_sb_crypt;
-+ }
-+
-+ return 0;
-+}
-+
-+static void bch2_sb_crypt_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
-+
-+ prt_printf(out, "KFD: %llu", BCH_CRYPT_KDF_TYPE(crypt));
-+ prt_newline(out);
-+ prt_printf(out, "scrypt n: %llu", BCH_KDF_SCRYPT_N(crypt));
-+ prt_newline(out);
-+ prt_printf(out, "scrypt r: %llu", BCH_KDF_SCRYPT_R(crypt));
-+ prt_newline(out);
-+ prt_printf(out, "scrypt p: %llu", BCH_KDF_SCRYPT_P(crypt));
-+ prt_newline(out);
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
-+ .validate = bch2_sb_crypt_validate,
-+ .to_text = bch2_sb_crypt_to_text,
-+};
-+
-+#ifdef __KERNEL__
-+static int __bch2_request_key(char *key_description, struct bch_key *key)
-+{
-+ struct key *keyring_key;
-+ const struct user_key_payload *ukp;
-+ int ret;
-+
-+ keyring_key = request_key(&key_type_user, key_description, NULL);
-+ if (IS_ERR(keyring_key))
-+ return PTR_ERR(keyring_key);
-+
-+ down_read(&keyring_key->sem);
-+ ukp = dereference_key_locked(keyring_key);
-+ if (ukp->datalen == sizeof(*key)) {
-+ memcpy(key, ukp->data, ukp->datalen);
-+ ret = 0;
-+ } else {
-+ ret = -EINVAL;
-+ }
-+ up_read(&keyring_key->sem);
-+ key_put(keyring_key);
-+
-+ return ret;
-+}
-+#else
-+#include <keyutils.h>
-+
-+static int __bch2_request_key(char *key_description, struct bch_key *key)
-+{
-+ key_serial_t key_id;
-+
-+ key_id = request_key("user", key_description, NULL,
-+ KEY_SPEC_SESSION_KEYRING);
-+ if (key_id >= 0)
-+ goto got_key;
-+
-+ key_id = request_key("user", key_description, NULL,
-+ KEY_SPEC_USER_KEYRING);
-+ if (key_id >= 0)
-+ goto got_key;
-+
-+ key_id = request_key("user", key_description, NULL,
-+ KEY_SPEC_USER_SESSION_KEYRING);
-+ if (key_id >= 0)
-+ goto got_key;
-+
-+ return -errno;
-+got_key:
-+
-+ if (keyctl_read(key_id, (void *) key, sizeof(*key)) != sizeof(*key))
-+ return -1;
-+
-+ return 0;
-+}
-+
-+#include "../crypto.h"
-+#endif
-+
-+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
-+{
-+ struct printbuf key_description = PRINTBUF;
-+ int ret;
-+
-+ prt_printf(&key_description, "bcachefs:");
-+ pr_uuid(&key_description, sb->user_uuid.b);
-+
-+ ret = __bch2_request_key(key_description.buf, key);
-+ printbuf_exit(&key_description);
-+
-+#ifndef __KERNEL__
-+ if (ret) {
-+ char *passphrase = read_passphrase("Enter passphrase: ");
-+ struct bch_encrypted_key sb_key;
-+
-+ bch2_passphrase_check(sb, passphrase,
-+ key, &sb_key);
-+ ret = 0;
-+ }
-+#endif
-+
-+ /* stash with memfd, pass memfd fd to mount */
-+
-+ return ret;
-+}
-+
-+#ifndef __KERNEL__
-+int bch2_revoke_key(struct bch_sb *sb)
-+{
-+ key_serial_t key_id;
-+ struct printbuf key_description = PRINTBUF;
-+
-+ prt_printf(&key_description, "bcachefs:");
-+ pr_uuid(&key_description, sb->user_uuid.b);
-+
-+ key_id = request_key("user", key_description.buf, NULL, KEY_SPEC_USER_KEYRING);
-+ printbuf_exit(&key_description);
-+ if (key_id < 0)
-+ return errno;
-+
-+ keyctl_revoke(key_id);
-+
-+ return 0;
-+}
-+#endif
-+
-+int bch2_decrypt_sb_key(struct bch_fs *c,
-+ struct bch_sb_field_crypt *crypt,
-+ struct bch_key *key)
-+{
-+ struct bch_encrypted_key sb_key = crypt->key;
-+ struct bch_key user_key;
-+ int ret = 0;
-+
-+ /* is key encrypted? */
-+ if (!bch2_key_is_encrypted(&sb_key))
-+ goto out;
-+
-+ ret = bch2_request_key(c->disk_sb.sb, &user_key);
-+ if (ret) {
-+ bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret));
-+ goto err;
-+ }
-+
-+ /* decrypt real key: */
-+ ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
-+ &sb_key, sizeof(sb_key));
-+ if (ret)
-+ goto err;
-+
-+ if (bch2_key_is_encrypted(&sb_key)) {
-+ bch_err(c, "incorrect encryption key");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+out:
-+ *key = sb_key.key;
-+err:
-+ memzero_explicit(&sb_key, sizeof(sb_key));
-+ memzero_explicit(&user_key, sizeof(user_key));
-+ return ret;
-+}
-+
-+static int bch2_alloc_ciphers(struct bch_fs *c)
-+{
-+ int ret;
-+
-+ if (!c->chacha20)
-+ c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
-+ ret = PTR_ERR_OR_ZERO(c->chacha20);
-+
-+ if (ret) {
-+ bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret));
-+ return ret;
-+ }
-+
-+ if (!c->poly1305)
-+ c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
-+ ret = PTR_ERR_OR_ZERO(c->poly1305);
-+
-+ if (ret) {
-+ bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret));
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_disable_encryption(struct bch_fs *c)
-+{
-+ struct bch_sb_field_crypt *crypt;
-+ struct bch_key key;
-+ int ret = -EINVAL;
-+
-+ mutex_lock(&c->sb_lock);
-+
-+ crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
-+ if (!crypt)
-+ goto out;
-+
-+ /* is key encrypted? */
-+ ret = 0;
-+ if (bch2_key_is_encrypted(&crypt->key))
-+ goto out;
-+
-+ ret = bch2_decrypt_sb_key(c, crypt, &key);
-+ if (ret)
-+ goto out;
-+
-+ crypt->key.magic = cpu_to_le64(BCH_KEY_MAGIC);
-+ crypt->key.key = key;
-+
-+ SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0);
-+ bch2_write_super(c);
-+out:
-+ mutex_unlock(&c->sb_lock);
-+
-+ return ret;
-+}
-+
-+int bch2_enable_encryption(struct bch_fs *c, bool keyed)
-+{
-+ struct bch_encrypted_key key;
-+ struct bch_key user_key;
-+ struct bch_sb_field_crypt *crypt;
-+ int ret = -EINVAL;
-+
-+ mutex_lock(&c->sb_lock);
-+
-+ /* Do we already have an encryption key? */
-+ if (bch2_sb_field_get(c->disk_sb.sb, crypt))
-+ goto err;
-+
-+ ret = bch2_alloc_ciphers(c);
-+ if (ret)
-+ goto err;
-+
-+ key.magic = cpu_to_le64(BCH_KEY_MAGIC);
-+ get_random_bytes(&key.key, sizeof(key.key));
-+
-+ if (keyed) {
-+ ret = bch2_request_key(c->disk_sb.sb, &user_key);
-+ if (ret) {
-+ bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret));
-+ goto err;
-+ }
-+
-+ ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
-+ &key, sizeof(key));
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = crypto_skcipher_setkey(&c->chacha20->base,
-+ (void *) &key.key, sizeof(key.key));
-+ if (ret)
-+ goto err;
-+
-+ crypt = bch2_sb_field_resize(&c->disk_sb, crypt,
-+ sizeof(*crypt) / sizeof(u64));
-+ if (!crypt) {
-+ ret = -BCH_ERR_ENOSPC_sb_crypt;
-+ goto err;
-+ }
-+
-+ crypt->key = key;
-+
-+ /* write superblock */
-+ SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1);
-+ bch2_write_super(c);
-+err:
-+ mutex_unlock(&c->sb_lock);
-+ memzero_explicit(&user_key, sizeof(user_key));
-+ memzero_explicit(&key, sizeof(key));
-+ return ret;
-+}
-+
-+void bch2_fs_encryption_exit(struct bch_fs *c)
-+{
-+ if (!IS_ERR_OR_NULL(c->poly1305))
-+ crypto_free_shash(c->poly1305);
-+ if (!IS_ERR_OR_NULL(c->chacha20))
-+ crypto_free_sync_skcipher(c->chacha20);
-+ if (!IS_ERR_OR_NULL(c->sha256))
-+ crypto_free_shash(c->sha256);
-+}
-+
-+int bch2_fs_encryption_init(struct bch_fs *c)
-+{
-+ struct bch_sb_field_crypt *crypt;
-+ struct bch_key key;
-+ int ret = 0;
-+
-+ c->sha256 = crypto_alloc_shash("sha256", 0, 0);
-+ ret = PTR_ERR_OR_ZERO(c->sha256);
-+ if (ret) {
-+ bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret));
-+ goto out;
-+ }
-+
-+ crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
-+ if (!crypt)
-+ goto out;
-+
-+ ret = bch2_alloc_ciphers(c);
-+ if (ret)
-+ goto out;
-+
-+ ret = bch2_decrypt_sb_key(c, crypt, &key);
-+ if (ret)
-+ goto out;
-+
-+ ret = crypto_skcipher_setkey(&c->chacha20->base,
-+ (void *) &key.key, sizeof(key.key));
-+ if (ret)
-+ goto out;
-+out:
-+ memzero_explicit(&key, sizeof(key));
-+ return ret;
-+}
-diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
-new file mode 100644
-index 000000000000..13998388c545
---- /dev/null
-+++ b/fs/bcachefs/checksum.h
-@@ -0,0 +1,213 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CHECKSUM_H
-+#define _BCACHEFS_CHECKSUM_H
-+
-+#include "bcachefs.h"
-+#include "extents_types.h"
-+#include "super-io.h"
-+
-+#include <linux/crc64.h>
-+#include <crypto/chacha.h>
-+
-+static inline bool bch2_checksum_mergeable(unsigned type)
-+{
-+
-+ switch (type) {
-+ case BCH_CSUM_none:
-+ case BCH_CSUM_crc32c:
-+ case BCH_CSUM_crc64:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
-+ struct bch_csum, size_t);
-+
-+#define BCH_NONCE_EXTENT cpu_to_le32(1 << 28)
-+#define BCH_NONCE_BTREE cpu_to_le32(2 << 28)
-+#define BCH_NONCE_JOURNAL cpu_to_le32(3 << 28)
-+#define BCH_NONCE_PRIO cpu_to_le32(4 << 28)
-+#define BCH_NONCE_POLY cpu_to_le32(1 << 31)
-+
-+struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
-+ const void *, size_t);
-+
-+/*
-+ * This is used for various on disk data structures - bch_sb, prio_set, bset,
-+ * jset: The checksum is _always_ the first field of these structs
-+ */
-+#define csum_vstruct(_c, _type, _nonce, _i) \
-+({ \
-+ const void *_start = ((const void *) (_i)) + sizeof((_i)->csum);\
-+ \
-+ bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\
-+})
-+
-+int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
-+int bch2_request_key(struct bch_sb *, struct bch_key *);
-+#ifndef __KERNEL__
-+int bch2_revoke_key(struct bch_sb *);
-+#endif
-+
-+int bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
-+ void *data, size_t);
-+
-+struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned,
-+ struct nonce, struct bio *);
-+
-+int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion,
-+ struct bch_extent_crc_unpacked,
-+ struct bch_extent_crc_unpacked *,
-+ struct bch_extent_crc_unpacked *,
-+ unsigned, unsigned, unsigned);
-+
-+int __bch2_encrypt_bio(struct bch_fs *, unsigned,
-+ struct nonce, struct bio *);
-+
-+static inline int bch2_encrypt_bio(struct bch_fs *c, unsigned type,
-+ struct nonce nonce, struct bio *bio)
-+{
-+ return bch2_csum_type_is_encryption(type)
-+ ? __bch2_encrypt_bio(c, type, nonce, bio)
-+ : 0;
-+}
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_crypt;
-+
-+int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *,
-+ struct bch_key *);
-+
-+int bch2_disable_encryption(struct bch_fs *);
-+int bch2_enable_encryption(struct bch_fs *, bool);
-+
-+void bch2_fs_encryption_exit(struct bch_fs *);
-+int bch2_fs_encryption_init(struct bch_fs *);
-+
-+static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type,
-+ bool data)
-+{
-+ switch (type) {
-+ case BCH_CSUM_OPT_none:
-+ return BCH_CSUM_none;
-+ case BCH_CSUM_OPT_crc32c:
-+ return data ? BCH_CSUM_crc32c : BCH_CSUM_crc32c_nonzero;
-+ case BCH_CSUM_OPT_crc64:
-+ return data ? BCH_CSUM_crc64 : BCH_CSUM_crc64_nonzero;
-+ case BCH_CSUM_OPT_xxhash:
-+ return BCH_CSUM_xxhash;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
-+ struct bch_io_opts opts)
-+{
-+ if (opts.nocow)
-+ return 0;
-+
-+ if (c->sb.encryption_type)
-+ return c->opts.wide_macs
-+ ? BCH_CSUM_chacha20_poly1305_128
-+ : BCH_CSUM_chacha20_poly1305_80;
-+
-+ return bch2_csum_opt_to_type(opts.data_checksum, true);
-+}
-+
-+static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
-+{
-+ if (c->sb.encryption_type)
-+ return BCH_CSUM_chacha20_poly1305_128;
-+
-+ return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
-+}
-+
-+static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
-+ unsigned type)
-+{
-+ if (type >= BCH_CSUM_NR)
-+ return false;
-+
-+ if (bch2_csum_type_is_encryption(type) && !c->chacha20)
-+ return false;
-+
-+ return true;
-+}
-+
-+/* returns true if not equal */
-+static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
-+{
-+ /*
-+ * XXX: need some way of preventing the compiler from optimizing this
-+ * into a form that isn't constant time..
-+ */
-+ return ((l.lo ^ r.lo) | (l.hi ^ r.hi)) != 0;
-+}
-+
-+/* for skipping ahead and encrypting/decrypting at an offset: */
-+static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
-+{
-+ EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1));
-+
-+ le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE);
-+ return nonce;
-+}
-+
-+static inline struct nonce null_nonce(void)
-+{
-+ struct nonce ret;
-+
-+ memset(&ret, 0, sizeof(ret));
-+ return ret;
-+}
-+
-+static inline struct nonce extent_nonce(struct bversion version,
-+ struct bch_extent_crc_unpacked crc)
-+{
-+ unsigned compression_type = crc_is_compressed(crc)
-+ ? crc.compression_type
-+ : 0;
-+ unsigned size = compression_type ? crc.uncompressed_size : 0;
-+ struct nonce nonce = (struct nonce) {{
-+ [0] = cpu_to_le32(size << 22),
-+ [1] = cpu_to_le32(version.lo),
-+ [2] = cpu_to_le32(version.lo >> 32),
-+ [3] = cpu_to_le32(version.hi|
-+ (compression_type << 24))^BCH_NONCE_EXTENT,
-+ }};
-+
-+ return nonce_add(nonce, crc.nonce << 9);
-+}
-+
-+static inline bool bch2_key_is_encrypted(struct bch_encrypted_key *key)
-+{
-+ return le64_to_cpu(key->magic) != BCH_KEY_MAGIC;
-+}
-+
-+static inline struct nonce __bch2_sb_key_nonce(struct bch_sb *sb)
-+{
-+ __le64 magic = __bch2_sb_magic(sb);
-+
-+ return (struct nonce) {{
-+ [0] = 0,
-+ [1] = 0,
-+ [2] = ((__le32 *) &magic)[0],
-+ [3] = ((__le32 *) &magic)[1],
-+ }};
-+}
-+
-+static inline struct nonce bch2_sb_key_nonce(struct bch_fs *c)
-+{
-+ __le64 magic = bch2_sb_magic(c);
-+
-+ return (struct nonce) {{
-+ [0] = 0,
-+ [1] = 0,
-+ [2] = ((__le32 *) &magic)[0],
-+ [3] = ((__le32 *) &magic)[1],
-+ }};
-+}
-+
-+#endif /* _BCACHEFS_CHECKSUM_H */
-diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
-new file mode 100644
-index 000000000000..f41889093a2c
---- /dev/null
-+++ b/fs/bcachefs/clock.c
-@@ -0,0 +1,193 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "clock.h"
-+
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/preempt.h>
-+
-+static inline long io_timer_cmp(io_timer_heap *h,
-+ struct io_timer *l,
-+ struct io_timer *r)
-+{
-+ return l->expire - r->expire;
-+}
-+
-+void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
-+{
-+ size_t i;
-+
-+ spin_lock(&clock->timer_lock);
-+
-+ if (time_after_eq((unsigned long) atomic64_read(&clock->now),
-+ timer->expire)) {
-+ spin_unlock(&clock->timer_lock);
-+ timer->fn(timer);
-+ return;
-+ }
-+
-+ for (i = 0; i < clock->timers.used; i++)
-+ if (clock->timers.data[i] == timer)
-+ goto out;
-+
-+ BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
-+out:
-+ spin_unlock(&clock->timer_lock);
-+}
-+
-+void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
-+{
-+ size_t i;
-+
-+ spin_lock(&clock->timer_lock);
-+
-+ for (i = 0; i < clock->timers.used; i++)
-+ if (clock->timers.data[i] == timer) {
-+ heap_del(&clock->timers, i, io_timer_cmp, NULL);
-+ break;
-+ }
-+
-+ spin_unlock(&clock->timer_lock);
-+}
-+
-+struct io_clock_wait {
-+ struct io_timer io_timer;
-+ struct timer_list cpu_timer;
-+ struct task_struct *task;
-+ int expired;
-+};
-+
-+static void io_clock_wait_fn(struct io_timer *timer)
-+{
-+ struct io_clock_wait *wait = container_of(timer,
-+ struct io_clock_wait, io_timer);
-+
-+ wait->expired = 1;
-+ wake_up_process(wait->task);
-+}
-+
-+static void io_clock_cpu_timeout(struct timer_list *timer)
-+{
-+ struct io_clock_wait *wait = container_of(timer,
-+ struct io_clock_wait, cpu_timer);
-+
-+ wait->expired = 1;
-+ wake_up_process(wait->task);
-+}
-+
-+void bch2_io_clock_schedule_timeout(struct io_clock *clock, unsigned long until)
-+{
-+ struct io_clock_wait wait;
-+
-+ /* XXX: calculate sleep time rigorously */
-+ wait.io_timer.expire = until;
-+ wait.io_timer.fn = io_clock_wait_fn;
-+ wait.task = current;
-+ wait.expired = 0;
-+ bch2_io_timer_add(clock, &wait.io_timer);
-+
-+ schedule();
-+
-+ bch2_io_timer_del(clock, &wait.io_timer);
-+}
-+
-+void bch2_kthread_io_clock_wait(struct io_clock *clock,
-+ unsigned long io_until,
-+ unsigned long cpu_timeout)
-+{
-+ bool kthread = (current->flags & PF_KTHREAD) != 0;
-+ struct io_clock_wait wait;
-+
-+ wait.io_timer.expire = io_until;
-+ wait.io_timer.fn = io_clock_wait_fn;
-+ wait.task = current;
-+ wait.expired = 0;
-+ bch2_io_timer_add(clock, &wait.io_timer);
-+
-+ timer_setup_on_stack(&wait.cpu_timer, io_clock_cpu_timeout, 0);
-+
-+ if (cpu_timeout != MAX_SCHEDULE_TIMEOUT)
-+ mod_timer(&wait.cpu_timer, cpu_timeout + jiffies);
-+
-+ while (1) {
-+ set_current_state(TASK_INTERRUPTIBLE);
-+ if (kthread && kthread_should_stop())
-+ break;
-+
-+ if (wait.expired)
-+ break;
-+
-+ schedule();
-+ try_to_freeze();
-+ }
-+
-+ __set_current_state(TASK_RUNNING);
-+ del_timer_sync(&wait.cpu_timer);
-+ destroy_timer_on_stack(&wait.cpu_timer);
-+ bch2_io_timer_del(clock, &wait.io_timer);
-+}
-+
-+static struct io_timer *get_expired_timer(struct io_clock *clock,
-+ unsigned long now)
-+{
-+ struct io_timer *ret = NULL;
-+
-+ spin_lock(&clock->timer_lock);
-+
-+ if (clock->timers.used &&
-+ time_after_eq(now, clock->timers.data[0]->expire))
-+ heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
-+
-+ spin_unlock(&clock->timer_lock);
-+
-+ return ret;
-+}
-+
-+void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
-+{
-+ struct io_timer *timer;
-+ unsigned long now = atomic64_add_return(sectors, &clock->now);
-+
-+ while ((timer = get_expired_timer(clock, now)))
-+ timer->fn(timer);
-+}
-+
-+void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
-+{
-+ unsigned long now;
-+ unsigned i;
-+
-+ out->atomic++;
-+ spin_lock(&clock->timer_lock);
-+ now = atomic64_read(&clock->now);
-+
-+ for (i = 0; i < clock->timers.used; i++)
-+ prt_printf(out, "%ps:\t%li\n",
-+ clock->timers.data[i]->fn,
-+ clock->timers.data[i]->expire - now);
-+ spin_unlock(&clock->timer_lock);
-+ --out->atomic;
-+}
-+
-+void bch2_io_clock_exit(struct io_clock *clock)
-+{
-+ free_heap(&clock->timers);
-+ free_percpu(clock->pcpu_buf);
-+}
-+
-+int bch2_io_clock_init(struct io_clock *clock)
-+{
-+ atomic64_set(&clock->now, 0);
-+ spin_lock_init(&clock->timer_lock);
-+
-+ clock->max_slop = IO_CLOCK_PCPU_SECTORS * num_possible_cpus();
-+
-+ clock->pcpu_buf = alloc_percpu(*clock->pcpu_buf);
-+ if (!clock->pcpu_buf)
-+ return -BCH_ERR_ENOMEM_io_clock_init;
-+
-+ if (!init_heap(&clock->timers, NR_IO_TIMERS, GFP_KERNEL))
-+ return -BCH_ERR_ENOMEM_io_clock_init;
-+
-+ return 0;
-+}
-diff --git a/fs/bcachefs/clock.h b/fs/bcachefs/clock.h
-new file mode 100644
-index 000000000000..70a0f7436c84
---- /dev/null
-+++ b/fs/bcachefs/clock.h
-@@ -0,0 +1,38 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CLOCK_H
-+#define _BCACHEFS_CLOCK_H
-+
-+void bch2_io_timer_add(struct io_clock *, struct io_timer *);
-+void bch2_io_timer_del(struct io_clock *, struct io_timer *);
-+void bch2_kthread_io_clock_wait(struct io_clock *, unsigned long,
-+ unsigned long);
-+
-+void __bch2_increment_clock(struct io_clock *, unsigned);
-+
-+static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors,
-+ int rw)
-+{
-+ struct io_clock *clock = &c->io_clock[rw];
-+
-+ if (unlikely(this_cpu_add_return(*clock->pcpu_buf, sectors) >=
-+ IO_CLOCK_PCPU_SECTORS))
-+ __bch2_increment_clock(clock, this_cpu_xchg(*clock->pcpu_buf, 0));
-+}
-+
-+void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long);
-+
-+#define bch2_kthread_wait_event_ioclock_timeout(condition, clock, timeout)\
-+({ \
-+ long __ret = timeout; \
-+ might_sleep(); \
-+ if (!___wait_cond_timeout(condition)) \
-+ __ret = __wait_event_timeout(wq, condition, timeout); \
-+ __ret; \
-+})
-+
-+void bch2_io_timers_to_text(struct printbuf *, struct io_clock *);
-+
-+void bch2_io_clock_exit(struct io_clock *);
-+int bch2_io_clock_init(struct io_clock *);
-+
-+#endif /* _BCACHEFS_CLOCK_H */
-diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h
-new file mode 100644
-index 000000000000..5fae0012d808
---- /dev/null
-+++ b/fs/bcachefs/clock_types.h
-@@ -0,0 +1,37 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_CLOCK_TYPES_H
-+#define _BCACHEFS_CLOCK_TYPES_H
-+
-+#include "util.h"
-+
-+#define NR_IO_TIMERS (BCH_SB_MEMBERS_MAX * 3)
-+
-+/*
-+ * Clocks/timers in units of sectors of IO:
-+ *
-+ * Note - they use percpu batching, so they're only approximate.
-+ */
-+
-+struct io_timer;
-+typedef void (*io_timer_fn)(struct io_timer *);
-+
-+struct io_timer {
-+ io_timer_fn fn;
-+ unsigned long expire;
-+};
-+
-+/* Amount to buffer up on a percpu counter */
-+#define IO_CLOCK_PCPU_SECTORS 128
-+
-+typedef HEAP(struct io_timer *) io_timer_heap;
-+
-+struct io_clock {
-+ atomic64_t now;
-+ u16 __percpu *pcpu_buf;
-+ unsigned max_slop;
-+
-+ spinlock_t timer_lock;
-+ io_timer_heap timers;
-+};
-+
-+#endif /* _BCACHEFS_CLOCK_TYPES_H */
-diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
-new file mode 100644
-index 000000000000..a8b148ec2a2b
---- /dev/null
-+++ b/fs/bcachefs/compress.c
-@@ -0,0 +1,728 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "checksum.h"
-+#include "compress.h"
-+#include "extents.h"
-+#include "super-io.h"
-+
-+#include <linux/lz4.h>
-+#include <linux/zlib.h>
-+#include <linux/zstd.h>
-+
-+/* Bounce buffer: */
-+struct bbuf {
-+ void *b;
-+ enum {
-+ BB_NONE,
-+ BB_VMAP,
-+ BB_KMALLOC,
-+ BB_MEMPOOL,
-+ } type;
-+ int rw;
-+};
-+
-+static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
-+{
-+ void *b;
-+
-+ BUG_ON(size > c->opts.encoded_extent_max);
-+
-+ b = kmalloc(size, GFP_NOFS|__GFP_NOWARN);
-+ if (b)
-+ return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
-+
-+ b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS);
-+ if (b)
-+ return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
-+
-+ BUG();
-+}
-+
-+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
-+{
-+ struct bio_vec bv;
-+ struct bvec_iter iter;
-+ void *expected_start = NULL;
-+
-+ __bio_for_each_bvec(bv, bio, iter, start) {
-+ if (expected_start &&
-+ expected_start != page_address(bv.bv_page) + bv.bv_offset)
-+ return false;
-+
-+ expected_start = page_address(bv.bv_page) +
-+ bv.bv_offset + bv.bv_len;
-+ }
-+
-+ return true;
-+}
-+
-+static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
-+ struct bvec_iter start, int rw)
-+{
-+ struct bbuf ret;
-+ struct bio_vec bv;
-+ struct bvec_iter iter;
-+ unsigned nr_pages = 0;
-+ struct page *stack_pages[16];
-+ struct page **pages = NULL;
-+ void *data;
-+
-+ BUG_ON(start.bi_size > c->opts.encoded_extent_max);
-+
-+ if (!PageHighMem(bio_iter_page(bio, start)) &&
-+ bio_phys_contig(bio, start))
-+ return (struct bbuf) {
-+ .b = page_address(bio_iter_page(bio, start)) +
-+ bio_iter_offset(bio, start),
-+ .type = BB_NONE, .rw = rw
-+ };
-+
-+ /* check if we can map the pages contiguously: */
-+ __bio_for_each_segment(bv, bio, iter, start) {
-+ if (iter.bi_size != start.bi_size &&
-+ bv.bv_offset)
-+ goto bounce;
-+
-+ if (bv.bv_len < iter.bi_size &&
-+ bv.bv_offset + bv.bv_len < PAGE_SIZE)
-+ goto bounce;
-+
-+ nr_pages++;
-+ }
-+
-+ BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
-+
-+ pages = nr_pages > ARRAY_SIZE(stack_pages)
-+ ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
-+ : stack_pages;
-+ if (!pages)
-+ goto bounce;
-+
-+ nr_pages = 0;
-+ __bio_for_each_segment(bv, bio, iter, start)
-+ pages[nr_pages++] = bv.bv_page;
-+
-+ data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
-+ if (pages != stack_pages)
-+ kfree(pages);
-+
-+ if (data)
-+ return (struct bbuf) {
-+ .b = data + bio_iter_offset(bio, start),
-+ .type = BB_VMAP, .rw = rw
-+ };
-+bounce:
-+ ret = __bounce_alloc(c, start.bi_size, rw);
-+
-+ if (rw == READ)
-+ memcpy_from_bio(ret.b, bio, start);
-+
-+ return ret;
-+}
-+
-+static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
-+{
-+ return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
-+}
-+
-+static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
-+{
-+ switch (buf.type) {
-+ case BB_NONE:
-+ break;
-+ case BB_VMAP:
-+ vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
-+ break;
-+ case BB_KMALLOC:
-+ kfree(buf.b);
-+ break;
-+ case BB_MEMPOOL:
-+ mempool_free(buf.b, &c->compression_bounce[buf.rw]);
-+ break;
-+ }
-+}
-+
-+static inline void zlib_set_workspace(z_stream *strm, void *workspace)
-+{
-+#ifdef __KERNEL__
-+ strm->workspace = workspace;
-+#endif
-+}
-+
-+static int __bio_uncompress(struct bch_fs *c, struct bio *src,
-+ void *dst_data, struct bch_extent_crc_unpacked crc)
-+{
-+ struct bbuf src_data = { NULL };
-+ size_t src_len = src->bi_iter.bi_size;
-+ size_t dst_len = crc.uncompressed_size << 9;
-+ void *workspace;
-+ int ret;
-+
-+ src_data = bio_map_or_bounce(c, src, READ);
-+
-+ switch (crc.compression_type) {
-+ case BCH_COMPRESSION_TYPE_lz4_old:
-+ case BCH_COMPRESSION_TYPE_lz4:
-+ ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
-+ src_len, dst_len, dst_len);
-+ if (ret != dst_len)
-+ goto err;
-+ break;
-+ case BCH_COMPRESSION_TYPE_gzip: {
-+ z_stream strm = {
-+ .next_in = src_data.b,
-+ .avail_in = src_len,
-+ .next_out = dst_data,
-+ .avail_out = dst_len,
-+ };
-+
-+ workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
-+
-+ zlib_set_workspace(&strm, workspace);
-+ zlib_inflateInit2(&strm, -MAX_WBITS);
-+ ret = zlib_inflate(&strm, Z_FINISH);
-+
-+ mempool_free(workspace, &c->decompress_workspace);
-+
-+ if (ret != Z_STREAM_END)
-+ goto err;
-+ break;
-+ }
-+ case BCH_COMPRESSION_TYPE_zstd: {
-+ ZSTD_DCtx *ctx;
-+ size_t real_src_len = le32_to_cpup(src_data.b);
-+
-+ if (real_src_len > src_len - 4)
-+ goto err;
-+
-+ workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
-+ ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
-+
-+ ret = zstd_decompress_dctx(ctx,
-+ dst_data, dst_len,
-+ src_data.b + 4, real_src_len);
-+
-+ mempool_free(workspace, &c->decompress_workspace);
-+
-+ if (ret != dst_len)
-+ goto err;
-+ break;
-+ }
-+ default:
-+ BUG();
-+ }
-+ ret = 0;
-+out:
-+ bio_unmap_or_unbounce(c, src_data);
-+ return ret;
-+err:
-+ ret = -EIO;
-+ goto out;
-+}
-+
-+int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
-+ struct bch_extent_crc_unpacked *crc)
-+{
-+ struct bbuf data = { NULL };
-+ size_t dst_len = crc->uncompressed_size << 9;
-+
-+ /* bio must own its pages: */
-+ BUG_ON(!bio->bi_vcnt);
-+ BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
-+
-+ if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
-+ crc->compressed_size << 9 > c->opts.encoded_extent_max) {
-+ bch_err(c, "error rewriting existing data: extent too big");
-+ return -EIO;
-+ }
-+
-+ data = __bounce_alloc(c, dst_len, WRITE);
-+
-+ if (__bio_uncompress(c, bio, data.b, *crc)) {
-+ if (!c->opts.no_data_io)
-+ bch_err(c, "error rewriting existing data: decompression error");
-+ bio_unmap_or_unbounce(c, data);
-+ return -EIO;
-+ }
-+
-+ /*
-+ * XXX: don't have a good way to assert that the bio was allocated with
-+ * enough space, we depend on bch2_move_extent doing the right thing
-+ */
-+ bio->bi_iter.bi_size = crc->live_size << 9;
-+
-+ memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
-+
-+ crc->csum_type = 0;
-+ crc->compression_type = 0;
-+ crc->compressed_size = crc->live_size;
-+ crc->uncompressed_size = crc->live_size;
-+ crc->offset = 0;
-+ crc->csum = (struct bch_csum) { 0, 0 };
-+
-+ bio_unmap_or_unbounce(c, data);
-+ return 0;
-+}
-+
-+int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
-+ struct bio *dst, struct bvec_iter dst_iter,
-+ struct bch_extent_crc_unpacked crc)
-+{
-+ struct bbuf dst_data = { NULL };
-+ size_t dst_len = crc.uncompressed_size << 9;
-+ int ret;
-+
-+ if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max ||
-+ crc.compressed_size << 9 > c->opts.encoded_extent_max)
-+ return -EIO;
-+
-+ dst_data = dst_len == dst_iter.bi_size
-+ ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
-+ : __bounce_alloc(c, dst_len, WRITE);
-+
-+ ret = __bio_uncompress(c, src, dst_data.b, crc);
-+ if (ret)
-+ goto err;
-+
-+ if (dst_data.type != BB_NONE &&
-+ dst_data.type != BB_VMAP)
-+ memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
-+err:
-+ bio_unmap_or_unbounce(c, dst_data);
-+ return ret;
-+}
-+
-+static int attempt_compress(struct bch_fs *c,
-+ void *workspace,
-+ void *dst, size_t dst_len,
-+ void *src, size_t src_len,
-+ struct bch_compression_opt compression)
-+{
-+ enum bch_compression_type compression_type =
-+ __bch2_compression_opt_to_type[compression.type];
-+
-+ switch (compression_type) {
-+ case BCH_COMPRESSION_TYPE_lz4:
-+ if (compression.level < LZ4HC_MIN_CLEVEL) {
-+ int len = src_len;
-+ int ret = LZ4_compress_destSize(
-+ src, dst,
-+ &len, dst_len,
-+ workspace);
-+ if (len < src_len)
-+ return -len;
-+
-+ return ret;
-+ } else {
-+ int ret = LZ4_compress_HC(
-+ src, dst,
-+ src_len, dst_len,
-+ compression.level,
-+ workspace);
-+
-+ return ret ?: -1;
-+ }
-+ case BCH_COMPRESSION_TYPE_gzip: {
-+ z_stream strm = {
-+ .next_in = src,
-+ .avail_in = src_len,
-+ .next_out = dst,
-+ .avail_out = dst_len,
-+ };
-+
-+ zlib_set_workspace(&strm, workspace);
-+ zlib_deflateInit2(&strm,
-+ compression.level
-+ ? clamp_t(unsigned, compression.level,
-+ Z_BEST_SPEED, Z_BEST_COMPRESSION)
-+ : Z_DEFAULT_COMPRESSION,
-+ Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
-+ Z_DEFAULT_STRATEGY);
-+
-+ if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
-+ return 0;
-+
-+ if (zlib_deflateEnd(&strm) != Z_OK)
-+ return 0;
-+
-+ return strm.total_out;
-+ }
-+ case BCH_COMPRESSION_TYPE_zstd: {
-+ /*
-+ * rescale:
-+ * zstd max compression level is 22, our max level is 15
-+ */
-+ unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
-+ ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
-+ ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
-+ zstd_cctx_workspace_bound(&params.cParams));
-+
-+ /*
-+ * ZSTD requires that when we decompress we pass in the exact
-+ * compressed size - rounding it up to the nearest sector
-+ * doesn't work, so we use the first 4 bytes of the buffer for
-+ * that.
-+ *
-+ * Additionally, the ZSTD code seems to have a bug where it will
-+ * write just past the end of the buffer - so subtract a fudge
-+ * factor (7 bytes) from the dst buffer size to account for
-+ * that.
-+ */
-+ size_t len = zstd_compress_cctx(ctx,
-+ dst + 4, dst_len - 4 - 7,
-+ src, src_len,
-+ &c->zstd_params);
-+ if (zstd_is_error(len))
-+ return 0;
-+
-+ *((__le32 *) dst) = cpu_to_le32(len);
-+ return len + 4;
-+ }
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static unsigned __bio_compress(struct bch_fs *c,
-+ struct bio *dst, size_t *dst_len,
-+ struct bio *src, size_t *src_len,
-+ struct bch_compression_opt compression)
-+{
-+ struct bbuf src_data = { NULL }, dst_data = { NULL };
-+ void *workspace;
-+ enum bch_compression_type compression_type =
-+ __bch2_compression_opt_to_type[compression.type];
-+ unsigned pad;
-+ int ret = 0;
-+
-+ BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
-+ BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
-+
-+ /* If it's only one block, don't bother trying to compress: */
-+ if (src->bi_iter.bi_size <= c->opts.block_size)
-+ return BCH_COMPRESSION_TYPE_incompressible;
-+
-+ dst_data = bio_map_or_bounce(c, dst, WRITE);
-+ src_data = bio_map_or_bounce(c, src, READ);
-+
-+ workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS);
-+
-+ *src_len = src->bi_iter.bi_size;
-+ *dst_len = dst->bi_iter.bi_size;
-+
-+ /*
-+ * XXX: this algorithm sucks when the compression code doesn't tell us
-+ * how much would fit, like LZ4 does:
-+ */
-+ while (1) {
-+ if (*src_len <= block_bytes(c)) {
-+ ret = -1;
-+ break;
-+ }
-+
-+ ret = attempt_compress(c, workspace,
-+ dst_data.b, *dst_len,
-+ src_data.b, *src_len,
-+ compression);
-+ if (ret > 0) {
-+ *dst_len = ret;
-+ ret = 0;
-+ break;
-+ }
-+
-+ /* Didn't fit: should we retry with a smaller amount? */
-+ if (*src_len <= *dst_len) {
-+ ret = -1;
-+ break;
-+ }
-+
-+ /*
-+ * If ret is negative, it's a hint as to how much data would fit
-+ */
-+ BUG_ON(-ret >= *src_len);
-+
-+ if (ret < 0)
-+ *src_len = -ret;
-+ else
-+ *src_len -= (*src_len - *dst_len) / 2;
-+ *src_len = round_down(*src_len, block_bytes(c));
-+ }
-+
-+ mempool_free(workspace, &c->compress_workspace[compression_type]);
-+
-+ if (ret)
-+ goto err;
-+
-+ /* Didn't get smaller: */
-+ if (round_up(*dst_len, block_bytes(c)) >= *src_len)
-+ goto err;
-+
-+ pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
-+
-+ memset(dst_data.b + *dst_len, 0, pad);
-+ *dst_len += pad;
-+
-+ if (dst_data.type != BB_NONE &&
-+ dst_data.type != BB_VMAP)
-+ memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
-+
-+ BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
-+ BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
-+ BUG_ON(*dst_len & (block_bytes(c) - 1));
-+ BUG_ON(*src_len & (block_bytes(c) - 1));
-+ ret = compression_type;
-+out:
-+ bio_unmap_or_unbounce(c, src_data);
-+ bio_unmap_or_unbounce(c, dst_data);
-+ return ret;
-+err:
-+ ret = BCH_COMPRESSION_TYPE_incompressible;
-+ goto out;
-+}
-+
-+unsigned bch2_bio_compress(struct bch_fs *c,
-+ struct bio *dst, size_t *dst_len,
-+ struct bio *src, size_t *src_len,
-+ unsigned compression_opt)
-+{
-+ unsigned orig_dst = dst->bi_iter.bi_size;
-+ unsigned orig_src = src->bi_iter.bi_size;
-+ unsigned compression_type;
-+
-+ /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
-+ src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
-+ c->opts.encoded_extent_max);
-+ /* Don't generate a bigger output than input: */
-+ dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
-+
-+ compression_type =
-+ __bio_compress(c, dst, dst_len, src, src_len,
-+ bch2_compression_decode(compression_opt));
-+
-+ dst->bi_iter.bi_size = orig_dst;
-+ src->bi_iter.bi_size = orig_src;
-+ return compression_type;
-+}
-+
-+static int __bch2_fs_compress_init(struct bch_fs *, u64);
-+
-+#define BCH_FEATURE_none 0
-+
-+static const unsigned bch2_compression_opt_to_feature[] = {
-+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
-+ BCH_COMPRESSION_OPTS()
-+#undef x
-+};
-+
-+#undef BCH_FEATURE_none
-+
-+static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
-+{
-+ int ret = 0;
-+
-+ if ((c->sb.features & f) == f)
-+ return 0;
-+
-+ mutex_lock(&c->sb_lock);
-+
-+ if ((c->sb.features & f) == f) {
-+ mutex_unlock(&c->sb_lock);
-+ return 0;
-+ }
-+
-+ ret = __bch2_fs_compress_init(c, c->sb.features|f);
-+ if (ret) {
-+ mutex_unlock(&c->sb_lock);
-+ return ret;
-+ }
-+
-+ c->disk_sb.sb->features[0] |= cpu_to_le64(f);
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ return 0;
-+}
-+
-+int bch2_check_set_has_compressed_data(struct bch_fs *c,
-+ unsigned compression_opt)
-+{
-+ unsigned compression_type = bch2_compression_decode(compression_opt).type;
-+
-+ BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
-+
-+ return compression_type
-+ ? __bch2_check_set_has_compressed_data(c,
-+ 1ULL << bch2_compression_opt_to_feature[compression_type])
-+ : 0;
-+}
-+
-+void bch2_fs_compress_exit(struct bch_fs *c)
-+{
-+ unsigned i;
-+
-+ mempool_exit(&c->decompress_workspace);
-+ for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
-+ mempool_exit(&c->compress_workspace[i]);
-+ mempool_exit(&c->compression_bounce[WRITE]);
-+ mempool_exit(&c->compression_bounce[READ]);
-+}
-+
-+static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
-+{
-+ size_t decompress_workspace_size = 0;
-+ ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
-+ c->opts.encoded_extent_max);
-+ struct {
-+ unsigned feature;
-+ enum bch_compression_type type;
-+ size_t compress_workspace;
-+ size_t decompress_workspace;
-+ } compression_types[] = {
-+ { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
-+ max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS),
-+ 0 },
-+ { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
-+ zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
-+ zlib_inflate_workspacesize(), },
-+ { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
-+ zstd_cctx_workspace_bound(&params.cParams),
-+ zstd_dctx_workspace_bound() },
-+ }, *i;
-+ bool have_compressed = false;
-+
-+ c->zstd_params = params;
-+
-+ for (i = compression_types;
-+ i < compression_types + ARRAY_SIZE(compression_types);
-+ i++)
-+ have_compressed |= (features & (1 << i->feature)) != 0;
-+
-+ if (!have_compressed)
-+ return 0;
-+
-+ if (!mempool_initialized(&c->compression_bounce[READ]) &&
-+ mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
-+ 1, c->opts.encoded_extent_max))
-+ return -BCH_ERR_ENOMEM_compression_bounce_read_init;
-+
-+ if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
-+ mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
-+ 1, c->opts.encoded_extent_max))
-+ return -BCH_ERR_ENOMEM_compression_bounce_write_init;
-+
-+ for (i = compression_types;
-+ i < compression_types + ARRAY_SIZE(compression_types);
-+ i++) {
-+ decompress_workspace_size =
-+ max(decompress_workspace_size, i->decompress_workspace);
-+
-+ if (!(features & (1 << i->feature)))
-+ continue;
-+
-+ if (mempool_initialized(&c->compress_workspace[i->type]))
-+ continue;
-+
-+ if (mempool_init_kvpmalloc_pool(
-+ &c->compress_workspace[i->type],
-+ 1, i->compress_workspace))
-+ return -BCH_ERR_ENOMEM_compression_workspace_init;
-+ }
-+
-+ if (!mempool_initialized(&c->decompress_workspace) &&
-+ mempool_init_kvpmalloc_pool(&c->decompress_workspace,
-+ 1, decompress_workspace_size))
-+ return -BCH_ERR_ENOMEM_decompression_workspace_init;
-+
-+ return 0;
-+}
-+
-+static u64 compression_opt_to_feature(unsigned v)
-+{
-+ unsigned type = bch2_compression_decode(v).type;
-+
-+ return BIT_ULL(bch2_compression_opt_to_feature[type]);
-+}
-+
-+int bch2_fs_compress_init(struct bch_fs *c)
-+{
-+ u64 f = c->sb.features;
-+
-+ f |= compression_opt_to_feature(c->opts.compression);
-+ f |= compression_opt_to_feature(c->opts.background_compression);
-+
-+ return __bch2_fs_compress_init(c, f);
-+}
-+
-+int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
-+ struct printbuf *err)
-+{
-+ char *val = kstrdup(_val, GFP_KERNEL);
-+ char *p = val, *type_str, *level_str;
-+ struct bch_compression_opt opt = { 0 };
-+ int ret;
-+
-+ if (!val)
-+ return -ENOMEM;
-+
-+ type_str = strsep(&p, ":");
-+ level_str = p;
-+
-+ ret = match_string(bch2_compression_opts, -1, type_str);
-+ if (ret < 0 && err)
-+ prt_str(err, "invalid compression type");
-+ if (ret < 0)
-+ goto err;
-+
-+ opt.type = ret;
-+
-+ if (level_str) {
-+ unsigned level;
-+
-+ ret = kstrtouint(level_str, 10, &level);
-+ if (!ret && !opt.type && level)
-+ ret = -EINVAL;
-+ if (!ret && level > 15)
-+ ret = -EINVAL;
-+ if (ret < 0 && err)
-+ prt_str(err, "invalid compression level");
-+ if (ret < 0)
-+ goto err;
-+
-+ opt.level = level;
-+ }
-+
-+ *res = bch2_compression_encode(opt);
-+err:
-+ kfree(val);
-+ return ret;
-+}
-+
-+void bch2_compression_opt_to_text(struct printbuf *out, u64 v)
-+{
-+ struct bch_compression_opt opt = bch2_compression_decode(v);
-+
-+ if (opt.type < BCH_COMPRESSION_OPT_NR)
-+ prt_str(out, bch2_compression_opts[opt.type]);
-+ else
-+ prt_printf(out, "(unknown compression opt %u)", opt.type);
-+ if (opt.level)
-+ prt_printf(out, ":%u", opt.level);
-+}
-+
-+void bch2_opt_compression_to_text(struct printbuf *out,
-+ struct bch_fs *c,
-+ struct bch_sb *sb,
-+ u64 v)
-+{
-+ return bch2_compression_opt_to_text(out, v);
-+}
-+
-+int bch2_opt_compression_validate(u64 v, struct printbuf *err)
-+{
-+ if (!bch2_compression_opt_valid(v)) {
-+ prt_printf(err, "invalid compression opt %llu", v);
-+ return -BCH_ERR_invalid_sb_opt_compression;
-+ }
-+
-+ return 0;
-+}
-diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
-new file mode 100644
-index 000000000000..607fd5e232c9
---- /dev/null
-+++ b/fs/bcachefs/compress.h
-@@ -0,0 +1,73 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_COMPRESS_H
-+#define _BCACHEFS_COMPRESS_H
-+
-+#include "extents_types.h"
-+
-+static const unsigned __bch2_compression_opt_to_type[] = {
-+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
-+ BCH_COMPRESSION_OPTS()
-+#undef x
-+};
-+
-+struct bch_compression_opt {
-+ u8 type:4,
-+ level:4;
-+};
-+
-+static inline struct bch_compression_opt __bch2_compression_decode(unsigned v)
-+{
-+ return (struct bch_compression_opt) {
-+ .type = v & 15,
-+ .level = v >> 4,
-+ };
-+}
-+
-+static inline bool bch2_compression_opt_valid(unsigned v)
-+{
-+ struct bch_compression_opt opt = __bch2_compression_decode(v);
-+
-+ return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level);
-+}
-+
-+static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
-+{
-+ return bch2_compression_opt_valid(v)
-+ ? __bch2_compression_decode(v)
-+ : (struct bch_compression_opt) { 0 };
-+}
-+
-+static inline unsigned bch2_compression_encode(struct bch_compression_opt opt)
-+{
-+ return opt.type|(opt.level << 4);
-+}
-+
-+static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
-+{
-+ return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
-+}
-+
-+int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
-+ struct bch_extent_crc_unpacked *);
-+int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
-+ struct bvec_iter, struct bch_extent_crc_unpacked);
-+unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *,
-+ struct bio *, size_t *, unsigned);
-+
-+int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
-+void bch2_fs_compress_exit(struct bch_fs *);
-+int bch2_fs_compress_init(struct bch_fs *);
-+
-+void bch2_compression_opt_to_text(struct printbuf *, u64);
-+
-+int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
-+void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
-+int bch2_opt_compression_validate(u64, struct printbuf *);
-+
-+#define bch2_opt_compression (struct bch_opt_fn) { \
-+ .parse = bch2_opt_compression_parse, \
-+ .to_text = bch2_opt_compression_to_text, \
-+ .validate = bch2_opt_compression_validate, \
-+}
-+
-+#endif /* _BCACHEFS_COMPRESS_H */
-diff --git a/fs/bcachefs/counters.c b/fs/bcachefs/counters.c
-new file mode 100644
-index 000000000000..02a996e06a64
---- /dev/null
-+++ b/fs/bcachefs/counters.c
-@@ -0,0 +1,107 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "super-io.h"
-+#include "counters.h"
-+
-+/* BCH_SB_FIELD_counters */
-+
-+static const char * const bch2_counter_names[] = {
-+#define x(t, n, ...) (#t),
-+ BCH_PERSISTENT_COUNTERS()
-+#undef x
-+ NULL
-+};
-+
-+static size_t bch2_sb_counter_nr_entries(struct bch_sb_field_counters *ctrs)
-+{
-+ if (!ctrs)
-+ return 0;
-+
-+ return (__le64 *) vstruct_end(&ctrs->field) - &ctrs->d[0];
-+};
-+
-+static int bch2_sb_counters_validate(struct bch_sb *sb,
-+ struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ return 0;
-+};
-+
-+static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_counters *ctrs = field_to_type(f, counters);
-+ unsigned int i;
-+ unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
-+
-+ for (i = 0; i < nr; i++) {
-+ if (i < BCH_COUNTER_NR)
-+ prt_printf(out, "%s ", bch2_counter_names[i]);
-+ else
-+ prt_printf(out, "(unknown)");
-+
-+ prt_tab(out);
-+ prt_printf(out, "%llu", le64_to_cpu(ctrs->d[i]));
-+ prt_newline(out);
-+ }
-+};
-+
-+int bch2_sb_counters_to_cpu(struct bch_fs *c)
-+{
-+ struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters);
-+ unsigned int i;
-+ unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
-+ u64 val = 0;
-+
-+ for (i = 0; i < BCH_COUNTER_NR; i++)
-+ c->counters_on_mount[i] = 0;
-+
-+ for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) {
-+ val = le64_to_cpu(ctrs->d[i]);
-+ percpu_u64_set(&c->counters[i], val);
-+ c->counters_on_mount[i] = val;
-+ }
-+ return 0;
-+};
-+
-+int bch2_sb_counters_from_cpu(struct bch_fs *c)
-+{
-+ struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters);
-+ struct bch_sb_field_counters *ret;
-+ unsigned int i;
-+ unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
-+
-+ if (nr < BCH_COUNTER_NR) {
-+ ret = bch2_sb_field_resize(&c->disk_sb, counters,
-+ sizeof(*ctrs) / sizeof(u64) + BCH_COUNTER_NR);
-+
-+ if (ret) {
-+ ctrs = ret;
-+ nr = bch2_sb_counter_nr_entries(ctrs);
-+ }
-+ }
-+
-+
-+ for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++)
-+ ctrs->d[i] = cpu_to_le64(percpu_u64_get(&c->counters[i]));
-+ return 0;
-+}
-+
-+void bch2_fs_counters_exit(struct bch_fs *c)
-+{
-+ free_percpu(c->counters);
-+}
-+
-+int bch2_fs_counters_init(struct bch_fs *c)
-+{
-+ c->counters = __alloc_percpu(sizeof(u64) * BCH_COUNTER_NR, sizeof(u64));
-+ if (!c->counters)
-+ return -BCH_ERR_ENOMEM_fs_counters_init;
-+
-+ return bch2_sb_counters_to_cpu(c);
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_counters = {
-+ .validate = bch2_sb_counters_validate,
-+ .to_text = bch2_sb_counters_to_text,
-+};
-diff --git a/fs/bcachefs/counters.h b/fs/bcachefs/counters.h
-new file mode 100644
-index 000000000000..4778aa19bf34
---- /dev/null
-+++ b/fs/bcachefs/counters.h
-@@ -0,0 +1,17 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_COUNTERS_H
-+#define _BCACHEFS_COUNTERS_H
-+
-+#include "bcachefs.h"
-+#include "super-io.h"
-+
-+
-+int bch2_sb_counters_to_cpu(struct bch_fs *);
-+int bch2_sb_counters_from_cpu(struct bch_fs *);
-+
-+void bch2_fs_counters_exit(struct bch_fs *);
-+int bch2_fs_counters_init(struct bch_fs *);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_counters;
-+
-+#endif // _BCACHEFS_COUNTERS_H
-diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h
-new file mode 100644
-index 000000000000..87b4b2d1ec76
---- /dev/null
-+++ b/fs/bcachefs/darray.h
-@@ -0,0 +1,93 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DARRAY_H
-+#define _BCACHEFS_DARRAY_H
-+
-+/*
-+ * Dynamic arrays:
-+ *
-+ * Inspired by CCAN's darray
-+ */
-+
-+#include "util.h"
-+#include <linux/slab.h>
-+
-+#define DARRAY(type) \
-+struct { \
-+ size_t nr, size; \
-+ type *data; \
-+}
-+
-+typedef DARRAY(void) darray_void;
-+
-+static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp)
-+{
-+ if (d->nr + more > d->size) {
-+ size_t new_size = roundup_pow_of_two(d->nr + more);
-+ void *data = krealloc_array(d->data, new_size, t_size, gfp);
-+
-+ if (!data)
-+ return -ENOMEM;
-+
-+ d->data = data;
-+ d->size = new_size;
-+ }
-+
-+ return 0;
-+}
-+
-+#define darray_make_room_gfp(_d, _more, _gfp) \
-+ __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
-+
-+#define darray_make_room(_d, _more) \
-+ darray_make_room_gfp(_d, _more, GFP_KERNEL)
-+
-+#define darray_top(_d) ((_d).data[(_d).nr])
-+
-+#define darray_push_gfp(_d, _item, _gfp) \
-+({ \
-+ int _ret = darray_make_room_gfp((_d), 1, _gfp); \
-+ \
-+ if (!_ret) \
-+ (_d)->data[(_d)->nr++] = (_item); \
-+ _ret; \
-+})
-+
-+#define darray_push(_d, _item) darray_push_gfp(_d, _item, GFP_KERNEL)
-+
-+#define darray_pop(_d) ((_d)->data[--(_d)->nr])
-+
-+#define darray_first(_d) ((_d).data[0])
-+#define darray_last(_d) ((_d).data[(_d).nr - 1])
-+
-+#define darray_insert_item(_d, pos, _item) \
-+({ \
-+ size_t _pos = (pos); \
-+ int _ret = darray_make_room((_d), 1); \
-+ \
-+ if (!_ret) \
-+ array_insert_item((_d)->data, (_d)->nr, _pos, (_item)); \
-+ _ret; \
-+})
-+
-+#define darray_remove_item(_d, _pos) \
-+ array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data)
-+
-+#define darray_for_each(_d, _i) \
-+ for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++)
-+
-+#define darray_for_each_reverse(_d, _i) \
-+ for (_i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i)
-+
-+#define darray_init(_d) \
-+do { \
-+ (_d)->data = NULL; \
-+ (_d)->nr = (_d)->size = 0; \
-+} while (0)
-+
-+#define darray_exit(_d) \
-+do { \
-+ kfree((_d)->data); \
-+ darray_init(_d); \
-+} while (0)
-+
-+#endif /* _BCACHEFS_DARRAY_H */
-diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
-new file mode 100644
-index 000000000000..0771a6d880bf
---- /dev/null
-+++ b/fs/bcachefs/data_update.c
-@@ -0,0 +1,551 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_buf.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "data_update.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "io_write.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "nocow_locking.h"
-+#include "rebalance.h"
-+#include "subvolume.h"
-+#include "trace.h"
-+
-+static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
-+{
-+ if (trace_move_extent_finish_enabled()) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ trace_move_extent_finish(c, buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+}
-+
-+static void trace_move_extent_fail2(struct data_update *m,
-+ struct bkey_s_c new,
-+ struct bkey_s_c wrote,
-+ struct bkey_i *insert,
-+ const char *msg)
-+{
-+ struct bch_fs *c = m->op.c;
-+ struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
-+ const union bch_extent_entry *entry;
-+ struct bch_extent_ptr *ptr;
-+ struct extent_ptr_decoded p;
-+ struct printbuf buf = PRINTBUF;
-+ unsigned i, rewrites_found = 0;
-+
-+ if (!trace_move_extent_fail_enabled())
-+ return;
-+
-+ prt_str(&buf, msg);
-+
-+ if (insert) {
-+ i = 0;
-+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) {
-+ if (((1U << i) & m->data_opts.rewrite_ptrs) &&
-+ (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
-+ !ptr->cached)
-+ rewrites_found |= 1U << i;
-+ i++;
-+ }
-+ }
-+
-+ prt_printf(&buf, "\nrewrite ptrs: %u%u%u%u",
-+ (m->data_opts.rewrite_ptrs & (1 << 0)) != 0,
-+ (m->data_opts.rewrite_ptrs & (1 << 1)) != 0,
-+ (m->data_opts.rewrite_ptrs & (1 << 2)) != 0,
-+ (m->data_opts.rewrite_ptrs & (1 << 3)) != 0);
-+
-+ prt_printf(&buf, "\nrewrites found: %u%u%u%u",
-+ (rewrites_found & (1 << 0)) != 0,
-+ (rewrites_found & (1 << 1)) != 0,
-+ (rewrites_found & (1 << 2)) != 0,
-+ (rewrites_found & (1 << 3)) != 0);
-+
-+ prt_str(&buf, "\nold: ");
-+ bch2_bkey_val_to_text(&buf, c, old);
-+
-+ prt_str(&buf, "\nnew: ");
-+ bch2_bkey_val_to_text(&buf, c, new);
-+
-+ prt_str(&buf, "\nwrote: ");
-+ bch2_bkey_val_to_text(&buf, c, wrote);
-+
-+ if (insert) {
-+ prt_str(&buf, "\ninsert: ");
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
-+ }
-+
-+ trace_move_extent_fail(c, buf.buf);
-+ printbuf_exit(&buf);
-+}
-+
-+static int __bch2_data_update_index_update(struct btree_trans *trans,
-+ struct bch_write_op *op)
-+{
-+ struct bch_fs *c = op->c;
-+ struct btree_iter iter;
-+ struct data_update *m =
-+ container_of(op, struct data_update, op);
-+ struct keylist *keys = &op->insert_keys;
-+ struct bkey_buf _new, _insert;
-+ int ret = 0;
-+
-+ bch2_bkey_buf_init(&_new);
-+ bch2_bkey_buf_init(&_insert);
-+ bch2_bkey_buf_realloc(&_insert, c, U8_MAX);
-+
-+ bch2_trans_iter_init(trans, &iter, m->btree_id,
-+ bkey_start_pos(&bch2_keylist_front(keys)->k),
-+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+ while (1) {
-+ struct bkey_s_c k;
-+ struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
-+ struct bkey_i *insert = NULL;
-+ struct bkey_i_extent *new;
-+ const union bch_extent_entry *entry_c;
-+ union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ struct bch_extent_ptr *ptr;
-+ const struct bch_extent_ptr *ptr_c;
-+ struct bpos next_pos;
-+ bool should_check_enospc;
-+ s64 i_sectors_delta = 0, disk_sectors_delta = 0;
-+ unsigned rewrites_found = 0, durability, i;
-+
-+ bch2_trans_begin(trans);
-+
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ new = bkey_i_to_extent(bch2_keylist_front(keys));
-+
-+ if (!bch2_extents_match(k, old)) {
-+ trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i),
-+ NULL, "no match:");
-+ goto nowork;
-+ }
-+
-+ bkey_reassemble(_insert.k, k);
-+ insert = _insert.k;
-+
-+ bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
-+ new = bkey_i_to_extent(_new.k);
-+ bch2_cut_front(iter.pos, &new->k_i);
-+
-+ bch2_cut_front(iter.pos, insert);
-+ bch2_cut_back(new->k.p, insert);
-+ bch2_cut_back(insert->k.p, &new->k_i);
-+
-+ /*
-+ * @old: extent that we read from
-+ * @insert: key that we're going to update, initialized from
-+ * extent currently in btree - same as @old unless we raced with
-+ * other updates
-+ * @new: extent with new pointers that we'll be adding to @insert
-+ *
-+ * Fist, drop rewrite_ptrs from @new:
-+ */
-+ i = 0;
-+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry_c) {
-+ if (((1U << i) & m->data_opts.rewrite_ptrs) &&
-+ (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
-+ !ptr->cached) {
-+ bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
-+ rewrites_found |= 1U << i;
-+ }
-+ i++;
-+ }
-+
-+ if (m->data_opts.rewrite_ptrs &&
-+ !rewrites_found &&
-+ bch2_bkey_durability(c, k) >= m->op.opts.data_replicas) {
-+ trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:");
-+ goto nowork;
-+ }
-+
-+ /*
-+ * A replica that we just wrote might conflict with a replica
-+ * that we want to keep, due to racing with another move:
-+ */
-+restart_drop_conflicting_replicas:
-+ extent_for_each_ptr(extent_i_to_s(new), ptr)
-+ if ((ptr_c = bch2_bkey_has_device_c(bkey_i_to_s_c(insert), ptr->dev)) &&
-+ !ptr_c->cached) {
-+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(&new->k_i), ptr);
-+ goto restart_drop_conflicting_replicas;
-+ }
-+
-+ if (!bkey_val_u64s(&new->k)) {
-+ trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
-+ goto nowork;
-+ }
-+
-+ /* Now, drop pointers that conflict with what we just wrote: */
-+ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
-+ if ((ptr = bch2_bkey_has_device(bkey_i_to_s(insert), p.ptr.dev)))
-+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
-+
-+ durability = bch2_bkey_durability(c, bkey_i_to_s_c(insert)) +
-+ bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i));
-+
-+ /* Now, drop excess replicas: */
-+restart_drop_extra_replicas:
-+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) {
-+ unsigned ptr_durability = bch2_extent_ptr_durability(c, &p);
-+
-+ if (!p.ptr.cached &&
-+ durability - ptr_durability >= m->op.opts.data_replicas) {
-+ durability -= ptr_durability;
-+
-+ bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
-+ goto restart_drop_extra_replicas;
-+ }
-+ }
-+
-+ /* Finally, add the pointers we just wrote: */
-+ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
-+ bch2_extent_ptr_decoded_append(insert, &p);
-+
-+ bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
-+ bch2_extent_normalize(c, bkey_i_to_s(insert));
-+
-+ ret = bch2_sum_sector_overwrites(trans, &iter, insert,
-+ &should_check_enospc,
-+ &i_sectors_delta,
-+ &disk_sectors_delta);
-+ if (ret)
-+ goto err;
-+
-+ if (disk_sectors_delta > (s64) op->res.sectors) {
-+ ret = bch2_disk_reservation_add(c, &op->res,
-+ disk_sectors_delta - op->res.sectors,
-+ !should_check_enospc
-+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
-+ if (ret)
-+ goto out;
-+ }
-+
-+ next_pos = insert->k.p;
-+
-+ ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
-+ k.k->p, bkey_start_pos(&insert->k)) ?:
-+ bch2_insert_snapshot_whiteouts(trans, m->btree_id,
-+ k.k->p, insert->k.p) ?:
-+ bch2_bkey_set_needs_rebalance(c, insert,
-+ op->opts.background_target,
-+ op->opts.background_compression) ?:
-+ bch2_trans_update(trans, &iter, insert,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-+ bch2_trans_commit(trans, &op->res,
-+ NULL,
-+ BTREE_INSERT_NOCHECK_RW|
-+ BTREE_INSERT_NOFAIL|
-+ m->data_opts.btree_insert_flags);
-+ if (!ret) {
-+ bch2_btree_iter_set_pos(&iter, next_pos);
-+
-+ this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
-+ trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i));
-+ }
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ ret = 0;
-+ if (ret)
-+ break;
-+next:
-+ while (bkey_ge(iter.pos, bch2_keylist_front(keys)->k.p)) {
-+ bch2_keylist_pop_front(keys);
-+ if (bch2_keylist_empty(keys))
-+ goto out;
-+ }
-+ continue;
-+nowork:
-+ if (m->stats && m->stats) {
-+ BUG_ON(k.k->p.offset <= iter.pos.offset);
-+ atomic64_inc(&m->stats->keys_raced);
-+ atomic64_add(k.k->p.offset - iter.pos.offset,
-+ &m->stats->sectors_raced);
-+ }
-+
-+ this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]);
-+
-+ bch2_btree_iter_advance(&iter);
-+ goto next;
-+ }
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_bkey_buf_exit(&_insert, c);
-+ bch2_bkey_buf_exit(&_new, c);
-+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
-+ return ret;
-+}
-+
-+int bch2_data_update_index_update(struct bch_write_op *op)
-+{
-+ return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op));
-+}
-+
-+void bch2_data_update_read_done(struct data_update *m,
-+ struct bch_extent_crc_unpacked crc)
-+{
-+ /* write bio must own pages: */
-+ BUG_ON(!m->op.wbio.bio.bi_vcnt);
-+
-+ m->op.crc = crc;
-+ m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9;
-+
-+ closure_call(&m->op.cl, bch2_write, NULL, NULL);
-+}
-+
-+void bch2_data_update_exit(struct data_update *update)
-+{
-+ struct bch_fs *c = update->op.c;
-+ struct bkey_ptrs_c ptrs =
-+ bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k));
-+ const struct bch_extent_ptr *ptr;
-+
-+ bkey_for_each_ptr(ptrs, ptr) {
-+ if (c->opts.nocow_enabled)
-+ bch2_bucket_nocow_unlock(&c->nocow_locks,
-+ PTR_BUCKET_POS(c, ptr), 0);
-+ percpu_ref_put(&bch_dev_bkey_exists(c, ptr->dev)->ref);
-+ }
-+
-+ bch2_bkey_buf_exit(&update->k, c);
-+ bch2_disk_reservation_put(c, &update->op.res);
-+ bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
-+}
-+
-+void bch2_update_unwritten_extent(struct btree_trans *trans,
-+ struct data_update *update)
-+{
-+ struct bch_fs *c = update->op.c;
-+ struct bio *bio = &update->op.wbio.bio;
-+ struct bkey_i_extent *e;
-+ struct write_point *wp;
-+ struct bch_extent_ptr *ptr;
-+ struct closure cl;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ closure_init_stack(&cl);
-+ bch2_keylist_init(&update->op.insert_keys, update->op.inline_keys);
-+
-+ while (bio_sectors(bio)) {
-+ unsigned sectors = bio_sectors(bio);
-+
-+ bch2_trans_iter_init(trans, &iter, update->btree_id, update->op.pos,
-+ BTREE_ITER_SLOTS);
-+ ret = lockrestart_do(trans, ({
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ bkey_err(k);
-+ }));
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret || !bch2_extents_match(k, bkey_i_to_s_c(update->k.k)))
-+ break;
-+
-+ e = bkey_extent_init(update->op.insert_keys.top);
-+ e->k.p = update->op.pos;
-+
-+ ret = bch2_alloc_sectors_start_trans(trans,
-+ update->op.target,
-+ false,
-+ update->op.write_point,
-+ &update->op.devs_have,
-+ update->op.nr_replicas,
-+ update->op.nr_replicas,
-+ update->op.watermark,
-+ 0, &cl, &wp);
-+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
-+ bch2_trans_unlock(trans);
-+ closure_sync(&cl);
-+ continue;
-+ }
-+
-+ if (ret)
-+ return;
-+
-+ sectors = min(sectors, wp->sectors_free);
-+
-+ bch2_key_resize(&e->k, sectors);
-+
-+ bch2_open_bucket_get(c, wp, &update->op.open_buckets);
-+ bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
-+ bch2_alloc_sectors_done(c, wp);
-+
-+ bio_advance(bio, sectors << 9);
-+ update->op.pos.offset += sectors;
-+
-+ extent_for_each_ptr(extent_i_to_s(e), ptr)
-+ ptr->unwritten = true;
-+ bch2_keylist_push(&update->op.insert_keys);
-+
-+ ret = __bch2_data_update_index_update(trans, &update->op);
-+
-+ bch2_open_buckets_put(c, &update->op.open_buckets);
-+
-+ if (ret)
-+ break;
-+ }
-+
-+ if (closure_nr_remaining(&cl) != 1) {
-+ bch2_trans_unlock(trans);
-+ closure_sync(&cl);
-+ }
-+}
-+
-+int bch2_data_update_init(struct btree_trans *trans,
-+ struct moving_context *ctxt,
-+ struct data_update *m,
-+ struct write_point_specifier wp,
-+ struct bch_io_opts io_opts,
-+ struct data_update_opts data_opts,
-+ enum btree_id btree_id,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ const struct bch_extent_ptr *ptr;
-+ unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
-+ unsigned ptrs_locked = 0;
-+ int ret;
-+
-+ bch2_bkey_buf_init(&m->k);
-+ bch2_bkey_buf_reassemble(&m->k, c, k);
-+ m->btree_id = btree_id;
-+ m->data_opts = data_opts;
-+ m->ctxt = ctxt;
-+ m->stats = ctxt ? ctxt->stats : NULL;
-+
-+ bch2_write_op_init(&m->op, c, io_opts);
-+ m->op.pos = bkey_start_pos(k.k);
-+ m->op.version = k.k->version;
-+ m->op.target = data_opts.target;
-+ m->op.write_point = wp;
-+ m->op.nr_replicas = 0;
-+ m->op.flags |= BCH_WRITE_PAGES_STABLE|
-+ BCH_WRITE_PAGES_OWNED|
-+ BCH_WRITE_DATA_ENCODED|
-+ BCH_WRITE_MOVE|
-+ m->data_opts.write_flags;
-+ m->op.compression_opt = io_opts.background_compression ?: io_opts.compression;
-+ m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
-+
-+ bkey_for_each_ptr(ptrs, ptr)
-+ percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
-+
-+ i = 0;
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ bool locked;
-+
-+ if (((1U << i) & m->data_opts.rewrite_ptrs)) {
-+ BUG_ON(p.ptr.cached);
-+
-+ if (crc_is_compressed(p.crc))
-+ reserve_sectors += k.k->size;
-+
-+ m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
-+ } else if (!p.ptr.cached) {
-+ bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
-+ }
-+
-+ /*
-+ * op->csum_type is normally initialized from the fs/file's
-+ * current options - but if an extent is encrypted, we require
-+ * that it stays encrypted:
-+ */
-+ if (bch2_csum_type_is_encryption(p.crc.csum_type)) {
-+ m->op.nonce = p.crc.nonce + p.crc.offset;
-+ m->op.csum_type = p.crc.csum_type;
-+ }
-+
-+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
-+ m->op.incompressible = true;
-+
-+ if (c->opts.nocow_enabled) {
-+ if (ctxt) {
-+ move_ctxt_wait_event(ctxt,
-+ (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
-+ PTR_BUCKET_POS(c, &p.ptr), 0)) ||
-+ !atomic_read(&ctxt->read_sectors));
-+
-+ if (!locked)
-+ bch2_bucket_nocow_lock(&c->nocow_locks,
-+ PTR_BUCKET_POS(c, &p.ptr), 0);
-+ } else {
-+ if (!bch2_bucket_nocow_trylock(&c->nocow_locks,
-+ PTR_BUCKET_POS(c, &p.ptr), 0)) {
-+ ret = -BCH_ERR_nocow_lock_blocked;
-+ goto err;
-+ }
-+ }
-+ ptrs_locked |= (1U << i);
-+ }
-+
-+ i++;
-+ }
-+
-+ if (reserve_sectors) {
-+ ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
-+ m->data_opts.extra_replicas
-+ ? 0
-+ : BCH_DISK_RESERVATION_NOFAIL);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ m->op.nr_replicas += m->data_opts.extra_replicas;
-+ m->op.nr_replicas_required = m->op.nr_replicas;
-+
-+ BUG_ON(!m->op.nr_replicas);
-+
-+ /* Special handling required: */
-+ if (bkey_extent_is_unwritten(k))
-+ return -BCH_ERR_unwritten_extent_update;
-+ return 0;
-+err:
-+ i = 0;
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ if ((1U << i) & ptrs_locked)
-+ bch2_bucket_nocow_unlock(&c->nocow_locks,
-+ PTR_BUCKET_POS(c, &p.ptr), 0);
-+ percpu_ref_put(&bch_dev_bkey_exists(c, p.ptr.dev)->ref);
-+ i++;
-+ }
-+
-+ bch2_bkey_buf_exit(&m->k, c);
-+ bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
-+ return ret;
-+}
-+
-+void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+ unsigned i = 0;
-+
-+ bkey_for_each_ptr(ptrs, ptr) {
-+ if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) {
-+ opts->kill_ptrs |= 1U << i;
-+ opts->rewrite_ptrs ^= 1U << i;
-+ }
-+
-+ i++;
-+ }
-+}
-diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
-new file mode 100644
-index 000000000000..9dc17b9d8379
---- /dev/null
-+++ b/fs/bcachefs/data_update.h
-@@ -0,0 +1,44 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+#ifndef _BCACHEFS_DATA_UPDATE_H
-+#define _BCACHEFS_DATA_UPDATE_H
-+
-+#include "bkey_buf.h"
-+#include "io_write_types.h"
-+
-+struct moving_context;
-+
-+struct data_update_opts {
-+ unsigned rewrite_ptrs;
-+ unsigned kill_ptrs;
-+ u16 target;
-+ u8 extra_replicas;
-+ unsigned btree_insert_flags;
-+ unsigned write_flags;
-+};
-+
-+struct data_update {
-+ /* extent being updated: */
-+ enum btree_id btree_id;
-+ struct bkey_buf k;
-+ struct data_update_opts data_opts;
-+ struct moving_context *ctxt;
-+ struct bch_move_stats *stats;
-+ struct bch_write_op op;
-+};
-+
-+int bch2_data_update_index_update(struct bch_write_op *);
-+
-+void bch2_data_update_read_done(struct data_update *,
-+ struct bch_extent_crc_unpacked);
-+
-+void bch2_data_update_exit(struct data_update *);
-+void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
-+int bch2_data_update_init(struct btree_trans *, struct moving_context *,
-+ struct data_update *,
-+ struct write_point_specifier,
-+ struct bch_io_opts, struct data_update_opts,
-+ enum btree_id, struct bkey_s_c);
-+void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *);
-+
-+#endif /* _BCACHEFS_DATA_UPDATE_H */
-diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
-new file mode 100644
-index 000000000000..57c5128db173
---- /dev/null
-+++ b/fs/bcachefs/debug.c
-@@ -0,0 +1,954 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Assorted bcachefs debug code
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "super.h"
-+
-+#include <linux/console.h>
-+#include <linux/debugfs.h>
-+#include <linux/module.h>
-+#include <linux/random.h>
-+#include <linux/seq_file.h>
-+
-+static struct dentry *bch_debug;
-+
-+static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
-+ struct extent_ptr_decoded pick)
-+{
-+ struct btree *v = c->verify_data;
-+ struct btree_node *n_ondisk = c->verify_ondisk;
-+ struct btree_node *n_sorted = c->verify_data->data;
-+ struct bset *sorted, *inmemory = &b->data->keys;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+ struct bio *bio;
-+ bool failed = false, saw_error = false;
-+
-+ if (!bch2_dev_get_ioref(ca, READ))
-+ return false;
-+
-+ bio = bio_alloc_bioset(ca->disk_sb.bdev,
-+ buf_pages(n_sorted, btree_bytes(c)),
-+ REQ_OP_READ|REQ_META,
-+ GFP_NOFS,
-+ &c->btree_bio);
-+ bio->bi_iter.bi_sector = pick.ptr.offset;
-+ bch2_bio_map(bio, n_sorted, btree_bytes(c));
-+
-+ submit_bio_wait(bio);
-+
-+ bio_put(bio);
-+ percpu_ref_put(&ca->io_ref);
-+
-+ memcpy(n_ondisk, n_sorted, btree_bytes(c));
-+
-+ v->written = 0;
-+ if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
-+ return false;
-+
-+ n_sorted = c->verify_data->data;
-+ sorted = &n_sorted->keys;
-+
-+ if (inmemory->u64s != sorted->u64s ||
-+ memcmp(inmemory->start,
-+ sorted->start,
-+ vstruct_end(inmemory) - (void *) inmemory->start)) {
-+ unsigned offset = 0, sectors;
-+ struct bset *i;
-+ unsigned j;
-+
-+ console_lock();
-+
-+ printk(KERN_ERR "*** in memory:\n");
-+ bch2_dump_bset(c, b, inmemory, 0);
-+
-+ printk(KERN_ERR "*** read back in:\n");
-+ bch2_dump_bset(c, v, sorted, 0);
-+
-+ while (offset < v->written) {
-+ if (!offset) {
-+ i = &n_ondisk->keys;
-+ sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
-+ c->block_bits;
-+ } else {
-+ struct btree_node_entry *bne =
-+ (void *) n_ondisk + (offset << 9);
-+ i = &bne->keys;
-+
-+ sectors = vstruct_blocks(bne, c->block_bits) <<
-+ c->block_bits;
-+ }
-+
-+ printk(KERN_ERR "*** on disk block %u:\n", offset);
-+ bch2_dump_bset(c, b, i, offset);
-+
-+ offset += sectors;
-+ }
-+
-+ for (j = 0; j < le16_to_cpu(inmemory->u64s); j++)
-+ if (inmemory->_data[j] != sorted->_data[j])
-+ break;
-+
-+ console_unlock();
-+ bch_err(c, "verify failed at key %u", j);
-+
-+ failed = true;
-+ }
-+
-+ if (v->written != b->written) {
-+ bch_err(c, "written wrong: expected %u, got %u",
-+ b->written, v->written);
-+ failed = true;
-+ }
-+
-+ return failed;
-+}
-+
-+void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
-+{
-+ struct bkey_ptrs_c ptrs;
-+ struct extent_ptr_decoded p;
-+ const union bch_extent_entry *entry;
-+ struct btree *v;
-+ struct bset *inmemory = &b->data->keys;
-+ struct bkey_packed *k;
-+ bool failed = false;
-+
-+ if (c->opts.nochanges)
-+ return;
-+
-+ bch2_btree_node_io_lock(b);
-+ mutex_lock(&c->verify_lock);
-+
-+ if (!c->verify_ondisk) {
-+ c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
-+ if (!c->verify_ondisk)
-+ goto out;
-+ }
-+
-+ if (!c->verify_data) {
-+ c->verify_data = __bch2_btree_node_mem_alloc(c);
-+ if (!c->verify_data)
-+ goto out;
-+
-+ list_del_init(&c->verify_data->list);
-+ }
-+
-+ BUG_ON(b->nsets != 1);
-+
-+ for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k))
-+ if (k->type == KEY_TYPE_btree_ptr_v2)
-+ ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0;
-+
-+ v = c->verify_data;
-+ bkey_copy(&v->key, &b->key);
-+ v->c.level = b->c.level;
-+ v->c.btree_id = b->c.btree_id;
-+ bch2_btree_keys_init(v);
-+
-+ ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key));
-+ bkey_for_each_ptr_decode(&b->key.k, ptrs, p, entry)
-+ failed |= bch2_btree_verify_replica(c, b, p);
-+
-+ if (failed) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
-+ bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+out:
-+ mutex_unlock(&c->verify_lock);
-+ bch2_btree_node_io_unlock(b);
-+}
-+
-+void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
-+ const struct btree *b)
-+{
-+ struct btree_node *n_ondisk = NULL;
-+ struct extent_ptr_decoded pick;
-+ struct bch_dev *ca;
-+ struct bio *bio = NULL;
-+ unsigned offset = 0;
-+ int ret;
-+
-+ if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) {
-+ prt_printf(out, "error getting device to read from: invalid device\n");
-+ return;
-+ }
-+
-+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+ if (!bch2_dev_get_ioref(ca, READ)) {
-+ prt_printf(out, "error getting device to read from: not online\n");
-+ return;
-+ }
-+
-+ n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
-+ if (!n_ondisk) {
-+ prt_printf(out, "memory allocation failure\n");
-+ goto out;
-+ }
-+
-+ bio = bio_alloc_bioset(ca->disk_sb.bdev,
-+ buf_pages(n_ondisk, btree_bytes(c)),
-+ REQ_OP_READ|REQ_META,
-+ GFP_NOFS,
-+ &c->btree_bio);
-+ bio->bi_iter.bi_sector = pick.ptr.offset;
-+ bch2_bio_map(bio, n_ondisk, btree_bytes(c));
-+
-+ ret = submit_bio_wait(bio);
-+ if (ret) {
-+ prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret));
-+ goto out;
-+ }
-+
-+ while (offset < btree_sectors(c)) {
-+ struct bset *i;
-+ struct nonce nonce;
-+ struct bch_csum csum;
-+ struct bkey_packed *k;
-+ unsigned sectors;
-+
-+ if (!offset) {
-+ i = &n_ondisk->keys;
-+
-+ if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
-+ prt_printf(out, "unknown checksum type at offset %u: %llu\n",
-+ offset, BSET_CSUM_TYPE(i));
-+ goto out;
-+ }
-+
-+ nonce = btree_nonce(i, offset << 9);
-+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk);
-+
-+ if (bch2_crc_cmp(csum, n_ondisk->csum)) {
-+ prt_printf(out, "invalid checksum\n");
-+ goto out;
-+ }
-+
-+ bset_encrypt(c, i, offset << 9);
-+
-+ sectors = vstruct_sectors(n_ondisk, c->block_bits);
-+ } else {
-+ struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9);
-+
-+ i = &bne->keys;
-+
-+ if (i->seq != n_ondisk->keys.seq)
-+ break;
-+
-+ if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
-+ prt_printf(out, "unknown checksum type at offset %u: %llu\n",
-+ offset, BSET_CSUM_TYPE(i));
-+ goto out;
-+ }
-+
-+ nonce = btree_nonce(i, offset << 9);
-+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
-+
-+ if (bch2_crc_cmp(csum, bne->csum)) {
-+ prt_printf(out, "invalid checksum");
-+ goto out;
-+ }
-+
-+ bset_encrypt(c, i, offset << 9);
-+
-+ sectors = vstruct_sectors(bne, c->block_bits);
-+ }
-+
-+ prt_printf(out, " offset %u version %u, journal seq %llu\n",
-+ offset,
-+ le16_to_cpu(i->version),
-+ le64_to_cpu(i->journal_seq));
-+ offset += sectors;
-+
-+ printbuf_indent_add(out, 4);
-+
-+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) {
-+ struct bkey u;
-+
-+ bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u));
-+ prt_newline(out);
-+ }
-+
-+ printbuf_indent_sub(out, 4);
-+ }
-+out:
-+ if (bio)
-+ bio_put(bio);
-+ kvpfree(n_ondisk, btree_bytes(c));
-+ percpu_ref_put(&ca->io_ref);
-+}
-+
-+#ifdef CONFIG_DEBUG_FS
-+
-+/* XXX: bch_fs refcounting */
-+
-+struct dump_iter {
-+ struct bch_fs *c;
-+ enum btree_id id;
-+ struct bpos from;
-+ struct bpos prev_node;
-+ u64 iter;
-+
-+ struct printbuf buf;
-+
-+ char __user *ubuf; /* destination user buffer */
-+ size_t size; /* size of requested read */
-+ ssize_t ret; /* bytes read so far */
-+};
-+
-+static ssize_t flush_buf(struct dump_iter *i)
-+{
-+ if (i->buf.pos) {
-+ size_t bytes = min_t(size_t, i->buf.pos, i->size);
-+ int copied = bytes - copy_to_user(i->ubuf, i->buf.buf, bytes);
-+
-+ i->ret += copied;
-+ i->ubuf += copied;
-+ i->size -= copied;
-+ i->buf.pos -= copied;
-+ memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos);
-+
-+ if (copied != bytes)
-+ return -EFAULT;
-+ }
-+
-+ return i->size ? 0 : i->ret;
-+}
-+
-+static int bch2_dump_open(struct inode *inode, struct file *file)
-+{
-+ struct btree_debug *bd = inode->i_private;
-+ struct dump_iter *i;
-+
-+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-+ if (!i)
-+ return -ENOMEM;
-+
-+ file->private_data = i;
-+ i->from = POS_MIN;
-+ i->iter = 0;
-+ i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]);
-+ i->id = bd->id;
-+ i->buf = PRINTBUF;
-+
-+ return 0;
-+}
-+
-+static int bch2_dump_release(struct inode *inode, struct file *file)
-+{
-+ struct dump_iter *i = file->private_data;
-+
-+ printbuf_exit(&i->buf);
-+ kfree(i);
-+ return 0;
-+}
-+
-+static ssize_t bch2_read_btree(struct file *file, char __user *buf,
-+ size_t size, loff_t *ppos)
-+{
-+ struct dump_iter *i = file->private_data;
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ ssize_t ret;
-+
-+ i->ubuf = buf;
-+ i->size = size;
-+ i->ret = 0;
-+
-+ ret = flush_buf(i);
-+ if (ret)
-+ return ret;
-+
-+ trans = bch2_trans_get(i->c);
-+ ret = for_each_btree_key2(trans, iter, i->id, i->from,
-+ BTREE_ITER_PREFETCH|
-+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
-+ bch2_bkey_val_to_text(&i->buf, i->c, k);
-+ prt_newline(&i->buf);
-+ drop_locks_do(trans, flush_buf(i));
-+ }));
-+ i->from = iter.pos;
-+
-+ bch2_trans_put(trans);
-+
-+ if (!ret)
-+ ret = flush_buf(i);
-+
-+ return ret ?: i->ret;
-+}
-+
-+static const struct file_operations btree_debug_ops = {
-+ .owner = THIS_MODULE,
-+ .open = bch2_dump_open,
-+ .release = bch2_dump_release,
-+ .read = bch2_read_btree,
-+};
-+
-+static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
-+ size_t size, loff_t *ppos)
-+{
-+ struct dump_iter *i = file->private_data;
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct btree *b;
-+ ssize_t ret;
-+
-+ i->ubuf = buf;
-+ i->size = size;
-+ i->ret = 0;
-+
-+ ret = flush_buf(i);
-+ if (ret)
-+ return ret;
-+
-+ if (bpos_eq(SPOS_MAX, i->from))
-+ return i->ret;
-+
-+ trans = bch2_trans_get(i->c);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) {
-+ bch2_btree_node_to_text(&i->buf, i->c, b);
-+ i->from = !bpos_eq(SPOS_MAX, b->key.k.p)
-+ ? bpos_successor(b->key.k.p)
-+ : b->key.k.p;
-+
-+ ret = drop_locks_do(trans, flush_buf(i));
-+ if (ret)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_put(trans);
-+
-+ if (!ret)
-+ ret = flush_buf(i);
-+
-+ return ret ?: i->ret;
-+}
-+
-+static const struct file_operations btree_format_debug_ops = {
-+ .owner = THIS_MODULE,
-+ .open = bch2_dump_open,
-+ .release = bch2_dump_release,
-+ .read = bch2_read_btree_formats,
-+};
-+
-+static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
-+ size_t size, loff_t *ppos)
-+{
-+ struct dump_iter *i = file->private_data;
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ ssize_t ret;
-+
-+ i->ubuf = buf;
-+ i->size = size;
-+ i->ret = 0;
-+
-+ ret = flush_buf(i);
-+ if (ret)
-+ return ret;
-+
-+ trans = bch2_trans_get(i->c);
-+
-+ ret = for_each_btree_key2(trans, iter, i->id, i->from,
-+ BTREE_ITER_PREFETCH|
-+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
-+ struct btree_path_level *l = &iter.path->l[0];
-+ struct bkey_packed *_k =
-+ bch2_btree_node_iter_peek(&l->iter, l->b);
-+
-+ if (bpos_gt(l->b->key.k.p, i->prev_node)) {
-+ bch2_btree_node_to_text(&i->buf, i->c, l->b);
-+ i->prev_node = l->b->key.k.p;
-+ }
-+
-+ bch2_bfloat_to_text(&i->buf, l->b, _k);
-+ drop_locks_do(trans, flush_buf(i));
-+ }));
-+ i->from = iter.pos;
-+
-+ bch2_trans_put(trans);
-+
-+ if (!ret)
-+ ret = flush_buf(i);
-+
-+ return ret ?: i->ret;
-+}
-+
-+static const struct file_operations bfloat_failed_debug_ops = {
-+ .owner = THIS_MODULE,
-+ .open = bch2_dump_open,
-+ .release = bch2_dump_release,
-+ .read = bch2_read_bfloat_failed,
-+};
-+
-+static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct btree *b)
-+{
-+ if (!out->nr_tabstops)
-+ printbuf_tabstop_push(out, 32);
-+
-+ prt_printf(out, "%px btree=%s l=%u ",
-+ b,
-+ bch2_btree_id_str(b->c.btree_id),
-+ b->c.level);
-+ prt_newline(out);
-+
-+ printbuf_indent_add(out, 2);
-+
-+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
-+ prt_newline(out);
-+
-+ prt_printf(out, "flags: ");
-+ prt_tab(out);
-+ prt_bitflags(out, bch2_btree_node_flags, b->flags);
-+ prt_newline(out);
-+
-+ prt_printf(out, "pcpu read locks: ");
-+ prt_tab(out);
-+ prt_printf(out, "%u", b->c.lock.readers != NULL);
-+ prt_newline(out);
-+
-+ prt_printf(out, "written:");
-+ prt_tab(out);
-+ prt_printf(out, "%u", b->written);
-+ prt_newline(out);
-+
-+ prt_printf(out, "writes blocked:");
-+ prt_tab(out);
-+ prt_printf(out, "%u", !list_empty_careful(&b->write_blocked));
-+ prt_newline(out);
-+
-+ prt_printf(out, "will make reachable:");
-+ prt_tab(out);
-+ prt_printf(out, "%lx", b->will_make_reachable);
-+ prt_newline(out);
-+
-+ prt_printf(out, "journal pin %px:", &b->writes[0].journal);
-+ prt_tab(out);
-+ prt_printf(out, "%llu", b->writes[0].journal.seq);
-+ prt_newline(out);
-+
-+ prt_printf(out, "journal pin %px:", &b->writes[1].journal);
-+ prt_tab(out);
-+ prt_printf(out, "%llu", b->writes[1].journal.seq);
-+ prt_newline(out);
-+
-+ printbuf_indent_sub(out, 2);
-+}
-+
-+static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
-+ size_t size, loff_t *ppos)
-+{
-+ struct dump_iter *i = file->private_data;
-+ struct bch_fs *c = i->c;
-+ bool done = false;
-+ ssize_t ret = 0;
-+
-+ i->ubuf = buf;
-+ i->size = size;
-+ i->ret = 0;
-+
-+ do {
-+ struct bucket_table *tbl;
-+ struct rhash_head *pos;
-+ struct btree *b;
-+
-+ ret = flush_buf(i);
-+ if (ret)
-+ return ret;
-+
-+ rcu_read_lock();
-+ i->buf.atomic++;
-+ tbl = rht_dereference_rcu(c->btree_cache.table.tbl,
-+ &c->btree_cache.table);
-+ if (i->iter < tbl->size) {
-+ rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
-+ bch2_cached_btree_node_to_text(&i->buf, c, b);
-+ i->iter++;
-+ } else {
-+ done = true;
-+ }
-+ --i->buf.atomic;
-+ rcu_read_unlock();
-+ } while (!done);
-+
-+ if (i->buf.allocation_failure)
-+ ret = -ENOMEM;
-+
-+ if (!ret)
-+ ret = flush_buf(i);
-+
-+ return ret ?: i->ret;
-+}
-+
-+static const struct file_operations cached_btree_nodes_ops = {
-+ .owner = THIS_MODULE,
-+ .open = bch2_dump_open,
-+ .release = bch2_dump_release,
-+ .read = bch2_cached_btree_nodes_read,
-+};
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
-+static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
-+ size_t size, loff_t *ppos)
-+{
-+ struct dump_iter *i = file->private_data;
-+ struct bch_fs *c = i->c;
-+ struct btree_trans *trans;
-+ ssize_t ret = 0;
-+ u32 seq;
-+
-+ i->ubuf = buf;
-+ i->size = size;
-+ i->ret = 0;
-+restart:
-+ seqmutex_lock(&c->btree_trans_lock);
-+ list_for_each_entry(trans, &c->btree_trans_list, list) {
-+ if (trans->locking_wait.task->pid <= i->iter)
-+ continue;
-+
-+ closure_get(&trans->ref);
-+ seq = seqmutex_seq(&c->btree_trans_lock);
-+ seqmutex_unlock(&c->btree_trans_lock);
-+
-+ ret = flush_buf(i);
-+ if (ret) {
-+ closure_put(&trans->ref);
-+ goto unlocked;
-+ }
-+
-+ bch2_btree_trans_to_text(&i->buf, trans);
-+
-+ prt_printf(&i->buf, "backtrace:");
-+ prt_newline(&i->buf);
-+ printbuf_indent_add(&i->buf, 2);
-+ bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task);
-+ printbuf_indent_sub(&i->buf, 2);
-+ prt_newline(&i->buf);
-+
-+ i->iter = trans->locking_wait.task->pid;
-+
-+ closure_put(&trans->ref);
-+
-+ if (!seqmutex_relock(&c->btree_trans_lock, seq))
-+ goto restart;
-+ }
-+ seqmutex_unlock(&c->btree_trans_lock);
-+unlocked:
-+ if (i->buf.allocation_failure)
-+ ret = -ENOMEM;
-+
-+ if (!ret)
-+ ret = flush_buf(i);
-+
-+ return ret ?: i->ret;
-+}
-+
-+static const struct file_operations btree_transactions_ops = {
-+ .owner = THIS_MODULE,
-+ .open = bch2_dump_open,
-+ .release = bch2_dump_release,
-+ .read = bch2_btree_transactions_read,
-+};
-+#endif /* CONFIG_BCACHEFS_DEBUG_TRANSACTIONS */
-+
-+static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
-+ size_t size, loff_t *ppos)
-+{
-+ struct dump_iter *i = file->private_data;
-+ struct bch_fs *c = i->c;
-+ bool done = false;
-+ int err;
-+
-+ i->ubuf = buf;
-+ i->size = size;
-+ i->ret = 0;
-+
-+ do {
-+ err = flush_buf(i);
-+ if (err)
-+ return err;
-+
-+ if (!i->size)
-+ break;
-+
-+ done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
-+ i->iter++;
-+ } while (!done);
-+
-+ if (i->buf.allocation_failure)
-+ return -ENOMEM;
-+
-+ return i->ret;
-+}
-+
-+static const struct file_operations journal_pins_ops = {
-+ .owner = THIS_MODULE,
-+ .open = bch2_dump_open,
-+ .release = bch2_dump_release,
-+ .read = bch2_journal_pins_read,
-+};
-+
-+static int lock_held_stats_open(struct inode *inode, struct file *file)
-+{
-+ struct bch_fs *c = inode->i_private;
-+ struct dump_iter *i;
-+
-+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-+
-+ if (!i)
-+ return -ENOMEM;
-+
-+ i->iter = 0;
-+ i->c = c;
-+ i->buf = PRINTBUF;
-+ file->private_data = i;
-+
-+ return 0;
-+}
-+
-+static int lock_held_stats_release(struct inode *inode, struct file *file)
-+{
-+ struct dump_iter *i = file->private_data;
-+
-+ printbuf_exit(&i->buf);
-+ kfree(i);
-+
-+ return 0;
-+}
-+
-+static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
-+ size_t size, loff_t *ppos)
-+{
-+ struct dump_iter *i = file->private_data;
-+ struct bch_fs *c = i->c;
-+ int err;
-+
-+ i->ubuf = buf;
-+ i->size = size;
-+ i->ret = 0;
-+
-+ while (1) {
-+ struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
-+
-+ err = flush_buf(i);
-+ if (err)
-+ return err;
-+
-+ if (!i->size)
-+ break;
-+
-+ if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) ||
-+ !bch2_btree_transaction_fns[i->iter])
-+ break;
-+
-+ prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]);
-+ prt_newline(&i->buf);
-+ printbuf_indent_add(&i->buf, 2);
-+
-+ mutex_lock(&s->lock);
-+
-+ prt_printf(&i->buf, "Max mem used: %u", s->max_mem);
-+ prt_newline(&i->buf);
-+
-+ if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
-+ prt_printf(&i->buf, "Lock hold times:");
-+ prt_newline(&i->buf);
-+
-+ printbuf_indent_add(&i->buf, 2);
-+ bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
-+ printbuf_indent_sub(&i->buf, 2);
-+ }
-+
-+ if (s->max_paths_text) {
-+ prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths);
-+ prt_newline(&i->buf);
-+
-+ printbuf_indent_add(&i->buf, 2);
-+ prt_str_indented(&i->buf, s->max_paths_text);
-+ printbuf_indent_sub(&i->buf, 2);
-+ }
-+
-+ mutex_unlock(&s->lock);
-+
-+ printbuf_indent_sub(&i->buf, 2);
-+ prt_newline(&i->buf);
-+ i->iter++;
-+ }
-+
-+ if (i->buf.allocation_failure)
-+ return -ENOMEM;
-+
-+ return i->ret;
-+}
-+
-+static const struct file_operations lock_held_stats_op = {
-+ .owner = THIS_MODULE,
-+ .open = lock_held_stats_open,
-+ .release = lock_held_stats_release,
-+ .read = lock_held_stats_read,
-+};
-+
-+static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
-+ size_t size, loff_t *ppos)
-+{
-+ struct dump_iter *i = file->private_data;
-+ struct bch_fs *c = i->c;
-+ struct btree_trans *trans;
-+ ssize_t ret = 0;
-+ u32 seq;
-+
-+ i->ubuf = buf;
-+ i->size = size;
-+ i->ret = 0;
-+
-+ if (i->iter)
-+ goto out;
-+restart:
-+ seqmutex_lock(&c->btree_trans_lock);
-+ list_for_each_entry(trans, &c->btree_trans_list, list) {
-+ if (trans->locking_wait.task->pid <= i->iter)
-+ continue;
-+
-+ closure_get(&trans->ref);
-+ seq = seqmutex_seq(&c->btree_trans_lock);
-+ seqmutex_unlock(&c->btree_trans_lock);
-+
-+ ret = flush_buf(i);
-+ if (ret) {
-+ closure_put(&trans->ref);
-+ goto out;
-+ }
-+
-+ bch2_check_for_deadlock(trans, &i->buf);
-+
-+ i->iter = trans->locking_wait.task->pid;
-+
-+ closure_put(&trans->ref);
-+
-+ if (!seqmutex_relock(&c->btree_trans_lock, seq))
-+ goto restart;
-+ }
-+ seqmutex_unlock(&c->btree_trans_lock);
-+out:
-+ if (i->buf.allocation_failure)
-+ ret = -ENOMEM;
-+
-+ if (!ret)
-+ ret = flush_buf(i);
-+
-+ return ret ?: i->ret;
-+}
-+
-+static const struct file_operations btree_deadlock_ops = {
-+ .owner = THIS_MODULE,
-+ .open = bch2_dump_open,
-+ .release = bch2_dump_release,
-+ .read = bch2_btree_deadlock_read,
-+};
-+
-+void bch2_fs_debug_exit(struct bch_fs *c)
-+{
-+ if (!IS_ERR_OR_NULL(c->fs_debug_dir))
-+ debugfs_remove_recursive(c->fs_debug_dir);
-+}
-+
-+void bch2_fs_debug_init(struct bch_fs *c)
-+{
-+ struct btree_debug *bd;
-+ char name[100];
-+
-+ if (IS_ERR_OR_NULL(bch_debug))
-+ return;
-+
-+ snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
-+ c->fs_debug_dir = debugfs_create_dir(name, bch_debug);
-+ if (IS_ERR_OR_NULL(c->fs_debug_dir))
-+ return;
-+
-+ debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir,
-+ c->btree_debug, &cached_btree_nodes_ops);
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
-+ debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir,
-+ c->btree_debug, &btree_transactions_ops);
-+#endif
-+
-+ debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
-+ c->btree_debug, &journal_pins_ops);
-+
-+ debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
-+ c, &lock_held_stats_op);
-+
-+ debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir,
-+ c->btree_debug, &btree_deadlock_ops);
-+
-+ c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
-+ if (IS_ERR_OR_NULL(c->btree_debug_dir))
-+ return;
-+
-+ for (bd = c->btree_debug;
-+ bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
-+ bd++) {
-+ bd->id = bd - c->btree_debug;
-+ debugfs_create_file(bch2_btree_id_str(bd->id),
-+ 0400, c->btree_debug_dir, bd,
-+ &btree_debug_ops);
-+
-+ snprintf(name, sizeof(name), "%s-formats",
-+ bch2_btree_id_str(bd->id));
-+
-+ debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
-+ &btree_format_debug_ops);
-+
-+ snprintf(name, sizeof(name), "%s-bfloat-failed",
-+ bch2_btree_id_str(bd->id));
-+
-+ debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
-+ &bfloat_failed_debug_ops);
-+ }
-+}
-+
-+#endif
-+
-+void bch2_debug_exit(void)
-+{
-+ if (!IS_ERR_OR_NULL(bch_debug))
-+ debugfs_remove_recursive(bch_debug);
-+}
-+
-+int __init bch2_debug_init(void)
-+{
-+ int ret = 0;
-+
-+ bch_debug = debugfs_create_dir("bcachefs", NULL);
-+ return ret;
-+}
-diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h
-new file mode 100644
-index 000000000000..2c37143b5fd1
---- /dev/null
-+++ b/fs/bcachefs/debug.h
-@@ -0,0 +1,32 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DEBUG_H
-+#define _BCACHEFS_DEBUG_H
-+
-+#include "bcachefs.h"
-+
-+struct bio;
-+struct btree;
-+struct bch_fs;
-+
-+void __bch2_btree_verify(struct bch_fs *, struct btree *);
-+void bch2_btree_node_ondisk_to_text(struct printbuf *, struct bch_fs *,
-+ const struct btree *);
-+
-+static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
-+{
-+ if (bch2_verify_btree_ondisk)
-+ __bch2_btree_verify(c, b);
-+}
-+
-+#ifdef CONFIG_DEBUG_FS
-+void bch2_fs_debug_exit(struct bch_fs *);
-+void bch2_fs_debug_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_debug_exit(struct bch_fs *c) {}
-+static inline void bch2_fs_debug_init(struct bch_fs *c) {}
-+#endif
-+
-+void bch2_debug_exit(void);
-+int bch2_debug_init(void);
-+
-+#endif /* _BCACHEFS_DEBUG_H */
-diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
-new file mode 100644
-index 000000000000..1a0f2d571569
---- /dev/null
-+++ b/fs/bcachefs/dirent.c
-@@ -0,0 +1,577 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_buf.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "dirent.h"
-+#include "fs.h"
-+#include "keylist.h"
-+#include "str_hash.h"
-+#include "subvolume.h"
-+
-+#include <linux/dcache.h>
-+
-+static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
-+{
-+ unsigned bkey_u64s = bkey_val_u64s(d.k);
-+ unsigned bkey_bytes = bkey_u64s * sizeof(u64);
-+ u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1];
-+#if CPU_BIG_ENDIAN
-+ unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8;
-+#else
-+ unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8;
-+#endif
-+
-+ return bkey_bytes -
-+ offsetof(struct bch_dirent, d_name) -
-+ trailing_nuls;
-+}
-+
-+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
-+{
-+ return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
-+}
-+
-+static u64 bch2_dirent_hash(const struct bch_hash_info *info,
-+ const struct qstr *name)
-+{
-+ struct bch_str_hash_ctx ctx;
-+
-+ bch2_str_hash_init(&ctx, info);
-+ bch2_str_hash_update(&ctx, info, name->name, name->len);
-+
-+ /* [0,2) reserved for dots */
-+ return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
-+}
-+
-+static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
-+{
-+ return bch2_dirent_hash(info, key);
-+}
-+
-+static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
-+{
-+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+ struct qstr name = bch2_dirent_get_name(d);
-+
-+ return bch2_dirent_hash(info, &name);
-+}
-+
-+static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
-+{
-+ struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-+ const struct qstr l_name = bch2_dirent_get_name(l);
-+ const struct qstr *r_name = _r;
-+
-+ return l_name.len - r_name->len ?: memcmp(l_name.name, r_name->name, l_name.len);
-+}
-+
-+static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
-+{
-+ struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-+ struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
-+ const struct qstr l_name = bch2_dirent_get_name(l);
-+ const struct qstr r_name = bch2_dirent_get_name(r);
-+
-+ return l_name.len - r_name.len ?: memcmp(l_name.name, r_name.name, l_name.len);
-+}
-+
-+static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k)
-+{
-+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+
-+ if (d.v->d_type == DT_SUBVOL)
-+ return le32_to_cpu(d.v->d_parent_subvol) == inum.subvol;
-+ return true;
-+}
-+
-+const struct bch_hash_desc bch2_dirent_hash_desc = {
-+ .btree_id = BTREE_ID_dirents,
-+ .key_type = KEY_TYPE_dirent,
-+ .hash_key = dirent_hash_key,
-+ .hash_bkey = dirent_hash_bkey,
-+ .cmp_key = dirent_cmp_key,
-+ .cmp_bkey = dirent_cmp_bkey,
-+ .is_visible = dirent_is_visible,
-+};
-+
-+int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+ struct qstr d_name = bch2_dirent_get_name(d);
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(!d_name.len, c, err,
-+ dirent_empty_name,
-+ "empty name");
-+
-+ bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err,
-+ dirent_val_too_big,
-+ "value too big (%zu > %u)",
-+ bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
-+
-+ /*
-+ * Check new keys don't exceed the max length
-+ * (older keys may be larger.)
-+ */
-+ bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err,
-+ dirent_name_too_long,
-+ "dirent name too big (%u > %u)",
-+ d_name.len, BCH_NAME_MAX);
-+
-+ bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err,
-+ dirent_name_embedded_nul,
-+ "dirent has stray data after name's NUL");
-+
-+ bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) ||
-+ (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err,
-+ dirent_name_dot_or_dotdot,
-+ "invalid name");
-+
-+ bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err,
-+ dirent_name_has_slash,
-+ "name with /");
-+
-+ bkey_fsck_err_on(d.v->d_type != DT_SUBVOL &&
-+ le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err,
-+ dirent_to_itself,
-+ "dirent points to own directory");
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-+ struct qstr d_name = bch2_dirent_get_name(d);
-+
-+ prt_printf(out, "%.*s -> %llu type %s",
-+ d_name.len,
-+ d_name.name,
-+ d.v->d_type != DT_SUBVOL
-+ ? le64_to_cpu(d.v->d_inum)
-+ : le32_to_cpu(d.v->d_child_subvol),
-+ bch2_d_type_str(d.v->d_type));
-+}
-+
-+static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
-+ subvol_inum dir, u8 type,
-+ const struct qstr *name, u64 dst)
-+{
-+ struct bkey_i_dirent *dirent;
-+ unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
-+
-+ if (name->len > BCH_NAME_MAX)
-+ return ERR_PTR(-ENAMETOOLONG);
-+
-+ BUG_ON(u64s > U8_MAX);
-+
-+ dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+ if (IS_ERR(dirent))
-+ return dirent;
-+
-+ bkey_dirent_init(&dirent->k_i);
-+ dirent->k.u64s = u64s;
-+
-+ if (type != DT_SUBVOL) {
-+ dirent->v.d_inum = cpu_to_le64(dst);
-+ } else {
-+ dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol);
-+ dirent->v.d_child_subvol = cpu_to_le32(dst);
-+ }
-+
-+ dirent->v.d_type = type;
-+
-+ memcpy(dirent->v.d_name, name->name, name->len);
-+ memset(dirent->v.d_name + name->len, 0,
-+ bkey_val_bytes(&dirent->k) -
-+ offsetof(struct bch_dirent, d_name) -
-+ name->len);
-+
-+ EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
-+
-+ return dirent;
-+}
-+
-+int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
-+ const struct bch_hash_info *hash_info,
-+ u8 type, const struct qstr *name, u64 dst_inum,
-+ u64 *dir_offset, int flags)
-+{
-+ struct bkey_i_dirent *dirent;
-+ int ret;
-+
-+ dirent = dirent_create_key(trans, dir, type, name, dst_inum);
-+ ret = PTR_ERR_OR_ZERO(dirent);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
-+ dir, &dirent->k_i, flags);
-+ *dir_offset = dirent->k.p.offset;
-+
-+ return ret;
-+}
-+
-+static void dirent_copy_target(struct bkey_i_dirent *dst,
-+ struct bkey_s_c_dirent src)
-+{
-+ dst->v.d_inum = src.v->d_inum;
-+ dst->v.d_type = src.v->d_type;
-+}
-+
-+int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
-+ struct bkey_s_c_dirent d, subvol_inum *target)
-+{
-+ struct bch_subvolume s;
-+ int ret = 0;
-+
-+ if (d.v->d_type == DT_SUBVOL &&
-+ le32_to_cpu(d.v->d_parent_subvol) != dir.subvol)
-+ return 1;
-+
-+ if (likely(d.v->d_type != DT_SUBVOL)) {
-+ target->subvol = dir.subvol;
-+ target->inum = le64_to_cpu(d.v->d_inum);
-+ } else {
-+ target->subvol = le32_to_cpu(d.v->d_child_subvol);
-+
-+ ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_CACHED, &s);
-+
-+ target->inum = le64_to_cpu(s.inode);
-+ }
-+
-+ return ret;
-+}
-+
-+int bch2_dirent_rename(struct btree_trans *trans,
-+ subvol_inum src_dir, struct bch_hash_info *src_hash,
-+ subvol_inum dst_dir, struct bch_hash_info *dst_hash,
-+ const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset,
-+ const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset,
-+ enum bch_rename_mode mode)
-+{
-+ struct btree_iter src_iter = { NULL };
-+ struct btree_iter dst_iter = { NULL };
-+ struct bkey_s_c old_src, old_dst = bkey_s_c_null;
-+ struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
-+ struct bpos dst_pos =
-+ POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
-+ unsigned src_type = 0, dst_type = 0, src_update_flags = 0;
-+ int ret = 0;
-+
-+ if (src_dir.subvol != dst_dir.subvol)
-+ return -EXDEV;
-+
-+ memset(src_inum, 0, sizeof(*src_inum));
-+ memset(dst_inum, 0, sizeof(*dst_inum));
-+
-+ /* Lookup src: */
-+ ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
-+ src_hash, src_dir, src_name,
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto out;
-+
-+ old_src = bch2_btree_iter_peek_slot(&src_iter);
-+ ret = bkey_err(old_src);
-+ if (ret)
-+ goto out;
-+
-+ ret = bch2_dirent_read_target(trans, src_dir,
-+ bkey_s_c_to_dirent(old_src), src_inum);
-+ if (ret)
-+ goto out;
-+
-+ src_type = bkey_s_c_to_dirent(old_src).v->d_type;
-+
-+ if (src_type == DT_SUBVOL && mode == BCH_RENAME_EXCHANGE)
-+ return -EOPNOTSUPP;
-+
-+
-+ /* Lookup dst: */
-+ if (mode == BCH_RENAME) {
-+ /*
-+ * Note that we're _not_ checking if the target already exists -
-+ * we're relying on the VFS to do that check for us for
-+ * correctness:
-+ */
-+ ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
-+ dst_hash, dst_dir, dst_name);
-+ if (ret)
-+ goto out;
-+ } else {
-+ ret = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
-+ dst_hash, dst_dir, dst_name,
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto out;
-+
-+ old_dst = bch2_btree_iter_peek_slot(&dst_iter);
-+ ret = bkey_err(old_dst);
-+ if (ret)
-+ goto out;
-+
-+ ret = bch2_dirent_read_target(trans, dst_dir,
-+ bkey_s_c_to_dirent(old_dst), dst_inum);
-+ if (ret)
-+ goto out;
-+
-+ dst_type = bkey_s_c_to_dirent(old_dst).v->d_type;
-+
-+ if (dst_type == DT_SUBVOL)
-+ return -EOPNOTSUPP;
-+ }
-+
-+ if (mode != BCH_RENAME_EXCHANGE)
-+ *src_offset = dst_iter.pos.offset;
-+
-+ /* Create new dst key: */
-+ new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0);
-+ ret = PTR_ERR_OR_ZERO(new_dst);
-+ if (ret)
-+ goto out;
-+
-+ dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
-+ new_dst->k.p = dst_iter.pos;
-+
-+ /* Create new src key: */
-+ if (mode == BCH_RENAME_EXCHANGE) {
-+ new_src = dirent_create_key(trans, src_dir, 0, src_name, 0);
-+ ret = PTR_ERR_OR_ZERO(new_src);
-+ if (ret)
-+ goto out;
-+
-+ dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
-+ new_src->k.p = src_iter.pos;
-+ } else {
-+ new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
-+ ret = PTR_ERR_OR_ZERO(new_src);
-+ if (ret)
-+ goto out;
-+
-+ bkey_init(&new_src->k);
-+ new_src->k.p = src_iter.pos;
-+
-+ if (bkey_le(dst_pos, src_iter.pos) &&
-+ bkey_lt(src_iter.pos, dst_iter.pos)) {
-+ /*
-+ * We have a hash collision for the new dst key,
-+ * and new_src - the key we're deleting - is between
-+ * new_dst's hashed slot and the slot we're going to be
-+ * inserting it into - oops. This will break the hash
-+ * table if we don't deal with it:
-+ */
-+ if (mode == BCH_RENAME) {
-+ /*
-+ * If we're not overwriting, we can just insert
-+ * new_dst at the src position:
-+ */
-+ new_src = new_dst;
-+ new_src->k.p = src_iter.pos;
-+ goto out_set_src;
-+ } else {
-+ /* If we're overwriting, we can't insert new_dst
-+ * at a different slot because it has to
-+ * overwrite old_dst - just make sure to use a
-+ * whiteout when deleting src:
-+ */
-+ new_src->k.type = KEY_TYPE_hash_whiteout;
-+ }
-+ } else {
-+ /* Check if we need a whiteout to delete src: */
-+ ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
-+ src_hash, &src_iter);
-+ if (ret < 0)
-+ goto out;
-+
-+ if (ret)
-+ new_src->k.type = KEY_TYPE_hash_whiteout;
-+ }
-+ }
-+
-+ ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
-+ if (ret)
-+ goto out;
-+out_set_src:
-+
-+ /*
-+ * If we're deleting a subvolume, we need to really delete the dirent,
-+ * not just emit a whiteout in the current snapshot:
-+ */
-+ if (src_type == DT_SUBVOL) {
-+ bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
-+ ret = bch2_btree_iter_traverse(&src_iter);
-+ if (ret)
-+ goto out;
-+
-+ new_src->k.p = src_iter.pos;
-+ src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE;
-+ }
-+
-+ ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
-+ if (ret)
-+ goto out;
-+
-+ if (mode == BCH_RENAME_EXCHANGE)
-+ *src_offset = new_src->k.p.offset;
-+ *dst_offset = new_dst->k.p.offset;
-+out:
-+ bch2_trans_iter_exit(trans, &src_iter);
-+ bch2_trans_iter_exit(trans, &dst_iter);
-+ return ret;
-+}
-+
-+int __bch2_dirent_lookup_trans(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ subvol_inum dir,
-+ const struct bch_hash_info *hash_info,
-+ const struct qstr *name, subvol_inum *inum,
-+ unsigned flags)
-+{
-+ struct bkey_s_c k;
-+ struct bkey_s_c_dirent d;
-+ u32 snapshot;
-+ int ret;
-+
-+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
-+ hash_info, dir, name, flags);
-+ if (ret)
-+ return ret;
-+
-+ k = bch2_btree_iter_peek_slot(iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ d = bkey_s_c_to_dirent(k);
-+
-+ ret = bch2_dirent_read_target(trans, dir, d, inum);
-+ if (ret > 0)
-+ ret = -ENOENT;
-+err:
-+ if (ret)
-+ bch2_trans_iter_exit(trans, iter);
-+
-+ return ret;
-+}
-+
-+u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
-+ const struct bch_hash_info *hash_info,
-+ const struct qstr *name, subvol_inum *inum)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ int ret;
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info,
-+ name, inum, 0);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+ if (!ret)
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ u32 snapshot;
-+ int ret;
-+
-+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
-+ if (ret)
-+ return ret;
-+
-+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
-+ SPOS(dir.inum, 0, snapshot),
-+ POS(dir.inum, U64_MAX), 0, k, ret)
-+ if (k.k->type == KEY_TYPE_dirent) {
-+ ret = -ENOTEMPTY;
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret;
-+}
-+
-+int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_s_c_dirent dirent;
-+ subvol_inum target;
-+ u32 snapshot;
-+ struct bkey_buf sk;
-+ struct qstr name;
-+ int ret;
-+
-+ bch2_bkey_buf_init(&sk);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
-+ SPOS(inum.inum, ctx->pos, snapshot),
-+ POS(inum.inum, U64_MAX), 0, k, ret) {
-+ if (k.k->type != KEY_TYPE_dirent)
-+ continue;
-+
-+ dirent = bkey_s_c_to_dirent(k);
-+
-+ ret = bch2_dirent_read_target(trans, inum, dirent, &target);
-+ if (ret < 0)
-+ break;
-+ if (ret)
-+ continue;
-+
-+ /* dir_emit() can fault and block: */
-+ bch2_bkey_buf_reassemble(&sk, c, k);
-+ dirent = bkey_i_to_s_c_dirent(sk.k);
-+ bch2_trans_unlock(trans);
-+
-+ name = bch2_dirent_get_name(dirent);
-+
-+ ctx->pos = dirent.k->p.offset;
-+ if (!dir_emit(ctx, name.name,
-+ name.len,
-+ target.inum,
-+ vfs_d_type(dirent.v->d_type)))
-+ break;
-+ ctx->pos = dirent.k->p.offset + 1;
-+
-+ /*
-+ * read_target looks up subvolumes, we can overflow paths if the
-+ * directory has many subvolumes in it
-+ */
-+ ret = btree_trans_too_many_iters(trans);
-+ if (ret)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_put(trans);
-+ bch2_bkey_buf_exit(&sk, c);
-+
-+ return ret;
-+}
-diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
-new file mode 100644
-index 000000000000..cd262bf4d9c5
---- /dev/null
-+++ b/fs/bcachefs/dirent.h
-@@ -0,0 +1,70 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DIRENT_H
-+#define _BCACHEFS_DIRENT_H
-+
-+#include "str_hash.h"
-+
-+enum bkey_invalid_flags;
-+extern const struct bch_hash_desc bch2_dirent_hash_desc;
-+
-+int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_dirent ((struct bkey_ops) { \
-+ .key_invalid = bch2_dirent_invalid, \
-+ .val_to_text = bch2_dirent_to_text, \
-+ .min_val_size = 16, \
-+})
-+
-+struct qstr;
-+struct file;
-+struct dir_context;
-+struct bch_fs;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+
-+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
-+
-+static inline unsigned dirent_val_u64s(unsigned len)
-+{
-+ return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len,
-+ sizeof(u64));
-+}
-+
-+int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
-+ struct bkey_s_c_dirent, subvol_inum *);
-+
-+int bch2_dirent_create(struct btree_trans *, subvol_inum,
-+ const struct bch_hash_info *, u8,
-+ const struct qstr *, u64, u64 *, int);
-+
-+static inline unsigned vfs_d_type(unsigned type)
-+{
-+ return type == DT_SUBVOL ? DT_DIR : type;
-+}
-+
-+enum bch_rename_mode {
-+ BCH_RENAME,
-+ BCH_RENAME_OVERWRITE,
-+ BCH_RENAME_EXCHANGE,
-+};
-+
-+int bch2_dirent_rename(struct btree_trans *,
-+ subvol_inum, struct bch_hash_info *,
-+ subvol_inum, struct bch_hash_info *,
-+ const struct qstr *, subvol_inum *, u64 *,
-+ const struct qstr *, subvol_inum *, u64 *,
-+ enum bch_rename_mode);
-+
-+int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *,
-+ subvol_inum, const struct bch_hash_info *,
-+ const struct qstr *, subvol_inum *, unsigned);
-+u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum,
-+ const struct bch_hash_info *,
-+ const struct qstr *, subvol_inum *);
-+
-+int bch2_empty_dir_trans(struct btree_trans *, subvol_inum);
-+int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *);
-+
-+#endif /* _BCACHEFS_DIRENT_H */
-diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
-new file mode 100644
-index 000000000000..d613695abf9f
---- /dev/null
-+++ b/fs/bcachefs/disk_groups.c
-@@ -0,0 +1,620 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "disk_groups.h"
-+#include "sb-members.h"
-+#include "super-io.h"
-+
-+#include <linux/sort.h>
-+
-+static int group_cmp(const void *_l, const void *_r)
-+{
-+ const struct bch_disk_group *l = _l;
-+ const struct bch_disk_group *r = _r;
-+
-+ return ((BCH_GROUP_DELETED(l) > BCH_GROUP_DELETED(r)) -
-+ (BCH_GROUP_DELETED(l) < BCH_GROUP_DELETED(r))) ?:
-+ ((BCH_GROUP_PARENT(l) > BCH_GROUP_PARENT(r)) -
-+ (BCH_GROUP_PARENT(l) < BCH_GROUP_PARENT(r))) ?:
-+ strncmp(l->label, r->label, sizeof(l->label));
-+}
-+
-+static int bch2_sb_disk_groups_validate(struct bch_sb *sb,
-+ struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_disk_groups *groups =
-+ field_to_type(f, disk_groups);
-+ struct bch_disk_group *g, *sorted = NULL;
-+ unsigned nr_groups = disk_groups_nr(groups);
-+ unsigned i, len;
-+ int ret = 0;
-+
-+ for (i = 0; i < sb->nr_devices; i++) {
-+ struct bch_member m = bch2_sb_member_get(sb, i);
-+ unsigned group_id;
-+
-+ if (!BCH_MEMBER_GROUP(&m))
-+ continue;
-+
-+ group_id = BCH_MEMBER_GROUP(&m) - 1;
-+
-+ if (group_id >= nr_groups) {
-+ prt_printf(err, "disk %u has invalid label %u (have %u)",
-+ i, group_id, nr_groups);
-+ return -BCH_ERR_invalid_sb_disk_groups;
-+ }
-+
-+ if (BCH_GROUP_DELETED(&groups->entries[group_id])) {
-+ prt_printf(err, "disk %u has deleted label %u", i, group_id);
-+ return -BCH_ERR_invalid_sb_disk_groups;
-+ }
-+ }
-+
-+ if (!nr_groups)
-+ return 0;
-+
-+ for (i = 0; i < nr_groups; i++) {
-+ g = groups->entries + i;
-+
-+ if (BCH_GROUP_DELETED(g))
-+ continue;
-+
-+ len = strnlen(g->label, sizeof(g->label));
-+ if (!len) {
-+ prt_printf(err, "label %u empty", i);
-+ return -BCH_ERR_invalid_sb_disk_groups;
-+ }
-+ }
-+
-+ sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL);
-+ if (!sorted)
-+ return -BCH_ERR_ENOMEM_disk_groups_validate;
-+
-+ memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted));
-+ sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL);
-+
-+ for (g = sorted; g + 1 < sorted + nr_groups; g++)
-+ if (!BCH_GROUP_DELETED(g) &&
-+ !group_cmp(&g[0], &g[1])) {
-+ prt_printf(err, "duplicate label %llu.%.*s",
-+ BCH_GROUP_PARENT(g),
-+ (int) sizeof(g->label), g->label);
-+ ret = -BCH_ERR_invalid_sb_disk_groups;
-+ goto err;
-+ }
-+err:
-+ kfree(sorted);
-+ return ret;
-+}
-+
-+void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ struct bch_disk_groups_cpu *g;
-+ struct bch_dev *ca;
-+ int i;
-+ unsigned iter;
-+
-+ out->atomic++;
-+ rcu_read_lock();
-+
-+ g = rcu_dereference(c->disk_groups);
-+ if (!g)
-+ goto out;
-+
-+ for (i = 0; i < g->nr; i++) {
-+ if (i)
-+ prt_printf(out, " ");
-+
-+ if (g->entries[i].deleted) {
-+ prt_printf(out, "[deleted]");
-+ continue;
-+ }
-+
-+ prt_printf(out, "[parent %d devs", g->entries[i].parent);
-+ for_each_member_device_rcu(ca, c, iter, &g->entries[i].devs)
-+ prt_printf(out, " %s", ca->name);
-+ prt_printf(out, "]");
-+ }
-+
-+out:
-+ rcu_read_unlock();
-+ out->atomic--;
-+}
-+
-+static void bch2_sb_disk_groups_to_text(struct printbuf *out,
-+ struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_disk_groups *groups =
-+ field_to_type(f, disk_groups);
-+ struct bch_disk_group *g;
-+ unsigned nr_groups = disk_groups_nr(groups);
-+
-+ for (g = groups->entries;
-+ g < groups->entries + nr_groups;
-+ g++) {
-+ if (g != groups->entries)
-+ prt_printf(out, " ");
-+
-+ if (BCH_GROUP_DELETED(g))
-+ prt_printf(out, "[deleted]");
-+ else
-+ prt_printf(out, "[parent %llu name %s]",
-+ BCH_GROUP_PARENT(g), g->label);
-+ }
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = {
-+ .validate = bch2_sb_disk_groups_validate,
-+ .to_text = bch2_sb_disk_groups_to_text
-+};
-+
-+int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
-+{
-+ struct bch_sb_field_disk_groups *groups;
-+ struct bch_disk_groups_cpu *cpu_g, *old_g;
-+ unsigned i, g, nr_groups;
-+
-+ lockdep_assert_held(&c->sb_lock);
-+
-+ groups = bch2_sb_field_get(c->disk_sb.sb, disk_groups);
-+ nr_groups = disk_groups_nr(groups);
-+
-+ if (!groups)
-+ return 0;
-+
-+ cpu_g = kzalloc(struct_size(cpu_g, entries, nr_groups), GFP_KERNEL);
-+ if (!cpu_g)
-+ return -BCH_ERR_ENOMEM_disk_groups_to_cpu;
-+
-+ cpu_g->nr = nr_groups;
-+
-+ for (i = 0; i < nr_groups; i++) {
-+ struct bch_disk_group *src = &groups->entries[i];
-+ struct bch_disk_group_cpu *dst = &cpu_g->entries[i];
-+
-+ dst->deleted = BCH_GROUP_DELETED(src);
-+ dst->parent = BCH_GROUP_PARENT(src);
-+ memcpy(dst->label, src->label, sizeof(dst->label));
-+ }
-+
-+ for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
-+ struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, i);
-+ struct bch_disk_group_cpu *dst;
-+
-+ if (!bch2_member_exists(&m))
-+ continue;
-+
-+ g = BCH_MEMBER_GROUP(&m);
-+ while (g) {
-+ dst = &cpu_g->entries[g - 1];
-+ __set_bit(i, dst->devs.d);
-+ g = dst->parent;
-+ }
-+ }
-+
-+ old_g = rcu_dereference_protected(c->disk_groups,
-+ lockdep_is_held(&c->sb_lock));
-+ rcu_assign_pointer(c->disk_groups, cpu_g);
-+ if (old_g)
-+ kfree_rcu(old_g, rcu);
-+
-+ return 0;
-+}
-+
-+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target)
-+{
-+ struct target t = target_decode(target);
-+ struct bch_devs_mask *devs;
-+
-+ rcu_read_lock();
-+
-+ switch (t.type) {
-+ case TARGET_NULL:
-+ devs = NULL;
-+ break;
-+ case TARGET_DEV: {
-+ struct bch_dev *ca = t.dev < c->sb.nr_devices
-+ ? rcu_dereference(c->devs[t.dev])
-+ : NULL;
-+ devs = ca ? &ca->self : NULL;
-+ break;
-+ }
-+ case TARGET_GROUP: {
-+ struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
-+
-+ devs = g && t.group < g->nr && !g->entries[t.group].deleted
-+ ? &g->entries[t.group].devs
-+ : NULL;
-+ break;
-+ }
-+ default:
-+ BUG();
-+ }
-+
-+ rcu_read_unlock();
-+
-+ return devs;
-+}
-+
-+bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
-+{
-+ struct target t = target_decode(target);
-+
-+ switch (t.type) {
-+ case TARGET_NULL:
-+ return false;
-+ case TARGET_DEV:
-+ return dev == t.dev;
-+ case TARGET_GROUP: {
-+ struct bch_disk_groups_cpu *g;
-+ const struct bch_devs_mask *m;
-+ bool ret;
-+
-+ rcu_read_lock();
-+ g = rcu_dereference(c->disk_groups);
-+ m = g && t.group < g->nr && !g->entries[t.group].deleted
-+ ? &g->entries[t.group].devs
-+ : NULL;
-+
-+ ret = m ? test_bit(dev, m->d) : false;
-+ rcu_read_unlock();
-+
-+ return ret;
-+ }
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups,
-+ unsigned parent,
-+ const char *name, unsigned namelen)
-+{
-+ unsigned i, nr_groups = disk_groups_nr(groups);
-+
-+ if (!namelen || namelen > BCH_SB_LABEL_SIZE)
-+ return -EINVAL;
-+
-+ for (i = 0; i < nr_groups; i++) {
-+ struct bch_disk_group *g = groups->entries + i;
-+
-+ if (BCH_GROUP_DELETED(g))
-+ continue;
-+
-+ if (!BCH_GROUP_DELETED(g) &&
-+ BCH_GROUP_PARENT(g) == parent &&
-+ strnlen(g->label, sizeof(g->label)) == namelen &&
-+ !memcmp(name, g->label, namelen))
-+ return i;
-+ }
-+
-+ return -1;
-+}
-+
-+static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent,
-+ const char *name, unsigned namelen)
-+{
-+ struct bch_sb_field_disk_groups *groups =
-+ bch2_sb_field_get(sb->sb, disk_groups);
-+ unsigned i, nr_groups = disk_groups_nr(groups);
-+ struct bch_disk_group *g;
-+
-+ if (!namelen || namelen > BCH_SB_LABEL_SIZE)
-+ return -EINVAL;
-+
-+ for (i = 0;
-+ i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]);
-+ i++)
-+ ;
-+
-+ if (i == nr_groups) {
-+ unsigned u64s =
-+ (sizeof(struct bch_sb_field_disk_groups) +
-+ sizeof(struct bch_disk_group) * (nr_groups + 1)) /
-+ sizeof(u64);
-+
-+ groups = bch2_sb_field_resize(sb, disk_groups, u64s);
-+ if (!groups)
-+ return -BCH_ERR_ENOSPC_disk_label_add;
-+
-+ nr_groups = disk_groups_nr(groups);
-+ }
-+
-+ BUG_ON(i >= nr_groups);
-+
-+ g = &groups->entries[i];
-+
-+ memcpy(g->label, name, namelen);
-+ if (namelen < sizeof(g->label))
-+ g->label[namelen] = '\0';
-+ SET_BCH_GROUP_DELETED(g, 0);
-+ SET_BCH_GROUP_PARENT(g, parent);
-+ SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
-+
-+ return i;
-+}
-+
-+int bch2_disk_path_find(struct bch_sb_handle *sb, const char *name)
-+{
-+ struct bch_sb_field_disk_groups *groups =
-+ bch2_sb_field_get(sb->sb, disk_groups);
-+ int v = -1;
-+
-+ do {
-+ const char *next = strchrnul(name, '.');
-+ unsigned len = next - name;
-+
-+ if (*next == '.')
-+ next++;
-+
-+ v = __bch2_disk_group_find(groups, v + 1, name, len);
-+ name = next;
-+ } while (*name && v >= 0);
-+
-+ return v;
-+}
-+
-+int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
-+{
-+ struct bch_sb_field_disk_groups *groups;
-+ unsigned parent = 0;
-+ int v = -1;
-+
-+ do {
-+ const char *next = strchrnul(name, '.');
-+ unsigned len = next - name;
-+
-+ if (*next == '.')
-+ next++;
-+
-+ groups = bch2_sb_field_get(sb->sb, disk_groups);
-+
-+ v = __bch2_disk_group_find(groups, parent, name, len);
-+ if (v < 0)
-+ v = __bch2_disk_group_add(sb, parent, name, len);
-+ if (v < 0)
-+ return v;
-+
-+ parent = v + 1;
-+ name = next;
-+ } while (*name && v >= 0);
-+
-+ return v;
-+}
-+
-+void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
-+{
-+ struct bch_disk_groups_cpu *groups;
-+ struct bch_disk_group_cpu *g;
-+ unsigned nr = 0;
-+ u16 path[32];
-+
-+ out->atomic++;
-+ rcu_read_lock();
-+ groups = rcu_dereference(c->disk_groups);
-+ if (!groups)
-+ goto invalid;
-+
-+ while (1) {
-+ if (nr == ARRAY_SIZE(path))
-+ goto invalid;
-+
-+ if (v >= groups->nr)
-+ goto invalid;
-+
-+ g = groups->entries + v;
-+
-+ if (g->deleted)
-+ goto invalid;
-+
-+ path[nr++] = v;
-+
-+ if (!g->parent)
-+ break;
-+
-+ v = g->parent - 1;
-+ }
-+
-+ while (nr) {
-+ v = path[--nr];
-+ g = groups->entries + v;
-+
-+ prt_printf(out, "%.*s", (int) sizeof(g->label), g->label);
-+ if (nr)
-+ prt_printf(out, ".");
-+ }
-+out:
-+ rcu_read_unlock();
-+ out->atomic--;
-+ return;
-+invalid:
-+ prt_printf(out, "invalid label %u", v);
-+ goto out;
-+}
-+
-+void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
-+{
-+ struct bch_sb_field_disk_groups *groups =
-+ bch2_sb_field_get(sb, disk_groups);
-+ struct bch_disk_group *g;
-+ unsigned nr = 0;
-+ u16 path[32];
-+
-+ while (1) {
-+ if (nr == ARRAY_SIZE(path))
-+ goto inval;
-+
-+ if (v >= disk_groups_nr(groups))
-+ goto inval;
-+
-+ g = groups->entries + v;
-+
-+ if (BCH_GROUP_DELETED(g))
-+ goto inval;
-+
-+ path[nr++] = v;
-+
-+ if (!BCH_GROUP_PARENT(g))
-+ break;
-+
-+ v = BCH_GROUP_PARENT(g) - 1;
-+ }
-+
-+ while (nr) {
-+ v = path[--nr];
-+ g = groups->entries + v;
-+
-+ prt_printf(out, "%.*s", (int) sizeof(g->label), g->label);
-+ if (nr)
-+ prt_printf(out, ".");
-+ }
-+ return;
-+inval:
-+ prt_printf(out, "invalid label %u", v);
-+}
-+
-+int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
-+{
-+ struct bch_member *mi;
-+ int ret, v = -1;
-+
-+ if (!strlen(name) || !strcmp(name, "none"))
-+ return 0;
-+
-+ v = bch2_disk_path_find_or_create(&c->disk_sb, name);
-+ if (v < 0)
-+ return v;
-+
-+ ret = bch2_sb_disk_groups_to_cpu(c);
-+ if (ret)
-+ return ret;
-+
-+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
-+ SET_BCH_MEMBER_GROUP(mi, v + 1);
-+ return 0;
-+}
-+
-+int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
-+{
-+ int ret;
-+
-+ mutex_lock(&c->sb_lock);
-+ ret = __bch2_dev_group_set(c, ca, name) ?:
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ return ret;
-+}
-+
-+int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
-+ struct printbuf *err)
-+{
-+ struct bch_dev *ca;
-+ int g;
-+
-+ if (!val)
-+ return -EINVAL;
-+
-+ if (!c)
-+ return 0;
-+
-+ if (!strlen(val) || !strcmp(val, "none")) {
-+ *res = 0;
-+ return 0;
-+ }
-+
-+ /* Is it a device? */
-+ ca = bch2_dev_lookup(c, val);
-+ if (!IS_ERR(ca)) {
-+ *res = dev_to_target(ca->dev_idx);
-+ percpu_ref_put(&ca->ref);
-+ return 0;
-+ }
-+
-+ mutex_lock(&c->sb_lock);
-+ g = bch2_disk_path_find(&c->disk_sb, val);
-+ mutex_unlock(&c->sb_lock);
-+
-+ if (g >= 0) {
-+ *res = group_to_target(g);
-+ return 0;
-+ }
-+
-+ return -EINVAL;
-+}
-+
-+void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
-+{
-+ struct target t = target_decode(v);
-+
-+ switch (t.type) {
-+ case TARGET_NULL:
-+ prt_printf(out, "none");
-+ break;
-+ case TARGET_DEV: {
-+ struct bch_dev *ca;
-+
-+ rcu_read_lock();
-+ ca = t.dev < c->sb.nr_devices
-+ ? rcu_dereference(c->devs[t.dev])
-+ : NULL;
-+
-+ if (ca && percpu_ref_tryget(&ca->io_ref)) {
-+ prt_printf(out, "/dev/%pg", ca->disk_sb.bdev);
-+ percpu_ref_put(&ca->io_ref);
-+ } else if (ca) {
-+ prt_printf(out, "offline device %u", t.dev);
-+ } else {
-+ prt_printf(out, "invalid device %u", t.dev);
-+ }
-+
-+ rcu_read_unlock();
-+ break;
-+ }
-+ case TARGET_GROUP:
-+ bch2_disk_path_to_text(out, c, t.group);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
-+{
-+ struct target t = target_decode(v);
-+
-+ switch (t.type) {
-+ case TARGET_NULL:
-+ prt_printf(out, "none");
-+ break;
-+ case TARGET_DEV: {
-+ struct bch_member m = bch2_sb_member_get(sb, t.dev);
-+
-+ if (bch2_dev_exists(sb, t.dev)) {
-+ prt_printf(out, "Device ");
-+ pr_uuid(out, m.uuid.b);
-+ prt_printf(out, " (%u)", t.dev);
-+ } else {
-+ prt_printf(out, "Bad device %u", t.dev);
-+ }
-+ break;
-+ }
-+ case TARGET_GROUP:
-+ bch2_disk_path_to_text_sb(out, sb, t.group);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+void bch2_opt_target_to_text(struct printbuf *out,
-+ struct bch_fs *c,
-+ struct bch_sb *sb,
-+ u64 v)
-+{
-+ if (c)
-+ bch2_target_to_text(out, c, v);
-+ else
-+ bch2_target_to_text_sb(out, sb, v);
-+}
-diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h
-new file mode 100644
-index 000000000000..441826fff224
---- /dev/null
-+++ b/fs/bcachefs/disk_groups.h
-@@ -0,0 +1,111 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DISK_GROUPS_H
-+#define _BCACHEFS_DISK_GROUPS_H
-+
-+#include "disk_groups_types.h"
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
-+
-+static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
-+{
-+ return groups
-+ ? (vstruct_end(&groups->field) -
-+ (void *) &groups->entries[0]) / sizeof(struct bch_disk_group)
-+ : 0;
-+}
-+
-+struct target {
-+ enum {
-+ TARGET_NULL,
-+ TARGET_DEV,
-+ TARGET_GROUP,
-+ } type;
-+ union {
-+ unsigned dev;
-+ unsigned group;
-+ };
-+};
-+
-+#define TARGET_DEV_START 1
-+#define TARGET_GROUP_START (256 + TARGET_DEV_START)
-+
-+static inline u16 dev_to_target(unsigned dev)
-+{
-+ return TARGET_DEV_START + dev;
-+}
-+
-+static inline u16 group_to_target(unsigned group)
-+{
-+ return TARGET_GROUP_START + group;
-+}
-+
-+static inline struct target target_decode(unsigned target)
-+{
-+ if (target >= TARGET_GROUP_START)
-+ return (struct target) {
-+ .type = TARGET_GROUP,
-+ .group = target - TARGET_GROUP_START
-+ };
-+
-+ if (target >= TARGET_DEV_START)
-+ return (struct target) {
-+ .type = TARGET_DEV,
-+ .group = target - TARGET_DEV_START
-+ };
-+
-+ return (struct target) { .type = TARGET_NULL };
-+}
-+
-+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned);
-+
-+static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c,
-+ enum bch_data_type data_type,
-+ u16 target)
-+{
-+ struct bch_devs_mask devs = c->rw_devs[data_type];
-+ const struct bch_devs_mask *t = bch2_target_to_mask(c, target);
-+
-+ if (t)
-+ bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
-+ return devs;
-+}
-+
-+static inline bool bch2_target_accepts_data(struct bch_fs *c,
-+ enum bch_data_type data_type,
-+ u16 target)
-+{
-+ struct bch_devs_mask rw_devs = target_rw_devs(c, data_type, target);
-+ return !bitmap_empty(rw_devs.d, BCH_SB_MEMBERS_MAX);
-+}
-+
-+bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned);
-+
-+int bch2_disk_path_find(struct bch_sb_handle *, const char *);
-+
-+/* Exported for userspace bcachefs-tools: */
-+int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
-+
-+void bch2_disk_path_to_text(struct printbuf *, struct bch_fs *, unsigned);
-+void bch2_disk_path_to_text_sb(struct printbuf *, struct bch_sb *, unsigned);
-+
-+void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned);
-+
-+int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
-+void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
-+
-+#define bch2_opt_target (struct bch_opt_fn) { \
-+ .parse = bch2_opt_target_parse, \
-+ .to_text = bch2_opt_target_to_text, \
-+}
-+
-+int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
-+
-+int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
-+int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
-+
-+const char *bch2_sb_validate_disk_groups(struct bch_sb *,
-+ struct bch_sb_field *);
-+
-+void bch2_disk_groups_to_text(struct printbuf *, struct bch_fs *);
-+
-+#endif /* _BCACHEFS_DISK_GROUPS_H */
-diff --git a/fs/bcachefs/disk_groups_types.h b/fs/bcachefs/disk_groups_types.h
-new file mode 100644
-index 000000000000..a54ef085b13d
---- /dev/null
-+++ b/fs/bcachefs/disk_groups_types.h
-@@ -0,0 +1,18 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_DISK_GROUPS_TYPES_H
-+#define _BCACHEFS_DISK_GROUPS_TYPES_H
-+
-+struct bch_disk_group_cpu {
-+ bool deleted;
-+ u16 parent;
-+ u8 label[BCH_SB_LABEL_SIZE];
-+ struct bch_devs_mask devs;
-+};
-+
-+struct bch_disk_groups_cpu {
-+ struct rcu_head rcu;
-+ unsigned nr;
-+ struct bch_disk_group_cpu entries[] __counted_by(nr);
-+};
-+
-+#endif /* _BCACHEFS_DISK_GROUPS_TYPES_H */
-diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
-new file mode 100644
-index 000000000000..875f7c5a6fca
---- /dev/null
-+++ b/fs/bcachefs/ec.c
-@@ -0,0 +1,1969 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+/* erasure coding */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "backpointers.h"
-+#include "bkey_buf.h"
-+#include "bset.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_write_buffer.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "io_read.h"
-+#include "keylist.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+#include <linux/sort.h>
-+
-+#ifdef __KERNEL__
-+
-+#include <linux/raid/pq.h>
-+#include <linux/raid/xor.h>
-+
-+static void raid5_recov(unsigned disks, unsigned failed_idx,
-+ size_t size, void **data)
-+{
-+ unsigned i = 2, nr;
-+
-+ BUG_ON(failed_idx >= disks);
-+
-+ swap(data[0], data[failed_idx]);
-+ memcpy(data[0], data[1], size);
-+
-+ while (i < disks) {
-+ nr = min_t(unsigned, disks - i, MAX_XOR_BLOCKS);
-+ xor_blocks(nr, size, data[0], data + i);
-+ i += nr;
-+ }
-+
-+ swap(data[0], data[failed_idx]);
-+}
-+
-+static void raid_gen(int nd, int np, size_t size, void **v)
-+{
-+ if (np >= 1)
-+ raid5_recov(nd + np, nd, size, v);
-+ if (np >= 2)
-+ raid6_call.gen_syndrome(nd + np, size, v);
-+ BUG_ON(np > 2);
-+}
-+
-+static void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
-+{
-+ switch (nr) {
-+ case 0:
-+ break;
-+ case 1:
-+ if (ir[0] < nd + 1)
-+ raid5_recov(nd + 1, ir[0], size, v);
-+ else
-+ raid6_call.gen_syndrome(nd + np, size, v);
-+ break;
-+ case 2:
-+ if (ir[1] < nd) {
-+ /* data+data failure. */
-+ raid6_2data_recov(nd + np, size, ir[0], ir[1], v);
-+ } else if (ir[0] < nd) {
-+ /* data + p/q failure */
-+
-+ if (ir[1] == nd) /* data + p failure */
-+ raid6_datap_recov(nd + np, size, ir[0], v);
-+ else { /* data + q failure */
-+ raid5_recov(nd + 1, ir[0], size, v);
-+ raid6_call.gen_syndrome(nd + np, size, v);
-+ }
-+ } else {
-+ raid_gen(nd, np, size, v);
-+ }
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+#else
-+
-+#include <raid/raid.h>
-+
-+#endif
-+
-+struct ec_bio {
-+ struct bch_dev *ca;
-+ struct ec_stripe_buf *buf;
-+ size_t idx;
-+ struct bio bio;
-+};
-+
-+/* Stripes btree keys: */
-+
-+int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) ||
-+ bpos_gt(k.k->p, POS(0, U32_MAX)), c, err,
-+ stripe_pos_bad,
-+ "stripe at bad pos");
-+
-+ bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err,
-+ stripe_val_size_bad,
-+ "incorrect value size (%zu < %u)",
-+ bkey_val_u64s(k.k), stripe_val_u64s(s));
-+
-+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-+ unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
-+
-+ prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
-+ s->algorithm,
-+ le16_to_cpu(s->sectors),
-+ nr_data,
-+ s->nr_redundant,
-+ s->csum_type,
-+ 1U << s->csum_granularity_bits);
-+
-+ for (i = 0; i < s->nr_blocks; i++) {
-+ const struct bch_extent_ptr *ptr = s->ptrs + i;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+ u32 offset;
-+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-+
-+ prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
-+ if (i < nr_data)
-+ prt_printf(out, "#%u", stripe_blockcount_get(s, i));
-+ prt_printf(out, " gen %u", ptr->gen);
-+ if (ptr_stale(ca, ptr))
-+ prt_printf(out, " stale");
-+ }
-+}
-+
-+/* returns blocknr in stripe that we matched: */
-+static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s,
-+ struct bkey_s_c k, unsigned *block)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+ unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
-+
-+ bkey_for_each_ptr(ptrs, ptr)
-+ for (i = 0; i < nr_data; i++)
-+ if (__bch2_ptr_matches_stripe(&s->ptrs[i], ptr,
-+ le16_to_cpu(s->sectors))) {
-+ *block = i;
-+ return ptr;
-+ }
-+
-+ return NULL;
-+}
-+
-+static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
-+{
-+ switch (k.k->type) {
-+ case KEY_TYPE_extent: {
-+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+ const union bch_extent_entry *entry;
-+
-+ extent_for_each_entry(e, entry)
-+ if (extent_entry_type(entry) ==
-+ BCH_EXTENT_ENTRY_stripe_ptr &&
-+ entry->stripe_ptr.idx == idx)
-+ return true;
-+
-+ break;
-+ }
-+ }
-+
-+ return false;
-+}
-+
-+/* Stripe bufs: */
-+
-+static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
-+{
-+ if (buf->key.k.type == KEY_TYPE_stripe) {
-+ struct bkey_i_stripe *s = bkey_i_to_stripe(&buf->key);
-+ unsigned i;
-+
-+ for (i = 0; i < s->v.nr_blocks; i++) {
-+ kvpfree(buf->data[i], buf->size << 9);
-+ buf->data[i] = NULL;
-+ }
-+ }
-+}
-+
-+/* XXX: this is a non-mempoolified memory allocation: */
-+static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
-+ unsigned offset, unsigned size)
-+{
-+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
-+ unsigned csum_granularity = 1U << v->csum_granularity_bits;
-+ unsigned end = offset + size;
-+ unsigned i;
-+
-+ BUG_ON(end > le16_to_cpu(v->sectors));
-+
-+ offset = round_down(offset, csum_granularity);
-+ end = min_t(unsigned, le16_to_cpu(v->sectors),
-+ round_up(end, csum_granularity));
-+
-+ buf->offset = offset;
-+ buf->size = end - offset;
-+
-+ memset(buf->valid, 0xFF, sizeof(buf->valid));
-+
-+ for (i = 0; i < v->nr_blocks; i++) {
-+ buf->data[i] = kvpmalloc(buf->size << 9, GFP_KERNEL);
-+ if (!buf->data[i])
-+ goto err;
-+ }
-+
-+ return 0;
-+err:
-+ ec_stripe_buf_exit(buf);
-+ return -BCH_ERR_ENOMEM_stripe_buf;
-+}
-+
-+/* Checksumming: */
-+
-+static struct bch_csum ec_block_checksum(struct ec_stripe_buf *buf,
-+ unsigned block, unsigned offset)
-+{
-+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
-+ unsigned csum_granularity = 1 << v->csum_granularity_bits;
-+ unsigned end = buf->offset + buf->size;
-+ unsigned len = min(csum_granularity, end - offset);
-+
-+ BUG_ON(offset >= end);
-+ BUG_ON(offset < buf->offset);
-+ BUG_ON(offset & (csum_granularity - 1));
-+ BUG_ON(offset + len != le16_to_cpu(v->sectors) &&
-+ (len & (csum_granularity - 1)));
-+
-+ return bch2_checksum(NULL, v->csum_type,
-+ null_nonce(),
-+ buf->data[block] + ((offset - buf->offset) << 9),
-+ len << 9);
-+}
-+
-+static void ec_generate_checksums(struct ec_stripe_buf *buf)
-+{
-+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
-+ unsigned i, j, csums_per_device = stripe_csums_per_device(v);
-+
-+ if (!v->csum_type)
-+ return;
-+
-+ BUG_ON(buf->offset);
-+ BUG_ON(buf->size != le16_to_cpu(v->sectors));
-+
-+ for (i = 0; i < v->nr_blocks; i++)
-+ for (j = 0; j < csums_per_device; j++)
-+ stripe_csum_set(v, i, j,
-+ ec_block_checksum(buf, i, j << v->csum_granularity_bits));
-+}
-+
-+static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
-+{
-+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
-+ unsigned csum_granularity = 1 << v->csum_granularity_bits;
-+ unsigned i;
-+
-+ if (!v->csum_type)
-+ return;
-+
-+ for (i = 0; i < v->nr_blocks; i++) {
-+ unsigned offset = buf->offset;
-+ unsigned end = buf->offset + buf->size;
-+
-+ if (!test_bit(i, buf->valid))
-+ continue;
-+
-+ while (offset < end) {
-+ unsigned j = offset >> v->csum_granularity_bits;
-+ unsigned len = min(csum_granularity, end - offset);
-+ struct bch_csum want = stripe_csum_get(v, i, j);
-+ struct bch_csum got = ec_block_checksum(buf, i, offset);
-+
-+ if (bch2_crc_cmp(want, got)) {
-+ struct printbuf err = PRINTBUF;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
-+
-+ prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
-+ want.hi, want.lo,
-+ got.hi, got.lo,
-+ bch2_csum_types[v->csum_type]);
-+ prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i);
-+ bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
-+ bch_err_ratelimited(ca, "%s", err.buf);
-+ printbuf_exit(&err);
-+
-+ clear_bit(i, buf->valid);
-+
-+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
-+ break;
-+ }
-+
-+ offset += len;
-+ }
-+ }
-+}
-+
-+/* Erasure coding: */
-+
-+static void ec_generate_ec(struct ec_stripe_buf *buf)
-+{
-+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
-+ unsigned nr_data = v->nr_blocks - v->nr_redundant;
-+ unsigned bytes = le16_to_cpu(v->sectors) << 9;
-+
-+ raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
-+}
-+
-+static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
-+{
-+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
-+
-+ return v->nr_blocks - bitmap_weight(buf->valid, v->nr_blocks);
-+}
-+
-+static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
-+{
-+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
-+ unsigned i, failed[BCH_BKEY_PTRS_MAX], nr_failed = 0;
-+ unsigned nr_data = v->nr_blocks - v->nr_redundant;
-+ unsigned bytes = buf->size << 9;
-+
-+ if (ec_nr_failed(buf) > v->nr_redundant) {
-+ bch_err_ratelimited(c,
-+ "error doing reconstruct read: unable to read enough blocks");
-+ return -1;
-+ }
-+
-+ for (i = 0; i < nr_data; i++)
-+ if (!test_bit(i, buf->valid))
-+ failed[nr_failed++] = i;
-+
-+ raid_rec(nr_failed, failed, nr_data, v->nr_redundant, bytes, buf->data);
-+ return 0;
-+}
-+
-+/* IO: */
-+
-+static void ec_block_endio(struct bio *bio)
-+{
-+ struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio);
-+ struct bch_stripe *v = &bkey_i_to_stripe(&ec_bio->buf->key)->v;
-+ struct bch_extent_ptr *ptr = &v->ptrs[ec_bio->idx];
-+ struct bch_dev *ca = ec_bio->ca;
-+ struct closure *cl = bio->bi_private;
-+
-+ if (bch2_dev_io_err_on(bio->bi_status, ca,
-+ bio_data_dir(bio)
-+ ? BCH_MEMBER_ERROR_write
-+ : BCH_MEMBER_ERROR_read,
-+ "erasure coding %s error: %s",
-+ bio_data_dir(bio) ? "write" : "read",
-+ bch2_blk_status_to_str(bio->bi_status)))
-+ clear_bit(ec_bio->idx, ec_bio->buf->valid);
-+
-+ if (ptr_stale(ca, ptr)) {
-+ bch_err_ratelimited(ca->fs,
-+ "error %s stripe: stale pointer after io",
-+ bio_data_dir(bio) == READ ? "reading from" : "writing to");
-+ clear_bit(ec_bio->idx, ec_bio->buf->valid);
-+ }
-+
-+ bio_put(&ec_bio->bio);
-+ percpu_ref_put(&ca->io_ref);
-+ closure_put(cl);
-+}
-+
-+static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
-+ blk_opf_t opf, unsigned idx, struct closure *cl)
-+{
-+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
-+ unsigned offset = 0, bytes = buf->size << 9;
-+ struct bch_extent_ptr *ptr = &v->ptrs[idx];
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-+ enum bch_data_type data_type = idx < v->nr_blocks - v->nr_redundant
-+ ? BCH_DATA_user
-+ : BCH_DATA_parity;
-+ int rw = op_is_write(opf);
-+
-+ if (ptr_stale(ca, ptr)) {
-+ bch_err_ratelimited(c,
-+ "error %s stripe: stale pointer",
-+ rw == READ ? "reading from" : "writing to");
-+ clear_bit(idx, buf->valid);
-+ return;
-+ }
-+
-+ if (!bch2_dev_get_ioref(ca, rw)) {
-+ clear_bit(idx, buf->valid);
-+ return;
-+ }
-+
-+ this_cpu_add(ca->io_done->sectors[rw][data_type], buf->size);
-+
-+ while (offset < bytes) {
-+ unsigned nr_iovecs = min_t(size_t, BIO_MAX_VECS,
-+ DIV_ROUND_UP(bytes, PAGE_SIZE));
-+ unsigned b = min_t(size_t, bytes - offset,
-+ nr_iovecs << PAGE_SHIFT);
-+ struct ec_bio *ec_bio;
-+
-+ ec_bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev,
-+ nr_iovecs,
-+ opf,
-+ GFP_KERNEL,
-+ &c->ec_bioset),
-+ struct ec_bio, bio);
-+
-+ ec_bio->ca = ca;
-+ ec_bio->buf = buf;
-+ ec_bio->idx = idx;
-+
-+ ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9);
-+ ec_bio->bio.bi_end_io = ec_block_endio;
-+ ec_bio->bio.bi_private = cl;
-+
-+ bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
-+
-+ closure_get(cl);
-+ percpu_ref_get(&ca->io_ref);
-+
-+ submit_bio(&ec_bio->bio);
-+
-+ offset += b;
-+ }
-+
-+ percpu_ref_put(&ca->io_ref);
-+}
-+
-+static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
-+ struct ec_stripe_buf *stripe)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes,
-+ POS(0, idx), BTREE_ITER_SLOTS);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+ if (k.k->type != KEY_TYPE_stripe) {
-+ ret = -ENOENT;
-+ goto err;
-+ }
-+ bkey_reassemble(&stripe->key, k);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+/* recovery read path: */
-+int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct ec_stripe_buf *buf;
-+ struct closure cl;
-+ struct bch_stripe *v;
-+ unsigned i, offset;
-+ int ret = 0;
-+
-+ closure_init_stack(&cl);
-+
-+ BUG_ON(!rbio->pick.has_ec);
-+
-+ buf = kzalloc(sizeof(*buf), GFP_NOFS);
-+ if (!buf)
-+ return -BCH_ERR_ENOMEM_ec_read_extent;
-+
-+ ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
-+ if (ret) {
-+ bch_err_ratelimited(c,
-+ "error doing reconstruct read: error %i looking up stripe", ret);
-+ kfree(buf);
-+ return -EIO;
-+ }
-+
-+ v = &bkey_i_to_stripe(&buf->key)->v;
-+
-+ if (!bch2_ptr_matches_stripe(v, rbio->pick)) {
-+ bch_err_ratelimited(c,
-+ "error doing reconstruct read: pointer doesn't match stripe");
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ offset = rbio->bio.bi_iter.bi_sector - v->ptrs[rbio->pick.ec.block].offset;
-+ if (offset + bio_sectors(&rbio->bio) > le16_to_cpu(v->sectors)) {
-+ bch_err_ratelimited(c,
-+ "error doing reconstruct read: read is bigger than stripe");
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio));
-+ if (ret)
-+ goto err;
-+
-+ for (i = 0; i < v->nr_blocks; i++)
-+ ec_block_io(c, buf, REQ_OP_READ, i, &cl);
-+
-+ closure_sync(&cl);
-+
-+ if (ec_nr_failed(buf) > v->nr_redundant) {
-+ bch_err_ratelimited(c,
-+ "error doing reconstruct read: unable to read enough blocks");
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ ec_validate_checksums(c, buf);
-+
-+ ret = ec_do_recov(c, buf);
-+ if (ret)
-+ goto err;
-+
-+ memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
-+ buf->data[rbio->pick.ec.block] + ((offset - buf->offset) << 9));
-+err:
-+ ec_stripe_buf_exit(buf);
-+ kfree(buf);
-+ return ret;
-+}
-+
-+/* stripe bucket accounting: */
-+
-+static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
-+{
-+ ec_stripes_heap n, *h = &c->ec_stripes_heap;
-+
-+ if (idx >= h->size) {
-+ if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
-+ return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
-+
-+ mutex_lock(&c->ec_stripes_heap_lock);
-+ if (n.size > h->size) {
-+ memcpy(n.data, h->data, h->used * sizeof(h->data[0]));
-+ n.used = h->used;
-+ swap(*h, n);
-+ }
-+ mutex_unlock(&c->ec_stripes_heap_lock);
-+
-+ free_heap(&n);
-+ }
-+
-+ if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
-+ return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
-+
-+ if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
-+ !genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
-+ return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
-+
-+ return 0;
-+}
-+
-+static int ec_stripe_mem_alloc(struct btree_trans *trans,
-+ struct btree_iter *iter)
-+{
-+ return allocate_dropping_locks_errcode(trans,
-+ __ec_stripe_mem_alloc(trans->c, iter->pos.offset, _gfp));
-+}
-+
-+/*
-+ * Hash table of open stripes:
-+ * Stripes that are being created or modified are kept in a hash table, so that
-+ * stripe deletion can skip them.
-+ */
-+
-+static bool __bch2_stripe_is_open(struct bch_fs *c, u64 idx)
-+{
-+ unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
-+ struct ec_stripe_new *s;
-+
-+ hlist_for_each_entry(s, &c->ec_stripes_new[hash], hash)
-+ if (s->idx == idx)
-+ return true;
-+ return false;
-+}
-+
-+static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx)
-+{
-+ bool ret = false;
-+
-+ spin_lock(&c->ec_stripes_new_lock);
-+ ret = __bch2_stripe_is_open(c, idx);
-+ spin_unlock(&c->ec_stripes_new_lock);
-+
-+ return ret;
-+}
-+
-+static bool bch2_try_open_stripe(struct bch_fs *c,
-+ struct ec_stripe_new *s,
-+ u64 idx)
-+{
-+ bool ret;
-+
-+ spin_lock(&c->ec_stripes_new_lock);
-+ ret = !__bch2_stripe_is_open(c, idx);
-+ if (ret) {
-+ unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
-+
-+ s->idx = idx;
-+ hlist_add_head(&s->hash, &c->ec_stripes_new[hash]);
-+ }
-+ spin_unlock(&c->ec_stripes_new_lock);
-+
-+ return ret;
-+}
-+
-+static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s)
-+{
-+ BUG_ON(!s->idx);
-+
-+ spin_lock(&c->ec_stripes_new_lock);
-+ hlist_del_init(&s->hash);
-+ spin_unlock(&c->ec_stripes_new_lock);
-+
-+ s->idx = 0;
-+}
-+
-+/* Heap of all existing stripes, ordered by blocks_nonempty */
-+
-+static u64 stripe_idx_to_delete(struct bch_fs *c)
-+{
-+ ec_stripes_heap *h = &c->ec_stripes_heap;
-+
-+ lockdep_assert_held(&c->ec_stripes_heap_lock);
-+
-+ if (h->used &&
-+ h->data[0].blocks_nonempty == 0 &&
-+ !bch2_stripe_is_open(c, h->data[0].idx))
-+ return h->data[0].idx;
-+
-+ return 0;
-+}
-+
-+static inline int ec_stripes_heap_cmp(ec_stripes_heap *h,
-+ struct ec_stripe_heap_entry l,
-+ struct ec_stripe_heap_entry r)
-+{
-+ return ((l.blocks_nonempty > r.blocks_nonempty) -
-+ (l.blocks_nonempty < r.blocks_nonempty));
-+}
-+
-+static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
-+ size_t i)
-+{
-+ struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
-+
-+ genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i;
-+}
-+
-+static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
-+{
-+ ec_stripes_heap *h = &c->ec_stripes_heap;
-+ struct stripe *m = genradix_ptr(&c->stripes, idx);
-+
-+ BUG_ON(m->heap_idx >= h->used);
-+ BUG_ON(h->data[m->heap_idx].idx != idx);
-+}
-+
-+void bch2_stripes_heap_del(struct bch_fs *c,
-+ struct stripe *m, size_t idx)
-+{
-+ mutex_lock(&c->ec_stripes_heap_lock);
-+ heap_verify_backpointer(c, idx);
-+
-+ heap_del(&c->ec_stripes_heap, m->heap_idx,
-+ ec_stripes_heap_cmp,
-+ ec_stripes_heap_set_backpointer);
-+ mutex_unlock(&c->ec_stripes_heap_lock);
-+}
-+
-+void bch2_stripes_heap_insert(struct bch_fs *c,
-+ struct stripe *m, size_t idx)
-+{
-+ mutex_lock(&c->ec_stripes_heap_lock);
-+ BUG_ON(heap_full(&c->ec_stripes_heap));
-+
-+ heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
-+ .idx = idx,
-+ .blocks_nonempty = m->blocks_nonempty,
-+ }),
-+ ec_stripes_heap_cmp,
-+ ec_stripes_heap_set_backpointer);
-+
-+ heap_verify_backpointer(c, idx);
-+ mutex_unlock(&c->ec_stripes_heap_lock);
-+}
-+
-+void bch2_stripes_heap_update(struct bch_fs *c,
-+ struct stripe *m, size_t idx)
-+{
-+ ec_stripes_heap *h = &c->ec_stripes_heap;
-+ bool do_deletes;
-+ size_t i;
-+
-+ mutex_lock(&c->ec_stripes_heap_lock);
-+ heap_verify_backpointer(c, idx);
-+
-+ h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
-+
-+ i = m->heap_idx;
-+ heap_sift_up(h, i, ec_stripes_heap_cmp,
-+ ec_stripes_heap_set_backpointer);
-+ heap_sift_down(h, i, ec_stripes_heap_cmp,
-+ ec_stripes_heap_set_backpointer);
-+
-+ heap_verify_backpointer(c, idx);
-+
-+ do_deletes = stripe_idx_to_delete(c) != 0;
-+ mutex_unlock(&c->ec_stripes_heap_lock);
-+
-+ if (do_deletes)
-+ bch2_do_stripe_deletes(c);
-+}
-+
-+/* stripe deletion */
-+
-+static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_s_c_stripe s;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, POS(0, idx),
-+ BTREE_ITER_INTENT);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (k.k->type != KEY_TYPE_stripe) {
-+ bch2_fs_inconsistent(c, "attempting to delete nonexistent stripe %llu", idx);
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ s = bkey_s_c_to_stripe(k);
-+ for (unsigned i = 0; i < s.v->nr_blocks; i++)
-+ if (stripe_blockcount_get(s.v, i)) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ bch2_fs_inconsistent(c, "attempting to delete nonempty stripe %s", buf.buf);
-+ printbuf_exit(&buf);
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ ret = bch2_btree_delete_at(trans, &iter, 0);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static void ec_stripe_delete_work(struct work_struct *work)
-+{
-+ struct bch_fs *c =
-+ container_of(work, struct bch_fs, ec_stripe_delete_work);
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ int ret;
-+ u64 idx;
-+
-+ while (1) {
-+ mutex_lock(&c->ec_stripes_heap_lock);
-+ idx = stripe_idx_to_delete(c);
-+ mutex_unlock(&c->ec_stripes_heap_lock);
-+
-+ if (!idx)
-+ break;
-+
-+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ ec_stripe_delete(trans, idx));
-+ if (ret) {
-+ bch_err_fn(c, ret);
-+ break;
-+ }
-+ }
-+
-+ bch2_trans_put(trans);
-+
-+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
-+}
-+
-+void bch2_do_stripe_deletes(struct bch_fs *c)
-+{
-+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
-+ !queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
-+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
-+}
-+
-+/* stripe creation: */
-+
-+static int ec_stripe_key_update(struct btree_trans *trans,
-+ struct bkey_i_stripe *new,
-+ bool create)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes,
-+ new->k.p, BTREE_ITER_INTENT);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (k.k->type != (create ? KEY_TYPE_deleted : KEY_TYPE_stripe)) {
-+ bch2_fs_inconsistent(c, "error %s stripe: got existing key type %s",
-+ create ? "creating" : "updating",
-+ bch2_bkey_types[k.k->type]);
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ if (k.k->type == KEY_TYPE_stripe) {
-+ const struct bch_stripe *old = bkey_s_c_to_stripe(k).v;
-+ unsigned i;
-+
-+ if (old->nr_blocks != new->v.nr_blocks) {
-+ bch_err(c, "error updating stripe: nr_blocks does not match");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ for (i = 0; i < new->v.nr_blocks; i++) {
-+ unsigned v = stripe_blockcount_get(old, i);
-+
-+ BUG_ON(v &&
-+ (old->ptrs[i].dev != new->v.ptrs[i].dev ||
-+ old->ptrs[i].gen != new->v.ptrs[i].gen ||
-+ old->ptrs[i].offset != new->v.ptrs[i].offset));
-+
-+ stripe_blockcount_set(&new->v, i, v);
-+ }
-+ }
-+
-+ ret = bch2_trans_update(trans, &iter, &new->k_i, 0);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int ec_stripe_update_extent(struct btree_trans *trans,
-+ struct bpos bucket, u8 gen,
-+ struct ec_stripe_buf *s,
-+ struct bpos *bp_pos)
-+{
-+ struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
-+ struct bch_fs *c = trans->c;
-+ struct bch_backpointer bp;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ const struct bch_extent_ptr *ptr_c;
-+ struct bch_extent_ptr *ptr, *ec_ptr = NULL;
-+ struct bch_extent_stripe_ptr stripe_ptr;
-+ struct bkey_i *n;
-+ int ret, dev, block;
-+
-+ ret = bch2_get_next_backpointer(trans, bucket, gen,
-+ bp_pos, &bp, BTREE_ITER_CACHED);
-+ if (ret)
-+ return ret;
-+ if (bpos_eq(*bp_pos, SPOS_MAX))
-+ return 0;
-+
-+ if (bp.level) {
-+ struct printbuf buf = PRINTBUF;
-+ struct btree_iter node_iter;
-+ struct btree *b;
-+
-+ b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp);
-+ bch2_trans_iter_exit(trans, &node_iter);
-+
-+ if (!b)
-+ return 0;
-+
-+ prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b);
-+ bch2_backpointer_to_text(&buf, &bp);
-+
-+ bch2_fs_inconsistent(c, "%s", buf.buf);
-+ printbuf_exit(&buf);
-+ return -EIO;
-+ }
-+
-+ k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_INTENT);
-+ ret = bkey_err(k);
-+ if (ret)
-+ return ret;
-+ if (!k.k) {
-+ /*
-+ * extent no longer exists - we could flush the btree
-+ * write buffer and retry to verify, but no need:
-+ */
-+ return 0;
-+ }
-+
-+ if (extent_has_stripe_ptr(k, s->key.k.p.offset))
-+ goto out;
-+
-+ ptr_c = bkey_matches_stripe(v, k, &block);
-+ /*
-+ * It doesn't generally make sense to erasure code cached ptrs:
-+ * XXX: should we be incrementing a counter?
-+ */
-+ if (!ptr_c || ptr_c->cached)
-+ goto out;
-+
-+ dev = v->ptrs[block].dev;
-+
-+ n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(stripe_ptr));
-+ ret = PTR_ERR_OR_ZERO(n);
-+ if (ret)
-+ goto out;
-+
-+ bkey_reassemble(n, k);
-+
-+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
-+ ec_ptr = bch2_bkey_has_device(bkey_i_to_s(n), dev);
-+ BUG_ON(!ec_ptr);
-+
-+ stripe_ptr = (struct bch_extent_stripe_ptr) {
-+ .type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
-+ .block = block,
-+ .redundancy = v->nr_redundant,
-+ .idx = s->key.k.p.offset,
-+ };
-+
-+ __extent_entry_insert(n,
-+ (union bch_extent_entry *) ec_ptr,
-+ (union bch_extent_entry *) &stripe_ptr);
-+
-+ ret = bch2_trans_update(trans, &iter, n, 0);
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_buf *s,
-+ unsigned block)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
-+ struct bch_extent_ptr bucket = v->ptrs[block];
-+ struct bpos bucket_pos = PTR_BUCKET_POS(c, &bucket);
-+ struct bpos bp_pos = POS_MIN;
-+ int ret = 0;
-+
-+ while (1) {
-+ ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_NOCHECK_RW|
-+ BTREE_INSERT_NOFAIL,
-+ ec_stripe_update_extent(trans, bucket_pos, bucket.gen,
-+ s, &bp_pos));
-+ if (ret)
-+ break;
-+ if (bkey_eq(bp_pos, POS_MAX))
-+ break;
-+
-+ bp_pos = bpos_nosnap_successor(bp_pos);
-+ }
-+
-+ return ret;
-+}
-+
-+static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
-+ unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
-+ int ret = 0;
-+
-+ ret = bch2_btree_write_buffer_flush(trans);
-+ if (ret)
-+ goto err;
-+
-+ for (i = 0; i < nr_data; i++) {
-+ ret = ec_stripe_update_bucket(trans, s, i);
-+ if (ret)
-+ break;
-+ }
-+err:
-+ bch2_trans_put(trans);
-+
-+ return ret;
-+}
-+
-+static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
-+ struct ec_stripe_new *s,
-+ unsigned block,
-+ struct open_bucket *ob)
-+{
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-+ unsigned offset = ca->mi.bucket_size - ob->sectors_free;
-+ int ret;
-+
-+ if (!bch2_dev_get_ioref(ca, WRITE)) {
-+ s->err = -BCH_ERR_erofs_no_writes;
-+ return;
-+ }
-+
-+ memset(s->new_stripe.data[block] + (offset << 9),
-+ 0,
-+ ob->sectors_free << 9);
-+
-+ ret = blkdev_issue_zeroout(ca->disk_sb.bdev,
-+ ob->bucket * ca->mi.bucket_size + offset,
-+ ob->sectors_free,
-+ GFP_KERNEL, 0);
-+
-+ percpu_ref_put(&ca->io_ref);
-+
-+ if (ret)
-+ s->err = ret;
-+}
-+
-+void bch2_ec_stripe_new_free(struct bch_fs *c, struct ec_stripe_new *s)
-+{
-+ if (s->idx)
-+ bch2_stripe_close(c, s);
-+ kfree(s);
-+}
-+
-+/*
-+ * data buckets of new stripe all written: create the stripe
-+ */
-+static void ec_stripe_create(struct ec_stripe_new *s)
-+{
-+ struct bch_fs *c = s->c;
-+ struct open_bucket *ob;
-+ struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
-+ unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
-+ int ret;
-+
-+ BUG_ON(s->h->s == s);
-+
-+ closure_sync(&s->iodone);
-+
-+ if (!s->err) {
-+ for (i = 0; i < nr_data; i++)
-+ if (s->blocks[i]) {
-+ ob = c->open_buckets + s->blocks[i];
-+
-+ if (ob->sectors_free)
-+ zero_out_rest_of_ec_bucket(c, s, i, ob);
-+ }
-+ }
-+
-+ if (s->err) {
-+ if (!bch2_err_matches(s->err, EROFS))
-+ bch_err(c, "error creating stripe: error writing data buckets");
-+ goto err;
-+ }
-+
-+ if (s->have_existing_stripe) {
-+ ec_validate_checksums(c, &s->existing_stripe);
-+
-+ if (ec_do_recov(c, &s->existing_stripe)) {
-+ bch_err(c, "error creating stripe: error reading existing stripe");
-+ goto err;
-+ }
-+
-+ for (i = 0; i < nr_data; i++)
-+ if (stripe_blockcount_get(&bkey_i_to_stripe(&s->existing_stripe.key)->v, i))
-+ swap(s->new_stripe.data[i],
-+ s->existing_stripe.data[i]);
-+
-+ ec_stripe_buf_exit(&s->existing_stripe);
-+ }
-+
-+ BUG_ON(!s->allocated);
-+ BUG_ON(!s->idx);
-+
-+ ec_generate_ec(&s->new_stripe);
-+
-+ ec_generate_checksums(&s->new_stripe);
-+
-+ /* write p/q: */
-+ for (i = nr_data; i < v->nr_blocks; i++)
-+ ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone);
-+ closure_sync(&s->iodone);
-+
-+ if (ec_nr_failed(&s->new_stripe)) {
-+ bch_err(c, "error creating stripe: error writing redundancy buckets");
-+ goto err;
-+ }
-+
-+ ret = bch2_trans_do(c, &s->res, NULL,
-+ BTREE_INSERT_NOCHECK_RW|
-+ BTREE_INSERT_NOFAIL,
-+ ec_stripe_key_update(trans,
-+ bkey_i_to_stripe(&s->new_stripe.key),
-+ !s->have_existing_stripe));
-+ if (ret) {
-+ bch_err(c, "error creating stripe: error creating stripe key");
-+ goto err;
-+ }
-+
-+ ret = ec_stripe_update_extents(c, &s->new_stripe);
-+ if (ret) {
-+ bch_err_msg(c, ret, "creating stripe: error updating pointers");
-+ goto err;
-+ }
-+err:
-+ bch2_disk_reservation_put(c, &s->res);
-+
-+ for (i = 0; i < v->nr_blocks; i++)
-+ if (s->blocks[i]) {
-+ ob = c->open_buckets + s->blocks[i];
-+
-+ if (i < nr_data) {
-+ ob->ec = NULL;
-+ __bch2_open_bucket_put(c, ob);
-+ } else {
-+ bch2_open_bucket_put(c, ob);
-+ }
-+ }
-+
-+ mutex_lock(&c->ec_stripe_new_lock);
-+ list_del(&s->list);
-+ mutex_unlock(&c->ec_stripe_new_lock);
-+ wake_up(&c->ec_stripe_new_wait);
-+
-+ ec_stripe_buf_exit(&s->existing_stripe);
-+ ec_stripe_buf_exit(&s->new_stripe);
-+ closure_debug_destroy(&s->iodone);
-+
-+ ec_stripe_new_put(c, s, STRIPE_REF_stripe);
-+}
-+
-+static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
-+{
-+ struct ec_stripe_new *s;
-+
-+ mutex_lock(&c->ec_stripe_new_lock);
-+ list_for_each_entry(s, &c->ec_stripe_new_list, list)
-+ if (!atomic_read(&s->ref[STRIPE_REF_io]))
-+ goto out;
-+ s = NULL;
-+out:
-+ mutex_unlock(&c->ec_stripe_new_lock);
-+
-+ return s;
-+}
-+
-+static void ec_stripe_create_work(struct work_struct *work)
-+{
-+ struct bch_fs *c = container_of(work,
-+ struct bch_fs, ec_stripe_create_work);
-+ struct ec_stripe_new *s;
-+
-+ while ((s = get_pending_stripe(c)))
-+ ec_stripe_create(s);
-+
-+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
-+}
-+
-+void bch2_ec_do_stripe_creates(struct bch_fs *c)
-+{
-+ bch2_write_ref_get(c, BCH_WRITE_REF_stripe_create);
-+
-+ if (!queue_work(system_long_wq, &c->ec_stripe_create_work))
-+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
-+}
-+
-+static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+ struct ec_stripe_new *s = h->s;
-+
-+ BUG_ON(!s->allocated && !s->err);
-+
-+ h->s = NULL;
-+ s->pending = true;
-+
-+ mutex_lock(&c->ec_stripe_new_lock);
-+ list_add(&s->list, &c->ec_stripe_new_list);
-+ mutex_unlock(&c->ec_stripe_new_lock);
-+
-+ ec_stripe_new_put(c, s, STRIPE_REF_io);
-+}
-+
-+void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
-+{
-+ struct ec_stripe_new *s = ob->ec;
-+
-+ s->err = -EIO;
-+}
-+
-+void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
-+{
-+ struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
-+ struct bch_dev *ca;
-+ unsigned offset;
-+
-+ if (!ob)
-+ return NULL;
-+
-+ BUG_ON(!ob->ec->new_stripe.data[ob->ec_idx]);
-+
-+ ca = bch_dev_bkey_exists(c, ob->dev);
-+ offset = ca->mi.bucket_size - ob->sectors_free;
-+
-+ return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9);
-+}
-+
-+static int unsigned_cmp(const void *_l, const void *_r)
-+{
-+ unsigned l = *((const unsigned *) _l);
-+ unsigned r = *((const unsigned *) _r);
-+
-+ return cmp_int(l, r);
-+}
-+
-+/* pick most common bucket size: */
-+static unsigned pick_blocksize(struct bch_fs *c,
-+ struct bch_devs_mask *devs)
-+{
-+ struct bch_dev *ca;
-+ unsigned i, nr = 0, sizes[BCH_SB_MEMBERS_MAX];
-+ struct {
-+ unsigned nr, size;
-+ } cur = { 0, 0 }, best = { 0, 0 };
-+
-+ for_each_member_device_rcu(ca, c, i, devs)
-+ sizes[nr++] = ca->mi.bucket_size;
-+
-+ sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL);
-+
-+ for (i = 0; i < nr; i++) {
-+ if (sizes[i] != cur.size) {
-+ if (cur.nr > best.nr)
-+ best = cur;
-+
-+ cur.nr = 0;
-+ cur.size = sizes[i];
-+ }
-+
-+ cur.nr++;
-+ }
-+
-+ if (cur.nr > best.nr)
-+ best = cur;
-+
-+ return best.size;
-+}
-+
-+static bool may_create_new_stripe(struct bch_fs *c)
-+{
-+ return false;
-+}
-+
-+static void ec_stripe_key_init(struct bch_fs *c,
-+ struct bkey_i *k,
-+ unsigned nr_data,
-+ unsigned nr_parity,
-+ unsigned stripe_size)
-+{
-+ struct bkey_i_stripe *s = bkey_stripe_init(k);
-+ unsigned u64s;
-+
-+ s->v.sectors = cpu_to_le16(stripe_size);
-+ s->v.algorithm = 0;
-+ s->v.nr_blocks = nr_data + nr_parity;
-+ s->v.nr_redundant = nr_parity;
-+ s->v.csum_granularity_bits = ilog2(c->opts.encoded_extent_max >> 9);
-+ s->v.csum_type = BCH_CSUM_crc32c;
-+ s->v.pad = 0;
-+
-+ while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
-+ BUG_ON(1 << s->v.csum_granularity_bits >=
-+ le16_to_cpu(s->v.sectors) ||
-+ s->v.csum_granularity_bits == U8_MAX);
-+ s->v.csum_granularity_bits++;
-+ }
-+
-+ set_bkey_val_u64s(&s->k, u64s);
-+}
-+
-+static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+ struct ec_stripe_new *s;
-+
-+ lockdep_assert_held(&h->lock);
-+
-+ s = kzalloc(sizeof(*s), GFP_KERNEL);
-+ if (!s)
-+ return -BCH_ERR_ENOMEM_ec_new_stripe_alloc;
-+
-+ mutex_init(&s->lock);
-+ closure_init(&s->iodone, NULL);
-+ atomic_set(&s->ref[STRIPE_REF_stripe], 1);
-+ atomic_set(&s->ref[STRIPE_REF_io], 1);
-+ s->c = c;
-+ s->h = h;
-+ s->nr_data = min_t(unsigned, h->nr_active_devs,
-+ BCH_BKEY_PTRS_MAX) - h->redundancy;
-+ s->nr_parity = h->redundancy;
-+
-+ ec_stripe_key_init(c, &s->new_stripe.key,
-+ s->nr_data, s->nr_parity, h->blocksize);
-+
-+ h->s = s;
-+ return 0;
-+}
-+
-+static struct ec_stripe_head *
-+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
-+ unsigned algo, unsigned redundancy,
-+ enum bch_watermark watermark)
-+{
-+ struct ec_stripe_head *h;
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ h = kzalloc(sizeof(*h), GFP_KERNEL);
-+ if (!h)
-+ return NULL;
-+
-+ mutex_init(&h->lock);
-+ BUG_ON(!mutex_trylock(&h->lock));
-+
-+ h->target = target;
-+ h->algo = algo;
-+ h->redundancy = redundancy;
-+ h->watermark = watermark;
-+
-+ rcu_read_lock();
-+ h->devs = target_rw_devs(c, BCH_DATA_user, target);
-+
-+ for_each_member_device_rcu(ca, c, i, &h->devs)
-+ if (!ca->mi.durability)
-+ __clear_bit(i, h->devs.d);
-+
-+ h->blocksize = pick_blocksize(c, &h->devs);
-+
-+ for_each_member_device_rcu(ca, c, i, &h->devs)
-+ if (ca->mi.bucket_size == h->blocksize)
-+ h->nr_active_devs++;
-+
-+ rcu_read_unlock();
-+ list_add(&h->list, &c->ec_stripe_head_list);
-+ return h;
-+}
-+
-+void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
-+{
-+ if (h->s &&
-+ h->s->allocated &&
-+ bitmap_weight(h->s->blocks_allocated,
-+ h->s->nr_data) == h->s->nr_data)
-+ ec_stripe_set_pending(c, h);
-+
-+ mutex_unlock(&h->lock);
-+}
-+
-+static struct ec_stripe_head *
-+__bch2_ec_stripe_head_get(struct btree_trans *trans,
-+ unsigned target,
-+ unsigned algo,
-+ unsigned redundancy,
-+ enum bch_watermark watermark)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct ec_stripe_head *h;
-+ int ret;
-+
-+ if (!redundancy)
-+ return NULL;
-+
-+ ret = bch2_trans_mutex_lock(trans, &c->ec_stripe_head_lock);
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ if (test_bit(BCH_FS_GOING_RO, &c->flags)) {
-+ h = ERR_PTR(-BCH_ERR_erofs_no_writes);
-+ goto found;
-+ }
-+
-+ list_for_each_entry(h, &c->ec_stripe_head_list, list)
-+ if (h->target == target &&
-+ h->algo == algo &&
-+ h->redundancy == redundancy &&
-+ h->watermark == watermark) {
-+ ret = bch2_trans_mutex_lock(trans, &h->lock);
-+ if (ret)
-+ h = ERR_PTR(ret);
-+ goto found;
-+ }
-+
-+ h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
-+found:
-+ mutex_unlock(&c->ec_stripe_head_lock);
-+ return h;
-+}
-+
-+static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
-+ enum bch_watermark watermark, struct closure *cl)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_devs_mask devs = h->devs;
-+ struct open_bucket *ob;
-+ struct open_buckets buckets;
-+ struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
-+ unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
-+ bool have_cache = true;
-+ int ret = 0;
-+
-+ BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
-+ BUG_ON(v->nr_redundant != h->s->nr_parity);
-+
-+ for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
-+ __clear_bit(v->ptrs[i].dev, devs.d);
-+ if (i < h->s->nr_data)
-+ nr_have_data++;
-+ else
-+ nr_have_parity++;
-+ }
-+
-+ BUG_ON(nr_have_data > h->s->nr_data);
-+ BUG_ON(nr_have_parity > h->s->nr_parity);
-+
-+ buckets.nr = 0;
-+ if (nr_have_parity < h->s->nr_parity) {
-+ ret = bch2_bucket_alloc_set_trans(trans, &buckets,
-+ &h->parity_stripe,
-+ &devs,
-+ h->s->nr_parity,
-+ &nr_have_parity,
-+ &have_cache, 0,
-+ BCH_DATA_parity,
-+ watermark,
-+ cl);
-+
-+ open_bucket_for_each(c, &buckets, ob, i) {
-+ j = find_next_zero_bit(h->s->blocks_gotten,
-+ h->s->nr_data + h->s->nr_parity,
-+ h->s->nr_data);
-+ BUG_ON(j >= h->s->nr_data + h->s->nr_parity);
-+
-+ h->s->blocks[j] = buckets.v[i];
-+ v->ptrs[j] = bch2_ob_ptr(c, ob);
-+ __set_bit(j, h->s->blocks_gotten);
-+ }
-+
-+ if (ret)
-+ return ret;
-+ }
-+
-+ buckets.nr = 0;
-+ if (nr_have_data < h->s->nr_data) {
-+ ret = bch2_bucket_alloc_set_trans(trans, &buckets,
-+ &h->block_stripe,
-+ &devs,
-+ h->s->nr_data,
-+ &nr_have_data,
-+ &have_cache, 0,
-+ BCH_DATA_user,
-+ watermark,
-+ cl);
-+
-+ open_bucket_for_each(c, &buckets, ob, i) {
-+ j = find_next_zero_bit(h->s->blocks_gotten,
-+ h->s->nr_data, 0);
-+ BUG_ON(j >= h->s->nr_data);
-+
-+ h->s->blocks[j] = buckets.v[i];
-+ v->ptrs[j] = bch2_ob_ptr(c, ob);
-+ __set_bit(j, h->s->blocks_gotten);
-+ }
-+
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+/* XXX: doesn't obey target: */
-+static s64 get_existing_stripe(struct bch_fs *c,
-+ struct ec_stripe_head *head)
-+{
-+ ec_stripes_heap *h = &c->ec_stripes_heap;
-+ struct stripe *m;
-+ size_t heap_idx;
-+ u64 stripe_idx;
-+ s64 ret = -1;
-+
-+ if (may_create_new_stripe(c))
-+ return -1;
-+
-+ mutex_lock(&c->ec_stripes_heap_lock);
-+ for (heap_idx = 0; heap_idx < h->used; heap_idx++) {
-+ /* No blocks worth reusing, stripe will just be deleted: */
-+ if (!h->data[heap_idx].blocks_nonempty)
-+ continue;
-+
-+ stripe_idx = h->data[heap_idx].idx;
-+
-+ m = genradix_ptr(&c->stripes, stripe_idx);
-+
-+ if (m->algorithm == head->algo &&
-+ m->nr_redundant == head->redundancy &&
-+ m->sectors == head->blocksize &&
-+ m->blocks_nonempty < m->nr_blocks - m->nr_redundant &&
-+ bch2_try_open_stripe(c, head->s, stripe_idx)) {
-+ ret = stripe_idx;
-+ break;
-+ }
-+ }
-+ mutex_unlock(&c->ec_stripes_heap_lock);
-+ return ret;
-+}
-+
-+static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
-+ struct bch_stripe *existing_v;
-+ unsigned i;
-+ s64 idx;
-+ int ret;
-+
-+ /*
-+ * If we can't allocate a new stripe, and there's no stripes with empty
-+ * blocks for us to reuse, that means we have to wait on copygc:
-+ */
-+ idx = get_existing_stripe(c, h);
-+ if (idx < 0)
-+ return -BCH_ERR_stripe_alloc_blocked;
-+
-+ ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
-+ if (ret) {
-+ bch2_stripe_close(c, h->s);
-+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret));
-+ return ret;
-+ }
-+
-+ existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v;
-+
-+ BUG_ON(existing_v->nr_redundant != h->s->nr_parity);
-+ h->s->nr_data = existing_v->nr_blocks -
-+ existing_v->nr_redundant;
-+
-+ ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize);
-+ if (ret) {
-+ bch2_stripe_close(c, h->s);
-+ return ret;
-+ }
-+
-+ BUG_ON(h->s->existing_stripe.size != h->blocksize);
-+ BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
-+
-+ /*
-+ * Free buckets we initially allocated - they might conflict with
-+ * blocks from the stripe we're reusing:
-+ */
-+ for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) {
-+ bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
-+ h->s->blocks[i] = 0;
-+ }
-+ memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
-+ memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
-+
-+ for (i = 0; i < existing_v->nr_blocks; i++) {
-+ if (stripe_blockcount_get(existing_v, i)) {
-+ __set_bit(i, h->s->blocks_gotten);
-+ __set_bit(i, h->s->blocks_allocated);
-+ }
-+
-+ ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
-+ }
-+
-+ bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key);
-+ h->s->have_existing_stripe = true;
-+
-+ return 0;
-+}
-+
-+static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bpos min_pos = POS(0, 1);
-+ struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
-+ int ret;
-+
-+ if (!h->s->res.sectors) {
-+ ret = bch2_disk_reservation_get(c, &h->s->res,
-+ h->blocksize,
-+ h->s->nr_parity,
-+ BCH_DISK_RESERVATION_NOFAIL);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
-+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+ if (bkey_gt(k.k->p, POS(0, U32_MAX))) {
-+ if (start_pos.offset) {
-+ start_pos = min_pos;
-+ bch2_btree_iter_set_pos(&iter, start_pos);
-+ continue;
-+ }
-+
-+ ret = -BCH_ERR_ENOSPC_stripe_create;
-+ break;
-+ }
-+
-+ if (bkey_deleted(k.k) &&
-+ bch2_try_open_stripe(c, h->s, k.k->p.offset))
-+ break;
-+ }
-+
-+ c->ec_stripe_hint = iter.pos.offset;
-+
-+ if (ret)
-+ goto err;
-+
-+ ret = ec_stripe_mem_alloc(trans, &iter);
-+ if (ret) {
-+ bch2_stripe_close(c, h->s);
-+ goto err;
-+ }
-+
-+ h->s->new_stripe.key.k.p = iter.pos;
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+err:
-+ bch2_disk_reservation_put(c, &h->s->res);
-+ goto out;
-+}
-+
-+struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
-+ unsigned target,
-+ unsigned algo,
-+ unsigned redundancy,
-+ enum bch_watermark watermark,
-+ struct closure *cl)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct ec_stripe_head *h;
-+ bool waiting = false;
-+ int ret;
-+
-+ h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
-+ if (!h)
-+ bch_err(c, "no stripe head");
-+ if (IS_ERR_OR_NULL(h))
-+ return h;
-+
-+ if (!h->s) {
-+ ret = ec_new_stripe_alloc(c, h);
-+ if (ret) {
-+ bch_err(c, "failed to allocate new stripe");
-+ goto err;
-+ }
-+ }
-+
-+ if (h->s->allocated)
-+ goto allocated;
-+
-+ if (h->s->have_existing_stripe)
-+ goto alloc_existing;
-+
-+ /* First, try to allocate a full stripe: */
-+ ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?:
-+ __bch2_ec_stripe_head_reserve(trans, h);
-+ if (!ret)
-+ goto allocate_buf;
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
-+ bch2_err_matches(ret, ENOMEM))
-+ goto err;
-+
-+ /*
-+ * Not enough buckets available for a full stripe: we must reuse an
-+ * existing stripe:
-+ */
-+ while (1) {
-+ ret = __bch2_ec_stripe_head_reuse(trans, h);
-+ if (!ret)
-+ break;
-+ if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
-+ goto err;
-+
-+ if (watermark == BCH_WATERMARK_copygc) {
-+ ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?:
-+ __bch2_ec_stripe_head_reserve(trans, h);
-+ if (ret)
-+ goto err;
-+ goto allocate_buf;
-+ }
-+
-+ /* XXX freelist_wait? */
-+ closure_wait(&c->freelist_wait, cl);
-+ waiting = true;
-+ }
-+
-+ if (waiting)
-+ closure_wake_up(&c->freelist_wait);
-+alloc_existing:
-+ /*
-+ * Retry allocating buckets, with the watermark for this
-+ * particular write:
-+ */
-+ ret = new_stripe_alloc_buckets(trans, h, watermark, cl);
-+ if (ret)
-+ goto err;
-+
-+allocate_buf:
-+ ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize);
-+ if (ret)
-+ goto err;
-+
-+ h->s->allocated = true;
-+allocated:
-+ BUG_ON(!h->s->idx);
-+ BUG_ON(!h->s->new_stripe.data[0]);
-+ BUG_ON(trans->restarted);
-+ return h;
-+err:
-+ bch2_ec_stripe_head_put(c, h);
-+ return ERR_PTR(ret);
-+}
-+
-+static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ struct ec_stripe_head *h;
-+ struct open_bucket *ob;
-+ unsigned i;
-+
-+ mutex_lock(&c->ec_stripe_head_lock);
-+ list_for_each_entry(h, &c->ec_stripe_head_list, list) {
-+ mutex_lock(&h->lock);
-+ if (!h->s)
-+ goto unlock;
-+
-+ if (!ca)
-+ goto found;
-+
-+ for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) {
-+ if (!h->s->blocks[i])
-+ continue;
-+
-+ ob = c->open_buckets + h->s->blocks[i];
-+ if (ob->dev == ca->dev_idx)
-+ goto found;
-+ }
-+ goto unlock;
-+found:
-+ h->s->err = -BCH_ERR_erofs_no_writes;
-+ ec_stripe_set_pending(c, h);
-+unlock:
-+ mutex_unlock(&h->lock);
-+ }
-+ mutex_unlock(&c->ec_stripe_head_lock);
-+}
-+
-+void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ __bch2_ec_stop(c, ca);
-+}
-+
-+void bch2_fs_ec_stop(struct bch_fs *c)
-+{
-+ __bch2_ec_stop(c, NULL);
-+}
-+
-+static bool bch2_fs_ec_flush_done(struct bch_fs *c)
-+{
-+ bool ret;
-+
-+ mutex_lock(&c->ec_stripe_new_lock);
-+ ret = list_empty(&c->ec_stripe_new_list);
-+ mutex_unlock(&c->ec_stripe_new_lock);
-+
-+ return ret;
-+}
-+
-+void bch2_fs_ec_flush(struct bch_fs *c)
-+{
-+ wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
-+}
-+
-+int bch2_stripes_read(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ const struct bch_stripe *s;
-+ struct stripe *m;
-+ unsigned i;
-+ int ret;
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
-+ BTREE_ITER_PREFETCH, k, ret) {
-+ if (k.k->type != KEY_TYPE_stripe)
-+ continue;
-+
-+ ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
-+ if (ret)
-+ break;
-+
-+ s = bkey_s_c_to_stripe(k).v;
-+
-+ m = genradix_ptr(&c->stripes, k.k->p.offset);
-+ m->sectors = le16_to_cpu(s->sectors);
-+ m->algorithm = s->algorithm;
-+ m->nr_blocks = s->nr_blocks;
-+ m->nr_redundant = s->nr_redundant;
-+ m->blocks_nonempty = 0;
-+
-+ for (i = 0; i < s->nr_blocks; i++)
-+ m->blocks_nonempty += !!stripe_blockcount_get(s, i);
-+
-+ bch2_stripes_heap_insert(c, m, k.k->p.offset);
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ bch2_trans_put(trans);
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+
-+ return ret;
-+}
-+
-+void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ ec_stripes_heap *h = &c->ec_stripes_heap;
-+ struct stripe *m;
-+ size_t i;
-+
-+ mutex_lock(&c->ec_stripes_heap_lock);
-+ for (i = 0; i < min_t(size_t, h->used, 50); i++) {
-+ m = genradix_ptr(&c->stripes, h->data[i].idx);
-+
-+ prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
-+ h->data[i].blocks_nonempty,
-+ m->nr_blocks - m->nr_redundant,
-+ m->nr_redundant);
-+ if (bch2_stripe_is_open(c, h->data[i].idx))
-+ prt_str(out, " open");
-+ prt_newline(out);
-+ }
-+ mutex_unlock(&c->ec_stripes_heap_lock);
-+}
-+
-+void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ struct ec_stripe_head *h;
-+ struct ec_stripe_new *s;
-+
-+ mutex_lock(&c->ec_stripe_head_lock);
-+ list_for_each_entry(h, &c->ec_stripe_head_list, list) {
-+ prt_printf(out, "target %u algo %u redundancy %u %s:\n",
-+ h->target, h->algo, h->redundancy,
-+ bch2_watermarks[h->watermark]);
-+
-+ if (h->s)
-+ prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n",
-+ h->s->idx, h->s->nr_data, h->s->nr_parity,
-+ bitmap_weight(h->s->blocks_allocated,
-+ h->s->nr_data));
-+ }
-+ mutex_unlock(&c->ec_stripe_head_lock);
-+
-+ prt_printf(out, "in flight:\n");
-+
-+ mutex_lock(&c->ec_stripe_new_lock);
-+ list_for_each_entry(s, &c->ec_stripe_new_list, list) {
-+ prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n",
-+ s->idx, s->nr_data, s->nr_parity,
-+ atomic_read(&s->ref[STRIPE_REF_io]),
-+ atomic_read(&s->ref[STRIPE_REF_stripe]),
-+ bch2_watermarks[s->h->watermark]);
-+ }
-+ mutex_unlock(&c->ec_stripe_new_lock);
-+}
-+
-+void bch2_fs_ec_exit(struct bch_fs *c)
-+{
-+ struct ec_stripe_head *h;
-+ unsigned i;
-+
-+ while (1) {
-+ mutex_lock(&c->ec_stripe_head_lock);
-+ h = list_first_entry_or_null(&c->ec_stripe_head_list,
-+ struct ec_stripe_head, list);
-+ if (h)
-+ list_del(&h->list);
-+ mutex_unlock(&c->ec_stripe_head_lock);
-+ if (!h)
-+ break;
-+
-+ if (h->s) {
-+ for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++)
-+ BUG_ON(h->s->blocks[i]);
-+
-+ kfree(h->s);
-+ }
-+ kfree(h);
-+ }
-+
-+ BUG_ON(!list_empty(&c->ec_stripe_new_list));
-+
-+ free_heap(&c->ec_stripes_heap);
-+ genradix_free(&c->stripes);
-+ bioset_exit(&c->ec_bioset);
-+}
-+
-+void bch2_fs_ec_init_early(struct bch_fs *c)
-+{
-+ spin_lock_init(&c->ec_stripes_new_lock);
-+ mutex_init(&c->ec_stripes_heap_lock);
-+
-+ INIT_LIST_HEAD(&c->ec_stripe_head_list);
-+ mutex_init(&c->ec_stripe_head_lock);
-+
-+ INIT_LIST_HEAD(&c->ec_stripe_new_list);
-+ mutex_init(&c->ec_stripe_new_lock);
-+ init_waitqueue_head(&c->ec_stripe_new_wait);
-+
-+ INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
-+ INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
-+}
-+
-+int bch2_fs_ec_init(struct bch_fs *c)
-+{
-+ return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
-+ BIOSET_NEED_BVECS);
-+}
-diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
-new file mode 100644
-index 000000000000..7d0237c9819f
---- /dev/null
-+++ b/fs/bcachefs/ec.h
-@@ -0,0 +1,260 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EC_H
-+#define _BCACHEFS_EC_H
-+
-+#include "ec_types.h"
-+#include "buckets_types.h"
-+#include "extents_types.h"
-+
-+enum bkey_invalid_flags;
-+
-+int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
-+ struct bkey_s_c);
-+
-+#define bch2_bkey_ops_stripe ((struct bkey_ops) { \
-+ .key_invalid = bch2_stripe_invalid, \
-+ .val_to_text = bch2_stripe_to_text, \
-+ .swab = bch2_ptr_swab, \
-+ .trans_trigger = bch2_trans_mark_stripe, \
-+ .atomic_trigger = bch2_mark_stripe, \
-+ .min_val_size = 8, \
-+})
-+
-+static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
-+{
-+ return DIV_ROUND_UP(le16_to_cpu(s->sectors),
-+ 1 << s->csum_granularity_bits);
-+}
-+
-+static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
-+ unsigned dev, unsigned csum_idx)
-+{
-+ unsigned csum_bytes = bch_crc_bytes[s->csum_type];
-+
-+ return sizeof(struct bch_stripe) +
-+ sizeof(struct bch_extent_ptr) * s->nr_blocks +
-+ (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
-+}
-+
-+static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s,
-+ unsigned idx)
-+{
-+ return stripe_csum_offset(s, s->nr_blocks, 0) +
-+ sizeof(u16) * idx;
-+}
-+
-+static inline unsigned stripe_blockcount_get(const struct bch_stripe *s,
-+ unsigned idx)
-+{
-+ return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx));
-+}
-+
-+static inline void stripe_blockcount_set(struct bch_stripe *s,
-+ unsigned idx, unsigned v)
-+{
-+ __le16 *p = (void *) s + stripe_blockcount_offset(s, idx);
-+
-+ *p = cpu_to_le16(v);
-+}
-+
-+static inline unsigned stripe_val_u64s(const struct bch_stripe *s)
-+{
-+ return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
-+ sizeof(u64));
-+}
-+
-+static inline void *stripe_csum(struct bch_stripe *s,
-+ unsigned block, unsigned csum_idx)
-+{
-+ EBUG_ON(block >= s->nr_blocks);
-+ EBUG_ON(csum_idx >= stripe_csums_per_device(s));
-+
-+ return (void *) s + stripe_csum_offset(s, block, csum_idx);
-+}
-+
-+static inline struct bch_csum stripe_csum_get(struct bch_stripe *s,
-+ unsigned block, unsigned csum_idx)
-+{
-+ struct bch_csum csum = { 0 };
-+
-+ memcpy(&csum, stripe_csum(s, block, csum_idx), bch_crc_bytes[s->csum_type]);
-+ return csum;
-+}
-+
-+static inline void stripe_csum_set(struct bch_stripe *s,
-+ unsigned block, unsigned csum_idx,
-+ struct bch_csum csum)
-+{
-+ memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
-+}
-+
-+static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr,
-+ const struct bch_extent_ptr *data_ptr,
-+ unsigned sectors)
-+{
-+ return data_ptr->dev == stripe_ptr->dev &&
-+ data_ptr->gen == stripe_ptr->gen &&
-+ data_ptr->offset >= stripe_ptr->offset &&
-+ data_ptr->offset < stripe_ptr->offset + sectors;
-+}
-+
-+static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s,
-+ struct extent_ptr_decoded p)
-+{
-+ unsigned nr_data = s->nr_blocks - s->nr_redundant;
-+
-+ BUG_ON(!p.has_ec);
-+
-+ if (p.ec.block >= nr_data)
-+ return false;
-+
-+ return __bch2_ptr_matches_stripe(&s->ptrs[p.ec.block], &p.ptr,
-+ le16_to_cpu(s->sectors));
-+}
-+
-+static inline bool bch2_ptr_matches_stripe_m(const struct gc_stripe *m,
-+ struct extent_ptr_decoded p)
-+{
-+ unsigned nr_data = m->nr_blocks - m->nr_redundant;
-+
-+ BUG_ON(!p.has_ec);
-+
-+ if (p.ec.block >= nr_data)
-+ return false;
-+
-+ return __bch2_ptr_matches_stripe(&m->ptrs[p.ec.block], &p.ptr,
-+ m->sectors);
-+}
-+
-+struct bch_read_bio;
-+
-+struct ec_stripe_buf {
-+ /* might not be buffering the entire stripe: */
-+ unsigned offset;
-+ unsigned size;
-+ unsigned long valid[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
-+
-+ void *data[BCH_BKEY_PTRS_MAX];
-+
-+ __BKEY_PADDED(key, 255);
-+};
-+
-+struct ec_stripe_head;
-+
-+enum ec_stripe_ref {
-+ STRIPE_REF_io,
-+ STRIPE_REF_stripe,
-+ STRIPE_REF_NR
-+};
-+
-+struct ec_stripe_new {
-+ struct bch_fs *c;
-+ struct ec_stripe_head *h;
-+ struct mutex lock;
-+ struct list_head list;
-+
-+ struct hlist_node hash;
-+ u64 idx;
-+
-+ struct closure iodone;
-+
-+ atomic_t ref[STRIPE_REF_NR];
-+
-+ int err;
-+
-+ u8 nr_data;
-+ u8 nr_parity;
-+ bool allocated;
-+ bool pending;
-+ bool have_existing_stripe;
-+
-+ unsigned long blocks_gotten[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
-+ unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
-+ open_bucket_idx_t blocks[BCH_BKEY_PTRS_MAX];
-+ struct disk_reservation res;
-+
-+ struct ec_stripe_buf new_stripe;
-+ struct ec_stripe_buf existing_stripe;
-+};
-+
-+struct ec_stripe_head {
-+ struct list_head list;
-+ struct mutex lock;
-+
-+ unsigned target;
-+ unsigned algo;
-+ unsigned redundancy;
-+ enum bch_watermark watermark;
-+
-+ struct bch_devs_mask devs;
-+ unsigned nr_active_devs;
-+
-+ unsigned blocksize;
-+
-+ struct dev_stripe_state block_stripe;
-+ struct dev_stripe_state parity_stripe;
-+
-+ struct ec_stripe_new *s;
-+};
-+
-+int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *);
-+
-+void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
-+
-+void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
-+
-+int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
-+
-+void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
-+struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *,
-+ unsigned, unsigned, unsigned,
-+ enum bch_watermark, struct closure *);
-+
-+void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
-+void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
-+void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
-+
-+void bch2_do_stripe_deletes(struct bch_fs *);
-+void bch2_ec_do_stripe_creates(struct bch_fs *);
-+void bch2_ec_stripe_new_free(struct bch_fs *, struct ec_stripe_new *);
-+
-+static inline void ec_stripe_new_get(struct ec_stripe_new *s,
-+ enum ec_stripe_ref ref)
-+{
-+ atomic_inc(&s->ref[ref]);
-+}
-+
-+static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
-+ enum ec_stripe_ref ref)
-+{
-+ BUG_ON(atomic_read(&s->ref[ref]) <= 0);
-+
-+ if (atomic_dec_and_test(&s->ref[ref]))
-+ switch (ref) {
-+ case STRIPE_REF_stripe:
-+ bch2_ec_stripe_new_free(c, s);
-+ break;
-+ case STRIPE_REF_io:
-+ bch2_ec_do_stripe_creates(c);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
-+void bch2_fs_ec_stop(struct bch_fs *);
-+void bch2_fs_ec_flush(struct bch_fs *);
-+
-+int bch2_stripes_read(struct bch_fs *);
-+
-+void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
-+void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_fs_ec_exit(struct bch_fs *);
-+void bch2_fs_ec_init_early(struct bch_fs *);
-+int bch2_fs_ec_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_EC_H */
-diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h
-new file mode 100644
-index 000000000000..e2b02a82de32
---- /dev/null
-+++ b/fs/bcachefs/ec_types.h
-@@ -0,0 +1,41 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EC_TYPES_H
-+#define _BCACHEFS_EC_TYPES_H
-+
-+#include "bcachefs_format.h"
-+
-+struct bch_replicas_padded {
-+ struct bch_replicas_entry e;
-+ u8 pad[BCH_BKEY_PTRS_MAX];
-+};
-+
-+struct stripe {
-+ size_t heap_idx;
-+ u16 sectors;
-+ u8 algorithm;
-+ u8 nr_blocks;
-+ u8 nr_redundant;
-+ u8 blocks_nonempty;
-+};
-+
-+struct gc_stripe {
-+ u16 sectors;
-+
-+ u8 nr_blocks;
-+ u8 nr_redundant;
-+
-+ unsigned alive:1; /* does a corresponding key exist in stripes btree? */
-+ u16 block_sectors[BCH_BKEY_PTRS_MAX];
-+ struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX];
-+
-+ struct bch_replicas_padded r;
-+};
-+
-+struct ec_stripe_heap_entry {
-+ size_t idx;
-+ unsigned blocks_nonempty;
-+};
-+
-+typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap;
-+
-+#endif /* _BCACHEFS_EC_TYPES_H */
-diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c
-new file mode 100644
-index 000000000000..d260ff9bbfeb
---- /dev/null
-+++ b/fs/bcachefs/errcode.c
-@@ -0,0 +1,68 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "errcode.h"
-+
-+#include <linux/errname.h>
-+
-+static const char * const bch2_errcode_strs[] = {
-+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = #err,
-+ BCH_ERRCODES()
-+#undef x
-+ NULL
-+};
-+
-+static unsigned bch2_errcode_parents[] = {
-+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = class,
-+ BCH_ERRCODES()
-+#undef x
-+};
-+
-+const char *bch2_err_str(int err)
-+{
-+ const char *errstr;
-+
-+ err = abs(err);
-+
-+ BUG_ON(err >= BCH_ERR_MAX);
-+
-+ if (err >= BCH_ERR_START)
-+ errstr = bch2_errcode_strs[err - BCH_ERR_START];
-+ else if (err)
-+ errstr = errname(err);
-+ else
-+ errstr = "(No error)";
-+ return errstr ?: "(Invalid error)";
-+}
-+
-+bool __bch2_err_matches(int err, int class)
-+{
-+ err = abs(err);
-+ class = abs(class);
-+
-+ BUG_ON(err >= BCH_ERR_MAX);
-+ BUG_ON(class >= BCH_ERR_MAX);
-+
-+ while (err >= BCH_ERR_START && err != class)
-+ err = bch2_errcode_parents[err - BCH_ERR_START];
-+
-+ return err == class;
-+}
-+
-+int __bch2_err_class(int err)
-+{
-+ err = -err;
-+ BUG_ON((unsigned) err >= BCH_ERR_MAX);
-+
-+ while (err >= BCH_ERR_START && bch2_errcode_parents[err - BCH_ERR_START])
-+ err = bch2_errcode_parents[err - BCH_ERR_START];
-+
-+ return -err;
-+}
-+
-+const char *bch2_blk_status_to_str(blk_status_t status)
-+{
-+ if (status == BLK_STS_REMOVED)
-+ return "device removed";
-+ return blk_status_to_str(status);
-+}
-diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
-new file mode 100644
-index 000000000000..68a1a96bb7ca
---- /dev/null
-+++ b/fs/bcachefs/errcode.h
-@@ -0,0 +1,269 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ERRCODE_H
-+#define _BCACHEFS_ERRCODE_H
-+
-+#define BCH_ERRCODES() \
-+ x(ERANGE, ERANGE_option_too_small) \
-+ x(ERANGE, ERANGE_option_too_big) \
-+ x(ENOMEM, ENOMEM_stripe_buf) \
-+ x(ENOMEM, ENOMEM_replicas_table) \
-+ x(ENOMEM, ENOMEM_cpu_replicas) \
-+ x(ENOMEM, ENOMEM_replicas_gc) \
-+ x(ENOMEM, ENOMEM_disk_groups_validate) \
-+ x(ENOMEM, ENOMEM_disk_groups_to_cpu) \
-+ x(ENOMEM, ENOMEM_mark_snapshot) \
-+ x(ENOMEM, ENOMEM_mark_stripe) \
-+ x(ENOMEM, ENOMEM_mark_stripe_ptr) \
-+ x(ENOMEM, ENOMEM_btree_key_cache_create) \
-+ x(ENOMEM, ENOMEM_btree_key_cache_fill) \
-+ x(ENOMEM, ENOMEM_btree_key_cache_insert) \
-+ x(ENOMEM, ENOMEM_trans_kmalloc) \
-+ x(ENOMEM, ENOMEM_trans_log_msg) \
-+ x(ENOMEM, ENOMEM_do_encrypt) \
-+ x(ENOMEM, ENOMEM_ec_read_extent) \
-+ x(ENOMEM, ENOMEM_ec_stripe_mem_alloc) \
-+ x(ENOMEM, ENOMEM_ec_new_stripe_alloc) \
-+ x(ENOMEM, ENOMEM_fs_btree_cache_init) \
-+ x(ENOMEM, ENOMEM_fs_btree_key_cache_init) \
-+ x(ENOMEM, ENOMEM_fs_counters_init) \
-+ x(ENOMEM, ENOMEM_fs_btree_write_buffer_init) \
-+ x(ENOMEM, ENOMEM_io_clock_init) \
-+ x(ENOMEM, ENOMEM_blacklist_table_init) \
-+ x(ENOMEM, ENOMEM_sb_realloc_injected) \
-+ x(ENOMEM, ENOMEM_sb_bio_realloc) \
-+ x(ENOMEM, ENOMEM_sb_buf_realloc) \
-+ x(ENOMEM, ENOMEM_sb_journal_validate) \
-+ x(ENOMEM, ENOMEM_sb_journal_v2_validate) \
-+ x(ENOMEM, ENOMEM_journal_entry_add) \
-+ x(ENOMEM, ENOMEM_journal_read_buf_realloc) \
-+ x(ENOMEM, ENOMEM_btree_interior_update_worker_init)\
-+ x(ENOMEM, ENOMEM_btree_interior_update_pool_init) \
-+ x(ENOMEM, ENOMEM_bio_read_init) \
-+ x(ENOMEM, ENOMEM_bio_read_split_init) \
-+ x(ENOMEM, ENOMEM_bio_write_init) \
-+ x(ENOMEM, ENOMEM_bio_bounce_pages_init) \
-+ x(ENOMEM, ENOMEM_writepage_bioset_init) \
-+ x(ENOMEM, ENOMEM_dio_read_bioset_init) \
-+ x(ENOMEM, ENOMEM_dio_write_bioset_init) \
-+ x(ENOMEM, ENOMEM_nocow_flush_bioset_init) \
-+ x(ENOMEM, ENOMEM_promote_table_init) \
-+ x(ENOMEM, ENOMEM_compression_bounce_read_init) \
-+ x(ENOMEM, ENOMEM_compression_bounce_write_init) \
-+ x(ENOMEM, ENOMEM_compression_workspace_init) \
-+ x(ENOMEM, ENOMEM_decompression_workspace_init) \
-+ x(ENOMEM, ENOMEM_bucket_gens) \
-+ x(ENOMEM, ENOMEM_buckets_nouse) \
-+ x(ENOMEM, ENOMEM_usage_init) \
-+ x(ENOMEM, ENOMEM_btree_node_read_all_replicas) \
-+ x(ENOMEM, ENOMEM_btree_node_reclaim) \
-+ x(ENOMEM, ENOMEM_btree_node_mem_alloc) \
-+ x(ENOMEM, ENOMEM_btree_cache_cannibalize_lock) \
-+ x(ENOMEM, ENOMEM_buckets_waiting_for_journal_init)\
-+ x(ENOMEM, ENOMEM_buckets_waiting_for_journal_set) \
-+ x(ENOMEM, ENOMEM_set_nr_journal_buckets) \
-+ x(ENOMEM, ENOMEM_dev_journal_init) \
-+ x(ENOMEM, ENOMEM_journal_pin_fifo) \
-+ x(ENOMEM, ENOMEM_journal_buf) \
-+ x(ENOMEM, ENOMEM_gc_start) \
-+ x(ENOMEM, ENOMEM_gc_alloc_start) \
-+ x(ENOMEM, ENOMEM_gc_reflink_start) \
-+ x(ENOMEM, ENOMEM_gc_gens) \
-+ x(ENOMEM, ENOMEM_gc_repair_key) \
-+ x(ENOMEM, ENOMEM_fsck_extent_ends_at) \
-+ x(ENOMEM, ENOMEM_fsck_add_nlink) \
-+ x(ENOMEM, ENOMEM_journal_key_insert) \
-+ x(ENOMEM, ENOMEM_journal_keys_sort) \
-+ x(ENOMEM, ENOMEM_journal_replay) \
-+ x(ENOMEM, ENOMEM_read_superblock_clean) \
-+ x(ENOMEM, ENOMEM_fs_alloc) \
-+ x(ENOMEM, ENOMEM_fs_name_alloc) \
-+ x(ENOMEM, ENOMEM_fs_other_alloc) \
-+ x(ENOMEM, ENOMEM_dev_alloc) \
-+ x(ENOSPC, ENOSPC_disk_reservation) \
-+ x(ENOSPC, ENOSPC_bucket_alloc) \
-+ x(ENOSPC, ENOSPC_disk_label_add) \
-+ x(ENOSPC, ENOSPC_stripe_create) \
-+ x(ENOSPC, ENOSPC_inode_create) \
-+ x(ENOSPC, ENOSPC_str_hash_create) \
-+ x(ENOSPC, ENOSPC_snapshot_create) \
-+ x(ENOSPC, ENOSPC_subvolume_create) \
-+ x(ENOSPC, ENOSPC_sb) \
-+ x(ENOSPC, ENOSPC_sb_journal) \
-+ x(ENOSPC, ENOSPC_sb_journal_seq_blacklist) \
-+ x(ENOSPC, ENOSPC_sb_quota) \
-+ x(ENOSPC, ENOSPC_sb_replicas) \
-+ x(ENOSPC, ENOSPC_sb_members) \
-+ x(ENOSPC, ENOSPC_sb_members_v2) \
-+ x(ENOSPC, ENOSPC_sb_crypt) \
-+ x(ENOSPC, ENOSPC_btree_slot) \
-+ x(ENOSPC, ENOSPC_snapshot_tree) \
-+ x(ENOENT, ENOENT_bkey_type_mismatch) \
-+ x(ENOENT, ENOENT_str_hash_lookup) \
-+ x(ENOENT, ENOENT_str_hash_set_must_replace) \
-+ x(ENOENT, ENOENT_inode) \
-+ x(ENOENT, ENOENT_not_subvol) \
-+ x(ENOENT, ENOENT_not_directory) \
-+ x(ENOENT, ENOENT_directory_dead) \
-+ x(ENOENT, ENOENT_subvolume) \
-+ x(ENOENT, ENOENT_snapshot_tree) \
-+ x(ENOENT, ENOENT_dirent_doesnt_match_inode) \
-+ x(ENOENT, ENOENT_dev_not_found) \
-+ x(ENOENT, ENOENT_dev_idx_not_found) \
-+ x(0, open_buckets_empty) \
-+ x(0, freelist_empty) \
-+ x(BCH_ERR_freelist_empty, no_buckets_found) \
-+ x(0, transaction_restart) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_relock) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_fill_mem_alloc_fail)\
-+ x(BCH_ERR_transaction_restart, transaction_restart_mem_realloced) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_in_traverse_all) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\
-+ x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\
-+ x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\
-+ x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_split_race) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \
-+ x(BCH_ERR_transaction_restart, transaction_restart_nested) \
-+ x(0, no_btree_node) \
-+ x(BCH_ERR_no_btree_node, no_btree_node_relock) \
-+ x(BCH_ERR_no_btree_node, no_btree_node_upgrade) \
-+ x(BCH_ERR_no_btree_node, no_btree_node_drop) \
-+ x(BCH_ERR_no_btree_node, no_btree_node_lock_root) \
-+ x(BCH_ERR_no_btree_node, no_btree_node_up) \
-+ x(BCH_ERR_no_btree_node, no_btree_node_down) \
-+ x(BCH_ERR_no_btree_node, no_btree_node_init) \
-+ x(BCH_ERR_no_btree_node, no_btree_node_cached) \
-+ x(BCH_ERR_no_btree_node, no_btree_node_srcu_reset) \
-+ x(0, btree_insert_fail) \
-+ x(BCH_ERR_btree_insert_fail, btree_insert_btree_node_full) \
-+ x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \
-+ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \
-+ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \
-+ x(BCH_ERR_btree_insert_fail, btree_insert_need_flush_buffer) \
-+ x(0, backpointer_to_overwritten_btree_node) \
-+ x(0, lock_fail_root_changed) \
-+ x(0, journal_reclaim_would_deadlock) \
-+ x(EINVAL, fsck) \
-+ x(BCH_ERR_fsck, fsck_fix) \
-+ x(BCH_ERR_fsck, fsck_ignore) \
-+ x(BCH_ERR_fsck, fsck_errors_not_fixed) \
-+ x(BCH_ERR_fsck, fsck_repair_unimplemented) \
-+ x(BCH_ERR_fsck, fsck_repair_impossible) \
-+ x(0, restart_recovery) \
-+ x(0, unwritten_extent_update) \
-+ x(EINVAL, device_state_not_allowed) \
-+ x(EINVAL, member_info_missing) \
-+ x(EINVAL, mismatched_block_size) \
-+ x(EINVAL, block_size_too_small) \
-+ x(EINVAL, bucket_size_too_small) \
-+ x(EINVAL, device_size_too_small) \
-+ x(EINVAL, device_not_a_member_of_filesystem) \
-+ x(EINVAL, device_has_been_removed) \
-+ x(EINVAL, device_already_online) \
-+ x(EINVAL, insufficient_devices_to_start) \
-+ x(EINVAL, invalid) \
-+ x(EINVAL, internal_fsck_err) \
-+ x(EROFS, erofs_trans_commit) \
-+ x(EROFS, erofs_no_writes) \
-+ x(EROFS, erofs_journal_err) \
-+ x(EROFS, erofs_sb_err) \
-+ x(EROFS, erofs_unfixed_errors) \
-+ x(EROFS, erofs_norecovery) \
-+ x(EROFS, erofs_nochanges) \
-+ x(EROFS, insufficient_devices) \
-+ x(0, operation_blocked) \
-+ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \
-+ x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
-+ x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
-+ x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
-+ x(BCH_ERR_operation_blocked, stripe_alloc_blocked) \
-+ x(BCH_ERR_invalid, invalid_sb) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_magic) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_version) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_features) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_too_big) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_csum_type) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_csum) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_block_size) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_uuid) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_too_many_members) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_dev_idx) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_time_precision) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_field_size) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_layout) \
-+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_type) \
-+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_nr_superblocks) \
-+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_superblocks_overlap) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_members_missing) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_members) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_replicas) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_journal) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_crypt) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_clean) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_quota) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_errors) \
-+ x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \
-+ x(BCH_ERR_invalid, invalid_bkey) \
-+ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
-+ x(EIO, btree_node_read_err) \
-+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
-+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
-+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
-+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_bad_node) \
-+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_incompatible) \
-+ x(0, nopromote) \
-+ x(BCH_ERR_nopromote, nopromote_may_not) \
-+ x(BCH_ERR_nopromote, nopromote_already_promoted) \
-+ x(BCH_ERR_nopromote, nopromote_unwritten) \
-+ x(BCH_ERR_nopromote, nopromote_congested) \
-+ x(BCH_ERR_nopromote, nopromote_in_flight) \
-+ x(BCH_ERR_nopromote, nopromote_enomem)
-+
-+enum bch_errcode {
-+ BCH_ERR_START = 2048,
-+#define x(class, err) BCH_ERR_##err,
-+ BCH_ERRCODES()
-+#undef x
-+ BCH_ERR_MAX
-+};
-+
-+const char *bch2_err_str(int);
-+bool __bch2_err_matches(int, int);
-+
-+static inline bool _bch2_err_matches(int err, int class)
-+{
-+ return err < 0 && __bch2_err_matches(err, class);
-+}
-+
-+#define bch2_err_matches(_err, _class) \
-+({ \
-+ BUILD_BUG_ON(!__builtin_constant_p(_class)); \
-+ unlikely(_bch2_err_matches(_err, _class)); \
-+})
-+
-+int __bch2_err_class(int);
-+
-+static inline long bch2_err_class(long err)
-+{
-+ return err < 0 ? __bch2_err_class(err) : err;
-+}
-+
-+#define BLK_STS_REMOVED ((__force blk_status_t)128)
-+
-+const char *bch2_blk_status_to_str(blk_status_t);
-+
-+#endif /* _BCACHFES_ERRCODE_H */
-diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
-new file mode 100644
-index 000000000000..7b28d37922fd
---- /dev/null
-+++ b/fs/bcachefs/error.c
-@@ -0,0 +1,299 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "error.h"
-+#include "super.h"
-+
-+#define FSCK_ERR_RATELIMIT_NR 10
-+
-+bool bch2_inconsistent_error(struct bch_fs *c)
-+{
-+ set_bit(BCH_FS_ERROR, &c->flags);
-+
-+ switch (c->opts.errors) {
-+ case BCH_ON_ERROR_continue:
-+ return false;
-+ case BCH_ON_ERROR_ro:
-+ if (bch2_fs_emergency_read_only(c))
-+ bch_err(c, "inconsistency detected - emergency read only");
-+ return true;
-+ case BCH_ON_ERROR_panic:
-+ panic(bch2_fmt(c, "panic after error"));
-+ return true;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+void bch2_topology_error(struct bch_fs *c)
-+{
-+ set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags);
-+ if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
-+ bch2_inconsistent_error(c);
-+}
-+
-+void bch2_fatal_error(struct bch_fs *c)
-+{
-+ if (bch2_fs_emergency_read_only(c))
-+ bch_err(c, "fatal error - emergency read only");
-+}
-+
-+void bch2_io_error_work(struct work_struct *work)
-+{
-+ struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
-+ struct bch_fs *c = ca->fs;
-+ bool dev;
-+
-+ down_write(&c->state_lock);
-+ dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro,
-+ BCH_FORCE_IF_DEGRADED);
-+ if (dev
-+ ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
-+ BCH_FORCE_IF_DEGRADED)
-+ : bch2_fs_emergency_read_only(c))
-+ bch_err(ca,
-+ "too many IO errors, setting %s RO",
-+ dev ? "device" : "filesystem");
-+ up_write(&c->state_lock);
-+}
-+
-+void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
-+{
-+ atomic64_inc(&ca->errors[type]);
-+ //queue_work(system_long_wq, &ca->io_error_work);
-+}
-+
-+enum ask_yn {
-+ YN_NO,
-+ YN_YES,
-+ YN_ALLNO,
-+ YN_ALLYES,
-+};
-+
-+#ifdef __KERNEL__
-+#define bch2_fsck_ask_yn() YN_NO
-+#else
-+
-+#include "tools-util.h"
-+
-+enum ask_yn bch2_fsck_ask_yn(void)
-+{
-+ char *buf = NULL;
-+ size_t buflen = 0;
-+ bool ret;
-+
-+ while (true) {
-+ fputs(" (y,n, or Y,N for all errors of this type) ", stdout);
-+ fflush(stdout);
-+
-+ if (getline(&buf, &buflen, stdin) < 0)
-+ die("error reading from standard input");
-+
-+ strim(buf);
-+ if (strlen(buf) != 1)
-+ continue;
-+
-+ switch (buf[0]) {
-+ case 'n':
-+ return YN_NO;
-+ case 'y':
-+ return YN_YES;
-+ case 'N':
-+ return YN_ALLNO;
-+ case 'Y':
-+ return YN_ALLYES;
-+ }
-+ }
-+
-+ free(buf);
-+ return ret;
-+}
-+
-+#endif
-+
-+static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
-+{
-+ struct fsck_err_state *s;
-+
-+ if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
-+ return NULL;
-+
-+ list_for_each_entry(s, &c->fsck_error_msgs, list)
-+ if (s->fmt == fmt) {
-+ /*
-+ * move it to the head of the list: repeated fsck errors
-+ * are common
-+ */
-+ list_move(&s->list, &c->fsck_error_msgs);
-+ return s;
-+ }
-+
-+ s = kzalloc(sizeof(*s), GFP_NOFS);
-+ if (!s) {
-+ if (!c->fsck_alloc_msgs_err)
-+ bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
-+ c->fsck_alloc_msgs_err = true;
-+ return NULL;
-+ }
-+
-+ INIT_LIST_HEAD(&s->list);
-+ s->fmt = fmt;
-+ list_add(&s->list, &c->fsck_error_msgs);
-+ return s;
-+}
-+
-+int bch2_fsck_err(struct bch_fs *c,
-+ enum bch_fsck_flags flags,
-+ enum bch_sb_error_id err,
-+ const char *fmt, ...)
-+{
-+ struct fsck_err_state *s = NULL;
-+ va_list args;
-+ bool print = true, suppressing = false, inconsistent = false;
-+ struct printbuf buf = PRINTBUF, *out = &buf;
-+ int ret = -BCH_ERR_fsck_ignore;
-+
-+ bch2_sb_error_count(c, err);
-+
-+ va_start(args, fmt);
-+ prt_vprintf(out, fmt, args);
-+ va_end(args);
-+
-+ mutex_lock(&c->fsck_error_msgs_lock);
-+ s = fsck_err_get(c, fmt);
-+ if (s) {
-+ /*
-+ * We may be called multiple times for the same error on
-+ * transaction restart - this memoizes instead of asking the user
-+ * multiple times for the same error:
-+ */
-+ if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
-+ ret = s->ret;
-+ mutex_unlock(&c->fsck_error_msgs_lock);
-+ printbuf_exit(&buf);
-+ return ret;
-+ }
-+
-+ kfree(s->last_msg);
-+ s->last_msg = kstrdup(buf.buf, GFP_KERNEL);
-+
-+ if (c->opts.ratelimit_errors &&
-+ !(flags & FSCK_NO_RATELIMIT) &&
-+ s->nr >= FSCK_ERR_RATELIMIT_NR) {
-+ if (s->nr == FSCK_ERR_RATELIMIT_NR)
-+ suppressing = true;
-+ else
-+ print = false;
-+ }
-+
-+ s->nr++;
-+ }
-+
-+#ifdef BCACHEFS_LOG_PREFIX
-+ if (!strncmp(fmt, "bcachefs:", 9))
-+ prt_printf(out, bch2_log_msg(c, ""));
-+#endif
-+
-+ if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
-+ if (c->opts.errors != BCH_ON_ERROR_continue ||
-+ !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
-+ prt_str(out, ", shutting down");
-+ inconsistent = true;
-+ ret = -BCH_ERR_fsck_errors_not_fixed;
-+ } else if (flags & FSCK_CAN_FIX) {
-+ prt_str(out, ", fixing");
-+ ret = -BCH_ERR_fsck_fix;
-+ } else {
-+ prt_str(out, ", continuing");
-+ ret = -BCH_ERR_fsck_ignore;
-+ }
-+ } else if (c->opts.fix_errors == FSCK_FIX_exit) {
-+ prt_str(out, ", exiting");
-+ ret = -BCH_ERR_fsck_errors_not_fixed;
-+ } else if (flags & FSCK_CAN_FIX) {
-+ int fix = s && s->fix
-+ ? s->fix
-+ : c->opts.fix_errors;
-+
-+ if (fix == FSCK_FIX_ask) {
-+ int ask;
-+
-+ prt_str(out, ": fix?");
-+ bch2_print_string_as_lines(KERN_ERR, out->buf);
-+ print = false;
-+
-+ ask = bch2_fsck_ask_yn();
-+
-+ if (ask >= YN_ALLNO && s)
-+ s->fix = ask == YN_ALLNO
-+ ? FSCK_FIX_no
-+ : FSCK_FIX_yes;
-+
-+ ret = ask & 1
-+ ? -BCH_ERR_fsck_fix
-+ : -BCH_ERR_fsck_ignore;
-+ } else if (fix == FSCK_FIX_yes ||
-+ (c->opts.nochanges &&
-+ !(flags & FSCK_CAN_IGNORE))) {
-+ prt_str(out, ", fixing");
-+ ret = -BCH_ERR_fsck_fix;
-+ } else {
-+ prt_str(out, ", not fixing");
-+ }
-+ } else if (flags & FSCK_NEED_FSCK) {
-+ prt_str(out, " (run fsck to correct)");
-+ } else {
-+ prt_str(out, " (repair unimplemented)");
-+ }
-+
-+ if (ret == -BCH_ERR_fsck_ignore &&
-+ (c->opts.fix_errors == FSCK_FIX_exit ||
-+ !(flags & FSCK_CAN_IGNORE)))
-+ ret = -BCH_ERR_fsck_errors_not_fixed;
-+
-+ if (print)
-+ bch2_print_string_as_lines(KERN_ERR, out->buf);
-+
-+ if (!test_bit(BCH_FS_FSCK_DONE, &c->flags) &&
-+ (ret != -BCH_ERR_fsck_fix &&
-+ ret != -BCH_ERR_fsck_ignore))
-+ bch_err(c, "Unable to continue, halting");
-+ else if (suppressing)
-+ bch_err(c, "Ratelimiting new instances of previous error");
-+
-+ if (s)
-+ s->ret = ret;
-+
-+ mutex_unlock(&c->fsck_error_msgs_lock);
-+
-+ printbuf_exit(&buf);
-+
-+ if (inconsistent)
-+ bch2_inconsistent_error(c);
-+
-+ if (ret == -BCH_ERR_fsck_fix) {
-+ set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
-+ } else {
-+ set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
-+ set_bit(BCH_FS_ERROR, &c->flags);
-+ }
-+
-+ return ret;
-+}
-+
-+void bch2_flush_fsck_errs(struct bch_fs *c)
-+{
-+ struct fsck_err_state *s, *n;
-+
-+ mutex_lock(&c->fsck_error_msgs_lock);
-+
-+ list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
-+ if (s->ratelimited && s->last_msg)
-+ bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
-+
-+ list_del(&s->list);
-+ kfree(s->last_msg);
-+ kfree(s);
-+ }
-+
-+ mutex_unlock(&c->fsck_error_msgs_lock);
-+}
-diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
-new file mode 100644
-index 000000000000..d167d65986e0
---- /dev/null
-+++ b/fs/bcachefs/error.h
-@@ -0,0 +1,242 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_ERROR_H
-+#define _BCACHEFS_ERROR_H
-+
-+#include <linux/list.h>
-+#include <linux/printk.h>
-+#include "sb-errors.h"
-+
-+struct bch_dev;
-+struct bch_fs;
-+struct work_struct;
-+
-+/*
-+ * XXX: separate out errors that indicate on disk data is inconsistent, and flag
-+ * superblock as such
-+ */
-+
-+/* Error messages: */
-+
-+/*
-+ * Inconsistency errors: The on disk data is inconsistent. If these occur during
-+ * initial recovery, they don't indicate a bug in the running code - we walk all
-+ * the metadata before modifying anything. If they occur at runtime, they
-+ * indicate either a bug in the running code or (less likely) data is being
-+ * silently corrupted under us.
-+ *
-+ * XXX: audit all inconsistent errors and make sure they're all recoverable, in
-+ * BCH_ON_ERROR_CONTINUE mode
-+ */
-+
-+bool bch2_inconsistent_error(struct bch_fs *);
-+
-+void bch2_topology_error(struct bch_fs *);
-+
-+#define bch2_fs_inconsistent(c, ...) \
-+({ \
-+ bch_err(c, __VA_ARGS__); \
-+ bch2_inconsistent_error(c); \
-+})
-+
-+#define bch2_fs_inconsistent_on(cond, c, ...) \
-+({ \
-+ bool _ret = unlikely(!!(cond)); \
-+ \
-+ if (_ret) \
-+ bch2_fs_inconsistent(c, __VA_ARGS__); \
-+ _ret; \
-+})
-+
-+/*
-+ * Later we might want to mark only the particular device inconsistent, not the
-+ * entire filesystem:
-+ */
-+
-+#define bch2_dev_inconsistent(ca, ...) \
-+do { \
-+ bch_err(ca, __VA_ARGS__); \
-+ bch2_inconsistent_error((ca)->fs); \
-+} while (0)
-+
-+#define bch2_dev_inconsistent_on(cond, ca, ...) \
-+({ \
-+ bool _ret = unlikely(!!(cond)); \
-+ \
-+ if (_ret) \
-+ bch2_dev_inconsistent(ca, __VA_ARGS__); \
-+ _ret; \
-+})
-+
-+/*
-+ * When a transaction update discovers or is causing a fs inconsistency, it's
-+ * helpful to also dump the pending updates:
-+ */
-+#define bch2_trans_inconsistent(trans, ...) \
-+({ \
-+ bch_err(trans->c, __VA_ARGS__); \
-+ bch2_dump_trans_updates(trans); \
-+ bch2_inconsistent_error(trans->c); \
-+})
-+
-+#define bch2_trans_inconsistent_on(cond, trans, ...) \
-+({ \
-+ bool _ret = unlikely(!!(cond)); \
-+ \
-+ if (_ret) \
-+ bch2_trans_inconsistent(trans, __VA_ARGS__); \
-+ _ret; \
-+})
-+
-+/*
-+ * Fsck errors: inconsistency errors we detect at mount time, and should ideally
-+ * be able to repair:
-+ */
-+
-+struct fsck_err_state {
-+ struct list_head list;
-+ const char *fmt;
-+ u64 nr;
-+ bool ratelimited;
-+ int ret;
-+ int fix;
-+ char *last_msg;
-+};
-+
-+enum bch_fsck_flags {
-+ FSCK_CAN_FIX = 1 << 0,
-+ FSCK_CAN_IGNORE = 1 << 1,
-+ FSCK_NEED_FSCK = 1 << 2,
-+ FSCK_NO_RATELIMIT = 1 << 3,
-+};
-+
-+#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
-+
-+__printf(4, 5) __cold
-+int bch2_fsck_err(struct bch_fs *,
-+ enum bch_fsck_flags,
-+ enum bch_sb_error_id,
-+ const char *, ...);
-+void bch2_flush_fsck_errs(struct bch_fs *);
-+
-+#define __fsck_err(c, _flags, _err_type, ...) \
-+({ \
-+ int _ret = bch2_fsck_err(c, _flags, BCH_FSCK_ERR_##_err_type, \
-+ __VA_ARGS__); \
-+ \
-+ if (_ret != -BCH_ERR_fsck_fix && \
-+ _ret != -BCH_ERR_fsck_ignore) { \
-+ ret = _ret; \
-+ goto fsck_err; \
-+ } \
-+ \
-+ _ret == -BCH_ERR_fsck_fix; \
-+})
-+
-+/* These macros return true if error should be fixed: */
-+
-+/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
-+
-+#define __fsck_err_on(cond, c, _flags, _err_type, ...) \
-+ (unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false)
-+
-+#define need_fsck_err_on(cond, c, _err_type, ...) \
-+ __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__)
-+
-+#define need_fsck_err(c, _err_type, ...) \
-+ __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__)
-+
-+#define mustfix_fsck_err(c, _err_type, ...) \
-+ __fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
-+
-+#define mustfix_fsck_err_on(cond, c, _err_type, ...) \
-+ __fsck_err_on(cond, c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
-+
-+#define fsck_err(c, _err_type, ...) \
-+ __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
-+
-+#define fsck_err_on(cond, c, _err_type, ...) \
-+ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
-+
-+static inline void bch2_bkey_fsck_err(struct bch_fs *c,
-+ struct printbuf *err_msg,
-+ enum bch_sb_error_id err_type,
-+ const char *fmt, ...)
-+{
-+ va_list args;
-+
-+ va_start(args, fmt);
-+ prt_vprintf(err_msg, fmt, args);
-+ va_end(args);
-+
-+}
-+
-+#define bkey_fsck_err(c, _err_msg, _err_type, ...) \
-+do { \
-+ prt_printf(_err_msg, __VA_ARGS__); \
-+ bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \
-+ ret = -BCH_ERR_invalid_bkey; \
-+ goto fsck_err; \
-+} while (0)
-+
-+#define bkey_fsck_err_on(cond, ...) \
-+do { \
-+ if (unlikely(cond)) \
-+ bkey_fsck_err(__VA_ARGS__); \
-+} while (0)
-+
-+/*
-+ * Fatal errors: these don't indicate a bug, but we can't continue running in RW
-+ * mode - pretty much just due to metadata IO errors:
-+ */
-+
-+void bch2_fatal_error(struct bch_fs *);
-+
-+#define bch2_fs_fatal_error(c, ...) \
-+do { \
-+ bch_err(c, __VA_ARGS__); \
-+ bch2_fatal_error(c); \
-+} while (0)
-+
-+#define bch2_fs_fatal_err_on(cond, c, ...) \
-+({ \
-+ bool _ret = unlikely(!!(cond)); \
-+ \
-+ if (_ret) \
-+ bch2_fs_fatal_error(c, __VA_ARGS__); \
-+ _ret; \
-+})
-+
-+/*
-+ * IO errors: either recoverable metadata IO (because we have replicas), or data
-+ * IO - we need to log it and print out a message, but we don't (necessarily)
-+ * want to shut down the fs:
-+ */
-+
-+void bch2_io_error_work(struct work_struct *);
-+
-+/* Does the error handling without logging a message */
-+void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
-+
-+#define bch2_dev_io_err_on(cond, ca, _type, ...) \
-+({ \
-+ bool _ret = (cond); \
-+ \
-+ if (_ret) { \
-+ bch_err_dev_ratelimited(ca, __VA_ARGS__); \
-+ bch2_io_error(ca, _type); \
-+ } \
-+ _ret; \
-+})
-+
-+#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \
-+({ \
-+ bool _ret = (cond); \
-+ \
-+ if (_ret) { \
-+ bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \
-+ bch2_io_error(ca, _type); \
-+ } \
-+ _ret; \
-+})
-+
-+#endif /* _BCACHEFS_ERROR_H */
-diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
-new file mode 100644
-index 000000000000..21af6fb8cecf
---- /dev/null
-+++ b/fs/bcachefs/extent_update.c
-@@ -0,0 +1,173 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "debug.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+
-+/*
-+ * This counts the number of iterators to the alloc & ec btrees we'll need
-+ * inserting/removing this extent:
-+ */
-+static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ unsigned ret = 0, lru = 0;
-+
-+ bkey_extent_entry_for_each(ptrs, entry) {
-+ switch (__extent_entry_type(entry)) {
-+ case BCH_EXTENT_ENTRY_ptr:
-+ /* Might also be updating LRU btree */
-+ if (entry->ptr.cached)
-+ lru++;
-+
-+ fallthrough;
-+ case BCH_EXTENT_ENTRY_stripe_ptr:
-+ ret++;
-+ }
-+ }
-+
-+ /*
-+ * Updating keys in the alloc btree may also update keys in the
-+ * freespace or discard btrees:
-+ */
-+ return lru + ret * 2;
-+}
-+
-+static int count_iters_for_insert(struct btree_trans *trans,
-+ struct bkey_s_c k,
-+ unsigned offset,
-+ struct bpos *end,
-+ unsigned *nr_iters,
-+ unsigned max_iters)
-+{
-+ int ret = 0, ret2 = 0;
-+
-+ if (*nr_iters >= max_iters) {
-+ *end = bpos_min(*end, k.k->p);
-+ ret = 1;
-+ }
-+
-+ switch (k.k->type) {
-+ case KEY_TYPE_extent:
-+ case KEY_TYPE_reflink_v:
-+ *nr_iters += bch2_bkey_nr_alloc_ptrs(k);
-+
-+ if (*nr_iters >= max_iters) {
-+ *end = bpos_min(*end, k.k->p);
-+ ret = 1;
-+ }
-+
-+ break;
-+ case KEY_TYPE_reflink_p: {
-+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+ u64 idx = le64_to_cpu(p.v->idx);
-+ unsigned sectors = bpos_min(*end, p.k->p).offset -
-+ bkey_start_offset(p.k);
-+ struct btree_iter iter;
-+ struct bkey_s_c r_k;
-+
-+ for_each_btree_key_norestart(trans, iter,
-+ BTREE_ID_reflink, POS(0, idx + offset),
-+ BTREE_ITER_SLOTS, r_k, ret2) {
-+ if (bkey_ge(bkey_start_pos(r_k.k), POS(0, idx + sectors)))
-+ break;
-+
-+ /* extent_update_to_keys(), for the reflink_v update */
-+ *nr_iters += 1;
-+
-+ *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);
-+
-+ if (*nr_iters >= max_iters) {
-+ struct bpos pos = bkey_start_pos(k.k);
-+ pos.offset += min_t(u64, k.k->size,
-+ r_k.k->p.offset - idx);
-+
-+ *end = bpos_min(*end, pos);
-+ ret = 1;
-+ break;
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ break;
-+ }
-+ }
-+
-+ return ret2 ?: ret;
-+}
-+
-+#define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3)
-+
-+int bch2_extent_atomic_end(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_i *insert,
-+ struct bpos *end)
-+{
-+ struct btree_iter copy;
-+ struct bkey_s_c k;
-+ unsigned nr_iters = 0;
-+ int ret;
-+
-+ ret = bch2_btree_iter_traverse(iter);
-+ if (ret)
-+ return ret;
-+
-+ *end = insert->k.p;
-+
-+ /* extent_update_to_keys(): */
-+ nr_iters += 1;
-+
-+ ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
-+ &nr_iters, EXTENT_ITERS_MAX / 2);
-+ if (ret < 0)
-+ return ret;
-+
-+ bch2_trans_copy_iter(&copy, iter);
-+
-+ for_each_btree_key_upto_continue_norestart(copy, insert->k.p, 0, k, ret) {
-+ unsigned offset = 0;
-+
-+ if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k)))
-+ offset = bkey_start_offset(&insert->k) -
-+ bkey_start_offset(k.k);
-+
-+ /* extent_handle_overwrites(): */
-+ switch (bch2_extent_overlap(&insert->k, k.k)) {
-+ case BCH_EXTENT_OVERLAP_ALL:
-+ case BCH_EXTENT_OVERLAP_FRONT:
-+ nr_iters += 1;
-+ break;
-+ case BCH_EXTENT_OVERLAP_BACK:
-+ case BCH_EXTENT_OVERLAP_MIDDLE:
-+ nr_iters += 2;
-+ break;
-+ }
-+
-+ ret = count_iters_for_insert(trans, k, offset, end,
-+ &nr_iters, EXTENT_ITERS_MAX);
-+ if (ret)
-+ break;
-+ }
-+
-+ bch2_trans_iter_exit(trans, &copy);
-+ return ret < 0 ? ret : 0;
-+}
-+
-+int bch2_extent_trim_atomic(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_i *k)
-+{
-+ struct bpos end;
-+ int ret;
-+
-+ ret = bch2_extent_atomic_end(trans, iter, k, &end);
-+ if (ret)
-+ return ret;
-+
-+ bch2_cut_back(end, k);
-+ return 0;
-+}
-diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h
-new file mode 100644
-index 000000000000..6f5cf449361a
---- /dev/null
-+++ b/fs/bcachefs/extent_update.h
-@@ -0,0 +1,12 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENT_UPDATE_H
-+#define _BCACHEFS_EXTENT_UPDATE_H
-+
-+#include "bcachefs.h"
-+
-+int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *,
-+ struct bkey_i *, struct bpos *);
-+int bch2_extent_trim_atomic(struct btree_trans *, struct btree_iter *,
-+ struct bkey_i *);
-+
-+#endif /* _BCACHEFS_EXTENT_UPDATE_H */
-diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
-new file mode 100644
-index 000000000000..a864de231b69
---- /dev/null
-+++ b/fs/bcachefs/extents.c
-@@ -0,0 +1,1516 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
-+ *
-+ * Code for managing the extent btree and dynamically updating the writeback
-+ * dirty sector count.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "btree_gc.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "compress.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "replicas.h"
-+#include "super.h"
-+#include "super-io.h"
-+#include "trace.h"
-+#include "util.h"
-+
-+static unsigned bch2_crc_field_size_max[] = {
-+ [BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
-+ [BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
-+ [BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX,
-+};
-+
-+static void bch2_extent_crc_pack(union bch_extent_crc *,
-+ struct bch_extent_crc_unpacked,
-+ enum bch_extent_entry_type);
-+
-+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
-+ unsigned dev)
-+{
-+ struct bch_dev_io_failures *i;
-+
-+ for (i = f->devs; i < f->devs + f->nr; i++)
-+ if (i->dev == dev)
-+ return i;
-+
-+ return NULL;
-+}
-+
-+void bch2_mark_io_failure(struct bch_io_failures *failed,
-+ struct extent_ptr_decoded *p)
-+{
-+ struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
-+
-+ if (!f) {
-+ BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
-+
-+ f = &failed->devs[failed->nr++];
-+ f->dev = p->ptr.dev;
-+ f->idx = p->idx;
-+ f->nr_failed = 1;
-+ f->nr_retries = 0;
-+ } else if (p->idx != f->idx) {
-+ f->idx = p->idx;
-+ f->nr_failed = 1;
-+ f->nr_retries = 0;
-+ } else {
-+ f->nr_failed++;
-+ }
-+}
-+
-+/*
-+ * returns true if p1 is better than p2:
-+ */
-+static inline bool ptr_better(struct bch_fs *c,
-+ const struct extent_ptr_decoded p1,
-+ const struct extent_ptr_decoded p2)
-+{
-+ if (likely(!p1.idx && !p2.idx)) {
-+ struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
-+ struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
-+
-+ u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
-+ u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
-+
-+ /* Pick at random, biased in favor of the faster device: */
-+
-+ return bch2_rand_range(l1 + l2) > l1;
-+ }
-+
-+ if (bch2_force_reconstruct_read)
-+ return p1.idx > p2.idx;
-+
-+ return p1.idx < p2.idx;
-+}
-+
-+/*
-+ * This picks a non-stale pointer, preferably from a device other than @avoid.
-+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
-+ * other devices, it will still pick a pointer from avoid.
-+ */
-+int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
-+ struct bch_io_failures *failed,
-+ struct extent_ptr_decoded *pick)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ struct bch_dev_io_failures *f;
-+ struct bch_dev *ca;
-+ int ret = 0;
-+
-+ if (k.k->type == KEY_TYPE_error)
-+ return -EIO;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ /*
-+ * Unwritten extent: no need to actually read, treat it as a
-+ * hole and return 0s:
-+ */
-+ if (p.ptr.unwritten)
-+ return 0;
-+
-+ ca = bch_dev_bkey_exists(c, p.ptr.dev);
-+
-+ /*
-+ * If there are any dirty pointers it's an error if we can't
-+ * read:
-+ */
-+ if (!ret && !p.ptr.cached)
-+ ret = -EIO;
-+
-+ if (p.ptr.cached && ptr_stale(ca, &p.ptr))
-+ continue;
-+
-+ f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
-+ if (f)
-+ p.idx = f->nr_failed < f->nr_retries
-+ ? f->idx
-+ : f->idx + 1;
-+
-+ if (!p.idx &&
-+ !bch2_dev_is_readable(ca))
-+ p.idx++;
-+
-+ if (bch2_force_reconstruct_read &&
-+ !p.idx && p.has_ec)
-+ p.idx++;
-+
-+ if (p.idx >= (unsigned) p.has_ec + 1)
-+ continue;
-+
-+ if (ret > 0 && !ptr_better(c, p, *pick))
-+ continue;
-+
-+ *pick = p;
-+ ret = 1;
-+ }
-+
-+ return ret;
-+}
-+
-+/* KEY_TYPE_btree_ptr: */
-+
-+int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err,
-+ btree_ptr_val_too_big,
-+ "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
-+
-+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err,
-+ btree_ptr_v2_val_too_big,
-+ "value too big (%zu > %zu)",
-+ bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
-+
-+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
-+
-+ prt_printf(out, "seq %llx written %u min_key %s",
-+ le64_to_cpu(bp.v->seq),
-+ le16_to_cpu(bp.v->sectors_written),
-+ BTREE_PTR_RANGE_UPDATED(bp.v) ? "R " : "");
-+
-+ bch2_bpos_to_text(out, bp.v->min_key);
-+ prt_printf(out, " ");
-+ bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
-+ unsigned big_endian, int write,
-+ struct bkey_s k)
-+{
-+ struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k);
-+
-+ compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
-+
-+ if (version < bcachefs_metadata_version_inode_btree_change &&
-+ btree_id_is_extents(btree_id) &&
-+ !bkey_eq(bp.v->min_key, POS_MIN))
-+ bp.v->min_key = write
-+ ? bpos_nosnap_predecessor(bp.v->min_key)
-+ : bpos_nosnap_successor(bp.v->min_key);
-+}
-+
-+/* KEY_TYPE_extent: */
-+
-+bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
-+{
-+ struct bkey_ptrs l_ptrs = bch2_bkey_ptrs(l);
-+ struct bkey_ptrs_c r_ptrs = bch2_bkey_ptrs_c(r);
-+ union bch_extent_entry *en_l;
-+ const union bch_extent_entry *en_r;
-+ struct extent_ptr_decoded lp, rp;
-+ bool use_right_ptr;
-+ struct bch_dev *ca;
-+
-+ en_l = l_ptrs.start;
-+ en_r = r_ptrs.start;
-+ while (en_l < l_ptrs.end && en_r < r_ptrs.end) {
-+ if (extent_entry_type(en_l) != extent_entry_type(en_r))
-+ return false;
-+
-+ en_l = extent_entry_next(en_l);
-+ en_r = extent_entry_next(en_r);
-+ }
-+
-+ if (en_l < l_ptrs.end || en_r < r_ptrs.end)
-+ return false;
-+
-+ en_l = l_ptrs.start;
-+ en_r = r_ptrs.start;
-+ lp.crc = bch2_extent_crc_unpack(l.k, NULL);
-+ rp.crc = bch2_extent_crc_unpack(r.k, NULL);
-+
-+ while (__bkey_ptr_next_decode(l.k, l_ptrs.end, lp, en_l) &&
-+ __bkey_ptr_next_decode(r.k, r_ptrs.end, rp, en_r)) {
-+ if (lp.ptr.offset + lp.crc.offset + lp.crc.live_size !=
-+ rp.ptr.offset + rp.crc.offset ||
-+ lp.ptr.dev != rp.ptr.dev ||
-+ lp.ptr.gen != rp.ptr.gen ||
-+ lp.ptr.unwritten != rp.ptr.unwritten ||
-+ lp.has_ec != rp.has_ec)
-+ return false;
-+
-+ /* Extents may not straddle buckets: */
-+ ca = bch_dev_bkey_exists(c, lp.ptr.dev);
-+ if (PTR_BUCKET_NR(ca, &lp.ptr) != PTR_BUCKET_NR(ca, &rp.ptr))
-+ return false;
-+
-+ if (lp.has_ec != rp.has_ec ||
-+ (lp.has_ec &&
-+ (lp.ec.block != rp.ec.block ||
-+ lp.ec.redundancy != rp.ec.redundancy ||
-+ lp.ec.idx != rp.ec.idx)))
-+ return false;
-+
-+ if (lp.crc.compression_type != rp.crc.compression_type ||
-+ lp.crc.nonce != rp.crc.nonce)
-+ return false;
-+
-+ if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
-+ lp.crc.uncompressed_size) {
-+ /* can use left extent's crc entry */
-+ } else if (lp.crc.live_size <= rp.crc.offset) {
-+ /* can use right extent's crc entry */
-+ } else {
-+ /* check if checksums can be merged: */
-+ if (lp.crc.csum_type != rp.crc.csum_type ||
-+ lp.crc.nonce != rp.crc.nonce ||
-+ crc_is_compressed(lp.crc) ||
-+ !bch2_checksum_mergeable(lp.crc.csum_type))
-+ return false;
-+
-+ if (lp.crc.offset + lp.crc.live_size != lp.crc.compressed_size ||
-+ rp.crc.offset)
-+ return false;
-+
-+ if (lp.crc.csum_type &&
-+ lp.crc.uncompressed_size +
-+ rp.crc.uncompressed_size > (c->opts.encoded_extent_max >> 9))
-+ return false;
-+ }
-+
-+ en_l = extent_entry_next(en_l);
-+ en_r = extent_entry_next(en_r);
-+ }
-+
-+ en_l = l_ptrs.start;
-+ en_r = r_ptrs.start;
-+ while (en_l < l_ptrs.end && en_r < r_ptrs.end) {
-+ if (extent_entry_is_crc(en_l)) {
-+ struct bch_extent_crc_unpacked crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-+ struct bch_extent_crc_unpacked crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-+
-+ if (crc_l.uncompressed_size + crc_r.uncompressed_size >
-+ bch2_crc_field_size_max[extent_entry_type(en_l)])
-+ return false;
-+ }
-+
-+ en_l = extent_entry_next(en_l);
-+ en_r = extent_entry_next(en_r);
-+ }
-+
-+ use_right_ptr = false;
-+ en_l = l_ptrs.start;
-+ en_r = r_ptrs.start;
-+ while (en_l < l_ptrs.end) {
-+ if (extent_entry_type(en_l) == BCH_EXTENT_ENTRY_ptr &&
-+ use_right_ptr)
-+ en_l->ptr = en_r->ptr;
-+
-+ if (extent_entry_is_crc(en_l)) {
-+ struct bch_extent_crc_unpacked crc_l =
-+ bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-+ struct bch_extent_crc_unpacked crc_r =
-+ bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-+
-+ use_right_ptr = false;
-+
-+ if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
-+ crc_l.uncompressed_size) {
-+ /* can use left extent's crc entry */
-+ } else if (crc_l.live_size <= crc_r.offset) {
-+ /* can use right extent's crc entry */
-+ crc_r.offset -= crc_l.live_size;
-+ bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,
-+ extent_entry_type(en_l));
-+ use_right_ptr = true;
-+ } else {
-+ crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
-+ crc_l.csum,
-+ crc_r.csum,
-+ crc_r.uncompressed_size << 9);
-+
-+ crc_l.uncompressed_size += crc_r.uncompressed_size;
-+ crc_l.compressed_size += crc_r.compressed_size;
-+ bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
-+ extent_entry_type(en_l));
-+ }
-+ }
-+
-+ en_l = extent_entry_next(en_l);
-+ en_r = extent_entry_next(en_r);
-+ }
-+
-+ bch2_key_resize(l.k, l.k->size + r.k->size);
-+ return true;
-+}
-+
-+/* KEY_TYPE_reservation: */
-+
-+int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err,
-+ reservation_key_nr_replicas_invalid,
-+ "invalid nr_replicas (%u)", r.v->nr_replicas);
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-+
-+ prt_printf(out, "generation %u replicas %u",
-+ le32_to_cpu(r.v->generation),
-+ r.v->nr_replicas);
-+}
-+
-+bool bch2_reservation_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
-+{
-+ struct bkey_s_reservation l = bkey_s_to_reservation(_l);
-+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(_r);
-+
-+ if (l.v->generation != r.v->generation ||
-+ l.v->nr_replicas != r.v->nr_replicas)
-+ return false;
-+
-+ bch2_key_resize(l.k, l.k->size + r.k->size);
-+ return true;
-+}
-+
-+/* Extent checksum entries: */
-+
-+/* returns true if not equal */
-+static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
-+ struct bch_extent_crc_unpacked r)
-+{
-+ return (l.csum_type != r.csum_type ||
-+ l.compression_type != r.compression_type ||
-+ l.compressed_size != r.compressed_size ||
-+ l.uncompressed_size != r.uncompressed_size ||
-+ l.offset != r.offset ||
-+ l.live_size != r.live_size ||
-+ l.nonce != r.nonce ||
-+ bch2_crc_cmp(l.csum, r.csum));
-+}
-+
-+static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
-+ struct bch_extent_crc_unpacked n)
-+{
-+ return !crc_is_compressed(u) &&
-+ u.csum_type &&
-+ u.uncompressed_size > u.live_size &&
-+ bch2_csum_type_is_encryption(u.csum_type) ==
-+ bch2_csum_type_is_encryption(n.csum_type);
-+}
-+
-+bool bch2_can_narrow_extent_crcs(struct bkey_s_c k,
-+ struct bch_extent_crc_unpacked n)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ struct bch_extent_crc_unpacked crc;
-+ const union bch_extent_entry *i;
-+
-+ if (!n.csum_type)
-+ return false;
-+
-+ bkey_for_each_crc(k.k, ptrs, crc, i)
-+ if (can_narrow_crc(crc, n))
-+ return true;
-+
-+ return false;
-+}
-+
-+/*
-+ * We're writing another replica for this extent, so while we've got the data in
-+ * memory we'll be computing a new checksum for the currently live data.
-+ *
-+ * If there are other replicas we aren't moving, and they are checksummed but
-+ * not compressed, we can modify them to point to only the data that is
-+ * currently live (so that readers won't have to bounce) while we've got the
-+ * checksum we need:
-+ */
-+bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n)
-+{
-+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+ struct bch_extent_crc_unpacked u;
-+ struct extent_ptr_decoded p;
-+ union bch_extent_entry *i;
-+ bool ret = false;
-+
-+ /* Find a checksum entry that covers only live data: */
-+ if (!n.csum_type) {
-+ bkey_for_each_crc(&k->k, ptrs, u, i)
-+ if (!crc_is_compressed(u) &&
-+ u.csum_type &&
-+ u.live_size == u.uncompressed_size) {
-+ n = u;
-+ goto found;
-+ }
-+ return false;
-+ }
-+found:
-+ BUG_ON(crc_is_compressed(n));
-+ BUG_ON(n.offset);
-+ BUG_ON(n.live_size != k->k.size);
-+
-+restart_narrow_pointers:
-+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+
-+ bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
-+ if (can_narrow_crc(p.crc, n)) {
-+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(k), &i->ptr);
-+ p.ptr.offset += p.crc.offset;
-+ p.crc = n;
-+ bch2_extent_ptr_decoded_append(k, &p);
-+ ret = true;
-+ goto restart_narrow_pointers;
-+ }
-+
-+ return ret;
-+}
-+
-+static void bch2_extent_crc_pack(union bch_extent_crc *dst,
-+ struct bch_extent_crc_unpacked src,
-+ enum bch_extent_entry_type type)
-+{
-+#define set_common_fields(_dst, _src) \
-+ _dst.type = 1 << type; \
-+ _dst.csum_type = _src.csum_type, \
-+ _dst.compression_type = _src.compression_type, \
-+ _dst._compressed_size = _src.compressed_size - 1, \
-+ _dst._uncompressed_size = _src.uncompressed_size - 1, \
-+ _dst.offset = _src.offset
-+
-+ switch (type) {
-+ case BCH_EXTENT_ENTRY_crc32:
-+ set_common_fields(dst->crc32, src);
-+ dst->crc32.csum = (u32 __force) *((__le32 *) &src.csum.lo);
-+ break;
-+ case BCH_EXTENT_ENTRY_crc64:
-+ set_common_fields(dst->crc64, src);
-+ dst->crc64.nonce = src.nonce;
-+ dst->crc64.csum_lo = (u64 __force) src.csum.lo;
-+ dst->crc64.csum_hi = (u64 __force) *((__le16 *) &src.csum.hi);
-+ break;
-+ case BCH_EXTENT_ENTRY_crc128:
-+ set_common_fields(dst->crc128, src);
-+ dst->crc128.nonce = src.nonce;
-+ dst->crc128.csum = src.csum;
-+ break;
-+ default:
-+ BUG();
-+ }
-+#undef set_common_fields
-+}
-+
-+void bch2_extent_crc_append(struct bkey_i *k,
-+ struct bch_extent_crc_unpacked new)
-+{
-+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+ union bch_extent_crc *crc = (void *) ptrs.end;
-+ enum bch_extent_entry_type type;
-+
-+ if (bch_crc_bytes[new.csum_type] <= 4 &&
-+ new.uncompressed_size <= CRC32_SIZE_MAX &&
-+ new.nonce <= CRC32_NONCE_MAX)
-+ type = BCH_EXTENT_ENTRY_crc32;
-+ else if (bch_crc_bytes[new.csum_type] <= 10 &&
-+ new.uncompressed_size <= CRC64_SIZE_MAX &&
-+ new.nonce <= CRC64_NONCE_MAX)
-+ type = BCH_EXTENT_ENTRY_crc64;
-+ else if (bch_crc_bytes[new.csum_type] <= 16 &&
-+ new.uncompressed_size <= CRC128_SIZE_MAX &&
-+ new.nonce <= CRC128_NONCE_MAX)
-+ type = BCH_EXTENT_ENTRY_crc128;
-+ else
-+ BUG();
-+
-+ bch2_extent_crc_pack(crc, new, type);
-+
-+ k->k.u64s += extent_entry_u64s(ptrs.end);
-+
-+ EBUG_ON(bkey_val_u64s(&k->k) > BKEY_EXTENT_VAL_U64s_MAX);
-+}
-+
-+/* Generic code for keys with pointers: */
-+
-+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k)
-+{
-+ return bch2_bkey_devs(k).nr;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c k)
-+{
-+ return k.k->type == KEY_TYPE_reservation
-+ ? bkey_s_c_to_reservation(k).v->nr_replicas
-+ : bch2_bkey_dirty_devs(k).nr;
-+}
-+
-+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c k)
-+{
-+ unsigned ret = 0;
-+
-+ if (k.k->type == KEY_TYPE_reservation) {
-+ ret = bkey_s_c_to_reservation(k).v->nr_replicas;
-+ } else {
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+ ret += !p.ptr.cached && !crc_is_compressed(p.crc);
-+ }
-+
-+ return ret;
-+}
-+
-+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ unsigned ret = 0;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+ if (!p.ptr.cached && crc_is_compressed(p.crc))
-+ ret += p.crc.compressed_size;
-+
-+ return ret;
-+}
-+
-+bool bch2_bkey_is_incompressible(struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct bch_extent_crc_unpacked crc;
-+
-+ bkey_for_each_crc(k.k, ptrs, crc, entry)
-+ if (crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
-+ return true;
-+ return false;
-+}
-+
-+unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p = { 0 };
-+ unsigned replicas = 0;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ if (p.ptr.cached)
-+ continue;
-+
-+ if (p.has_ec)
-+ replicas += p.ec.redundancy;
-+
-+ replicas++;
-+
-+ }
-+
-+ return replicas;
-+}
-+
-+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
-+{
-+ struct bch_dev *ca;
-+
-+ if (p->ptr.cached)
-+ return 0;
-+
-+ ca = bch_dev_bkey_exists(c, p->ptr.dev);
-+
-+ return ca->mi.durability +
-+ (p->has_ec
-+ ? p->ec.redundancy
-+ : 0);
-+}
-+
-+unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
-+{
-+ struct bch_dev *ca;
-+
-+ if (p->ptr.cached)
-+ return 0;
-+
-+ ca = bch_dev_bkey_exists(c, p->ptr.dev);
-+
-+ if (ca->mi.state == BCH_MEMBER_STATE_failed)
-+ return 0;
-+
-+ return ca->mi.durability +
-+ (p->has_ec
-+ ? p->ec.redundancy
-+ : 0);
-+}
-+
-+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ unsigned durability = 0;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+ durability += bch2_extent_ptr_durability(c, &p);
-+
-+ return durability;
-+}
-+
-+static unsigned bch2_bkey_durability_safe(struct bch_fs *c, struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ unsigned durability = 0;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+ if (p.ptr.dev < c->sb.nr_devices && c->devs[p.ptr.dev])
-+ durability += bch2_extent_ptr_durability(c, &p);
-+
-+ return durability;
-+}
-+
-+void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry)
-+{
-+ union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
-+ union bch_extent_entry *next = extent_entry_next(entry);
-+
-+ memmove_u64s(entry, next, (u64 *) end - (u64 *) next);
-+ k->k.u64s -= extent_entry_u64s(entry);
-+}
-+
-+void bch2_extent_ptr_decoded_append(struct bkey_i *k,
-+ struct extent_ptr_decoded *p)
-+{
-+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
-+ struct bch_extent_crc_unpacked crc =
-+ bch2_extent_crc_unpack(&k->k, NULL);
-+ union bch_extent_entry *pos;
-+
-+ if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
-+ pos = ptrs.start;
-+ goto found;
-+ }
-+
-+ bkey_for_each_crc(&k->k, ptrs, crc, pos)
-+ if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
-+ pos = extent_entry_next(pos);
-+ goto found;
-+ }
-+
-+ bch2_extent_crc_append(k, p->crc);
-+ pos = bkey_val_end(bkey_i_to_s(k));
-+found:
-+ p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+ __extent_entry_insert(k, pos, to_entry(&p->ptr));
-+
-+ if (p->has_ec) {
-+ p->ec.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr;
-+ __extent_entry_insert(k, pos, to_entry(&p->ec));
-+ }
-+}
-+
-+static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
-+ union bch_extent_entry *entry)
-+{
-+ union bch_extent_entry *i = ptrs.start;
-+
-+ if (i == entry)
-+ return NULL;
-+
-+ while (extent_entry_next(i) != entry)
-+ i = extent_entry_next(i);
-+ return i;
-+}
-+
-+/*
-+ * Returns pointer to the next entry after the one being dropped:
-+ */
-+union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
-+ struct bch_extent_ptr *ptr)
-+{
-+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+ union bch_extent_entry *entry = to_entry(ptr), *next;
-+ union bch_extent_entry *ret = entry;
-+ bool drop_crc = true;
-+
-+ EBUG_ON(ptr < &ptrs.start->ptr ||
-+ ptr >= &ptrs.end->ptr);
-+ EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
-+
-+ for (next = extent_entry_next(entry);
-+ next != ptrs.end;
-+ next = extent_entry_next(next)) {
-+ if (extent_entry_is_crc(next)) {
-+ break;
-+ } else if (extent_entry_is_ptr(next)) {
-+ drop_crc = false;
-+ break;
-+ }
-+ }
-+
-+ extent_entry_drop(k, entry);
-+
-+ while ((entry = extent_entry_prev(ptrs, entry))) {
-+ if (extent_entry_is_ptr(entry))
-+ break;
-+
-+ if ((extent_entry_is_crc(entry) && drop_crc) ||
-+ extent_entry_is_stripe_ptr(entry)) {
-+ ret = (void *) ret - extent_entry_bytes(entry);
-+ extent_entry_drop(k, entry);
-+ }
-+ }
-+
-+ return ret;
-+}
-+
-+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
-+ struct bch_extent_ptr *ptr)
-+{
-+ bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
-+ union bch_extent_entry *ret =
-+ bch2_bkey_drop_ptr_noerror(k, ptr);
-+
-+ /*
-+ * If we deleted all the dirty pointers and there's still cached
-+ * pointers, we could set the cached pointers to dirty if they're not
-+ * stale - but to do that correctly we'd need to grab an open_bucket
-+ * reference so that we don't race with bucket reuse:
-+ */
-+ if (have_dirty &&
-+ !bch2_bkey_dirty_devs(k.s_c).nr) {
-+ k.k->type = KEY_TYPE_error;
-+ set_bkey_val_u64s(k.k, 0);
-+ ret = NULL;
-+ } else if (!bch2_bkey_nr_ptrs(k.s_c)) {
-+ k.k->type = KEY_TYPE_deleted;
-+ set_bkey_val_u64s(k.k, 0);
-+ ret = NULL;
-+ }
-+
-+ return ret;
-+}
-+
-+void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
-+{
-+ struct bch_extent_ptr *ptr;
-+
-+ bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
-+}
-+
-+void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
-+{
-+ struct bch_extent_ptr *ptr = bch2_bkey_has_device(k, dev);
-+
-+ if (ptr)
-+ bch2_bkey_drop_ptr_noerror(k, ptr);
-+}
-+
-+const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+
-+ bkey_for_each_ptr(ptrs, ptr)
-+ if (ptr->dev == dev)
-+ return ptr;
-+
-+ return NULL;
-+}
-+
-+bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+
-+ bkey_for_each_ptr(ptrs, ptr)
-+ if (bch2_dev_in_target(c, ptr->dev, target) &&
-+ (!ptr->cached ||
-+ !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
-+ return true;
-+
-+ return false;
-+}
-+
-+bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
-+ struct bch_extent_ptr m, u64 offset)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+ if (p.ptr.dev == m.dev &&
-+ p.ptr.gen == m.gen &&
-+ (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(k.k) ==
-+ (s64) m.offset - offset)
-+ return true;
-+
-+ return false;
-+}
-+
-+/*
-+ * Returns true if two extents refer to the same data:
-+ */
-+bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
-+{
-+ if (k1.k->type != k2.k->type)
-+ return false;
-+
-+ if (bkey_extent_is_direct_data(k1.k)) {
-+ struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(k1);
-+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
-+ const union bch_extent_entry *entry1, *entry2;
-+ struct extent_ptr_decoded p1, p2;
-+
-+ if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2))
-+ return false;
-+
-+ bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
-+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
-+ if (p1.ptr.dev == p2.ptr.dev &&
-+ p1.ptr.gen == p2.ptr.gen &&
-+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
-+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
-+ return true;
-+
-+ return false;
-+ } else {
-+ /* KEY_TYPE_deleted, etc. */
-+ return true;
-+ }
-+}
-+
-+struct bch_extent_ptr *
-+bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bkey_s k2)
-+{
-+ struct bkey_ptrs ptrs2 = bch2_bkey_ptrs(k2);
-+ union bch_extent_entry *entry2;
-+ struct extent_ptr_decoded p2;
-+
-+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
-+ if (p1.ptr.dev == p2.ptr.dev &&
-+ p1.ptr.gen == p2.ptr.gen &&
-+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
-+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
-+ return &entry2->ptr;
-+
-+ return NULL;
-+}
-+
-+void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
-+{
-+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+ union bch_extent_entry *entry;
-+ union bch_extent_entry *ec = NULL;
-+
-+ bkey_extent_entry_for_each(ptrs, entry) {
-+ if (&entry->ptr == ptr) {
-+ ptr->cached = true;
-+ if (ec)
-+ extent_entry_drop(k, ec);
-+ return;
-+ }
-+
-+ if (extent_entry_is_stripe_ptr(entry))
-+ ec = entry;
-+ else if (extent_entry_is_ptr(entry))
-+ ec = NULL;
-+ }
-+
-+ BUG();
-+}
-+
-+/*
-+ * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
-+ *
-+ * Returns true if @k should be dropped entirely
-+ *
-+ * For existing keys, only called when btree nodes are being rewritten, not when
-+ * they're merely being compacted/resorted in memory.
-+ */
-+bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
-+{
-+ struct bch_extent_ptr *ptr;
-+
-+ bch2_bkey_drop_ptrs(k, ptr,
-+ ptr->cached &&
-+ ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
-+
-+ return bkey_deleted(k.k);
-+}
-+
-+void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ bool first = true;
-+
-+ if (c)
-+ prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k));
-+
-+ bkey_extent_entry_for_each(ptrs, entry) {
-+ if (!first)
-+ prt_printf(out, " ");
-+
-+ switch (__extent_entry_type(entry)) {
-+ case BCH_EXTENT_ENTRY_ptr: {
-+ const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
-+ struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
-+ ? bch_dev_bkey_exists(c, ptr->dev)
-+ : NULL;
-+
-+ if (!ca) {
-+ prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
-+ (u64) ptr->offset, ptr->gen,
-+ ptr->cached ? " cached" : "");
-+ } else {
-+ u32 offset;
-+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-+
-+ prt_printf(out, "ptr: %u:%llu:%u gen %u",
-+ ptr->dev, b, offset, ptr->gen);
-+ if (ptr->cached)
-+ prt_str(out, " cached");
-+ if (ptr->unwritten)
-+ prt_str(out, " unwritten");
-+ if (ca && ptr_stale(ca, ptr))
-+ prt_printf(out, " stale");
-+ }
-+ break;
-+ }
-+ case BCH_EXTENT_ENTRY_crc32:
-+ case BCH_EXTENT_ENTRY_crc64:
-+ case BCH_EXTENT_ENTRY_crc128: {
-+ struct bch_extent_crc_unpacked crc =
-+ bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
-+
-+ prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
-+ crc.compressed_size,
-+ crc.uncompressed_size,
-+ crc.offset, crc.nonce,
-+ bch2_csum_types[crc.csum_type],
-+ bch2_compression_types[crc.compression_type]);
-+ break;
-+ }
-+ case BCH_EXTENT_ENTRY_stripe_ptr: {
-+ const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr;
-+
-+ prt_printf(out, "ec: idx %llu block %u",
-+ (u64) ec->idx, ec->block);
-+ break;
-+ }
-+ case BCH_EXTENT_ENTRY_rebalance: {
-+ const struct bch_extent_rebalance *r = &entry->rebalance;
-+
-+ prt_str(out, "rebalance: target ");
-+ if (c)
-+ bch2_target_to_text(out, c, r->target);
-+ else
-+ prt_printf(out, "%u", r->target);
-+ prt_str(out, " compression ");
-+ bch2_compression_opt_to_text(out, r->compression);
-+ break;
-+ }
-+ default:
-+ prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
-+ return;
-+ }
-+
-+ first = false;
-+ }
-+}
-+
-+static int extent_ptr_invalid(struct bch_fs *c,
-+ struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ const struct bch_extent_ptr *ptr,
-+ unsigned size_ondisk,
-+ bool metadata,
-+ struct printbuf *err)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr2;
-+ u64 bucket;
-+ u32 bucket_offset;
-+ struct bch_dev *ca;
-+ int ret = 0;
-+
-+ if (!bch2_dev_exists2(c, ptr->dev)) {
-+ /*
-+ * If we're in the write path this key might have already been
-+ * overwritten, and we could be seeing a device that doesn't
-+ * exist anymore due to racing with device removal:
-+ */
-+ if (flags & BKEY_INVALID_WRITE)
-+ return 0;
-+
-+ bkey_fsck_err(c, err, ptr_to_invalid_device,
-+ "pointer to invalid device (%u)", ptr->dev);
-+ }
-+
-+ ca = bch_dev_bkey_exists(c, ptr->dev);
-+ bkey_for_each_ptr(ptrs, ptr2)
-+ bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err,
-+ ptr_to_duplicate_device,
-+ "multiple pointers to same device (%u)", ptr->dev);
-+
-+ bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset);
-+
-+ bkey_fsck_err_on(bucket >= ca->mi.nbuckets, c, err,
-+ ptr_after_last_bucket,
-+ "pointer past last bucket (%llu > %llu)", bucket, ca->mi.nbuckets);
-+ bkey_fsck_err_on(ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket), c, err,
-+ ptr_before_first_bucket,
-+ "pointer before first bucket (%llu < %u)", bucket, ca->mi.first_bucket);
-+ bkey_fsck_err_on(bucket_offset + size_ondisk > ca->mi.bucket_size, c, err,
-+ ptr_spans_multiple_buckets,
-+ "pointer spans multiple buckets (%u + %u > %u)",
-+ bucket_offset, size_ondisk, ca->mi.bucket_size);
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct bch_extent_crc_unpacked crc;
-+ unsigned size_ondisk = k.k->size;
-+ unsigned nonce = UINT_MAX;
-+ unsigned nr_ptrs = 0;
-+ bool have_written = false, have_unwritten = false, have_ec = false, crc_since_last_ptr = false;
-+ int ret = 0;
-+
-+ if (bkey_is_btree_ptr(k.k))
-+ size_ondisk = btree_sectors(c);
-+
-+ bkey_extent_entry_for_each(ptrs, entry) {
-+ bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err,
-+ extent_ptrs_invalid_entry,
-+ "invalid extent entry type (got %u, max %u)",
-+ __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX);
-+
-+ bkey_fsck_err_on(bkey_is_btree_ptr(k.k) &&
-+ !extent_entry_is_ptr(entry), c, err,
-+ btree_ptr_has_non_ptr,
-+ "has non ptr field");
-+
-+ switch (extent_entry_type(entry)) {
-+ case BCH_EXTENT_ENTRY_ptr:
-+ ret = extent_ptr_invalid(c, k, flags, &entry->ptr,
-+ size_ondisk, false, err);
-+ if (ret)
-+ return ret;
-+
-+ bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err,
-+ ptr_cached_and_erasure_coded,
-+ "cached, erasure coded ptr");
-+
-+ if (!entry->ptr.unwritten)
-+ have_written = true;
-+ else
-+ have_unwritten = true;
-+
-+ have_ec = false;
-+ crc_since_last_ptr = false;
-+ nr_ptrs++;
-+ break;
-+ case BCH_EXTENT_ENTRY_crc32:
-+ case BCH_EXTENT_ENTRY_crc64:
-+ case BCH_EXTENT_ENTRY_crc128:
-+ crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
-+
-+ bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err,
-+ ptr_crc_uncompressed_size_too_small,
-+ "checksum offset + key size > uncompressed size");
-+ bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err,
-+ ptr_crc_csum_type_unknown,
-+ "invalid checksum type");
-+ bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err,
-+ ptr_crc_compression_type_unknown,
-+ "invalid compression type");
-+
-+ if (bch2_csum_type_is_encryption(crc.csum_type)) {
-+ if (nonce == UINT_MAX)
-+ nonce = crc.offset + crc.nonce;
-+ else if (nonce != crc.offset + crc.nonce)
-+ bkey_fsck_err(c, err, ptr_crc_nonce_mismatch,
-+ "incorrect nonce");
-+ }
-+
-+ bkey_fsck_err_on(crc_since_last_ptr, c, err,
-+ ptr_crc_redundant,
-+ "redundant crc entry");
-+ crc_since_last_ptr = true;
-+
-+ bkey_fsck_err_on(crc_is_encoded(crc) &&
-+ (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
-+ (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT)), c, err,
-+ ptr_crc_uncompressed_size_too_big,
-+ "too large encoded extent");
-+
-+ size_ondisk = crc.compressed_size;
-+ break;
-+ case BCH_EXTENT_ENTRY_stripe_ptr:
-+ bkey_fsck_err_on(have_ec, c, err,
-+ ptr_stripe_redundant,
-+ "redundant stripe entry");
-+ have_ec = true;
-+ break;
-+ case BCH_EXTENT_ENTRY_rebalance: {
-+ const struct bch_extent_rebalance *r = &entry->rebalance;
-+
-+ if (!bch2_compression_opt_valid(r->compression)) {
-+ struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
-+ prt_printf(err, "invalid compression opt %u:%u",
-+ opt.type, opt.level);
-+ return -BCH_ERR_invalid_bkey;
-+ }
-+ break;
-+ }
-+ }
-+ }
-+
-+ bkey_fsck_err_on(!nr_ptrs, c, err,
-+ extent_ptrs_no_ptrs,
-+ "no ptrs");
-+ bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err,
-+ extent_ptrs_too_many_ptrs,
-+ "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX);
-+ bkey_fsck_err_on(have_written && have_unwritten, c, err,
-+ extent_ptrs_written_and_unwritten,
-+ "extent with unwritten and written ptrs");
-+ bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err,
-+ extent_ptrs_unwritten,
-+ "has unwritten ptrs");
-+ bkey_fsck_err_on(crc_since_last_ptr, c, err,
-+ extent_ptrs_redundant_crc,
-+ "redundant crc entry");
-+ bkey_fsck_err_on(have_ec, c, err,
-+ extent_ptrs_redundant_stripe,
-+ "redundant stripe entry");
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_ptr_swab(struct bkey_s k)
-+{
-+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+ union bch_extent_entry *entry;
-+ u64 *d;
-+
-+ for (d = (u64 *) ptrs.start;
-+ d != (u64 *) ptrs.end;
-+ d++)
-+ *d = swab64(*d);
-+
-+ for (entry = ptrs.start;
-+ entry < ptrs.end;
-+ entry = extent_entry_next(entry)) {
-+ switch (extent_entry_type(entry)) {
-+ case BCH_EXTENT_ENTRY_ptr:
-+ break;
-+ case BCH_EXTENT_ENTRY_crc32:
-+ entry->crc32.csum = swab32(entry->crc32.csum);
-+ break;
-+ case BCH_EXTENT_ENTRY_crc64:
-+ entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
-+ entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
-+ break;
-+ case BCH_EXTENT_ENTRY_crc128:
-+ entry->crc128.csum.hi = (__force __le64)
-+ swab64((__force u64) entry->crc128.csum.hi);
-+ entry->crc128.csum.lo = (__force __le64)
-+ swab64((__force u64) entry->crc128.csum.lo);
-+ break;
-+ case BCH_EXTENT_ENTRY_stripe_ptr:
-+ break;
-+ case BCH_EXTENT_ENTRY_rebalance:
-+ break;
-+ }
-+ }
-+}
-+
-+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+
-+ bkey_extent_entry_for_each(ptrs, entry)
-+ if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance)
-+ return &entry->rebalance;
-+
-+ return NULL;
-+}
-+
-+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
-+ unsigned target, unsigned compression)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ unsigned rewrite_ptrs = 0;
-+
-+ if (compression) {
-+ unsigned compression_type = bch2_compression_opt_to_type(compression);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ unsigned i = 0;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) {
-+ rewrite_ptrs = 0;
-+ goto incompressible;
-+ }
-+
-+ if (!p.ptr.cached && p.crc.compression_type != compression_type)
-+ rewrite_ptrs |= 1U << i;
-+ i++;
-+ }
-+ }
-+incompressible:
-+ if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
-+ const struct bch_extent_ptr *ptr;
-+ unsigned i = 0;
-+
-+ bkey_for_each_ptr(ptrs, ptr) {
-+ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target))
-+ rewrite_ptrs |= 1U << i;
-+ i++;
-+ }
-+ }
-+
-+ return rewrite_ptrs;
-+}
-+
-+bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
-+{
-+ const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
-+
-+ /*
-+ * If it's an indirect extent, we don't delete the rebalance entry when
-+ * done so that we know what options were applied - check if it still
-+ * needs work done:
-+ */
-+ if (r &&
-+ k.k->type == KEY_TYPE_reflink_v &&
-+ !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression))
-+ r = NULL;
-+
-+ return r != NULL;
-+}
-+
-+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
-+ unsigned target, unsigned compression)
-+{
-+ struct bkey_s k = bkey_i_to_s(_k);
-+ struct bch_extent_rebalance *r;
-+ bool needs_rebalance;
-+
-+ if (!bkey_extent_is_direct_data(k.k))
-+ return 0;
-+
-+ /* get existing rebalance entry: */
-+ r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c);
-+ if (r) {
-+ if (k.k->type == KEY_TYPE_reflink_v) {
-+ /*
-+ * indirect extents: existing options take precedence,
-+ * so that we don't move extents back and forth if
-+ * they're referenced by different inodes with different
-+ * options:
-+ */
-+ if (r->target)
-+ target = r->target;
-+ if (r->compression)
-+ compression = r->compression;
-+ }
-+
-+ r->target = target;
-+ r->compression = compression;
-+ }
-+
-+ needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression);
-+
-+ if (needs_rebalance && !r) {
-+ union bch_extent_entry *new = bkey_val_end(k);
-+
-+ new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance;
-+ new->rebalance.compression = compression;
-+ new->rebalance.target = target;
-+ new->rebalance.unused = 0;
-+ k.k->u64s += extent_entry_u64s(new);
-+ } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) {
-+ /*
-+ * For indirect extents, don't delete the rebalance entry when
-+ * we're finished so that we know we specifically moved it or
-+ * compressed it to its current location/compression type
-+ */
-+ extent_entry_drop(k, (union bch_extent_entry *) r);
-+ }
-+
-+ return 0;
-+}
-+
-+/* Generic extent code: */
-+
-+int bch2_cut_front_s(struct bpos where, struct bkey_s k)
-+{
-+ unsigned new_val_u64s = bkey_val_u64s(k.k);
-+ int val_u64s_delta;
-+ u64 sub;
-+
-+ if (bkey_le(where, bkey_start_pos(k.k)))
-+ return 0;
-+
-+ EBUG_ON(bkey_gt(where, k.k->p));
-+
-+ sub = where.offset - bkey_start_offset(k.k);
-+
-+ k.k->size -= sub;
-+
-+ if (!k.k->size) {
-+ k.k->type = KEY_TYPE_deleted;
-+ new_val_u64s = 0;
-+ }
-+
-+ switch (k.k->type) {
-+ case KEY_TYPE_extent:
-+ case KEY_TYPE_reflink_v: {
-+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-+ union bch_extent_entry *entry;
-+ bool seen_crc = false;
-+
-+ bkey_extent_entry_for_each(ptrs, entry) {
-+ switch (extent_entry_type(entry)) {
-+ case BCH_EXTENT_ENTRY_ptr:
-+ if (!seen_crc)
-+ entry->ptr.offset += sub;
-+ break;
-+ case BCH_EXTENT_ENTRY_crc32:
-+ entry->crc32.offset += sub;
-+ break;
-+ case BCH_EXTENT_ENTRY_crc64:
-+ entry->crc64.offset += sub;
-+ break;
-+ case BCH_EXTENT_ENTRY_crc128:
-+ entry->crc128.offset += sub;
-+ break;
-+ case BCH_EXTENT_ENTRY_stripe_ptr:
-+ break;
-+ case BCH_EXTENT_ENTRY_rebalance:
-+ break;
-+ }
-+
-+ if (extent_entry_is_crc(entry))
-+ seen_crc = true;
-+ }
-+
-+ break;
-+ }
-+ case KEY_TYPE_reflink_p: {
-+ struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
-+
-+ le64_add_cpu(&p.v->idx, sub);
-+ break;
-+ }
-+ case KEY_TYPE_inline_data:
-+ case KEY_TYPE_indirect_inline_data: {
-+ void *p = bkey_inline_data_p(k);
-+ unsigned bytes = bkey_inline_data_bytes(k.k);
-+
-+ sub = min_t(u64, sub << 9, bytes);
-+
-+ memmove(p, p + sub, bytes - sub);
-+
-+ new_val_u64s -= sub >> 3;
-+ break;
-+ }
-+ }
-+
-+ val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
-+ BUG_ON(val_u64s_delta < 0);
-+
-+ set_bkey_val_u64s(k.k, new_val_u64s);
-+ memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
-+ return -val_u64s_delta;
-+}
-+
-+int bch2_cut_back_s(struct bpos where, struct bkey_s k)
-+{
-+ unsigned new_val_u64s = bkey_val_u64s(k.k);
-+ int val_u64s_delta;
-+ u64 len = 0;
-+
-+ if (bkey_ge(where, k.k->p))
-+ return 0;
-+
-+ EBUG_ON(bkey_lt(where, bkey_start_pos(k.k)));
-+
-+ len = where.offset - bkey_start_offset(k.k);
-+
-+ k.k->p.offset = where.offset;
-+ k.k->size = len;
-+
-+ if (!len) {
-+ k.k->type = KEY_TYPE_deleted;
-+ new_val_u64s = 0;
-+ }
-+
-+ switch (k.k->type) {
-+ case KEY_TYPE_inline_data:
-+ case KEY_TYPE_indirect_inline_data:
-+ new_val_u64s = (bkey_inline_data_offset(k.k) +
-+ min(bkey_inline_data_bytes(k.k), k.k->size << 9)) >> 3;
-+ break;
-+ }
-+
-+ val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
-+ BUG_ON(val_u64s_delta < 0);
-+
-+ set_bkey_val_u64s(k.k, new_val_u64s);
-+ memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
-+ return -val_u64s_delta;
-+}
-diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
-new file mode 100644
-index 000000000000..a2ce8a3be13c
---- /dev/null
-+++ b/fs/bcachefs/extents.h
-@@ -0,0 +1,765 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENTS_H
-+#define _BCACHEFS_EXTENTS_H
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "extents_types.h"
-+
-+struct bch_fs;
-+struct btree_trans;
-+enum bkey_invalid_flags;
-+
-+/* extent entries: */
-+
-+#define extent_entry_last(_e) \
-+ ((typeof(&(_e).v->start[0])) bkey_val_end(_e))
-+
-+#define entry_to_ptr(_entry) \
-+({ \
-+ EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \
-+ \
-+ __builtin_choose_expr( \
-+ type_is_exact(_entry, const union bch_extent_entry *), \
-+ (const struct bch_extent_ptr *) (_entry), \
-+ (struct bch_extent_ptr *) (_entry)); \
-+})
-+
-+/* downcast, preserves const */
-+#define to_entry(_entry) \
-+({ \
-+ BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \
-+ !type_is(_entry, struct bch_extent_ptr *) && \
-+ !type_is(_entry, struct bch_extent_stripe_ptr *)); \
-+ \
-+ __builtin_choose_expr( \
-+ (type_is_exact(_entry, const union bch_extent_crc *) || \
-+ type_is_exact(_entry, const struct bch_extent_ptr *) ||\
-+ type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
-+ (const union bch_extent_entry *) (_entry), \
-+ (union bch_extent_entry *) (_entry)); \
-+})
-+
-+#define extent_entry_next(_entry) \
-+ ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
-+
-+static inline unsigned
-+__extent_entry_type(const union bch_extent_entry *e)
-+{
-+ return e->type ? __ffs(e->type) : BCH_EXTENT_ENTRY_MAX;
-+}
-+
-+static inline enum bch_extent_entry_type
-+extent_entry_type(const union bch_extent_entry *e)
-+{
-+ int ret = __ffs(e->type);
-+
-+ EBUG_ON(ret < 0 || ret >= BCH_EXTENT_ENTRY_MAX);
-+
-+ return ret;
-+}
-+
-+static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
-+{
-+ switch (extent_entry_type(entry)) {
-+#define x(f, n) \
-+ case BCH_EXTENT_ENTRY_##f: \
-+ return sizeof(struct bch_extent_##f);
-+ BCH_EXTENT_ENTRY_TYPES()
-+#undef x
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
-+{
-+ return extent_entry_bytes(entry) / sizeof(u64);
-+}
-+
-+static inline void __extent_entry_insert(struct bkey_i *k,
-+ union bch_extent_entry *dst,
-+ union bch_extent_entry *new)
-+{
-+ union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
-+
-+ memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
-+ dst, (u64 *) end - (u64 *) dst);
-+ k->k.u64s += extent_entry_u64s(new);
-+ memcpy_u64s_small(dst, new, extent_entry_u64s(new));
-+}
-+
-+static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
-+{
-+ union bch_extent_entry *next = extent_entry_next(entry);
-+
-+ /* stripes have ptrs, but their layout doesn't work with this code */
-+ BUG_ON(k.k->type == KEY_TYPE_stripe);
-+
-+ memmove_u64s_down(entry, next,
-+ (u64 *) bkey_val_end(k) - (u64 *) next);
-+ k.k->u64s -= (u64 *) next - (u64 *) entry;
-+}
-+
-+static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
-+{
-+ return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
-+}
-+
-+static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e)
-+{
-+ return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
-+}
-+
-+static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
-+{
-+ switch (extent_entry_type(e)) {
-+ case BCH_EXTENT_ENTRY_crc32:
-+ case BCH_EXTENT_ENTRY_crc64:
-+ case BCH_EXTENT_ENTRY_crc128:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+union bch_extent_crc {
-+ u8 type;
-+ struct bch_extent_crc32 crc32;
-+ struct bch_extent_crc64 crc64;
-+ struct bch_extent_crc128 crc128;
-+};
-+
-+#define __entry_to_crc(_entry) \
-+ __builtin_choose_expr( \
-+ type_is_exact(_entry, const union bch_extent_entry *), \
-+ (const union bch_extent_crc *) (_entry), \
-+ (union bch_extent_crc *) (_entry))
-+
-+#define entry_to_crc(_entry) \
-+({ \
-+ EBUG_ON((_entry) && !extent_entry_is_crc(_entry)); \
-+ \
-+ __entry_to_crc(_entry); \
-+})
-+
-+static inline struct bch_extent_crc_unpacked
-+bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
-+{
-+#define common_fields(_crc) \
-+ .csum_type = _crc.csum_type, \
-+ .compression_type = _crc.compression_type, \
-+ .compressed_size = _crc._compressed_size + 1, \
-+ .uncompressed_size = _crc._uncompressed_size + 1, \
-+ .offset = _crc.offset, \
-+ .live_size = k->size
-+
-+ if (!crc)
-+ return (struct bch_extent_crc_unpacked) {
-+ .compressed_size = k->size,
-+ .uncompressed_size = k->size,
-+ .live_size = k->size,
-+ };
-+
-+ switch (extent_entry_type(to_entry(crc))) {
-+ case BCH_EXTENT_ENTRY_crc32: {
-+ struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+ common_fields(crc->crc32),
-+ };
-+
-+ *((__le32 *) &ret.csum.lo) = (__le32 __force) crc->crc32.csum;
-+ return ret;
-+ }
-+ case BCH_EXTENT_ENTRY_crc64: {
-+ struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+ common_fields(crc->crc64),
-+ .nonce = crc->crc64.nonce,
-+ .csum.lo = (__force __le64) crc->crc64.csum_lo,
-+ };
-+
-+ *((__le16 *) &ret.csum.hi) = (__le16 __force) crc->crc64.csum_hi;
-+
-+ return ret;
-+ }
-+ case BCH_EXTENT_ENTRY_crc128: {
-+ struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
-+ common_fields(crc->crc128),
-+ .nonce = crc->crc128.nonce,
-+ .csum = crc->crc128.csum,
-+ };
-+
-+ return ret;
-+ }
-+ default:
-+ BUG();
-+ }
-+#undef common_fields
-+}
-+
-+static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
-+{
-+ return (crc.compression_type != BCH_COMPRESSION_TYPE_none &&
-+ crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
-+}
-+
-+static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc)
-+{
-+ return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc);
-+}
-+
-+/* bkey_ptrs: generically over any key type that has ptrs */
-+
-+struct bkey_ptrs_c {
-+ const union bch_extent_entry *start;
-+ const union bch_extent_entry *end;
-+};
-+
-+struct bkey_ptrs {
-+ union bch_extent_entry *start;
-+ union bch_extent_entry *end;
-+};
-+
-+static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
-+{
-+ switch (k.k->type) {
-+ case KEY_TYPE_btree_ptr: {
-+ struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k);
-+
-+ return (struct bkey_ptrs_c) {
-+ to_entry(&e.v->start[0]),
-+ to_entry(extent_entry_last(e))
-+ };
-+ }
-+ case KEY_TYPE_extent: {
-+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-+
-+ return (struct bkey_ptrs_c) {
-+ e.v->start,
-+ extent_entry_last(e)
-+ };
-+ }
-+ case KEY_TYPE_stripe: {
-+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+
-+ return (struct bkey_ptrs_c) {
-+ to_entry(&s.v->ptrs[0]),
-+ to_entry(&s.v->ptrs[s.v->nr_blocks]),
-+ };
-+ }
-+ case KEY_TYPE_reflink_v: {
-+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+ return (struct bkey_ptrs_c) {
-+ r.v->start,
-+ bkey_val_end(r),
-+ };
-+ }
-+ case KEY_TYPE_btree_ptr_v2: {
-+ struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k);
-+
-+ return (struct bkey_ptrs_c) {
-+ to_entry(&e.v->start[0]),
-+ to_entry(extent_entry_last(e))
-+ };
-+ }
-+ default:
-+ return (struct bkey_ptrs_c) { NULL, NULL };
-+ }
-+}
-+
-+static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
-+{
-+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c);
-+
-+ return (struct bkey_ptrs) {
-+ (void *) p.start,
-+ (void *) p.end
-+ };
-+}
-+
-+#define __bkey_extent_entry_for_each_from(_start, _end, _entry) \
-+ for ((_entry) = (_start); \
-+ (_entry) < (_end); \
-+ (_entry) = extent_entry_next(_entry))
-+
-+#define __bkey_ptr_next(_ptr, _end) \
-+({ \
-+ typeof(_end) _entry; \
-+ \
-+ __bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry) \
-+ if (extent_entry_is_ptr(_entry)) \
-+ break; \
-+ \
-+ _entry < (_end) ? entry_to_ptr(_entry) : NULL; \
-+})
-+
-+#define bkey_extent_entry_for_each_from(_p, _entry, _start) \
-+ __bkey_extent_entry_for_each_from(_start, (_p).end, _entry)
-+
-+#define bkey_extent_entry_for_each(_p, _entry) \
-+ bkey_extent_entry_for_each_from(_p, _entry, _p.start)
-+
-+#define __bkey_for_each_ptr(_start, _end, _ptr) \
-+ for ((_ptr) = (_start); \
-+ ((_ptr) = __bkey_ptr_next(_ptr, _end)); \
-+ (_ptr)++)
-+
-+#define bkey_ptr_next(_p, _ptr) \
-+ __bkey_ptr_next(_ptr, (_p).end)
-+
-+#define bkey_for_each_ptr(_p, _ptr) \
-+ __bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr)
-+
-+#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry) \
-+({ \
-+ __label__ out; \
-+ \
-+ (_ptr).idx = 0; \
-+ (_ptr).has_ec = false; \
-+ \
-+ __bkey_extent_entry_for_each_from(_entry, _end, _entry) \
-+ switch (extent_entry_type(_entry)) { \
-+ case BCH_EXTENT_ENTRY_ptr: \
-+ (_ptr).ptr = _entry->ptr; \
-+ goto out; \
-+ case BCH_EXTENT_ENTRY_crc32: \
-+ case BCH_EXTENT_ENTRY_crc64: \
-+ case BCH_EXTENT_ENTRY_crc128: \
-+ (_ptr).crc = bch2_extent_crc_unpack(_k, \
-+ entry_to_crc(_entry)); \
-+ break; \
-+ case BCH_EXTENT_ENTRY_stripe_ptr: \
-+ (_ptr).ec = _entry->stripe_ptr; \
-+ (_ptr).has_ec = true; \
-+ break; \
-+ default: \
-+ /* nothing */ \
-+ break; \
-+ } \
-+out: \
-+ _entry < (_end); \
-+})
-+
-+#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry) \
-+ for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL), \
-+ (_entry) = _start; \
-+ __bkey_ptr_next_decode(_k, _end, _ptr, _entry); \
-+ (_entry) = extent_entry_next(_entry))
-+
-+#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry) \
-+ __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \
-+ _ptr, _entry)
-+
-+#define bkey_crc_next(_k, _start, _end, _crc, _iter) \
-+({ \
-+ __bkey_extent_entry_for_each_from(_iter, _end, _iter) \
-+ if (extent_entry_is_crc(_iter)) { \
-+ (_crc) = bch2_extent_crc_unpack(_k, \
-+ entry_to_crc(_iter)); \
-+ break; \
-+ } \
-+ \
-+ (_iter) < (_end); \
-+})
-+
-+#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter) \
-+ for ((_crc) = bch2_extent_crc_unpack(_k, NULL), \
-+ (_iter) = (_start); \
-+ bkey_crc_next(_k, _start, _end, _crc, _iter); \
-+ (_iter) = extent_entry_next(_iter))
-+
-+#define bkey_for_each_crc(_k, _p, _crc, _iter) \
-+ __bkey_for_each_crc(_k, (_p).start, (_p).end, _crc, _iter)
-+
-+/* Iterate over pointers in KEY_TYPE_extent: */
-+
-+#define extent_for_each_entry_from(_e, _entry, _start) \
-+ __bkey_extent_entry_for_each_from(_start, \
-+ extent_entry_last(_e), _entry)
-+
-+#define extent_for_each_entry(_e, _entry) \
-+ extent_for_each_entry_from(_e, _entry, (_e).v->start)
-+
-+#define extent_ptr_next(_e, _ptr) \
-+ __bkey_ptr_next(_ptr, extent_entry_last(_e))
-+
-+#define extent_for_each_ptr(_e, _ptr) \
-+ __bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr)
-+
-+#define extent_for_each_ptr_decode(_e, _ptr, _entry) \
-+ __bkey_for_each_ptr_decode((_e).k, (_e).v->start, \
-+ extent_entry_last(_e), _ptr, _entry)
-+
-+/* utility code common to all keys with pointers: */
-+
-+void bch2_mark_io_failure(struct bch_io_failures *,
-+ struct extent_ptr_decoded *);
-+int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
-+ struct bch_io_failures *,
-+ struct extent_ptr_decoded *);
-+
-+/* KEY_TYPE_btree_ptr: */
-+
-+int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
-+ struct bkey_s_c);
-+
-+int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
-+ int, struct bkey_s);
-+
-+#define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \
-+ .key_invalid = bch2_btree_ptr_invalid, \
-+ .val_to_text = bch2_btree_ptr_to_text, \
-+ .swab = bch2_ptr_swab, \
-+ .trans_trigger = bch2_trans_mark_extent, \
-+ .atomic_trigger = bch2_mark_extent, \
-+})
-+
-+#define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \
-+ .key_invalid = bch2_btree_ptr_v2_invalid, \
-+ .val_to_text = bch2_btree_ptr_v2_to_text, \
-+ .swab = bch2_ptr_swab, \
-+ .compat = bch2_btree_ptr_v2_compat, \
-+ .trans_trigger = bch2_trans_mark_extent, \
-+ .atomic_trigger = bch2_mark_extent, \
-+ .min_val_size = 40, \
-+})
-+
-+/* KEY_TYPE_extent: */
-+
-+bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_extent ((struct bkey_ops) { \
-+ .key_invalid = bch2_bkey_ptrs_invalid, \
-+ .val_to_text = bch2_bkey_ptrs_to_text, \
-+ .swab = bch2_ptr_swab, \
-+ .key_normalize = bch2_extent_normalize, \
-+ .key_merge = bch2_extent_merge, \
-+ .trans_trigger = bch2_trans_mark_extent, \
-+ .atomic_trigger = bch2_mark_extent, \
-+})
-+
-+/* KEY_TYPE_reservation: */
-+
-+int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_reservation ((struct bkey_ops) { \
-+ .key_invalid = bch2_reservation_invalid, \
-+ .val_to_text = bch2_reservation_to_text, \
-+ .key_merge = bch2_reservation_merge, \
-+ .trans_trigger = bch2_trans_mark_reservation, \
-+ .atomic_trigger = bch2_mark_reservation, \
-+ .min_val_size = 8, \
-+})
-+
-+/* Extent checksum entries: */
-+
-+bool bch2_can_narrow_extent_crcs(struct bkey_s_c,
-+ struct bch_extent_crc_unpacked);
-+bool bch2_bkey_narrow_crcs(struct bkey_i *, struct bch_extent_crc_unpacked);
-+void bch2_extent_crc_append(struct bkey_i *,
-+ struct bch_extent_crc_unpacked);
-+
-+/* Generic code for keys with pointers: */
-+
-+static inline bool bkey_is_btree_ptr(const struct bkey *k)
-+{
-+ switch (k->type) {
-+ case KEY_TYPE_btree_ptr:
-+ case KEY_TYPE_btree_ptr_v2:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+static inline bool bkey_extent_is_direct_data(const struct bkey *k)
-+{
-+ switch (k->type) {
-+ case KEY_TYPE_btree_ptr:
-+ case KEY_TYPE_btree_ptr_v2:
-+ case KEY_TYPE_extent:
-+ case KEY_TYPE_reflink_v:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+static inline bool bkey_extent_is_inline_data(const struct bkey *k)
-+{
-+ return k->type == KEY_TYPE_inline_data ||
-+ k->type == KEY_TYPE_indirect_inline_data;
-+}
-+
-+static inline unsigned bkey_inline_data_offset(const struct bkey *k)
-+{
-+ switch (k->type) {
-+ case KEY_TYPE_inline_data:
-+ return sizeof(struct bch_inline_data);
-+ case KEY_TYPE_indirect_inline_data:
-+ return sizeof(struct bch_indirect_inline_data);
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static inline unsigned bkey_inline_data_bytes(const struct bkey *k)
-+{
-+ return bkey_val_bytes(k) - bkey_inline_data_offset(k);
-+}
-+
-+#define bkey_inline_data_p(_k) (((void *) (_k).v) + bkey_inline_data_offset((_k).k))
-+
-+static inline bool bkey_extent_is_data(const struct bkey *k)
-+{
-+ return bkey_extent_is_direct_data(k) ||
-+ bkey_extent_is_inline_data(k) ||
-+ k->type == KEY_TYPE_reflink_p;
-+}
-+
-+/*
-+ * Should extent be counted under inode->i_sectors?
-+ */
-+static inline bool bkey_extent_is_allocation(const struct bkey *k)
-+{
-+ switch (k->type) {
-+ case KEY_TYPE_extent:
-+ case KEY_TYPE_reservation:
-+ case KEY_TYPE_reflink_p:
-+ case KEY_TYPE_reflink_v:
-+ case KEY_TYPE_inline_data:
-+ case KEY_TYPE_indirect_inline_data:
-+ case KEY_TYPE_error:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+static inline bool bkey_extent_is_unwritten(struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+
-+ bkey_for_each_ptr(ptrs, ptr)
-+ if (ptr->unwritten)
-+ return true;
-+ return false;
-+}
-+
-+static inline bool bkey_extent_is_reservation(struct bkey_s_c k)
-+{
-+ return k.k->type == KEY_TYPE_reservation ||
-+ bkey_extent_is_unwritten(k);
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
-+{
-+ struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+
-+ bkey_for_each_ptr(p, ptr)
-+ ret.devs[ret.nr++] = ptr->dev;
-+
-+ return ret;
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
-+{
-+ struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+
-+ bkey_for_each_ptr(p, ptr)
-+ if (!ptr->cached)
-+ ret.devs[ret.nr++] = ptr->dev;
-+
-+ return ret;
-+}
-+
-+static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
-+{
-+ struct bch_devs_list ret = (struct bch_devs_list) { 0 };
-+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+
-+ bkey_for_each_ptr(p, ptr)
-+ if (ptr->cached)
-+ ret.devs[ret.nr++] = ptr->dev;
-+
-+ return ret;
-+}
-+
-+static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr)
-+{
-+ switch (k.k->type) {
-+ case KEY_TYPE_btree_ptr:
-+ case KEY_TYPE_btree_ptr_v2:
-+ return BCH_DATA_btree;
-+ case KEY_TYPE_extent:
-+ case KEY_TYPE_reflink_v:
-+ return BCH_DATA_user;
-+ case KEY_TYPE_stripe: {
-+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+
-+ BUG_ON(ptr < s.v->ptrs ||
-+ ptr >= s.v->ptrs + s.v->nr_blocks);
-+
-+ return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
-+ ? BCH_DATA_parity
-+ : BCH_DATA_user;
-+ }
-+ default:
-+ BUG();
-+ }
-+}
-+
-+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
-+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
-+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
-+bool bch2_bkey_is_incompressible(struct bkey_s_c);
-+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
-+
-+unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c);
-+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *, struct extent_ptr_decoded *);
-+unsigned bch2_extent_ptr_durability(struct bch_fs *, struct extent_ptr_decoded *);
-+unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
-+
-+void bch2_bkey_drop_device(struct bkey_s, unsigned);
-+void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned);
-+
-+const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c, unsigned);
-+
-+static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsigned dev)
-+{
-+ return (void *) bch2_bkey_has_device_c(k.s_c, dev);
-+}
-+
-+bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
-+
-+void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *);
-+
-+static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr ptr)
-+{
-+ struct bch_extent_ptr *dest;
-+
-+ EBUG_ON(bch2_bkey_has_device(bkey_i_to_s(k), ptr.dev));
-+
-+ switch (k->k.type) {
-+ case KEY_TYPE_btree_ptr:
-+ case KEY_TYPE_btree_ptr_v2:
-+ case KEY_TYPE_extent:
-+ EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
-+
-+ ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-+ dest = (struct bch_extent_ptr *)((void *) &k->v + bkey_val_bytes(&k->k));
-+ *dest = ptr;
-+ k->k.u64s++;
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+void bch2_extent_ptr_decoded_append(struct bkey_i *,
-+ struct extent_ptr_decoded *);
-+union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s,
-+ struct bch_extent_ptr *);
-+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
-+ struct bch_extent_ptr *);
-+
-+#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \
-+do { \
-+ struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \
-+ \
-+ _ptr = &_ptrs.start->ptr; \
-+ \
-+ while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \
-+ if (_cond) { \
-+ _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \
-+ _ptrs = bch2_bkey_ptrs(_k); \
-+ continue; \
-+ } \
-+ \
-+ (_ptr)++; \
-+ } \
-+} while (0)
-+
-+bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
-+ struct bch_extent_ptr, u64);
-+bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
-+struct bch_extent_ptr *
-+bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
-+
-+void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
-+
-+bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
-+void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
-+ struct bkey_s_c);
-+int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+
-+void bch2_ptr_swab(struct bkey_s);
-+
-+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c);
-+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
-+ unsigned, unsigned);
-+bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
-+
-+int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
-+ unsigned, unsigned);
-+
-+/* Generic extent code: */
-+
-+enum bch_extent_overlap {
-+ BCH_EXTENT_OVERLAP_ALL = 0,
-+ BCH_EXTENT_OVERLAP_BACK = 1,
-+ BCH_EXTENT_OVERLAP_FRONT = 2,
-+ BCH_EXTENT_OVERLAP_MIDDLE = 3,
-+};
-+
-+/* Returns how k overlaps with m */
-+static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
-+ const struct bkey *m)
-+{
-+ int cmp1 = bkey_lt(k->p, m->p);
-+ int cmp2 = bkey_gt(bkey_start_pos(k), bkey_start_pos(m));
-+
-+ return (cmp1 << 1) + cmp2;
-+}
-+
-+int bch2_cut_front_s(struct bpos, struct bkey_s);
-+int bch2_cut_back_s(struct bpos, struct bkey_s);
-+
-+static inline void bch2_cut_front(struct bpos where, struct bkey_i *k)
-+{
-+ bch2_cut_front_s(where, bkey_i_to_s(k));
-+}
-+
-+static inline void bch2_cut_back(struct bpos where, struct bkey_i *k)
-+{
-+ bch2_cut_back_s(where, bkey_i_to_s(k));
-+}
-+
-+/**
-+ * bch_key_resize - adjust size of @k
-+ *
-+ * bkey_start_offset(k) will be preserved, modifies where the extent ends
-+ */
-+static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
-+{
-+ k->p.offset -= k->size;
-+ k->p.offset += new_size;
-+ k->size = new_size;
-+}
-+
-+#endif /* _BCACHEFS_EXTENTS_H */
-diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h
-new file mode 100644
-index 000000000000..43d6c341ecca
---- /dev/null
-+++ b/fs/bcachefs/extents_types.h
-@@ -0,0 +1,40 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_EXTENTS_TYPES_H
-+#define _BCACHEFS_EXTENTS_TYPES_H
-+
-+#include "bcachefs_format.h"
-+
-+struct bch_extent_crc_unpacked {
-+ u32 compressed_size;
-+ u32 uncompressed_size;
-+ u32 live_size;
-+
-+ u8 csum_type;
-+ u8 compression_type;
-+
-+ u16 offset;
-+
-+ u16 nonce;
-+
-+ struct bch_csum csum;
-+};
-+
-+struct extent_ptr_decoded {
-+ unsigned idx;
-+ bool has_ec;
-+ struct bch_extent_crc_unpacked crc;
-+ struct bch_extent_ptr ptr;
-+ struct bch_extent_stripe_ptr ec;
-+};
-+
-+struct bch_io_failures {
-+ u8 nr;
-+ struct bch_dev_io_failures {
-+ u8 dev;
-+ u8 idx;
-+ u8 nr_failed;
-+ u8 nr_retries;
-+ } devs[BCH_REPLICAS_MAX];
-+};
-+
-+#endif /* _BCACHEFS_EXTENTS_TYPES_H */
-diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
-new file mode 100644
-index 000000000000..05429c9631cd
---- /dev/null
-+++ b/fs/bcachefs/eytzinger.h
-@@ -0,0 +1,281 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _EYTZINGER_H
-+#define _EYTZINGER_H
-+
-+#include <linux/bitops.h>
-+#include <linux/log2.h>
-+
-+#include "util.h"
-+
-+/*
-+ * Traversal for trees in eytzinger layout - a full binary tree layed out in an
-+ * array
-+ */
-+
-+/*
-+ * One based indexing version:
-+ *
-+ * With one based indexing each level of the tree starts at a power of two -
-+ * good for cacheline alignment:
-+ */
-+
-+static inline unsigned eytzinger1_child(unsigned i, unsigned child)
-+{
-+ EBUG_ON(child > 1);
-+
-+ return (i << 1) + child;
-+}
-+
-+static inline unsigned eytzinger1_left_child(unsigned i)
-+{
-+ return eytzinger1_child(i, 0);
-+}
-+
-+static inline unsigned eytzinger1_right_child(unsigned i)
-+{
-+ return eytzinger1_child(i, 1);
-+}
-+
-+static inline unsigned eytzinger1_first(unsigned size)
-+{
-+ return rounddown_pow_of_two(size);
-+}
-+
-+static inline unsigned eytzinger1_last(unsigned size)
-+{
-+ return rounddown_pow_of_two(size + 1) - 1;
-+}
-+
-+/*
-+ * eytzinger1_next() and eytzinger1_prev() have the nice properties that
-+ *
-+ * eytzinger1_next(0) == eytzinger1_first())
-+ * eytzinger1_prev(0) == eytzinger1_last())
-+ *
-+ * eytzinger1_prev(eytzinger1_first()) == 0
-+ * eytzinger1_next(eytzinger1_last()) == 0
-+ */
-+
-+static inline unsigned eytzinger1_next(unsigned i, unsigned size)
-+{
-+ EBUG_ON(i > size);
-+
-+ if (eytzinger1_right_child(i) <= size) {
-+ i = eytzinger1_right_child(i);
-+
-+ i <<= __fls(size + 1) - __fls(i);
-+ i >>= i > size;
-+ } else {
-+ i >>= ffz(i) + 1;
-+ }
-+
-+ return i;
-+}
-+
-+static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
-+{
-+ EBUG_ON(i > size);
-+
-+ if (eytzinger1_left_child(i) <= size) {
-+ i = eytzinger1_left_child(i) + 1;
-+
-+ i <<= __fls(size + 1) - __fls(i);
-+ i -= 1;
-+ i >>= i > size;
-+ } else {
-+ i >>= __ffs(i) + 1;
-+ }
-+
-+ return i;
-+}
-+
-+static inline unsigned eytzinger1_extra(unsigned size)
-+{
-+ return (size + 1 - rounddown_pow_of_two(size)) << 1;
-+}
-+
-+static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
-+ unsigned extra)
-+{
-+ unsigned b = __fls(i);
-+ unsigned shift = __fls(size) - b;
-+ int s;
-+
-+ EBUG_ON(!i || i > size);
-+
-+ i ^= 1U << b;
-+ i <<= 1;
-+ i |= 1;
-+ i <<= shift;
-+
-+ /*
-+ * sign bit trick:
-+ *
-+ * if (i > extra)
-+ * i -= (i - extra) >> 1;
-+ */
-+ s = extra - i;
-+ i += (s >> 1) & (s >> 31);
-+
-+ return i;
-+}
-+
-+static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
-+ unsigned extra)
-+{
-+ unsigned shift;
-+ int s;
-+
-+ EBUG_ON(!i || i > size);
-+
-+ /*
-+ * sign bit trick:
-+ *
-+ * if (i > extra)
-+ * i += i - extra;
-+ */
-+ s = extra - i;
-+ i -= s & (s >> 31);
-+
-+ shift = __ffs(i);
-+
-+ i >>= shift + 1;
-+ i |= 1U << (__fls(size) - shift);
-+
-+ return i;
-+}
-+
-+static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
-+{
-+ return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
-+}
-+
-+static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
-+{
-+ return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
-+}
-+
-+#define eytzinger1_for_each(_i, _size) \
-+ for ((_i) = eytzinger1_first((_size)); \
-+ (_i) != 0; \
-+ (_i) = eytzinger1_next((_i), (_size)))
-+
-+/* Zero based indexing version: */
-+
-+static inline unsigned eytzinger0_child(unsigned i, unsigned child)
-+{
-+ EBUG_ON(child > 1);
-+
-+ return (i << 1) + 1 + child;
-+}
-+
-+static inline unsigned eytzinger0_left_child(unsigned i)
-+{
-+ return eytzinger0_child(i, 0);
-+}
-+
-+static inline unsigned eytzinger0_right_child(unsigned i)
-+{
-+ return eytzinger0_child(i, 1);
-+}
-+
-+static inline unsigned eytzinger0_first(unsigned size)
-+{
-+ return eytzinger1_first(size) - 1;
-+}
-+
-+static inline unsigned eytzinger0_last(unsigned size)
-+{
-+ return eytzinger1_last(size) - 1;
-+}
-+
-+static inline unsigned eytzinger0_next(unsigned i, unsigned size)
-+{
-+ return eytzinger1_next(i + 1, size) - 1;
-+}
-+
-+static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
-+{
-+ return eytzinger1_prev(i + 1, size) - 1;
-+}
-+
-+static inline unsigned eytzinger0_extra(unsigned size)
-+{
-+ return eytzinger1_extra(size);
-+}
-+
-+static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
-+ unsigned extra)
-+{
-+ return __eytzinger1_to_inorder(i + 1, size, extra) - 1;
-+}
-+
-+static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
-+ unsigned extra)
-+{
-+ return __inorder_to_eytzinger1(i + 1, size, extra) - 1;
-+}
-+
-+static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
-+{
-+ return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
-+}
-+
-+static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
-+{
-+ return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
-+}
-+
-+#define eytzinger0_for_each(_i, _size) \
-+ for ((_i) = eytzinger0_first((_size)); \
-+ (_i) != -1; \
-+ (_i) = eytzinger0_next((_i), (_size)))
-+
-+typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
-+
-+/* return greatest node <= @search, or -1 if not found */
-+static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
-+ eytzinger_cmp_fn cmp, const void *search)
-+{
-+ unsigned i, n = 0;
-+
-+ if (!nr)
-+ return -1;
-+
-+ do {
-+ i = n;
-+ n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
-+ } while (n < nr);
-+
-+ if (n & 1) {
-+ /* @i was greater than @search, return previous node: */
-+
-+ if (i == eytzinger0_first(nr))
-+ return -1;
-+
-+ return eytzinger0_prev(i, nr);
-+ } else {
-+ return i;
-+ }
-+}
-+
-+#define eytzinger0_find(base, nr, size, _cmp, search) \
-+({ \
-+ void *_base = (base); \
-+ void *_search = (search); \
-+ size_t _nr = (nr); \
-+ size_t _size = (size); \
-+ size_t _i = 0; \
-+ int _res; \
-+ \
-+ while (_i < _nr && \
-+ (_res = _cmp(_search, _base + _i * _size, _size))) \
-+ _i = eytzinger0_child(_i, _res > 0); \
-+ _i; \
-+})
-+
-+void eytzinger0_sort(void *, size_t, size_t,
-+ int (*cmp_func)(const void *, const void *, size_t),
-+ void (*swap_func)(void *, void *, size_t));
-+
-+#endif /* _EYTZINGER_H */
-diff --git a/fs/bcachefs/fifo.h b/fs/bcachefs/fifo.h
-new file mode 100644
-index 000000000000..66b945be10c2
---- /dev/null
-+++ b/fs/bcachefs/fifo.h
-@@ -0,0 +1,127 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FIFO_H
-+#define _BCACHEFS_FIFO_H
-+
-+#include "util.h"
-+
-+#define FIFO(type) \
-+struct { \
-+ size_t front, back, size, mask; \
-+ type *data; \
-+}
-+
-+#define DECLARE_FIFO(type, name) FIFO(type) name
-+
-+#define fifo_buf_size(fifo) \
-+ ((fifo)->size \
-+ ? roundup_pow_of_two((fifo)->size) * sizeof((fifo)->data[0]) \
-+ : 0)
-+
-+#define init_fifo(fifo, _size, _gfp) \
-+({ \
-+ (fifo)->front = (fifo)->back = 0; \
-+ (fifo)->size = (_size); \
-+ (fifo)->mask = (fifo)->size \
-+ ? roundup_pow_of_two((fifo)->size) - 1 \
-+ : 0; \
-+ (fifo)->data = kvpmalloc(fifo_buf_size(fifo), (_gfp)); \
-+})
-+
-+#define free_fifo(fifo) \
-+do { \
-+ kvpfree((fifo)->data, fifo_buf_size(fifo)); \
-+ (fifo)->data = NULL; \
-+} while (0)
-+
-+#define fifo_swap(l, r) \
-+do { \
-+ swap((l)->front, (r)->front); \
-+ swap((l)->back, (r)->back); \
-+ swap((l)->size, (r)->size); \
-+ swap((l)->mask, (r)->mask); \
-+ swap((l)->data, (r)->data); \
-+} while (0)
-+
-+#define fifo_move(dest, src) \
-+do { \
-+ typeof(*((dest)->data)) _t; \
-+ while (!fifo_full(dest) && \
-+ fifo_pop(src, _t)) \
-+ fifo_push(dest, _t); \
-+} while (0)
-+
-+#define fifo_used(fifo) (((fifo)->back - (fifo)->front))
-+#define fifo_free(fifo) ((fifo)->size - fifo_used(fifo))
-+
-+#define fifo_empty(fifo) ((fifo)->front == (fifo)->back)
-+#define fifo_full(fifo) (fifo_used(fifo) == (fifo)->size)
-+
-+#define fifo_peek_front(fifo) ((fifo)->data[(fifo)->front & (fifo)->mask])
-+#define fifo_peek_back(fifo) ((fifo)->data[((fifo)->back - 1) & (fifo)->mask])
-+
-+#define fifo_entry_idx_abs(fifo, p) \
-+ ((((p) >= &fifo_peek_front(fifo) \
-+ ? (fifo)->front : (fifo)->back) & ~(fifo)->mask) + \
-+ (((p) - (fifo)->data)))
-+
-+#define fifo_entry_idx(fifo, p) (((p) - &fifo_peek_front(fifo)) & (fifo)->mask)
-+#define fifo_idx_entry(fifo, i) ((fifo)->data[((fifo)->front + (i)) & (fifo)->mask])
-+
-+#define fifo_push_back_ref(f) \
-+ (fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask])
-+
-+#define fifo_push_front_ref(f) \
-+ (fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask])
-+
-+#define fifo_push_back(fifo, new) \
-+({ \
-+ typeof((fifo)->data) _r = fifo_push_back_ref(fifo); \
-+ if (_r) \
-+ *_r = (new); \
-+ _r != NULL; \
-+})
-+
-+#define fifo_push_front(fifo, new) \
-+({ \
-+ typeof((fifo)->data) _r = fifo_push_front_ref(fifo); \
-+ if (_r) \
-+ *_r = (new); \
-+ _r != NULL; \
-+})
-+
-+#define fifo_pop_front(fifo, i) \
-+({ \
-+ bool _r = !fifo_empty((fifo)); \
-+ if (_r) \
-+ (i) = (fifo)->data[(fifo)->front++ & (fifo)->mask]; \
-+ _r; \
-+})
-+
-+#define fifo_pop_back(fifo, i) \
-+({ \
-+ bool _r = !fifo_empty((fifo)); \
-+ if (_r) \
-+ (i) = (fifo)->data[--(fifo)->back & (fifo)->mask]; \
-+ _r; \
-+})
-+
-+#define fifo_push_ref(fifo) fifo_push_back_ref(fifo)
-+#define fifo_push(fifo, i) fifo_push_back(fifo, (i))
-+#define fifo_pop(fifo, i) fifo_pop_front(fifo, (i))
-+#define fifo_peek(fifo) fifo_peek_front(fifo)
-+
-+#define fifo_for_each_entry(_entry, _fifo, _iter) \
-+ for (typecheck(typeof((_fifo)->front), _iter), \
-+ (_iter) = (_fifo)->front; \
-+ ((_iter != (_fifo)->back) && \
-+ (_entry = (_fifo)->data[(_iter) & (_fifo)->mask], true)); \
-+ (_iter)++)
-+
-+#define fifo_for_each_entry_ptr(_ptr, _fifo, _iter) \
-+ for (typecheck(typeof((_fifo)->front), _iter), \
-+ (_iter) = (_fifo)->front; \
-+ ((_iter != (_fifo)->back) && \
-+ (_ptr = &(_fifo)->data[(_iter) & (_fifo)->mask], true)); \
-+ (_iter)++)
-+
-+#endif /* _BCACHEFS_FIFO_H */
-diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
-new file mode 100644
-index 000000000000..4496cf91a4c1
---- /dev/null
-+++ b/fs/bcachefs/fs-common.c
-@@ -0,0 +1,501 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "acl.h"
-+#include "btree_update.h"
-+#include "dirent.h"
-+#include "fs-common.h"
-+#include "inode.h"
-+#include "subvolume.h"
-+#include "xattr.h"
-+
-+#include <linux/posix_acl.h>
-+
-+static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
-+{
-+ return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
-+}
-+
-+int bch2_create_trans(struct btree_trans *trans,
-+ subvol_inum dir,
-+ struct bch_inode_unpacked *dir_u,
-+ struct bch_inode_unpacked *new_inode,
-+ const struct qstr *name,
-+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+ struct posix_acl *default_acl,
-+ struct posix_acl *acl,
-+ subvol_inum snapshot_src,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter dir_iter = { NULL };
-+ struct btree_iter inode_iter = { NULL };
-+ subvol_inum new_inum = dir;
-+ u64 now = bch2_current_time(c);
-+ u64 cpu = raw_smp_processor_id();
-+ u64 dir_target;
-+ u32 snapshot;
-+ unsigned dir_type = mode_to_type(mode);
-+ int ret;
-+
-+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ if (!(flags & BCH_CREATE_SNAPSHOT)) {
-+ /* Normal create path - allocate a new inode: */
-+ bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
-+
-+ if (flags & BCH_CREATE_TMPFILE)
-+ new_inode->bi_flags |= BCH_INODE_unlinked;
-+
-+ ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
-+ if (ret)
-+ goto err;
-+
-+ snapshot_src = (subvol_inum) { 0 };
-+ } else {
-+ /*
-+ * Creating a snapshot - we're not allocating a new inode, but
-+ * we do have to lookup the root inode of the subvolume we're
-+ * snapshotting and update it (in the new snapshot):
-+ */
-+
-+ if (!snapshot_src.inum) {
-+ /* Inode wasn't specified, just snapshot: */
-+ struct bch_subvolume s;
-+
-+ ret = bch2_subvolume_get(trans, snapshot_src.subvol, true,
-+ BTREE_ITER_CACHED, &s);
-+ if (ret)
-+ goto err;
-+
-+ snapshot_src.inum = le64_to_cpu(s.inode);
-+ }
-+
-+ ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src,
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ if (new_inode->bi_subvol != snapshot_src.subvol) {
-+ /* Not a subvolume root: */
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ /*
-+ * If we're not root, we have to own the subvolume being
-+ * snapshotted:
-+ */
-+ if (uid && new_inode->bi_uid != uid) {
-+ ret = -EPERM;
-+ goto err;
-+ }
-+
-+ flags |= BCH_CREATE_SUBVOL;
-+ }
-+
-+ new_inum.inum = new_inode->bi_inum;
-+ dir_target = new_inode->bi_inum;
-+
-+ if (flags & BCH_CREATE_SUBVOL) {
-+ u32 new_subvol, dir_snapshot;
-+
-+ ret = bch2_subvolume_create(trans, new_inode->bi_inum,
-+ snapshot_src.subvol,
-+ &new_subvol, &snapshot,
-+ (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
-+ if (ret)
-+ goto err;
-+
-+ new_inode->bi_parent_subvol = dir.subvol;
-+ new_inode->bi_subvol = new_subvol;
-+ new_inum.subvol = new_subvol;
-+ dir_target = new_subvol;
-+ dir_type = DT_SUBVOL;
-+
-+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot);
-+ if (ret)
-+ goto err;
-+
-+ bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot);
-+ ret = bch2_btree_iter_traverse(&dir_iter);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ if (!(flags & BCH_CREATE_SNAPSHOT)) {
-+ if (default_acl) {
-+ ret = bch2_set_acl_trans(trans, new_inum, new_inode,
-+ default_acl, ACL_TYPE_DEFAULT);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ if (acl) {
-+ ret = bch2_set_acl_trans(trans, new_inum, new_inode,
-+ acl, ACL_TYPE_ACCESS);
-+ if (ret)
-+ goto err;
-+ }
-+ }
-+
-+ if (!(flags & BCH_CREATE_TMPFILE)) {
-+ struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
-+ u64 dir_offset;
-+
-+ if (is_subdir_for_nlink(new_inode))
-+ dir_u->bi_nlink++;
-+ dir_u->bi_mtime = dir_u->bi_ctime = now;
-+
-+ ret = bch2_inode_write(trans, &dir_iter, dir_u);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_dirent_create(trans, dir, &dir_hash,
-+ dir_type,
-+ name,
-+ dir_target,
-+ &dir_offset,
-+ BCH_HASH_SET_MUST_CREATE);
-+ if (ret)
-+ goto err;
-+
-+ if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
-+ new_inode->bi_dir = dir_u->bi_inum;
-+ new_inode->bi_dir_offset = dir_offset;
-+ }
-+ }
-+
-+ inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
-+ bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
-+
-+ ret = bch2_btree_iter_traverse(&inode_iter) ?:
-+ bch2_inode_write(trans, &inode_iter, new_inode);
-+err:
-+ bch2_trans_iter_exit(trans, &inode_iter);
-+ bch2_trans_iter_exit(trans, &dir_iter);
-+ return ret;
-+}
-+
-+int bch2_link_trans(struct btree_trans *trans,
-+ subvol_inum dir, struct bch_inode_unpacked *dir_u,
-+ subvol_inum inum, struct bch_inode_unpacked *inode_u,
-+ const struct qstr *name)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter dir_iter = { NULL };
-+ struct btree_iter inode_iter = { NULL };
-+ struct bch_hash_info dir_hash;
-+ u64 now = bch2_current_time(c);
-+ u64 dir_offset = 0;
-+ int ret;
-+
-+ if (dir.subvol != inum.subvol)
-+ return -EXDEV;
-+
-+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ inode_u->bi_ctime = now;
-+ ret = bch2_inode_nlink_inc(inode_u);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ if (bch2_reinherit_attrs(inode_u, dir_u)) {
-+ ret = -EXDEV;
-+ goto err;
-+ }
-+
-+ dir_u->bi_mtime = dir_u->bi_ctime = now;
-+
-+ dir_hash = bch2_hash_info_init(c, dir_u);
-+
-+ ret = bch2_dirent_create(trans, dir, &dir_hash,
-+ mode_to_type(inode_u->bi_mode),
-+ name, inum.inum, &dir_offset,
-+ BCH_HASH_SET_MUST_CREATE);
-+ if (ret)
-+ goto err;
-+
-+ if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
-+ inode_u->bi_dir = dir.inum;
-+ inode_u->bi_dir_offset = dir_offset;
-+ }
-+
-+ ret = bch2_inode_write(trans, &dir_iter, dir_u) ?:
-+ bch2_inode_write(trans, &inode_iter, inode_u);
-+err:
-+ bch2_trans_iter_exit(trans, &dir_iter);
-+ bch2_trans_iter_exit(trans, &inode_iter);
-+ return ret;
-+}
-+
-+int bch2_unlink_trans(struct btree_trans *trans,
-+ subvol_inum dir,
-+ struct bch_inode_unpacked *dir_u,
-+ struct bch_inode_unpacked *inode_u,
-+ const struct qstr *name,
-+ bool deleting_snapshot)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter dir_iter = { NULL };
-+ struct btree_iter dirent_iter = { NULL };
-+ struct btree_iter inode_iter = { NULL };
-+ struct bch_hash_info dir_hash;
-+ subvol_inum inum;
-+ u64 now = bch2_current_time(c);
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ dir_hash = bch2_hash_info_init(c, dir_u);
-+
-+ ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
-+ name, &inum, BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum,
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) {
-+ ret = bch2_empty_dir_trans(trans, inum);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ if (deleting_snapshot && !inode_u->bi_subvol) {
-+ ret = -BCH_ERR_ENOENT_not_subvol;
-+ goto err;
-+ }
-+
-+ if (deleting_snapshot || inode_u->bi_subvol) {
-+ ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
-+ if (ret)
-+ goto err;
-+
-+ k = bch2_btree_iter_peek_slot(&dirent_iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * If we're deleting a subvolume, we need to really delete the
-+ * dirent, not just emit a whiteout in the current snapshot:
-+ */
-+ bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot);
-+ ret = bch2_btree_iter_traverse(&dirent_iter);
-+ if (ret)
-+ goto err;
-+ } else {
-+ bch2_inode_nlink_dec(trans, inode_u);
-+ }
-+
-+ if (inode_u->bi_dir == dirent_iter.pos.inode &&
-+ inode_u->bi_dir_offset == dirent_iter.pos.offset) {
-+ inode_u->bi_dir = 0;
-+ inode_u->bi_dir_offset = 0;
-+ }
-+
-+ dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
-+ dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
-+
-+ ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
-+ &dir_hash, &dirent_iter,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-+ bch2_inode_write(trans, &dir_iter, dir_u) ?:
-+ bch2_inode_write(trans, &inode_iter, inode_u);
-+err:
-+ bch2_trans_iter_exit(trans, &inode_iter);
-+ bch2_trans_iter_exit(trans, &dirent_iter);
-+ bch2_trans_iter_exit(trans, &dir_iter);
-+ return ret;
-+}
-+
-+bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
-+ struct bch_inode_unpacked *src_u)
-+{
-+ u64 src, dst;
-+ unsigned id;
-+ bool ret = false;
-+
-+ for (id = 0; id < Inode_opt_nr; id++) {
-+ /* Skip attributes that were explicitly set on this inode */
-+ if (dst_u->bi_fields_set & (1 << id))
-+ continue;
-+
-+ src = bch2_inode_opt_get(src_u, id);
-+ dst = bch2_inode_opt_get(dst_u, id);
-+
-+ if (src == dst)
-+ continue;
-+
-+ bch2_inode_opt_set(dst_u, id, src);
-+ ret = true;
-+ }
-+
-+ return ret;
-+}
-+
-+int bch2_rename_trans(struct btree_trans *trans,
-+ subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u,
-+ subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u,
-+ struct bch_inode_unpacked *src_inode_u,
-+ struct bch_inode_unpacked *dst_inode_u,
-+ const struct qstr *src_name,
-+ const struct qstr *dst_name,
-+ enum bch_rename_mode mode)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter src_dir_iter = { NULL };
-+ struct btree_iter dst_dir_iter = { NULL };
-+ struct btree_iter src_inode_iter = { NULL };
-+ struct btree_iter dst_inode_iter = { NULL };
-+ struct bch_hash_info src_hash, dst_hash;
-+ subvol_inum src_inum, dst_inum;
-+ u64 src_offset, dst_offset;
-+ u64 now = bch2_current_time(c);
-+ int ret;
-+
-+ ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir,
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ src_hash = bch2_hash_info_init(c, src_dir_u);
-+
-+ if (dst_dir.inum != src_dir.inum ||
-+ dst_dir.subvol != src_dir.subvol) {
-+ ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ dst_hash = bch2_hash_info_init(c, dst_dir_u);
-+ } else {
-+ dst_dir_u = src_dir_u;
-+ dst_hash = src_hash;
-+ }
-+
-+ ret = bch2_dirent_rename(trans,
-+ src_dir, &src_hash,
-+ dst_dir, &dst_hash,
-+ src_name, &src_inum, &src_offset,
-+ dst_name, &dst_inum, &dst_offset,
-+ mode);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum,
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+
-+ if (dst_inum.inum) {
-+ ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum,
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
-+ src_inode_u->bi_dir = dst_dir_u->bi_inum;
-+ src_inode_u->bi_dir_offset = dst_offset;
-+
-+ if (mode == BCH_RENAME_EXCHANGE) {
-+ dst_inode_u->bi_dir = src_dir_u->bi_inum;
-+ dst_inode_u->bi_dir_offset = src_offset;
-+ }
-+
-+ if (mode == BCH_RENAME_OVERWRITE &&
-+ dst_inode_u->bi_dir == dst_dir_u->bi_inum &&
-+ dst_inode_u->bi_dir_offset == src_offset) {
-+ dst_inode_u->bi_dir = 0;
-+ dst_inode_u->bi_dir_offset = 0;
-+ }
-+ }
-+
-+ if (mode == BCH_RENAME_OVERWRITE) {
-+ if (S_ISDIR(src_inode_u->bi_mode) !=
-+ S_ISDIR(dst_inode_u->bi_mode)) {
-+ ret = -ENOTDIR;
-+ goto err;
-+ }
-+
-+ if (S_ISDIR(dst_inode_u->bi_mode) &&
-+ bch2_empty_dir_trans(trans, dst_inum)) {
-+ ret = -ENOTEMPTY;
-+ goto err;
-+ }
-+ }
-+
-+ if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
-+ S_ISDIR(src_inode_u->bi_mode)) {
-+ ret = -EXDEV;
-+ goto err;
-+ }
-+
-+ if (mode == BCH_RENAME_EXCHANGE &&
-+ bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
-+ S_ISDIR(dst_inode_u->bi_mode)) {
-+ ret = -EXDEV;
-+ goto err;
-+ }
-+
-+ if (is_subdir_for_nlink(src_inode_u)) {
-+ src_dir_u->bi_nlink--;
-+ dst_dir_u->bi_nlink++;
-+ }
-+
-+ if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
-+ dst_dir_u->bi_nlink--;
-+ src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
-+ }
-+
-+ if (mode == BCH_RENAME_OVERWRITE)
-+ bch2_inode_nlink_dec(trans, dst_inode_u);
-+
-+ src_dir_u->bi_mtime = now;
-+ src_dir_u->bi_ctime = now;
-+
-+ if (src_dir.inum != dst_dir.inum) {
-+ dst_dir_u->bi_mtime = now;
-+ dst_dir_u->bi_ctime = now;
-+ }
-+
-+ src_inode_u->bi_ctime = now;
-+
-+ if (dst_inum.inum)
-+ dst_inode_u->bi_ctime = now;
-+
-+ ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
-+ (src_dir.inum != dst_dir.inum
-+ ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
-+ : 0) ?:
-+ bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
-+ (dst_inum.inum
-+ ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
-+ : 0);
-+err:
-+ bch2_trans_iter_exit(trans, &dst_inode_iter);
-+ bch2_trans_iter_exit(trans, &src_inode_iter);
-+ bch2_trans_iter_exit(trans, &dst_dir_iter);
-+ bch2_trans_iter_exit(trans, &src_dir_iter);
-+ return ret;
-+}
-diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h
-new file mode 100644
-index 000000000000..dde237859514
---- /dev/null
-+++ b/fs/bcachefs/fs-common.h
-@@ -0,0 +1,43 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_COMMON_H
-+#define _BCACHEFS_FS_COMMON_H
-+
-+struct posix_acl;
-+
-+#define BCH_CREATE_TMPFILE (1U << 0)
-+#define BCH_CREATE_SUBVOL (1U << 1)
-+#define BCH_CREATE_SNAPSHOT (1U << 2)
-+#define BCH_CREATE_SNAPSHOT_RO (1U << 3)
-+
-+int bch2_create_trans(struct btree_trans *, subvol_inum,
-+ struct bch_inode_unpacked *,
-+ struct bch_inode_unpacked *,
-+ const struct qstr *,
-+ uid_t, gid_t, umode_t, dev_t,
-+ struct posix_acl *,
-+ struct posix_acl *,
-+ subvol_inum, unsigned);
-+
-+int bch2_link_trans(struct btree_trans *,
-+ subvol_inum, struct bch_inode_unpacked *,
-+ subvol_inum, struct bch_inode_unpacked *,
-+ const struct qstr *);
-+
-+int bch2_unlink_trans(struct btree_trans *, subvol_inum,
-+ struct bch_inode_unpacked *,
-+ struct bch_inode_unpacked *,
-+ const struct qstr *, bool);
-+
-+int bch2_rename_trans(struct btree_trans *,
-+ subvol_inum, struct bch_inode_unpacked *,
-+ subvol_inum, struct bch_inode_unpacked *,
-+ struct bch_inode_unpacked *,
-+ struct bch_inode_unpacked *,
-+ const struct qstr *,
-+ const struct qstr *,
-+ enum bch_rename_mode);
-+
-+bool bch2_reinherit_attrs(struct bch_inode_unpacked *,
-+ struct bch_inode_unpacked *);
-+
-+#endif /* _BCACHEFS_FS_COMMON_H */
-diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
-new file mode 100644
-index 000000000000..52f0e7acda3d
---- /dev/null
-+++ b/fs/bcachefs/fs-io-buffered.c
-@@ -0,0 +1,1106 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_buf.h"
-+#include "fs-io.h"
-+#include "fs-io-buffered.h"
-+#include "fs-io-direct.h"
-+#include "fs-io-pagecache.h"
-+#include "io_read.h"
-+#include "io_write.h"
-+
-+#include <linux/backing-dev.h>
-+#include <linux/pagemap.h>
-+#include <linux/writeback.h>
-+
-+static inline bool bio_full(struct bio *bio, unsigned len)
-+{
-+ if (bio->bi_vcnt >= bio->bi_max_vecs)
-+ return true;
-+ if (bio->bi_iter.bi_size > UINT_MAX - len)
-+ return true;
-+ return false;
-+}
-+
-+/* readpage(s): */
-+
-+static void bch2_readpages_end_io(struct bio *bio)
-+{
-+ struct folio_iter fi;
-+
-+ bio_for_each_folio_all(fi, bio) {
-+ if (!bio->bi_status) {
-+ folio_mark_uptodate(fi.folio);
-+ } else {
-+ folio_clear_uptodate(fi.folio);
-+ folio_set_error(fi.folio);
-+ }
-+ folio_unlock(fi.folio);
-+ }
-+
-+ bio_put(bio);
-+}
-+
-+struct readpages_iter {
-+ struct address_space *mapping;
-+ unsigned idx;
-+ folios folios;
-+};
-+
-+static int readpages_iter_init(struct readpages_iter *iter,
-+ struct readahead_control *ractl)
-+{
-+ struct folio **fi;
-+ int ret;
-+
-+ memset(iter, 0, sizeof(*iter));
-+
-+ iter->mapping = ractl->mapping;
-+
-+ ret = bch2_filemap_get_contig_folios_d(iter->mapping,
-+ ractl->_index << PAGE_SHIFT,
-+ (ractl->_index + ractl->_nr_pages) << PAGE_SHIFT,
-+ 0, mapping_gfp_mask(iter->mapping),
-+ &iter->folios);
-+ if (ret)
-+ return ret;
-+
-+ darray_for_each(iter->folios, fi) {
-+ ractl->_nr_pages -= 1U << folio_order(*fi);
-+ __bch2_folio_create(*fi, __GFP_NOFAIL|GFP_KERNEL);
-+ folio_put(*fi);
-+ folio_put(*fi);
-+ }
-+
-+ return 0;
-+}
-+
-+static inline struct folio *readpage_iter_peek(struct readpages_iter *iter)
-+{
-+ if (iter->idx >= iter->folios.nr)
-+ return NULL;
-+ return iter->folios.data[iter->idx];
-+}
-+
-+static inline void readpage_iter_advance(struct readpages_iter *iter)
-+{
-+ iter->idx++;
-+}
-+
-+static bool extent_partial_reads_expensive(struct bkey_s_c k)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ struct bch_extent_crc_unpacked crc;
-+ const union bch_extent_entry *i;
-+
-+ bkey_for_each_crc(k.k, ptrs, crc, i)
-+ if (crc.csum_type || crc.compression_type)
-+ return true;
-+ return false;
-+}
-+
-+static int readpage_bio_extend(struct btree_trans *trans,
-+ struct readpages_iter *iter,
-+ struct bio *bio,
-+ unsigned sectors_this_extent,
-+ bool get_more)
-+{
-+ /* Don't hold btree locks while allocating memory: */
-+ bch2_trans_unlock(trans);
-+
-+ while (bio_sectors(bio) < sectors_this_extent &&
-+ bio->bi_vcnt < bio->bi_max_vecs) {
-+ struct folio *folio = readpage_iter_peek(iter);
-+ int ret;
-+
-+ if (folio) {
-+ readpage_iter_advance(iter);
-+ } else {
-+ pgoff_t folio_offset = bio_end_sector(bio) >> PAGE_SECTORS_SHIFT;
-+
-+ if (!get_more)
-+ break;
-+
-+ folio = xa_load(&iter->mapping->i_pages, folio_offset);
-+ if (folio && !xa_is_value(folio))
-+ break;
-+
-+ folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0);
-+ if (!folio)
-+ break;
-+
-+ if (!__bch2_folio_create(folio, GFP_KERNEL)) {
-+ folio_put(folio);
-+ break;
-+ }
-+
-+ ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_KERNEL);
-+ if (ret) {
-+ __bch2_folio_release(folio);
-+ folio_put(folio);
-+ break;
-+ }
-+
-+ folio_put(folio);
-+ }
-+
-+ BUG_ON(folio_sector(folio) != bio_end_sector(bio));
-+
-+ BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0));
-+ }
-+
-+ return bch2_trans_relock(trans);
-+}
-+
-+static void bchfs_read(struct btree_trans *trans,
-+ struct bch_read_bio *rbio,
-+ subvol_inum inum,
-+ struct readpages_iter *readpages_iter)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_buf sk;
-+ int flags = BCH_READ_RETRY_IF_STALE|
-+ BCH_READ_MAY_PROMOTE;
-+ u32 snapshot;
-+ int ret = 0;
-+
-+ rbio->c = c;
-+ rbio->start_time = local_clock();
-+ rbio->subvol = inum.subvol;
-+
-+ bch2_bkey_buf_init(&sk);
-+retry:
-+ bch2_trans_begin(trans);
-+ iter = (struct btree_iter) { NULL };
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot),
-+ BTREE_ITER_SLOTS);
-+ while (1) {
-+ struct bkey_s_c k;
-+ unsigned bytes, sectors, offset_into_extent;
-+ enum btree_id data_btree = BTREE_ID_extents;
-+
-+ /*
-+ * read_extent -> io_time_reset may cause a transaction restart
-+ * without returning an error, we need to check for that here:
-+ */
-+ ret = bch2_trans_relock(trans);
-+ if (ret)
-+ break;
-+
-+ bch2_btree_iter_set_pos(&iter,
-+ POS(inum.inum, rbio->bio.bi_iter.bi_sector));
-+
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ break;
-+
-+ offset_into_extent = iter.pos.offset -
-+ bkey_start_offset(k.k);
-+ sectors = k.k->size - offset_into_extent;
-+
-+ bch2_bkey_buf_reassemble(&sk, c, k);
-+
-+ ret = bch2_read_indirect_extent(trans, &data_btree,
-+ &offset_into_extent, &sk);
-+ if (ret)
-+ break;
-+
-+ k = bkey_i_to_s_c(sk.k);
-+
-+ sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+ if (readpages_iter) {
-+ ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors,
-+ extent_partial_reads_expensive(k));
-+ if (ret)
-+ break;
-+ }
-+
-+ bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
-+ swap(rbio->bio.bi_iter.bi_size, bytes);
-+
-+ if (rbio->bio.bi_iter.bi_size == bytes)
-+ flags |= BCH_READ_LAST_FRAGMENT;
-+
-+ bch2_bio_page_state_set(&rbio->bio, k);
-+
-+ bch2_read_extent(trans, rbio, iter.pos,
-+ data_btree, k, offset_into_extent, flags);
-+
-+ if (flags & BCH_READ_LAST_FRAGMENT)
-+ break;
-+
-+ swap(rbio->bio.bi_iter.bi_size, bytes);
-+ bio_advance(&rbio->bio, bytes);
-+
-+ ret = btree_trans_too_many_iters(trans);
-+ if (ret)
-+ break;
-+ }
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ if (ret) {
-+ bch_err_inum_offset_ratelimited(c,
-+ iter.pos.inode,
-+ iter.pos.offset << 9,
-+ "read error %i from btree lookup", ret);
-+ rbio->bio.bi_status = BLK_STS_IOERR;
-+ bio_endio(&rbio->bio);
-+ }
-+
-+ bch2_bkey_buf_exit(&sk, c);
-+}
-+
-+void bch2_readahead(struct readahead_control *ractl)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch_io_opts opts;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct folio *folio;
-+ struct readpages_iter readpages_iter;
-+ int ret;
-+
-+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
-+
-+ ret = readpages_iter_init(&readpages_iter, ractl);
-+ BUG_ON(ret);
-+
-+ bch2_pagecache_add_get(inode);
-+
-+ while ((folio = readpage_iter_peek(&readpages_iter))) {
-+ unsigned n = min_t(unsigned,
-+ readpages_iter.folios.nr -
-+ readpages_iter.idx,
-+ BIO_MAX_VECS);
-+ struct bch_read_bio *rbio =
-+ rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ,
-+ GFP_KERNEL, &c->bio_read),
-+ opts);
-+
-+ readpage_iter_advance(&readpages_iter);
-+
-+ rbio->bio.bi_iter.bi_sector = folio_sector(folio);
-+ rbio->bio.bi_end_io = bch2_readpages_end_io;
-+ BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
-+
-+ bchfs_read(trans, rbio, inode_inum(inode),
-+ &readpages_iter);
-+ bch2_trans_unlock(trans);
-+ }
-+
-+ bch2_pagecache_add_put(inode);
-+
-+ bch2_trans_put(trans);
-+ darray_exit(&readpages_iter.folios);
-+}
-+
-+static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio,
-+ subvol_inum inum, struct folio *folio)
-+{
-+ bch2_folio_create(folio, __GFP_NOFAIL);
-+
-+ rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
-+ rbio->bio.bi_iter.bi_sector = folio_sector(folio);
-+ BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
-+
-+ bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0));
-+}
-+
-+static void bch2_read_single_folio_end_io(struct bio *bio)
-+{
-+ complete(bio->bi_private);
-+}
-+
-+int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch_read_bio *rbio;
-+ struct bch_io_opts opts;
-+ int ret;
-+ DECLARE_COMPLETION_ONSTACK(done);
-+
-+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
-+
-+ rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read),
-+ opts);
-+ rbio->bio.bi_private = &done;
-+ rbio->bio.bi_end_io = bch2_read_single_folio_end_io;
-+
-+ __bchfs_readfolio(c, rbio, inode_inum(inode), folio);
-+ wait_for_completion(&done);
-+
-+ ret = blk_status_to_errno(rbio->bio.bi_status);
-+ bio_put(&rbio->bio);
-+
-+ if (ret < 0)
-+ return ret;
-+
-+ folio_mark_uptodate(folio);
-+ return 0;
-+}
-+
-+int bch2_read_folio(struct file *file, struct folio *folio)
-+{
-+ int ret;
-+
-+ ret = bch2_read_single_folio(folio, folio->mapping);
-+ folio_unlock(folio);
-+ return bch2_err_class(ret);
-+}
-+
-+/* writepages: */
-+
-+struct bch_writepage_io {
-+ struct bch_inode_info *inode;
-+
-+ /* must be last: */
-+ struct bch_write_op op;
-+};
-+
-+struct bch_writepage_state {
-+ struct bch_writepage_io *io;
-+ struct bch_io_opts opts;
-+ struct bch_folio_sector *tmp;
-+ unsigned tmp_sectors;
-+};
-+
-+static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c,
-+ struct bch_inode_info *inode)
-+{
-+ struct bch_writepage_state ret = { 0 };
-+
-+ bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode);
-+ return ret;
-+}
-+
-+/*
-+ * Determine when a writepage io is full. We have to limit writepage bios to a
-+ * single page per bvec (i.e. 1MB with 4k pages) because that is the limit to
-+ * what the bounce path in bch2_write_extent() can handle. In theory we could
-+ * loosen this restriction for non-bounce I/O, but we don't have that context
-+ * here. Ideally, we can up this limit and make it configurable in the future
-+ * when the bounce path can be enhanced to accommodate larger source bios.
-+ */
-+static inline bool bch_io_full(struct bch_writepage_io *io, unsigned len)
-+{
-+ struct bio *bio = &io->op.wbio.bio;
-+ return bio_full(bio, len) ||
-+ (bio->bi_iter.bi_size + len > BIO_MAX_VECS * PAGE_SIZE);
-+}
-+
-+static void bch2_writepage_io_done(struct bch_write_op *op)
-+{
-+ struct bch_writepage_io *io =
-+ container_of(op, struct bch_writepage_io, op);
-+ struct bch_fs *c = io->op.c;
-+ struct bio *bio = &io->op.wbio.bio;
-+ struct folio_iter fi;
-+ unsigned i;
-+
-+ if (io->op.error) {
-+ set_bit(EI_INODE_ERROR, &io->inode->ei_flags);
-+
-+ bio_for_each_folio_all(fi, bio) {
-+ struct bch_folio *s;
-+
-+ folio_set_error(fi.folio);
-+ mapping_set_error(fi.folio->mapping, -EIO);
-+
-+ s = __bch2_folio(fi.folio);
-+ spin_lock(&s->lock);
-+ for (i = 0; i < folio_sectors(fi.folio); i++)
-+ s->s[i].nr_replicas = 0;
-+ spin_unlock(&s->lock);
-+ }
-+ }
-+
-+ if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) {
-+ bio_for_each_folio_all(fi, bio) {
-+ struct bch_folio *s;
-+
-+ s = __bch2_folio(fi.folio);
-+ spin_lock(&s->lock);
-+ for (i = 0; i < folio_sectors(fi.folio); i++)
-+ s->s[i].nr_replicas = 0;
-+ spin_unlock(&s->lock);
-+ }
-+ }
-+
-+ /*
-+ * racing with fallocate can cause us to add fewer sectors than
-+ * expected - but we shouldn't add more sectors than expected:
-+ */
-+ WARN_ON_ONCE(io->op.i_sectors_delta > 0);
-+
-+ /*
-+ * (error (due to going RO) halfway through a page can screw that up
-+ * slightly)
-+ * XXX wtf?
-+ BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS);
-+ */
-+
-+ /*
-+ * PageWriteback is effectively our ref on the inode - fixup i_blocks
-+ * before calling end_page_writeback:
-+ */
-+ bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
-+
-+ bio_for_each_folio_all(fi, bio) {
-+ struct bch_folio *s = __bch2_folio(fi.folio);
-+
-+ if (atomic_dec_and_test(&s->write_count))
-+ folio_end_writeback(fi.folio);
-+ }
-+
-+ bio_put(&io->op.wbio.bio);
-+}
-+
-+static void bch2_writepage_do_io(struct bch_writepage_state *w)
-+{
-+ struct bch_writepage_io *io = w->io;
-+
-+ w->io = NULL;
-+ closure_call(&io->op.cl, bch2_write, NULL, NULL);
-+}
-+
-+/*
-+ * Get a bch_writepage_io and add @page to it - appending to an existing one if
-+ * possible, else allocating a new one:
-+ */
-+static void bch2_writepage_io_alloc(struct bch_fs *c,
-+ struct writeback_control *wbc,
-+ struct bch_writepage_state *w,
-+ struct bch_inode_info *inode,
-+ u64 sector,
-+ unsigned nr_replicas)
-+{
-+ struct bch_write_op *op;
-+
-+ w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS,
-+ REQ_OP_WRITE,
-+ GFP_KERNEL,
-+ &c->writepage_bioset),
-+ struct bch_writepage_io, op.wbio.bio);
-+
-+ w->io->inode = inode;
-+ op = &w->io->op;
-+ bch2_write_op_init(op, c, w->opts);
-+ op->target = w->opts.foreground_target;
-+ op->nr_replicas = nr_replicas;
-+ op->res.nr_replicas = nr_replicas;
-+ op->write_point = writepoint_hashed(inode->ei_last_dirtied);
-+ op->subvol = inode->ei_subvol;
-+ op->pos = POS(inode->v.i_ino, sector);
-+ op->end_io = bch2_writepage_io_done;
-+ op->devs_need_flush = &inode->ei_devs_need_flush;
-+ op->wbio.bio.bi_iter.bi_sector = sector;
-+ op->wbio.bio.bi_opf = wbc_to_write_flags(wbc);
-+}
-+
-+static int __bch2_writepage(struct folio *folio,
-+ struct writeback_control *wbc,
-+ void *data)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch_writepage_state *w = data;
-+ struct bch_folio *s;
-+ unsigned i, offset, f_sectors, nr_replicas_this_write = U32_MAX;
-+ loff_t i_size = i_size_read(&inode->v);
-+ int ret;
-+
-+ EBUG_ON(!folio_test_uptodate(folio));
-+
-+ /* Is the folio fully inside i_size? */
-+ if (folio_end_pos(folio) <= i_size)
-+ goto do_io;
-+
-+ /* Is the folio fully outside i_size? (truncate in progress) */
-+ if (folio_pos(folio) >= i_size) {
-+ folio_unlock(folio);
-+ return 0;
-+ }
-+
-+ /*
-+ * The folio straddles i_size. It must be zeroed out on each and every
-+ * writepage invocation because it may be mmapped. "A file is mapped
-+ * in multiples of the folio size. For a file that is not a multiple of
-+ * the folio size, the remaining memory is zeroed when mapped, and
-+ * writes to that region are not written out to the file."
-+ */
-+ folio_zero_segment(folio,
-+ i_size - folio_pos(folio),
-+ folio_size(folio));
-+do_io:
-+ f_sectors = folio_sectors(folio);
-+ s = bch2_folio(folio);
-+
-+ if (f_sectors > w->tmp_sectors) {
-+ kfree(w->tmp);
-+ w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL);
-+ w->tmp_sectors = f_sectors;
-+ }
-+
-+ /*
-+ * Things get really hairy with errors during writeback:
-+ */
-+ ret = bch2_get_folio_disk_reservation(c, inode, folio, false);
-+ BUG_ON(ret);
-+
-+ /* Before unlocking the page, get copy of reservations: */
-+ spin_lock(&s->lock);
-+ memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors);
-+
-+ for (i = 0; i < f_sectors; i++) {
-+ if (s->s[i].state < SECTOR_dirty)
-+ continue;
-+
-+ nr_replicas_this_write =
-+ min_t(unsigned, nr_replicas_this_write,
-+ s->s[i].nr_replicas +
-+ s->s[i].replicas_reserved);
-+ }
-+
-+ for (i = 0; i < f_sectors; i++) {
-+ if (s->s[i].state < SECTOR_dirty)
-+ continue;
-+
-+ s->s[i].nr_replicas = w->opts.compression
-+ ? 0 : nr_replicas_this_write;
-+
-+ s->s[i].replicas_reserved = 0;
-+ bch2_folio_sector_set(folio, s, i, SECTOR_allocated);
-+ }
-+ spin_unlock(&s->lock);
-+
-+ BUG_ON(atomic_read(&s->write_count));
-+ atomic_set(&s->write_count, 1);
-+
-+ BUG_ON(folio_test_writeback(folio));
-+ folio_start_writeback(folio);
-+
-+ folio_unlock(folio);
-+
-+ offset = 0;
-+ while (1) {
-+ unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0;
-+ u64 sector;
-+
-+ while (offset < f_sectors &&
-+ w->tmp[offset].state < SECTOR_dirty)
-+ offset++;
-+
-+ if (offset == f_sectors)
-+ break;
-+
-+ while (offset + sectors < f_sectors &&
-+ w->tmp[offset + sectors].state >= SECTOR_dirty) {
-+ reserved_sectors += w->tmp[offset + sectors].replicas_reserved;
-+ dirty_sectors += w->tmp[offset + sectors].state == SECTOR_dirty;
-+ sectors++;
-+ }
-+ BUG_ON(!sectors);
-+
-+ sector = folio_sector(folio) + offset;
-+
-+ if (w->io &&
-+ (w->io->op.res.nr_replicas != nr_replicas_this_write ||
-+ bch_io_full(w->io, sectors << 9) ||
-+ bio_end_sector(&w->io->op.wbio.bio) != sector))
-+ bch2_writepage_do_io(w);
-+
-+ if (!w->io)
-+ bch2_writepage_io_alloc(c, wbc, w, inode, sector,
-+ nr_replicas_this_write);
-+
-+ atomic_inc(&s->write_count);
-+
-+ BUG_ON(inode != w->io->inode);
-+ BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio,
-+ sectors << 9, offset << 9));
-+
-+ /* Check for writing past i_size: */
-+ WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) >
-+ round_up(i_size, block_bytes(c)) &&
-+ !test_bit(BCH_FS_EMERGENCY_RO, &c->flags),
-+ "writing past i_size: %llu > %llu (unrounded %llu)\n",
-+ bio_end_sector(&w->io->op.wbio.bio) << 9,
-+ round_up(i_size, block_bytes(c)),
-+ i_size);
-+
-+ w->io->op.res.sectors += reserved_sectors;
-+ w->io->op.i_sectors_delta -= dirty_sectors;
-+ w->io->op.new_i_size = i_size;
-+
-+ offset += sectors;
-+ }
-+
-+ if (atomic_dec_and_test(&s->write_count))
-+ folio_end_writeback(folio);
-+
-+ return 0;
-+}
-+
-+int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc)
-+{
-+ struct bch_fs *c = mapping->host->i_sb->s_fs_info;
-+ struct bch_writepage_state w =
-+ bch_writepage_state_init(c, to_bch_ei(mapping->host));
-+ struct blk_plug plug;
-+ int ret;
-+
-+ blk_start_plug(&plug);
-+ ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w);
-+ if (w.io)
-+ bch2_writepage_do_io(&w);
-+ blk_finish_plug(&plug);
-+ kfree(w.tmp);
-+ return bch2_err_class(ret);
-+}
-+
-+/* buffered writes: */
-+
-+int bch2_write_begin(struct file *file, struct address_space *mapping,
-+ loff_t pos, unsigned len,
-+ struct page **pagep, void **fsdata)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch2_folio_reservation *res;
-+ struct folio *folio;
-+ unsigned offset;
-+ int ret = -ENOMEM;
-+
-+ res = kmalloc(sizeof(*res), GFP_KERNEL);
-+ if (!res)
-+ return -ENOMEM;
-+
-+ bch2_folio_reservation_init(c, inode, res);
-+ *fsdata = res;
-+
-+ bch2_pagecache_add_get(inode);
-+
-+ folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT,
-+ FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE,
-+ mapping_gfp_mask(mapping));
-+ if (IS_ERR_OR_NULL(folio))
-+ goto err_unlock;
-+
-+ offset = pos - folio_pos(folio);
-+ len = min_t(size_t, len, folio_end_pos(folio) - pos);
-+
-+ if (folio_test_uptodate(folio))
-+ goto out;
-+
-+ /* If we're writing entire folio, don't need to read it in first: */
-+ if (!offset && len == folio_size(folio))
-+ goto out;
-+
-+ if (!offset && pos + len >= inode->v.i_size) {
-+ folio_zero_segment(folio, len, folio_size(folio));
-+ flush_dcache_folio(folio);
-+ goto out;
-+ }
-+
-+ if (folio_pos(folio) >= inode->v.i_size) {
-+ folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio));
-+ flush_dcache_folio(folio);
-+ goto out;
-+ }
-+readpage:
-+ ret = bch2_read_single_folio(folio, mapping);
-+ if (ret)
-+ goto err;
-+out:
-+ ret = bch2_folio_set(c, inode_inum(inode), &folio, 1);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len);
-+ if (ret) {
-+ if (!folio_test_uptodate(folio)) {
-+ /*
-+ * If the folio hasn't been read in, we won't know if we
-+ * actually need a reservation - we don't actually need
-+ * to read here, we just need to check if the folio is
-+ * fully backed by uncompressed data:
-+ */
-+ goto readpage;
-+ }
-+
-+ goto err;
-+ }
-+
-+ *pagep = &folio->page;
-+ return 0;
-+err:
-+ folio_unlock(folio);
-+ folio_put(folio);
-+ *pagep = NULL;
-+err_unlock:
-+ bch2_pagecache_add_put(inode);
-+ kfree(res);
-+ *fsdata = NULL;
-+ return bch2_err_class(ret);
-+}
-+
-+int bch2_write_end(struct file *file, struct address_space *mapping,
-+ loff_t pos, unsigned len, unsigned copied,
-+ struct page *page, void *fsdata)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(mapping->host);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch2_folio_reservation *res = fsdata;
-+ struct folio *folio = page_folio(page);
-+ unsigned offset = pos - folio_pos(folio);
-+
-+ lockdep_assert_held(&inode->v.i_rwsem);
-+ BUG_ON(offset + copied > folio_size(folio));
-+
-+ if (unlikely(copied < len && !folio_test_uptodate(folio))) {
-+ /*
-+ * The folio needs to be read in, but that would destroy
-+ * our partial write - simplest thing is to just force
-+ * userspace to redo the write:
-+ */
-+ folio_zero_range(folio, 0, folio_size(folio));
-+ flush_dcache_folio(folio);
-+ copied = 0;
-+ }
-+
-+ spin_lock(&inode->v.i_lock);
-+ if (pos + copied > inode->v.i_size)
-+ i_size_write(&inode->v, pos + copied);
-+ spin_unlock(&inode->v.i_lock);
-+
-+ if (copied) {
-+ if (!folio_test_uptodate(folio))
-+ folio_mark_uptodate(folio);
-+
-+ bch2_set_folio_dirty(c, inode, folio, res, offset, copied);
-+
-+ inode->ei_last_dirtied = (unsigned long) current;
-+ }
-+
-+ folio_unlock(folio);
-+ folio_put(folio);
-+ bch2_pagecache_add_put(inode);
-+
-+ bch2_folio_reservation_put(c, inode, res);
-+ kfree(res);
-+
-+ return copied;
-+}
-+
-+static noinline void folios_trunc(folios *fs, struct folio **fi)
-+{
-+ while (fs->data + fs->nr > fi) {
-+ struct folio *f = darray_pop(fs);
-+
-+ folio_unlock(f);
-+ folio_put(f);
-+ }
-+}
-+
-+static int __bch2_buffered_write(struct bch_inode_info *inode,
-+ struct address_space *mapping,
-+ struct iov_iter *iter,
-+ loff_t pos, unsigned len)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch2_folio_reservation res;
-+ folios fs;
-+ struct folio **fi, *f;
-+ unsigned copied = 0, f_offset, f_copied;
-+ u64 end = pos + len, f_pos, f_len;
-+ loff_t last_folio_pos = inode->v.i_size;
-+ int ret = 0;
-+
-+ BUG_ON(!len);
-+
-+ bch2_folio_reservation_init(c, inode, &res);
-+ darray_init(&fs);
-+
-+ ret = bch2_filemap_get_contig_folios_d(mapping, pos, end,
-+ FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT,
-+ mapping_gfp_mask(mapping),
-+ &fs);
-+ if (ret)
-+ goto out;
-+
-+ BUG_ON(!fs.nr);
-+
-+ f = darray_first(fs);
-+ if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
-+ ret = bch2_read_single_folio(f, mapping);
-+ if (ret)
-+ goto out;
-+ }
-+
-+ f = darray_last(fs);
-+ end = min(end, folio_end_pos(f));
-+ last_folio_pos = folio_pos(f);
-+ if (end != folio_end_pos(f) && !folio_test_uptodate(f)) {
-+ if (end >= inode->v.i_size) {
-+ folio_zero_range(f, 0, folio_size(f));
-+ } else {
-+ ret = bch2_read_single_folio(f, mapping);
-+ if (ret)
-+ goto out;
-+ }
-+ }
-+
-+ ret = bch2_folio_set(c, inode_inum(inode), fs.data, fs.nr);
-+ if (ret)
-+ goto out;
-+
-+ f_pos = pos;
-+ f_offset = pos - folio_pos(darray_first(fs));
-+ darray_for_each(fs, fi) {
-+ f = *fi;
-+ f_len = min(end, folio_end_pos(f)) - f_pos;
-+
-+ /*
-+ * XXX: per POSIX and fstests generic/275, on -ENOSPC we're
-+ * supposed to write as much as we have disk space for.
-+ *
-+ * On failure here we should still write out a partial page if
-+ * we aren't completely out of disk space - we don't do that
-+ * yet:
-+ */
-+ ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len);
-+ if (unlikely(ret)) {
-+ folios_trunc(&fs, fi);
-+ if (!fs.nr)
-+ goto out;
-+
-+ end = min(end, folio_end_pos(darray_last(fs)));
-+ break;
-+ }
-+
-+ f_pos = folio_end_pos(f);
-+ f_offset = 0;
-+ }
-+
-+ if (mapping_writably_mapped(mapping))
-+ darray_for_each(fs, fi)
-+ flush_dcache_folio(*fi);
-+
-+ f_pos = pos;
-+ f_offset = pos - folio_pos(darray_first(fs));
-+ darray_for_each(fs, fi) {
-+ f = *fi;
-+ f_len = min(end, folio_end_pos(f)) - f_pos;
-+ f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
-+ if (!f_copied) {
-+ folios_trunc(&fs, fi);
-+ break;
-+ }
-+
-+ if (!folio_test_uptodate(f) &&
-+ f_copied != folio_size(f) &&
-+ pos + copied + f_copied < inode->v.i_size) {
-+ iov_iter_revert(iter, f_copied);
-+ folio_zero_range(f, 0, folio_size(f));
-+ folios_trunc(&fs, fi);
-+ break;
-+ }
-+
-+ flush_dcache_folio(f);
-+ copied += f_copied;
-+
-+ if (f_copied != f_len) {
-+ folios_trunc(&fs, fi + 1);
-+ break;
-+ }
-+
-+ f_pos = folio_end_pos(f);
-+ f_offset = 0;
-+ }
-+
-+ if (!copied)
-+ goto out;
-+
-+ end = pos + copied;
-+
-+ spin_lock(&inode->v.i_lock);
-+ if (end > inode->v.i_size)
-+ i_size_write(&inode->v, end);
-+ spin_unlock(&inode->v.i_lock);
-+
-+ f_pos = pos;
-+ f_offset = pos - folio_pos(darray_first(fs));
-+ darray_for_each(fs, fi) {
-+ f = *fi;
-+ f_len = min(end, folio_end_pos(f)) - f_pos;
-+
-+ if (!folio_test_uptodate(f))
-+ folio_mark_uptodate(f);
-+
-+ bch2_set_folio_dirty(c, inode, f, &res, f_offset, f_len);
-+
-+ f_pos = folio_end_pos(f);
-+ f_offset = 0;
-+ }
-+
-+ inode->ei_last_dirtied = (unsigned long) current;
-+out:
-+ darray_for_each(fs, fi) {
-+ folio_unlock(*fi);
-+ folio_put(*fi);
-+ }
-+
-+ /*
-+ * If the last folio added to the mapping starts beyond current EOF, we
-+ * performed a short write but left around at least one post-EOF folio.
-+ * Clean up the mapping before we return.
-+ */
-+ if (last_folio_pos >= inode->v.i_size)
-+ truncate_pagecache(&inode->v, inode->v.i_size);
-+
-+ darray_exit(&fs);
-+ bch2_folio_reservation_put(c, inode, &res);
-+
-+ return copied ?: ret;
-+}
-+
-+static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
-+{
-+ struct file *file = iocb->ki_filp;
-+ struct address_space *mapping = file->f_mapping;
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ loff_t pos = iocb->ki_pos;
-+ ssize_t written = 0;
-+ int ret = 0;
-+
-+ bch2_pagecache_add_get(inode);
-+
-+ do {
-+ unsigned offset = pos & (PAGE_SIZE - 1);
-+ unsigned bytes = iov_iter_count(iter);
-+again:
-+ /*
-+ * Bring in the user page that we will copy from _first_.
-+ * Otherwise there's a nasty deadlock on copying from the
-+ * same page as we're writing to, without it being marked
-+ * up-to-date.
-+ *
-+ * Not only is this an optimisation, but it is also required
-+ * to check that the address is actually valid, when atomic
-+ * usercopies are used, below.
-+ */
-+ if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
-+ bytes = min_t(unsigned long, iov_iter_count(iter),
-+ PAGE_SIZE - offset);
-+
-+ if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
-+ ret = -EFAULT;
-+ break;
-+ }
-+ }
-+
-+ if (unlikely(fatal_signal_pending(current))) {
-+ ret = -EINTR;
-+ break;
-+ }
-+
-+ ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
-+ if (unlikely(ret < 0))
-+ break;
-+
-+ cond_resched();
-+
-+ if (unlikely(ret == 0)) {
-+ /*
-+ * If we were unable to copy any data at all, we must
-+ * fall back to a single segment length write.
-+ *
-+ * If we didn't fallback here, we could livelock
-+ * because not all segments in the iov can be copied at
-+ * once without a pagefault.
-+ */
-+ bytes = min_t(unsigned long, PAGE_SIZE - offset,
-+ iov_iter_single_seg_count(iter));
-+ goto again;
-+ }
-+ pos += ret;
-+ written += ret;
-+ ret = 0;
-+
-+ balance_dirty_pages_ratelimited(mapping);
-+ } while (iov_iter_count(iter));
-+
-+ bch2_pagecache_add_put(inode);
-+
-+ return written ? written : ret;
-+}
-+
-+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
-+{
-+ struct file *file = iocb->ki_filp;
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ ssize_t ret;
-+
-+ if (iocb->ki_flags & IOCB_DIRECT) {
-+ ret = bch2_direct_write(iocb, from);
-+ goto out;
-+ }
-+
-+ inode_lock(&inode->v);
-+
-+ ret = generic_write_checks(iocb, from);
-+ if (ret <= 0)
-+ goto unlock;
-+
-+ ret = file_remove_privs(file);
-+ if (ret)
-+ goto unlock;
-+
-+ ret = file_update_time(file);
-+ if (ret)
-+ goto unlock;
-+
-+ ret = bch2_buffered_write(iocb, from);
-+ if (likely(ret > 0))
-+ iocb->ki_pos += ret;
-+unlock:
-+ inode_unlock(&inode->v);
-+
-+ if (ret > 0)
-+ ret = generic_write_sync(iocb, ret);
-+out:
-+ return bch2_err_class(ret);
-+}
-+
-+void bch2_fs_fs_io_buffered_exit(struct bch_fs *c)
-+{
-+ bioset_exit(&c->writepage_bioset);
-+}
-+
-+int bch2_fs_fs_io_buffered_init(struct bch_fs *c)
-+{
-+ if (bioset_init(&c->writepage_bioset,
-+ 4, offsetof(struct bch_writepage_io, op.wbio.bio),
-+ BIOSET_NEED_BVECS))
-+ return -BCH_ERR_ENOMEM_writepage_bioset_init;
-+
-+ return 0;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-io-buffered.h b/fs/bcachefs/fs-io-buffered.h
-new file mode 100644
-index 000000000000..a6126ff790e6
---- /dev/null
-+++ b/fs/bcachefs/fs-io-buffered.h
-@@ -0,0 +1,27 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IO_BUFFERED_H
-+#define _BCACHEFS_FS_IO_BUFFERED_H
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+int bch2_read_single_folio(struct folio *, struct address_space *);
-+int bch2_read_folio(struct file *, struct folio *);
-+
-+int bch2_writepages(struct address_space *, struct writeback_control *);
-+void bch2_readahead(struct readahead_control *);
-+
-+int bch2_write_begin(struct file *, struct address_space *, loff_t,
-+ unsigned, struct page **, void **);
-+int bch2_write_end(struct file *, struct address_space *, loff_t,
-+ unsigned, unsigned, struct page *, void *);
-+
-+ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
-+
-+void bch2_fs_fs_io_buffered_exit(struct bch_fs *);
-+int bch2_fs_fs_io_buffered_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_fs_io_buffered_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_fs_io_buffered_init(struct bch_fs *c) { return 0; }
-+#endif
-+
-+#endif /* _BCACHEFS_FS_IO_BUFFERED_H */
-diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
-new file mode 100644
-index 000000000000..5b42a76c4796
---- /dev/null
-+++ b/fs/bcachefs/fs-io-direct.c
-@@ -0,0 +1,680 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "fs.h"
-+#include "fs-io.h"
-+#include "fs-io-direct.h"
-+#include "fs-io-pagecache.h"
-+#include "io_read.h"
-+#include "io_write.h"
-+
-+#include <linux/kthread.h>
-+#include <linux/pagemap.h>
-+#include <linux/prefetch.h>
-+#include <linux/task_io_accounting_ops.h>
-+
-+/* O_DIRECT reads */
-+
-+struct dio_read {
-+ struct closure cl;
-+ struct kiocb *req;
-+ long ret;
-+ bool should_dirty;
-+ struct bch_read_bio rbio;
-+};
-+
-+static void bio_check_or_release(struct bio *bio, bool check_dirty)
-+{
-+ if (check_dirty) {
-+ bio_check_pages_dirty(bio);
-+ } else {
-+ bio_release_pages(bio, false);
-+ bio_put(bio);
-+ }
-+}
-+
-+static void bch2_dio_read_complete(struct closure *cl)
-+{
-+ struct dio_read *dio = container_of(cl, struct dio_read, cl);
-+
-+ dio->req->ki_complete(dio->req, dio->ret);
-+ bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
-+}
-+
-+static void bch2_direct_IO_read_endio(struct bio *bio)
-+{
-+ struct dio_read *dio = bio->bi_private;
-+
-+ if (bio->bi_status)
-+ dio->ret = blk_status_to_errno(bio->bi_status);
-+
-+ closure_put(&dio->cl);
-+}
-+
-+static void bch2_direct_IO_read_split_endio(struct bio *bio)
-+{
-+ struct dio_read *dio = bio->bi_private;
-+ bool should_dirty = dio->should_dirty;
-+
-+ bch2_direct_IO_read_endio(bio);
-+ bio_check_or_release(bio, should_dirty);
-+}
-+
-+static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
-+{
-+ struct file *file = req->ki_filp;
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch_io_opts opts;
-+ struct dio_read *dio;
-+ struct bio *bio;
-+ loff_t offset = req->ki_pos;
-+ bool sync = is_sync_kiocb(req);
-+ size_t shorten;
-+ ssize_t ret;
-+
-+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
-+
-+ if ((offset|iter->count) & (block_bytes(c) - 1))
-+ return -EINVAL;
-+
-+ ret = min_t(loff_t, iter->count,
-+ max_t(loff_t, 0, i_size_read(&inode->v) - offset));
-+
-+ if (!ret)
-+ return ret;
-+
-+ shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c));
-+ iter->count -= shorten;
-+
-+ bio = bio_alloc_bioset(NULL,
-+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
-+ REQ_OP_READ,
-+ GFP_KERNEL,
-+ &c->dio_read_bioset);
-+
-+ bio->bi_end_io = bch2_direct_IO_read_endio;
-+
-+ dio = container_of(bio, struct dio_read, rbio.bio);
-+ closure_init(&dio->cl, NULL);
-+
-+ /*
-+ * this is a _really_ horrible hack just to avoid an atomic sub at the
-+ * end:
-+ */
-+ if (!sync) {
-+ set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL);
-+ atomic_set(&dio->cl.remaining,
-+ CLOSURE_REMAINING_INITIALIZER -
-+ CLOSURE_RUNNING +
-+ CLOSURE_DESTRUCTOR);
-+ } else {
-+ atomic_set(&dio->cl.remaining,
-+ CLOSURE_REMAINING_INITIALIZER + 1);
-+ dio->cl.closure_get_happened = true;
-+ }
-+
-+ dio->req = req;
-+ dio->ret = ret;
-+ /*
-+ * This is one of the sketchier things I've encountered: we have to skip
-+ * the dirtying of requests that are internal from the kernel (i.e. from
-+ * loopback), because we'll deadlock on page_lock.
-+ */
-+ dio->should_dirty = iter_is_iovec(iter);
-+
-+ goto start;
-+ while (iter->count) {
-+ bio = bio_alloc_bioset(NULL,
-+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
-+ REQ_OP_READ,
-+ GFP_KERNEL,
-+ &c->bio_read);
-+ bio->bi_end_io = bch2_direct_IO_read_split_endio;
-+start:
-+ bio->bi_opf = REQ_OP_READ|REQ_SYNC;
-+ bio->bi_iter.bi_sector = offset >> 9;
-+ bio->bi_private = dio;
-+
-+ ret = bio_iov_iter_get_pages(bio, iter);
-+ if (ret < 0) {
-+ /* XXX: fault inject this path */
-+ bio->bi_status = BLK_STS_RESOURCE;
-+ bio_endio(bio);
-+ break;
-+ }
-+
-+ offset += bio->bi_iter.bi_size;
-+
-+ if (dio->should_dirty)
-+ bio_set_pages_dirty(bio);
-+
-+ if (iter->count)
-+ closure_get(&dio->cl);
-+
-+ bch2_read(c, rbio_init(bio, opts), inode_inum(inode));
-+ }
-+
-+ iter->count += shorten;
-+
-+ if (sync) {
-+ closure_sync(&dio->cl);
-+ closure_debug_destroy(&dio->cl);
-+ ret = dio->ret;
-+ bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
-+ return ret;
-+ } else {
-+ return -EIOCBQUEUED;
-+ }
-+}
-+
-+ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
-+{
-+ struct file *file = iocb->ki_filp;
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct address_space *mapping = file->f_mapping;
-+ size_t count = iov_iter_count(iter);
-+ ssize_t ret;
-+
-+ if (!count)
-+ return 0; /* skip atime */
-+
-+ if (iocb->ki_flags & IOCB_DIRECT) {
-+ struct blk_plug plug;
-+
-+ if (unlikely(mapping->nrpages)) {
-+ ret = filemap_write_and_wait_range(mapping,
-+ iocb->ki_pos,
-+ iocb->ki_pos + count - 1);
-+ if (ret < 0)
-+ goto out;
-+ }
-+
-+ file_accessed(file);
-+
-+ blk_start_plug(&plug);
-+ ret = bch2_direct_IO_read(iocb, iter);
-+ blk_finish_plug(&plug);
-+
-+ if (ret >= 0)
-+ iocb->ki_pos += ret;
-+ } else {
-+ bch2_pagecache_add_get(inode);
-+ ret = generic_file_read_iter(iocb, iter);
-+ bch2_pagecache_add_put(inode);
-+ }
-+out:
-+ return bch2_err_class(ret);
-+}
-+
-+/* O_DIRECT writes */
-+
-+struct dio_write {
-+ struct kiocb *req;
-+ struct address_space *mapping;
-+ struct bch_inode_info *inode;
-+ struct mm_struct *mm;
-+ unsigned loop:1,
-+ extending:1,
-+ sync:1,
-+ flush:1,
-+ free_iov:1;
-+ struct quota_res quota_res;
-+ u64 written;
-+
-+ struct iov_iter iter;
-+ struct iovec inline_vecs[2];
-+
-+ /* must be last: */
-+ struct bch_write_op op;
-+};
-+
-+static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum,
-+ u64 offset, u64 size,
-+ unsigned nr_replicas, bool compressed)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ u64 end = offset + size;
-+ u32 snapshot;
-+ bool ret = true;
-+ int err;
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ err = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (err)
-+ goto err;
-+
-+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
-+ SPOS(inum.inum, offset, snapshot),
-+ BTREE_ITER_SLOTS, k, err) {
-+ if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end)))
-+ break;
-+
-+ if (k.k->p.snapshot != snapshot ||
-+ nr_replicas > bch2_bkey_replicas(c, k) ||
-+ (!compressed && bch2_bkey_sectors_compressed(k))) {
-+ ret = false;
-+ break;
-+ }
-+ }
-+
-+ offset = iter.pos.offset;
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(err, BCH_ERR_transaction_restart))
-+ goto retry;
-+ bch2_trans_put(trans);
-+
-+ return err ? false : ret;
-+}
-+
-+static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio)
-+{
-+ struct bch_fs *c = dio->op.c;
-+ struct bch_inode_info *inode = dio->inode;
-+ struct bio *bio = &dio->op.wbio.bio;
-+
-+ return bch2_check_range_allocated(c, inode_inum(inode),
-+ dio->op.pos.offset, bio_sectors(bio),
-+ dio->op.opts.data_replicas,
-+ dio->op.opts.compression != 0);
-+}
-+
-+static void bch2_dio_write_loop_async(struct bch_write_op *);
-+static __always_inline long bch2_dio_write_done(struct dio_write *dio);
-+
-+/*
-+ * We're going to return -EIOCBQUEUED, but we haven't finished consuming the
-+ * iov_iter yet, so we need to stash a copy of the iovec: it might be on the
-+ * caller's stack, we're not guaranteed that it will live for the duration of
-+ * the IO:
-+ */
-+static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
-+{
-+ struct iovec *iov = dio->inline_vecs;
-+
-+ /*
-+ * iov_iter has a single embedded iovec - nothing to do:
-+ */
-+ if (iter_is_ubuf(&dio->iter))
-+ return 0;
-+
-+ /*
-+ * We don't currently handle non-iovec iov_iters here - return an error,
-+ * and we'll fall back to doing the IO synchronously:
-+ */
-+ if (!iter_is_iovec(&dio->iter))
-+ return -1;
-+
-+ if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
-+ iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
-+ GFP_KERNEL);
-+ if (unlikely(!iov))
-+ return -ENOMEM;
-+
-+ dio->free_iov = true;
-+ }
-+
-+ memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov));
-+ dio->iter.__iov = iov;
-+ return 0;
-+}
-+
-+static void bch2_dio_write_flush_done(struct closure *cl)
-+{
-+ struct dio_write *dio = container_of(cl, struct dio_write, op.cl);
-+ struct bch_fs *c = dio->op.c;
-+
-+ closure_debug_destroy(cl);
-+
-+ dio->op.error = bch2_journal_error(&c->journal);
-+
-+ bch2_dio_write_done(dio);
-+}
-+
-+static noinline void bch2_dio_write_flush(struct dio_write *dio)
-+{
-+ struct bch_fs *c = dio->op.c;
-+ struct bch_inode_unpacked inode;
-+ int ret;
-+
-+ dio->flush = 0;
-+
-+ closure_init(&dio->op.cl, NULL);
-+
-+ if (!dio->op.error) {
-+ ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode);
-+ if (ret) {
-+ dio->op.error = ret;
-+ } else {
-+ bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq,
-+ &dio->op.cl);
-+ bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl);
-+ }
-+ }
-+
-+ if (dio->sync) {
-+ closure_sync(&dio->op.cl);
-+ closure_debug_destroy(&dio->op.cl);
-+ } else {
-+ continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL);
-+ }
-+}
-+
-+static __always_inline long bch2_dio_write_done(struct dio_write *dio)
-+{
-+ struct kiocb *req = dio->req;
-+ struct bch_inode_info *inode = dio->inode;
-+ bool sync = dio->sync;
-+ long ret;
-+
-+ if (unlikely(dio->flush)) {
-+ bch2_dio_write_flush(dio);
-+ if (!sync)
-+ return -EIOCBQUEUED;
-+ }
-+
-+ bch2_pagecache_block_put(inode);
-+
-+ if (dio->free_iov)
-+ kfree(dio->iter.__iov);
-+
-+ ret = dio->op.error ?: ((long) dio->written << 9);
-+ bio_put(&dio->op.wbio.bio);
-+
-+ /* inode->i_dio_count is our ref on inode and thus bch_fs */
-+ inode_dio_end(&inode->v);
-+
-+ if (ret < 0)
-+ ret = bch2_err_class(ret);
-+
-+ if (!sync) {
-+ req->ki_complete(req, ret);
-+ ret = -EIOCBQUEUED;
-+ }
-+ return ret;
-+}
-+
-+static __always_inline void bch2_dio_write_end(struct dio_write *dio)
-+{
-+ struct bch_fs *c = dio->op.c;
-+ struct kiocb *req = dio->req;
-+ struct bch_inode_info *inode = dio->inode;
-+ struct bio *bio = &dio->op.wbio.bio;
-+
-+ req->ki_pos += (u64) dio->op.written << 9;
-+ dio->written += dio->op.written;
-+
-+ if (dio->extending) {
-+ spin_lock(&inode->v.i_lock);
-+ if (req->ki_pos > inode->v.i_size)
-+ i_size_write(&inode->v, req->ki_pos);
-+ spin_unlock(&inode->v.i_lock);
-+ }
-+
-+ if (dio->op.i_sectors_delta || dio->quota_res.sectors) {
-+ mutex_lock(&inode->ei_quota_lock);
-+ __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta);
-+ __bch2_quota_reservation_put(c, inode, &dio->quota_res);
-+ mutex_unlock(&inode->ei_quota_lock);
-+ }
-+
-+ bio_release_pages(bio, false);
-+
-+ if (unlikely(dio->op.error))
-+ set_bit(EI_INODE_ERROR, &inode->ei_flags);
-+}
-+
-+static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
-+{
-+ struct bch_fs *c = dio->op.c;
-+ struct kiocb *req = dio->req;
-+ struct address_space *mapping = dio->mapping;
-+ struct bch_inode_info *inode = dio->inode;
-+ struct bch_io_opts opts;
-+ struct bio *bio = &dio->op.wbio.bio;
-+ unsigned unaligned, iter_count;
-+ bool sync = dio->sync, dropped_locks;
-+ long ret;
-+
-+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
-+
-+ while (1) {
-+ iter_count = dio->iter.count;
-+
-+ EBUG_ON(current->faults_disabled_mapping);
-+ current->faults_disabled_mapping = mapping;
-+
-+ ret = bio_iov_iter_get_pages(bio, &dio->iter);
-+
-+ dropped_locks = fdm_dropped_locks();
-+
-+ current->faults_disabled_mapping = NULL;
-+
-+ /*
-+ * If the fault handler returned an error but also signalled
-+ * that it dropped & retook ei_pagecache_lock, we just need to
-+ * re-shoot down the page cache and retry:
-+ */
-+ if (dropped_locks && ret)
-+ ret = 0;
-+
-+ if (unlikely(ret < 0))
-+ goto err;
-+
-+ if (unlikely(dropped_locks)) {
-+ ret = bch2_write_invalidate_inode_pages_range(mapping,
-+ req->ki_pos,
-+ req->ki_pos + iter_count - 1);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ if (!bio->bi_iter.bi_size)
-+ continue;
-+ }
-+
-+ unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
-+ bio->bi_iter.bi_size -= unaligned;
-+ iov_iter_revert(&dio->iter, unaligned);
-+
-+ if (!bio->bi_iter.bi_size) {
-+ /*
-+ * bio_iov_iter_get_pages was only able to get <
-+ * blocksize worth of pages:
-+ */
-+ ret = -EFAULT;
-+ goto err;
-+ }
-+
-+ bch2_write_op_init(&dio->op, c, opts);
-+ dio->op.end_io = sync
-+ ? NULL
-+ : bch2_dio_write_loop_async;
-+ dio->op.target = dio->op.opts.foreground_target;
-+ dio->op.write_point = writepoint_hashed((unsigned long) current);
-+ dio->op.nr_replicas = dio->op.opts.data_replicas;
-+ dio->op.subvol = inode->ei_subvol;
-+ dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
-+ dio->op.devs_need_flush = &inode->ei_devs_need_flush;
-+
-+ if (sync)
-+ dio->op.flags |= BCH_WRITE_SYNC;
-+ dio->op.flags |= BCH_WRITE_CHECK_ENOSPC;
-+
-+ ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
-+ bio_sectors(bio), true);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
-+ dio->op.opts.data_replicas, 0);
-+ if (unlikely(ret) &&
-+ !bch2_dio_write_check_allocated(dio))
-+ goto err;
-+
-+ task_io_account_write(bio->bi_iter.bi_size);
-+
-+ if (unlikely(dio->iter.count) &&
-+ !dio->sync &&
-+ !dio->loop &&
-+ bch2_dio_write_copy_iov(dio))
-+ dio->sync = sync = true;
-+
-+ dio->loop = true;
-+ closure_call(&dio->op.cl, bch2_write, NULL, NULL);
-+
-+ if (!sync)
-+ return -EIOCBQUEUED;
-+
-+ bch2_dio_write_end(dio);
-+
-+ if (likely(!dio->iter.count) || dio->op.error)
-+ break;
-+
-+ bio_reset(bio, NULL, REQ_OP_WRITE);
-+ }
-+out:
-+ return bch2_dio_write_done(dio);
-+err:
-+ dio->op.error = ret;
-+
-+ bio_release_pages(bio, false);
-+
-+ bch2_quota_reservation_put(c, inode, &dio->quota_res);
-+ goto out;
-+}
-+
-+static noinline __cold void bch2_dio_write_continue(struct dio_write *dio)
-+{
-+ struct mm_struct *mm = dio->mm;
-+
-+ bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE);
-+
-+ if (mm)
-+ kthread_use_mm(mm);
-+ bch2_dio_write_loop(dio);
-+ if (mm)
-+ kthread_unuse_mm(mm);
-+}
-+
-+static void bch2_dio_write_loop_async(struct bch_write_op *op)
-+{
-+ struct dio_write *dio = container_of(op, struct dio_write, op);
-+
-+ bch2_dio_write_end(dio);
-+
-+ if (likely(!dio->iter.count) || dio->op.error)
-+ bch2_dio_write_done(dio);
-+ else
-+ bch2_dio_write_continue(dio);
-+}
-+
-+ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
-+{
-+ struct file *file = req->ki_filp;
-+ struct address_space *mapping = file->f_mapping;
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct dio_write *dio;
-+ struct bio *bio;
-+ bool locked = true, extending;
-+ ssize_t ret;
-+
-+ prefetch(&c->opts);
-+ prefetch((void *) &c->opts + 64);
-+ prefetch(&inode->ei_inode);
-+ prefetch((void *) &inode->ei_inode + 64);
-+
-+ inode_lock(&inode->v);
-+
-+ ret = generic_write_checks(req, iter);
-+ if (unlikely(ret <= 0))
-+ goto err;
-+
-+ ret = file_remove_privs(file);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ ret = file_update_time(file);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
-+ goto err;
-+
-+ inode_dio_begin(&inode->v);
-+ bch2_pagecache_block_get(inode);
-+
-+ extending = req->ki_pos + iter->count > inode->v.i_size;
-+ if (!extending) {
-+ inode_unlock(&inode->v);
-+ locked = false;
-+ }
-+
-+ bio = bio_alloc_bioset(NULL,
-+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
-+ REQ_OP_WRITE,
-+ GFP_KERNEL,
-+ &c->dio_write_bioset);
-+ dio = container_of(bio, struct dio_write, op.wbio.bio);
-+ dio->req = req;
-+ dio->mapping = mapping;
-+ dio->inode = inode;
-+ dio->mm = current->mm;
-+ dio->loop = false;
-+ dio->extending = extending;
-+ dio->sync = is_sync_kiocb(req) || extending;
-+ dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
-+ dio->free_iov = false;
-+ dio->quota_res.sectors = 0;
-+ dio->written = 0;
-+ dio->iter = *iter;
-+ dio->op.c = c;
-+
-+ if (unlikely(mapping->nrpages)) {
-+ ret = bch2_write_invalidate_inode_pages_range(mapping,
-+ req->ki_pos,
-+ req->ki_pos + iter->count - 1);
-+ if (unlikely(ret))
-+ goto err_put_bio;
-+ }
-+
-+ ret = bch2_dio_write_loop(dio);
-+err:
-+ if (locked)
-+ inode_unlock(&inode->v);
-+ return ret;
-+err_put_bio:
-+ bch2_pagecache_block_put(inode);
-+ bio_put(bio);
-+ inode_dio_end(&inode->v);
-+ goto err;
-+}
-+
-+void bch2_fs_fs_io_direct_exit(struct bch_fs *c)
-+{
-+ bioset_exit(&c->dio_write_bioset);
-+ bioset_exit(&c->dio_read_bioset);
-+}
-+
-+int bch2_fs_fs_io_direct_init(struct bch_fs *c)
-+{
-+ if (bioset_init(&c->dio_read_bioset,
-+ 4, offsetof(struct dio_read, rbio.bio),
-+ BIOSET_NEED_BVECS))
-+ return -BCH_ERR_ENOMEM_dio_read_bioset_init;
-+
-+ if (bioset_init(&c->dio_write_bioset,
-+ 4, offsetof(struct dio_write, op.wbio.bio),
-+ BIOSET_NEED_BVECS))
-+ return -BCH_ERR_ENOMEM_dio_write_bioset_init;
-+
-+ return 0;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-io-direct.h b/fs/bcachefs/fs-io-direct.h
-new file mode 100644
-index 000000000000..814621ec7f81
---- /dev/null
-+++ b/fs/bcachefs/fs-io-direct.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IO_DIRECT_H
-+#define _BCACHEFS_FS_IO_DIRECT_H
-+
-+#ifndef NO_BCACHEFS_FS
-+ssize_t bch2_direct_write(struct kiocb *, struct iov_iter *);
-+ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *);
-+
-+void bch2_fs_fs_io_direct_exit(struct bch_fs *);
-+int bch2_fs_fs_io_direct_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_fs_io_direct_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_fs_io_direct_init(struct bch_fs *c) { return 0; }
-+#endif
-+
-+#endif /* _BCACHEFS_FS_IO_DIRECT_H */
-diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
-new file mode 100644
-index 000000000000..8bd9bcdd27f7
---- /dev/null
-+++ b/fs/bcachefs/fs-io-pagecache.c
-@@ -0,0 +1,791 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "btree_iter.h"
-+#include "extents.h"
-+#include "fs-io.h"
-+#include "fs-io-pagecache.h"
-+#include "subvolume.h"
-+
-+#include <linux/pagevec.h>
-+#include <linux/writeback.h>
-+
-+int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
-+ loff_t start, u64 end,
-+ int fgp_flags, gfp_t gfp,
-+ folios *fs)
-+{
-+ struct folio *f;
-+ u64 pos = start;
-+ int ret = 0;
-+
-+ while (pos < end) {
-+ if ((u64) pos >= (u64) start + (1ULL << 20))
-+ fgp_flags &= ~FGP_CREAT;
-+
-+ ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
-+ if (ret)
-+ break;
-+
-+ f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
-+ if (IS_ERR_OR_NULL(f))
-+ break;
-+
-+ BUG_ON(fs->nr && folio_pos(f) != pos);
-+
-+ pos = folio_end_pos(f);
-+ darray_push(fs, f);
-+ }
-+
-+ if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
-+ ret = -ENOMEM;
-+
-+ return fs->nr ? 0 : ret;
-+}
-+
-+/* pagecache_block must be held */
-+int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
-+ loff_t start, loff_t end)
-+{
-+ int ret;
-+
-+ /*
-+ * XXX: the way this is currently implemented, we can spin if a process
-+ * is continually redirtying a specific page
-+ */
-+ do {
-+ if (!mapping->nrpages)
-+ return 0;
-+
-+ ret = filemap_write_and_wait_range(mapping, start, end);
-+ if (ret)
-+ break;
-+
-+ if (!mapping->nrpages)
-+ return 0;
-+
-+ ret = invalidate_inode_pages2_range(mapping,
-+ start >> PAGE_SHIFT,
-+ end >> PAGE_SHIFT);
-+ } while (ret == -EBUSY);
-+
-+ return ret;
-+}
-+
-+#if 0
-+/* Useful for debug tracing: */
-+static const char * const bch2_folio_sector_states[] = {
-+#define x(n) #n,
-+ BCH_FOLIO_SECTOR_STATE()
-+#undef x
-+ NULL
-+};
-+#endif
-+
-+static inline enum bch_folio_sector_state
-+folio_sector_dirty(enum bch_folio_sector_state state)
-+{
-+ switch (state) {
-+ case SECTOR_unallocated:
-+ return SECTOR_dirty;
-+ case SECTOR_reserved:
-+ return SECTOR_dirty_reserved;
-+ default:
-+ return state;
-+ }
-+}
-+
-+static inline enum bch_folio_sector_state
-+folio_sector_undirty(enum bch_folio_sector_state state)
-+{
-+ switch (state) {
-+ case SECTOR_dirty:
-+ return SECTOR_unallocated;
-+ case SECTOR_dirty_reserved:
-+ return SECTOR_reserved;
-+ default:
-+ return state;
-+ }
-+}
-+
-+static inline enum bch_folio_sector_state
-+folio_sector_reserve(enum bch_folio_sector_state state)
-+{
-+ switch (state) {
-+ case SECTOR_unallocated:
-+ return SECTOR_reserved;
-+ case SECTOR_dirty:
-+ return SECTOR_dirty_reserved;
-+ default:
-+ return state;
-+ }
-+}
-+
-+/* for newly allocated folios: */
-+struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
-+{
-+ struct bch_folio *s;
-+
-+ s = kzalloc(sizeof(*s) +
-+ sizeof(struct bch_folio_sector) *
-+ folio_sectors(folio), gfp);
-+ if (!s)
-+ return NULL;
-+
-+ spin_lock_init(&s->lock);
-+ folio_attach_private(folio, s);
-+ return s;
-+}
-+
-+struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
-+{
-+ return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
-+}
-+
-+static unsigned bkey_to_sector_state(struct bkey_s_c k)
-+{
-+ if (bkey_extent_is_reservation(k))
-+ return SECTOR_reserved;
-+ if (bkey_extent_is_allocation(k.k))
-+ return SECTOR_allocated;
-+ return SECTOR_unallocated;
-+}
-+
-+static void __bch2_folio_set(struct folio *folio,
-+ unsigned pg_offset, unsigned pg_len,
-+ unsigned nr_ptrs, unsigned state)
-+{
-+ struct bch_folio *s = bch2_folio(folio);
-+ unsigned i, sectors = folio_sectors(folio);
-+
-+ BUG_ON(pg_offset >= sectors);
-+ BUG_ON(pg_offset + pg_len > sectors);
-+
-+ spin_lock(&s->lock);
-+
-+ for (i = pg_offset; i < pg_offset + pg_len; i++) {
-+ s->s[i].nr_replicas = nr_ptrs;
-+ bch2_folio_sector_set(folio, s, i, state);
-+ }
-+
-+ if (i == sectors)
-+ s->uptodate = true;
-+
-+ spin_unlock(&s->lock);
-+}
-+
-+/*
-+ * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
-+ * extents btree:
-+ */
-+int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
-+ struct folio **fs, unsigned nr_folios)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_folio *s;
-+ u64 offset = folio_sector(fs[0]);
-+ unsigned folio_idx;
-+ u32 snapshot;
-+ bool need_set = false;
-+ int ret;
-+
-+ for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
-+ s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
-+ if (!s)
-+ return -ENOMEM;
-+
-+ need_set |= !s->uptodate;
-+ }
-+
-+ if (!need_set)
-+ return 0;
-+
-+ folio_idx = 0;
-+ trans = bch2_trans_get(c);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
-+ SPOS(inum.inum, offset, snapshot),
-+ BTREE_ITER_SLOTS, k, ret) {
-+ unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
-+ unsigned state = bkey_to_sector_state(k);
-+
-+ while (folio_idx < nr_folios) {
-+ struct folio *folio = fs[folio_idx];
-+ u64 folio_start = folio_sector(folio);
-+ u64 folio_end = folio_end_sector(folio);
-+ unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
-+ folio_start;
-+ unsigned folio_len = min(k.k->p.offset, folio_end) -
-+ folio_offset - folio_start;
-+
-+ BUG_ON(k.k->p.offset < folio_start);
-+ BUG_ON(bkey_start_offset(k.k) > folio_end);
-+
-+ if (!bch2_folio(folio)->uptodate)
-+ __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
-+
-+ if (k.k->p.offset < folio_end)
-+ break;
-+ folio_idx++;
-+ }
-+
-+ if (folio_idx == nr_folios)
-+ break;
-+ }
-+
-+ offset = iter.pos.offset;
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+ bch2_trans_put(trans);
-+
-+ return ret;
-+}
-+
-+void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
-+{
-+ struct bvec_iter iter;
-+ struct folio_vec fv;
-+ unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
-+ ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
-+ unsigned state = bkey_to_sector_state(k);
-+
-+ bio_for_each_folio(fv, bio, iter)
-+ __bch2_folio_set(fv.fv_folio,
-+ fv.fv_offset >> 9,
-+ fv.fv_len >> 9,
-+ nr_ptrs, state);
-+}
-+
-+void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
-+ u64 start, u64 end)
-+{
-+ pgoff_t index = start >> PAGE_SECTORS_SHIFT;
-+ pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
-+ struct folio_batch fbatch;
-+ unsigned i, j;
-+
-+ if (end <= start)
-+ return;
-+
-+ folio_batch_init(&fbatch);
-+
-+ while (filemap_get_folios(inode->v.i_mapping,
-+ &index, end_index, &fbatch)) {
-+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
-+ struct folio *folio = fbatch.folios[i];
-+ u64 folio_start = folio_sector(folio);
-+ u64 folio_end = folio_end_sector(folio);
-+ unsigned folio_offset = max(start, folio_start) - folio_start;
-+ unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
-+ struct bch_folio *s;
-+
-+ BUG_ON(end <= folio_start);
-+
-+ folio_lock(folio);
-+ s = bch2_folio(folio);
-+
-+ if (s) {
-+ spin_lock(&s->lock);
-+ for (j = folio_offset; j < folio_offset + folio_len; j++)
-+ s->s[j].nr_replicas = 0;
-+ spin_unlock(&s->lock);
-+ }
-+
-+ folio_unlock(folio);
-+ }
-+ folio_batch_release(&fbatch);
-+ cond_resched();
-+ }
-+}
-+
-+void bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
-+ u64 start, u64 end)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ pgoff_t index = start >> PAGE_SECTORS_SHIFT;
-+ pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
-+ struct folio_batch fbatch;
-+ s64 i_sectors_delta = 0;
-+ unsigned i, j;
-+
-+ if (end <= start)
-+ return;
-+
-+ folio_batch_init(&fbatch);
-+
-+ while (filemap_get_folios(inode->v.i_mapping,
-+ &index, end_index, &fbatch)) {
-+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
-+ struct folio *folio = fbatch.folios[i];
-+ u64 folio_start = folio_sector(folio);
-+ u64 folio_end = folio_end_sector(folio);
-+ unsigned folio_offset = max(start, folio_start) - folio_start;
-+ unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
-+ struct bch_folio *s;
-+
-+ BUG_ON(end <= folio_start);
-+
-+ folio_lock(folio);
-+ s = bch2_folio(folio);
-+
-+ if (s) {
-+ spin_lock(&s->lock);
-+ for (j = folio_offset; j < folio_offset + folio_len; j++) {
-+ i_sectors_delta -= s->s[j].state == SECTOR_dirty;
-+ bch2_folio_sector_set(folio, s, j,
-+ folio_sector_reserve(s->s[j].state));
-+ }
-+ spin_unlock(&s->lock);
-+ }
-+
-+ folio_unlock(folio);
-+ }
-+ folio_batch_release(&fbatch);
-+ cond_resched();
-+ }
-+
-+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+}
-+
-+static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
-+ unsigned nr_replicas)
-+{
-+ return max(0, (int) nr_replicas -
-+ s->nr_replicas -
-+ s->replicas_reserved);
-+}
-+
-+int bch2_get_folio_disk_reservation(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct folio *folio, bool check_enospc)
-+{
-+ struct bch_folio *s = bch2_folio_create(folio, 0);
-+ unsigned nr_replicas = inode_nr_replicas(c, inode);
-+ struct disk_reservation disk_res = { 0 };
-+ unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
-+ int ret;
-+
-+ if (!s)
-+ return -ENOMEM;
-+
-+ for (i = 0; i < sectors; i++)
-+ disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
-+
-+ if (!disk_res_sectors)
-+ return 0;
-+
-+ ret = bch2_disk_reservation_get(c, &disk_res,
-+ disk_res_sectors, 1,
-+ !check_enospc
-+ ? BCH_DISK_RESERVATION_NOFAIL
-+ : 0);
-+ if (unlikely(ret))
-+ return ret;
-+
-+ for (i = 0; i < sectors; i++)
-+ s->s[i].replicas_reserved +=
-+ sectors_to_reserve(&s->s[i], nr_replicas);
-+
-+ return 0;
-+}
-+
-+void bch2_folio_reservation_put(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct bch2_folio_reservation *res)
-+{
-+ bch2_disk_reservation_put(c, &res->disk);
-+ bch2_quota_reservation_put(c, inode, &res->quota);
-+}
-+
-+int bch2_folio_reservation_get(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct folio *folio,
-+ struct bch2_folio_reservation *res,
-+ unsigned offset, unsigned len)
-+{
-+ struct bch_folio *s = bch2_folio_create(folio, 0);
-+ unsigned i, disk_sectors = 0, quota_sectors = 0;
-+ int ret;
-+
-+ if (!s)
-+ return -ENOMEM;
-+
-+ BUG_ON(!s->uptodate);
-+
-+ for (i = round_down(offset, block_bytes(c)) >> 9;
-+ i < round_up(offset + len, block_bytes(c)) >> 9;
-+ i++) {
-+ disk_sectors += sectors_to_reserve(&s->s[i],
-+ res->disk.nr_replicas);
-+ quota_sectors += s->s[i].state == SECTOR_unallocated;
-+ }
-+
-+ if (disk_sectors) {
-+ ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
-+ if (unlikely(ret))
-+ return ret;
-+ }
-+
-+ if (quota_sectors) {
-+ ret = bch2_quota_reservation_add(c, inode, &res->quota,
-+ quota_sectors, true);
-+ if (unlikely(ret)) {
-+ struct disk_reservation tmp = {
-+ .sectors = disk_sectors
-+ };
-+
-+ bch2_disk_reservation_put(c, &tmp);
-+ res->disk.sectors -= disk_sectors;
-+ return ret;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static void bch2_clear_folio_bits(struct folio *folio)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch_folio *s = bch2_folio(folio);
-+ struct disk_reservation disk_res = { 0 };
-+ int i, sectors = folio_sectors(folio), dirty_sectors = 0;
-+
-+ if (!s)
-+ return;
-+
-+ EBUG_ON(!folio_test_locked(folio));
-+ EBUG_ON(folio_test_writeback(folio));
-+
-+ for (i = 0; i < sectors; i++) {
-+ disk_res.sectors += s->s[i].replicas_reserved;
-+ s->s[i].replicas_reserved = 0;
-+
-+ dirty_sectors -= s->s[i].state == SECTOR_dirty;
-+ bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
-+ }
-+
-+ bch2_disk_reservation_put(c, &disk_res);
-+
-+ bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
-+
-+ bch2_folio_release(folio);
-+}
-+
-+void bch2_set_folio_dirty(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct folio *folio,
-+ struct bch2_folio_reservation *res,
-+ unsigned offset, unsigned len)
-+{
-+ struct bch_folio *s = bch2_folio(folio);
-+ unsigned i, dirty_sectors = 0;
-+
-+ WARN_ON((u64) folio_pos(folio) + offset + len >
-+ round_up((u64) i_size_read(&inode->v), block_bytes(c)));
-+
-+ BUG_ON(!s->uptodate);
-+
-+ spin_lock(&s->lock);
-+
-+ for (i = round_down(offset, block_bytes(c)) >> 9;
-+ i < round_up(offset + len, block_bytes(c)) >> 9;
-+ i++) {
-+ unsigned sectors = sectors_to_reserve(&s->s[i],
-+ res->disk.nr_replicas);
-+
-+ /*
-+ * This can happen if we race with the error path in
-+ * bch2_writepage_io_done():
-+ */
-+ sectors = min_t(unsigned, sectors, res->disk.sectors);
-+
-+ s->s[i].replicas_reserved += sectors;
-+ res->disk.sectors -= sectors;
-+
-+ dirty_sectors += s->s[i].state == SECTOR_unallocated;
-+
-+ bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
-+ }
-+
-+ spin_unlock(&s->lock);
-+
-+ bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
-+
-+ if (!folio_test_dirty(folio))
-+ filemap_dirty_folio(inode->v.i_mapping, folio);
-+}
-+
-+vm_fault_t bch2_page_fault(struct vm_fault *vmf)
-+{
-+ struct file *file = vmf->vma->vm_file;
-+ struct address_space *mapping = file->f_mapping;
-+ struct address_space *fdm = faults_disabled_mapping();
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ vm_fault_t ret;
-+
-+ if (fdm == mapping)
-+ return VM_FAULT_SIGBUS;
-+
-+ /* Lock ordering: */
-+ if (fdm > mapping) {
-+ struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
-+
-+ if (bch2_pagecache_add_tryget(inode))
-+ goto got_lock;
-+
-+ bch2_pagecache_block_put(fdm_host);
-+
-+ bch2_pagecache_add_get(inode);
-+ bch2_pagecache_add_put(inode);
-+
-+ bch2_pagecache_block_get(fdm_host);
-+
-+ /* Signal that lock has been dropped: */
-+ set_fdm_dropped_locks();
-+ return VM_FAULT_SIGBUS;
-+ }
-+
-+ bch2_pagecache_add_get(inode);
-+got_lock:
-+ ret = filemap_fault(vmf);
-+ bch2_pagecache_add_put(inode);
-+
-+ return ret;
-+}
-+
-+vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
-+{
-+ struct folio *folio = page_folio(vmf->page);
-+ struct file *file = vmf->vma->vm_file;
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct address_space *mapping = file->f_mapping;
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch2_folio_reservation res;
-+ unsigned len;
-+ loff_t isize;
-+ vm_fault_t ret;
-+
-+ bch2_folio_reservation_init(c, inode, &res);
-+
-+ sb_start_pagefault(inode->v.i_sb);
-+ file_update_time(file);
-+
-+ /*
-+ * Not strictly necessary, but helps avoid dio writes livelocking in
-+ * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
-+ * a bch2_write_invalidate_inode_pages_range() that works without dropping
-+ * page lock before invalidating page
-+ */
-+ bch2_pagecache_add_get(inode);
-+
-+ folio_lock(folio);
-+ isize = i_size_read(&inode->v);
-+
-+ if (folio->mapping != mapping || folio_pos(folio) >= isize) {
-+ folio_unlock(folio);
-+ ret = VM_FAULT_NOPAGE;
-+ goto out;
-+ }
-+
-+ len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
-+
-+ if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
-+ bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
-+ folio_unlock(folio);
-+ ret = VM_FAULT_SIGBUS;
-+ goto out;
-+ }
-+
-+ bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
-+ bch2_folio_reservation_put(c, inode, &res);
-+
-+ folio_wait_stable(folio);
-+ ret = VM_FAULT_LOCKED;
-+out:
-+ bch2_pagecache_add_put(inode);
-+ sb_end_pagefault(inode->v.i_sb);
-+
-+ return ret;
-+}
-+
-+void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
-+{
-+ if (offset || length < folio_size(folio))
-+ return;
-+
-+ bch2_clear_folio_bits(folio);
-+}
-+
-+bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
-+{
-+ if (folio_test_dirty(folio) || folio_test_writeback(folio))
-+ return false;
-+
-+ bch2_clear_folio_bits(folio);
-+ return true;
-+}
-+
-+/* fseek: */
-+
-+static int folio_data_offset(struct folio *folio, loff_t pos,
-+ unsigned min_replicas)
-+{
-+ struct bch_folio *s = bch2_folio(folio);
-+ unsigned i, sectors = folio_sectors(folio);
-+
-+ if (s)
-+ for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
-+ if (s->s[i].state >= SECTOR_dirty &&
-+ s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
-+ return i << SECTOR_SHIFT;
-+
-+ return -1;
-+}
-+
-+loff_t bch2_seek_pagecache_data(struct inode *vinode,
-+ loff_t start_offset,
-+ loff_t end_offset,
-+ unsigned min_replicas,
-+ bool nonblock)
-+{
-+ struct folio_batch fbatch;
-+ pgoff_t start_index = start_offset >> PAGE_SHIFT;
-+ pgoff_t end_index = end_offset >> PAGE_SHIFT;
-+ pgoff_t index = start_index;
-+ unsigned i;
-+ loff_t ret;
-+ int offset;
-+
-+ folio_batch_init(&fbatch);
-+
-+ while (filemap_get_folios(vinode->i_mapping,
-+ &index, end_index, &fbatch)) {
-+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
-+ struct folio *folio = fbatch.folios[i];
-+
-+ if (!nonblock) {
-+ folio_lock(folio);
-+ } else if (!folio_trylock(folio)) {
-+ folio_batch_release(&fbatch);
-+ return -EAGAIN;
-+ }
-+
-+ offset = folio_data_offset(folio,
-+ max(folio_pos(folio), start_offset),
-+ min_replicas);
-+ if (offset >= 0) {
-+ ret = clamp(folio_pos(folio) + offset,
-+ start_offset, end_offset);
-+ folio_unlock(folio);
-+ folio_batch_release(&fbatch);
-+ return ret;
-+ }
-+ folio_unlock(folio);
-+ }
-+ folio_batch_release(&fbatch);
-+ cond_resched();
-+ }
-+
-+ return end_offset;
-+}
-+
-+/*
-+ * Search for a hole in a folio.
-+ *
-+ * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
-+ * code to indicate a pagecache hole exists at the returned offset. Otherwise
-+ * return 0 if the folio is filled with data, or an error code. This function
-+ * can return -EAGAIN if nonblock is specified.
-+ */
-+static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
-+ unsigned min_replicas, bool nonblock)
-+{
-+ struct folio *folio;
-+ struct bch_folio *s;
-+ unsigned i, sectors;
-+ int ret = -ENOENT;
-+
-+ folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
-+ FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
-+ if (IS_ERR(folio))
-+ return PTR_ERR(folio);
-+
-+ s = bch2_folio(folio);
-+ if (!s)
-+ goto unlock;
-+
-+ sectors = folio_sectors(folio);
-+ for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
-+ if (s->s[i].state < SECTOR_dirty ||
-+ s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
-+ *offset = max(*offset,
-+ folio_pos(folio) + (i << SECTOR_SHIFT));
-+ goto unlock;
-+ }
-+
-+ *offset = folio_end_pos(folio);
-+ ret = 0;
-+unlock:
-+ folio_unlock(folio);
-+ folio_put(folio);
-+ return ret;
-+}
-+
-+loff_t bch2_seek_pagecache_hole(struct inode *vinode,
-+ loff_t start_offset,
-+ loff_t end_offset,
-+ unsigned min_replicas,
-+ bool nonblock)
-+{
-+ struct address_space *mapping = vinode->i_mapping;
-+ loff_t offset = start_offset;
-+ loff_t ret = 0;
-+
-+ while (!ret && offset < end_offset)
-+ ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
-+
-+ if (ret && ret != -ENOENT)
-+ return ret;
-+ return min(offset, end_offset);
-+}
-+
-+int bch2_clamp_data_hole(struct inode *inode,
-+ u64 *hole_start,
-+ u64 *hole_end,
-+ unsigned min_replicas,
-+ bool nonblock)
-+{
-+ loff_t ret;
-+
-+ ret = bch2_seek_pagecache_hole(inode,
-+ *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
-+ if (ret < 0)
-+ return ret;
-+
-+ *hole_start = ret;
-+
-+ if (*hole_start == *hole_end)
-+ return 0;
-+
-+ ret = bch2_seek_pagecache_data(inode,
-+ *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
-+ if (ret < 0)
-+ return ret;
-+
-+ *hole_end = ret;
-+ return 0;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h
-new file mode 100644
-index 000000000000..a2222ad586e9
---- /dev/null
-+++ b/fs/bcachefs/fs-io-pagecache.h
-@@ -0,0 +1,176 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IO_PAGECACHE_H
-+#define _BCACHEFS_FS_IO_PAGECACHE_H
-+
-+#include <linux/pagemap.h>
-+
-+typedef DARRAY(struct folio *) folios;
-+
-+int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t,
-+ u64, int, gfp_t, folios *);
-+int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t);
-+
-+/*
-+ * Use u64 for the end pos and sector helpers because if the folio covers the
-+ * max supported range of the mapping, the start offset of the next folio
-+ * overflows loff_t. This breaks much of the range based processing in the
-+ * buffered write path.
-+ */
-+static inline u64 folio_end_pos(struct folio *folio)
-+{
-+ return folio_pos(folio) + folio_size(folio);
-+}
-+
-+static inline size_t folio_sectors(struct folio *folio)
-+{
-+ return PAGE_SECTORS << folio_order(folio);
-+}
-+
-+static inline loff_t folio_sector(struct folio *folio)
-+{
-+ return folio_pos(folio) >> 9;
-+}
-+
-+static inline u64 folio_end_sector(struct folio *folio)
-+{
-+ return folio_end_pos(folio) >> 9;
-+}
-+
-+#define BCH_FOLIO_SECTOR_STATE() \
-+ x(unallocated) \
-+ x(reserved) \
-+ x(dirty) \
-+ x(dirty_reserved) \
-+ x(allocated)
-+
-+enum bch_folio_sector_state {
-+#define x(n) SECTOR_##n,
-+ BCH_FOLIO_SECTOR_STATE()
-+#undef x
-+};
-+
-+struct bch_folio_sector {
-+ /* Uncompressed, fully allocated replicas (or on disk reservation): */
-+ unsigned nr_replicas:4;
-+
-+ /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */
-+ unsigned replicas_reserved:4;
-+
-+ /* i_sectors: */
-+ enum bch_folio_sector_state state:8;
-+};
-+
-+struct bch_folio {
-+ spinlock_t lock;
-+ atomic_t write_count;
-+ /*
-+ * Is the sector state up to date with the btree?
-+ * (Not the data itself)
-+ */
-+ bool uptodate;
-+ struct bch_folio_sector s[];
-+};
-+
-+/* Helper for when we need to add debug instrumentation: */
-+static inline void bch2_folio_sector_set(struct folio *folio,
-+ struct bch_folio *s,
-+ unsigned i, unsigned n)
-+{
-+ s->s[i].state = n;
-+}
-+
-+/* file offset (to folio offset) to bch_folio_sector index */
-+static inline int folio_pos_to_s(struct folio *folio, loff_t pos)
-+{
-+ u64 f_offset = pos - folio_pos(folio);
-+
-+ BUG_ON(pos < folio_pos(folio) || pos >= folio_end_pos(folio));
-+ return f_offset >> SECTOR_SHIFT;
-+}
-+
-+/* for newly allocated folios: */
-+static inline void __bch2_folio_release(struct folio *folio)
-+{
-+ kfree(folio_detach_private(folio));
-+}
-+
-+static inline void bch2_folio_release(struct folio *folio)
-+{
-+ EBUG_ON(!folio_test_locked(folio));
-+ __bch2_folio_release(folio);
-+}
-+
-+static inline struct bch_folio *__bch2_folio(struct folio *folio)
-+{
-+ return folio_has_private(folio)
-+ ? (struct bch_folio *) folio_get_private(folio)
-+ : NULL;
-+}
-+
-+static inline struct bch_folio *bch2_folio(struct folio *folio)
-+{
-+ EBUG_ON(!folio_test_locked(folio));
-+
-+ return __bch2_folio(folio);
-+}
-+
-+struct bch_folio *__bch2_folio_create(struct folio *, gfp_t);
-+struct bch_folio *bch2_folio_create(struct folio *, gfp_t);
-+
-+struct bch2_folio_reservation {
-+ struct disk_reservation disk;
-+ struct quota_res quota;
-+};
-+
-+static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
-+{
-+ /* XXX: this should not be open coded */
-+ return inode->ei_inode.bi_data_replicas
-+ ? inode->ei_inode.bi_data_replicas - 1
-+ : c->opts.data_replicas;
-+}
-+
-+static inline void bch2_folio_reservation_init(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct bch2_folio_reservation *res)
-+{
-+ memset(res, 0, sizeof(*res));
-+
-+ res->disk.nr_replicas = inode_nr_replicas(c, inode);
-+}
-+
-+int bch2_folio_set(struct bch_fs *, subvol_inum, struct folio **, unsigned);
-+void bch2_bio_page_state_set(struct bio *, struct bkey_s_c);
-+
-+void bch2_mark_pagecache_unallocated(struct bch_inode_info *, u64, u64);
-+void bch2_mark_pagecache_reserved(struct bch_inode_info *, u64, u64);
-+
-+int bch2_get_folio_disk_reservation(struct bch_fs *,
-+ struct bch_inode_info *,
-+ struct folio *, bool);
-+
-+void bch2_folio_reservation_put(struct bch_fs *,
-+ struct bch_inode_info *,
-+ struct bch2_folio_reservation *);
-+int bch2_folio_reservation_get(struct bch_fs *,
-+ struct bch_inode_info *,
-+ struct folio *,
-+ struct bch2_folio_reservation *,
-+ unsigned, unsigned);
-+
-+void bch2_set_folio_dirty(struct bch_fs *,
-+ struct bch_inode_info *,
-+ struct folio *,
-+ struct bch2_folio_reservation *,
-+ unsigned, unsigned);
-+
-+vm_fault_t bch2_page_fault(struct vm_fault *);
-+vm_fault_t bch2_page_mkwrite(struct vm_fault *);
-+void bch2_invalidate_folio(struct folio *, size_t, size_t);
-+bool bch2_release_folio(struct folio *, gfp_t);
-+
-+loff_t bch2_seek_pagecache_data(struct inode *, loff_t, loff_t, unsigned, bool);
-+loff_t bch2_seek_pagecache_hole(struct inode *, loff_t, loff_t, unsigned, bool);
-+int bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned, bool);
-+
-+#endif /* _BCACHEFS_FS_IO_PAGECACHE_H */
-diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
-new file mode 100644
-index 000000000000..b0e8144ec550
---- /dev/null
-+++ b/fs/bcachefs/fs-io.c
-@@ -0,0 +1,1072 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_buf.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+#include "fs.h"
-+#include "fs-io.h"
-+#include "fs-io-buffered.h"
-+#include "fs-io-pagecache.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "io_misc.h"
-+#include "keylist.h"
-+#include "quota.h"
-+#include "reflink.h"
-+#include "trace.h"
-+
-+#include <linux/aio.h>
-+#include <linux/backing-dev.h>
-+#include <linux/falloc.h>
-+#include <linux/migrate.h>
-+#include <linux/mmu_context.h>
-+#include <linux/pagevec.h>
-+#include <linux/rmap.h>
-+#include <linux/sched/signal.h>
-+#include <linux/task_io_accounting_ops.h>
-+#include <linux/uio.h>
-+
-+#include <trace/events/writeback.h>
-+
-+struct nocow_flush {
-+ struct closure *cl;
-+ struct bch_dev *ca;
-+ struct bio bio;
-+};
-+
-+static void nocow_flush_endio(struct bio *_bio)
-+{
-+
-+ struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio);
-+
-+ closure_put(bio->cl);
-+ percpu_ref_put(&bio->ca->io_ref);
-+ bio_put(&bio->bio);
-+}
-+
-+void bch2_inode_flush_nocow_writes_async(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct closure *cl)
-+{
-+ struct nocow_flush *bio;
-+ struct bch_dev *ca;
-+ struct bch_devs_mask devs;
-+ unsigned dev;
-+
-+ dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX);
-+ if (dev == BCH_SB_MEMBERS_MAX)
-+ return;
-+
-+ devs = inode->ei_devs_need_flush;
-+ memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
-+
-+ for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) {
-+ rcu_read_lock();
-+ ca = rcu_dereference(c->devs[dev]);
-+ if (ca && !percpu_ref_tryget(&ca->io_ref))
-+ ca = NULL;
-+ rcu_read_unlock();
-+
-+ if (!ca)
-+ continue;
-+
-+ bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0,
-+ REQ_OP_FLUSH,
-+ GFP_KERNEL,
-+ &c->nocow_flush_bioset),
-+ struct nocow_flush, bio);
-+ bio->cl = cl;
-+ bio->ca = ca;
-+ bio->bio.bi_end_io = nocow_flush_endio;
-+ closure_bio_submit(&bio->bio, cl);
-+ }
-+}
-+
-+static int bch2_inode_flush_nocow_writes(struct bch_fs *c,
-+ struct bch_inode_info *inode)
-+{
-+ struct closure cl;
-+
-+ closure_init_stack(&cl);
-+ bch2_inode_flush_nocow_writes_async(c, inode, &cl);
-+ closure_sync(&cl);
-+
-+ return 0;
-+}
-+
-+/* i_size updates: */
-+
-+struct inode_new_size {
-+ loff_t new_size;
-+ u64 now;
-+ unsigned fields;
-+};
-+
-+static int inode_set_size(struct btree_trans *trans,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi,
-+ void *p)
-+{
-+ struct inode_new_size *s = p;
-+
-+ bi->bi_size = s->new_size;
-+ if (s->fields & ATTR_ATIME)
-+ bi->bi_atime = s->now;
-+ if (s->fields & ATTR_MTIME)
-+ bi->bi_mtime = s->now;
-+ if (s->fields & ATTR_CTIME)
-+ bi->bi_ctime = s->now;
-+
-+ return 0;
-+}
-+
-+int __must_check bch2_write_inode_size(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ loff_t new_size, unsigned fields)
-+{
-+ struct inode_new_size s = {
-+ .new_size = new_size,
-+ .now = bch2_current_time(c),
-+ .fields = fields,
-+ };
-+
-+ return bch2_write_inode(c, inode, inode_set_size, &s, fields);
-+}
-+
-+void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
-+ struct quota_res *quota_res, s64 sectors)
-+{
-+ bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c,
-+ "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
-+ inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
-+ inode->ei_inode.bi_sectors);
-+ inode->v.i_blocks += sectors;
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+ if (quota_res &&
-+ !test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags) &&
-+ sectors > 0) {
-+ BUG_ON(sectors > quota_res->sectors);
-+ BUG_ON(sectors > inode->ei_quota_reserved);
-+
-+ quota_res->sectors -= sectors;
-+ inode->ei_quota_reserved -= sectors;
-+ } else {
-+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN);
-+ }
-+#endif
-+}
-+
-+/* fsync: */
-+
-+/*
-+ * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an
-+ * insert trigger: look up the btree inode instead
-+ */
-+static int bch2_flush_inode(struct bch_fs *c,
-+ struct bch_inode_info *inode)
-+{
-+ struct bch_inode_unpacked u;
-+ int ret;
-+
-+ if (c->opts.journal_flush_disabled)
-+ return 0;
-+
-+ ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u);
-+ if (ret)
-+ return ret;
-+
-+ return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
-+ bch2_inode_flush_nocow_writes(c, inode);
-+}
-+
-+int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-+{
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ int ret, ret2, ret3;
-+
-+ ret = file_write_and_wait_range(file, start, end);
-+ ret2 = sync_inode_metadata(&inode->v, 1);
-+ ret3 = bch2_flush_inode(c, inode);
-+
-+ return bch2_err_class(ret ?: ret2 ?: ret3);
-+}
-+
-+/* truncate: */
-+
-+static inline int range_has_data(struct bch_fs *c, u32 subvol,
-+ struct bpos start,
-+ struct bpos end)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, subvol, &start.snapshot);
-+ if (ret)
-+ goto err;
-+
-+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, start, end, 0, k, ret)
-+ if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) {
-+ ret = 1;
-+ break;
-+ }
-+ start = iter.pos;
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int __bch2_truncate_folio(struct bch_inode_info *inode,
-+ pgoff_t index, loff_t start, loff_t end)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct address_space *mapping = inode->v.i_mapping;
-+ struct bch_folio *s;
-+ unsigned start_offset;
-+ unsigned end_offset;
-+ unsigned i;
-+ struct folio *folio;
-+ s64 i_sectors_delta = 0;
-+ int ret = 0;
-+ u64 end_pos;
-+
-+ folio = filemap_lock_folio(mapping, index);
-+ if (IS_ERR_OR_NULL(folio)) {
-+ /*
-+ * XXX: we're doing two index lookups when we end up reading the
-+ * folio
-+ */
-+ ret = range_has_data(c, inode->ei_subvol,
-+ POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)),
-+ POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS));
-+ if (ret <= 0)
-+ return ret;
-+
-+ folio = __filemap_get_folio(mapping, index,
-+ FGP_LOCK|FGP_CREAT, GFP_KERNEL);
-+ if (IS_ERR_OR_NULL(folio)) {
-+ ret = -ENOMEM;
-+ goto out;
-+ }
-+ }
-+
-+ BUG_ON(start >= folio_end_pos(folio));
-+ BUG_ON(end <= folio_pos(folio));
-+
-+ start_offset = max(start, folio_pos(folio)) - folio_pos(folio);
-+ end_offset = min_t(u64, end, folio_end_pos(folio)) - folio_pos(folio);
-+
-+ /* Folio boundary? Nothing to do */
-+ if (start_offset == 0 &&
-+ end_offset == folio_size(folio)) {
-+ ret = 0;
-+ goto unlock;
-+ }
-+
-+ s = bch2_folio_create(folio, 0);
-+ if (!s) {
-+ ret = -ENOMEM;
-+ goto unlock;
-+ }
-+
-+ if (!folio_test_uptodate(folio)) {
-+ ret = bch2_read_single_folio(folio, mapping);
-+ if (ret)
-+ goto unlock;
-+ }
-+
-+ ret = bch2_folio_set(c, inode_inum(inode), &folio, 1);
-+ if (ret)
-+ goto unlock;
-+
-+ for (i = round_up(start_offset, block_bytes(c)) >> 9;
-+ i < round_down(end_offset, block_bytes(c)) >> 9;
-+ i++) {
-+ s->s[i].nr_replicas = 0;
-+
-+ i_sectors_delta -= s->s[i].state == SECTOR_dirty;
-+ bch2_folio_sector_set(folio, s, i, SECTOR_unallocated);
-+ }
-+
-+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+
-+ /*
-+ * Caller needs to know whether this folio will be written out by
-+ * writeback - doing an i_size update if necessary - or whether it will
-+ * be responsible for the i_size update.
-+ *
-+ * Note that we shouldn't ever see a folio beyond EOF, but check and
-+ * warn if so. This has been observed by failure to clean up folios
-+ * after a short write and there's still a chance reclaim will fix
-+ * things up.
-+ */
-+ WARN_ON_ONCE(folio_pos(folio) >= inode->v.i_size);
-+ end_pos = folio_end_pos(folio);
-+ if (inode->v.i_size > folio_pos(folio))
-+ end_pos = min_t(u64, inode->v.i_size, end_pos);
-+ ret = s->s[folio_pos_to_s(folio, end_pos - 1)].state >= SECTOR_dirty;
-+
-+ folio_zero_segment(folio, start_offset, end_offset);
-+
-+ /*
-+ * Bit of a hack - we don't want truncate to fail due to -ENOSPC.
-+ *
-+ * XXX: because we aren't currently tracking whether the folio has actual
-+ * data in it (vs. just 0s, or only partially written) this wrong. ick.
-+ */
-+ BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false));
-+
-+ /*
-+ * This removes any writeable userspace mappings; we need to force
-+ * .page_mkwrite to be called again before any mmapped writes, to
-+ * redirty the full page:
-+ */
-+ folio_mkclean(folio);
-+ filemap_dirty_folio(mapping, folio);
-+unlock:
-+ folio_unlock(folio);
-+ folio_put(folio);
-+out:
-+ return ret;
-+}
-+
-+static int bch2_truncate_folio(struct bch_inode_info *inode, loff_t from)
-+{
-+ return __bch2_truncate_folio(inode, from >> PAGE_SHIFT,
-+ from, ANYSINT_MAX(loff_t));
-+}
-+
-+static int bch2_truncate_folios(struct bch_inode_info *inode,
-+ loff_t start, loff_t end)
-+{
-+ int ret = __bch2_truncate_folio(inode, start >> PAGE_SHIFT,
-+ start, end);
-+
-+ if (ret >= 0 &&
-+ start >> PAGE_SHIFT != end >> PAGE_SHIFT)
-+ ret = __bch2_truncate_folio(inode,
-+ (end - 1) >> PAGE_SHIFT,
-+ start, end);
-+ return ret;
-+}
-+
-+static int bch2_extend(struct mnt_idmap *idmap,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *inode_u,
-+ struct iattr *iattr)
-+{
-+ struct address_space *mapping = inode->v.i_mapping;
-+ int ret;
-+
-+ /*
-+ * sync appends:
-+ *
-+ * this has to be done _before_ extending i_size:
-+ */
-+ ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX);
-+ if (ret)
-+ return ret;
-+
-+ truncate_setsize(&inode->v, iattr->ia_size);
-+
-+ return bch2_setattr_nonsize(idmap, inode, iattr);
-+}
-+
-+int bchfs_truncate(struct mnt_idmap *idmap,
-+ struct bch_inode_info *inode, struct iattr *iattr)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct address_space *mapping = inode->v.i_mapping;
-+ struct bch_inode_unpacked inode_u;
-+ s64 i_sectors_delta = 0;
-+ int ret = 0;
-+
-+ /*
-+ * If the truncate call with change the size of the file, the
-+ * cmtimes should be updated. If the size will not change, we
-+ * do not need to update the cmtimes.
-+ */
-+ if (iattr->ia_size != inode->v.i_size) {
-+ if (!(iattr->ia_valid & ATTR_MTIME))
-+ ktime_get_coarse_real_ts64(&iattr->ia_mtime);
-+ if (!(iattr->ia_valid & ATTR_CTIME))
-+ ktime_get_coarse_real_ts64(&iattr->ia_ctime);
-+ iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME;
-+ }
-+
-+ inode_dio_wait(&inode->v);
-+ bch2_pagecache_block_get(inode);
-+
-+ ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * check this before next assertion; on filesystem error our normal
-+ * invariants are a bit broken (truncate has to truncate the page cache
-+ * before the inode).
-+ */
-+ ret = bch2_journal_error(&c->journal);
-+ if (ret)
-+ goto err;
-+
-+ WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) &&
-+ inode->v.i_size < inode_u.bi_size,
-+ "truncate spotted in mem i_size < btree i_size: %llu < %llu\n",
-+ (u64) inode->v.i_size, inode_u.bi_size);
-+
-+ if (iattr->ia_size > inode->v.i_size) {
-+ ret = bch2_extend(idmap, inode, &inode_u, iattr);
-+ goto err;
-+ }
-+
-+ iattr->ia_valid &= ~ATTR_SIZE;
-+
-+ ret = bch2_truncate_folio(inode, iattr->ia_size);
-+ if (unlikely(ret < 0))
-+ goto err;
-+
-+ truncate_setsize(&inode->v, iattr->ia_size);
-+
-+ /*
-+ * When extending, we're going to write the new i_size to disk
-+ * immediately so we need to flush anything above the current on disk
-+ * i_size first:
-+ *
-+ * Also, when extending we need to flush the page that i_size currently
-+ * straddles - if it's mapped to userspace, we need to ensure that
-+ * userspace has to redirty it and call .mkwrite -> set_page_dirty
-+ * again to allocate the part of the page that was extended.
-+ */
-+ if (iattr->ia_size > inode_u.bi_size)
-+ ret = filemap_write_and_wait_range(mapping,
-+ inode_u.bi_size,
-+ iattr->ia_size - 1);
-+ else if (iattr->ia_size & (PAGE_SIZE - 1))
-+ ret = filemap_write_and_wait_range(mapping,
-+ round_down(iattr->ia_size, PAGE_SIZE),
-+ iattr->ia_size - 1);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta);
-+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+
-+ if (unlikely(ret)) {
-+ /*
-+ * If we error here, VFS caches are now inconsistent with btree
-+ */
-+ set_bit(EI_INODE_ERROR, &inode->ei_flags);
-+ goto err;
-+ }
-+
-+ bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks &&
-+ !bch2_journal_error(&c->journal), c,
-+ "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
-+ inode->v.i_ino, (u64) inode->v.i_blocks,
-+ inode->ei_inode.bi_sectors);
-+
-+ ret = bch2_setattr_nonsize(idmap, inode, iattr);
-+err:
-+ bch2_pagecache_block_put(inode);
-+ return bch2_err_class(ret);
-+}
-+
-+/* fallocate: */
-+
-+static int inode_update_times_fn(struct btree_trans *trans,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi, void *p)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+ bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
-+ return 0;
-+}
-+
-+static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ u64 end = offset + len;
-+ u64 block_start = round_up(offset, block_bytes(c));
-+ u64 block_end = round_down(end, block_bytes(c));
-+ bool truncated_last_page;
-+ int ret = 0;
-+
-+ ret = bch2_truncate_folios(inode, offset, end);
-+ if (unlikely(ret < 0))
-+ goto err;
-+
-+ truncated_last_page = ret;
-+
-+ truncate_pagecache_range(&inode->v, offset, end - 1);
-+
-+ if (block_start < block_end) {
-+ s64 i_sectors_delta = 0;
-+
-+ ret = bch2_fpunch(c, inode_inum(inode),
-+ block_start >> 9, block_end >> 9,
-+ &i_sectors_delta);
-+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+ }
-+
-+ mutex_lock(&inode->ei_update_lock);
-+ if (end >= inode->v.i_size && !truncated_last_page) {
-+ ret = bch2_write_inode_size(c, inode, inode->v.i_size,
-+ ATTR_MTIME|ATTR_CTIME);
-+ } else {
-+ ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
-+ ATTR_MTIME|ATTR_CTIME);
-+ }
-+ mutex_unlock(&inode->ei_update_lock);
-+err:
-+ return ret;
-+}
-+
-+static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
-+ loff_t offset, loff_t len,
-+ bool insert)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct address_space *mapping = inode->v.i_mapping;
-+ s64 i_sectors_delta = 0;
-+ int ret = 0;
-+
-+ if ((offset | len) & (block_bytes(c) - 1))
-+ return -EINVAL;
-+
-+ if (insert) {
-+ if (offset >= inode->v.i_size)
-+ return -EINVAL;
-+ } else {
-+ if (offset + len >= inode->v.i_size)
-+ return -EINVAL;
-+ }
-+
-+ ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
-+ if (ret)
-+ return ret;
-+
-+ if (insert)
-+ i_size_write(&inode->v, inode->v.i_size + len);
-+
-+ ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9,
-+ insert, &i_sectors_delta);
-+ if (!ret && !insert)
-+ i_size_write(&inode->v, inode->v.i_size - len);
-+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
-+
-+ return ret;
-+}
-+
-+static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
-+ u64 start_sector, u64 end_sector)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bpos end_pos = POS(inode->v.i_ino, end_sector);
-+ struct bch_io_opts opts;
-+ int ret = 0;
-+
-+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ POS(inode->v.i_ino, start_sector),
-+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+ while (!ret && bkey_lt(iter.pos, end_pos)) {
-+ s64 i_sectors_delta = 0;
-+ struct quota_res quota_res = { 0 };
-+ struct bkey_s_c k;
-+ unsigned sectors;
-+ bool is_allocation;
-+ u64 hole_start, hole_end;
-+ u32 snapshot;
-+
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans,
-+ inode->ei_subvol, &snapshot);
-+ if (ret)
-+ goto bkey_err;
-+
-+ bch2_btree_iter_set_snapshot(&iter, snapshot);
-+
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ if ((ret = bkey_err(k)))
-+ goto bkey_err;
-+
-+ hole_start = iter.pos.offset;
-+ hole_end = bpos_min(k.k->p, end_pos).offset;
-+ is_allocation = bkey_extent_is_allocation(k.k);
-+
-+ /* already reserved */
-+ if (bkey_extent_is_reservation(k) &&
-+ bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
-+ bch2_btree_iter_advance(&iter);
-+ continue;
-+ }
-+
-+ if (bkey_extent_is_data(k.k) &&
-+ !(mode & FALLOC_FL_ZERO_RANGE)) {
-+ bch2_btree_iter_advance(&iter);
-+ continue;
-+ }
-+
-+ if (!(mode & FALLOC_FL_ZERO_RANGE)) {
-+ /*
-+ * Lock ordering - can't be holding btree locks while
-+ * blocking on a folio lock:
-+ */
-+ if (bch2_clamp_data_hole(&inode->v,
-+ &hole_start,
-+ &hole_end,
-+ opts.data_replicas, true))
-+ ret = drop_locks_do(trans,
-+ (bch2_clamp_data_hole(&inode->v,
-+ &hole_start,
-+ &hole_end,
-+ opts.data_replicas, false), 0));
-+ bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
-+
-+ if (ret)
-+ goto bkey_err;
-+
-+ if (hole_start == hole_end)
-+ continue;
-+ }
-+
-+ sectors = hole_end - hole_start;
-+
-+ if (!is_allocation) {
-+ ret = bch2_quota_reservation_add(c, inode,
-+ &quota_res, sectors, true);
-+ if (unlikely(ret))
-+ goto bkey_err;
-+ }
-+
-+ ret = bch2_extent_fallocate(trans, inode_inum(inode), &iter,
-+ sectors, opts, &i_sectors_delta,
-+ writepoint_hashed((unsigned long) current));
-+ if (ret)
-+ goto bkey_err;
-+
-+ bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
-+
-+ drop_locks_do(trans,
-+ (bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
-+bkey_err:
-+ bch2_quota_reservation_put(c, inode, &quota_res);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ ret = 0;
-+ }
-+
-+ if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) {
-+ struct quota_res quota_res = { 0 };
-+ s64 i_sectors_delta = 0;
-+
-+ bch2_fpunch_at(trans, &iter, inode_inum(inode),
-+ end_sector, &i_sectors_delta);
-+ bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
-+ bch2_quota_reservation_put(c, inode, &quota_res);
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
-+ loff_t offset, loff_t len)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ u64 end = offset + len;
-+ u64 block_start = round_down(offset, block_bytes(c));
-+ u64 block_end = round_up(end, block_bytes(c));
-+ bool truncated_last_page = false;
-+ int ret, ret2 = 0;
-+
-+ if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) {
-+ ret = inode_newsize_ok(&inode->v, end);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (mode & FALLOC_FL_ZERO_RANGE) {
-+ ret = bch2_truncate_folios(inode, offset, end);
-+ if (unlikely(ret < 0))
-+ return ret;
-+
-+ truncated_last_page = ret;
-+
-+ truncate_pagecache_range(&inode->v, offset, end - 1);
-+
-+ block_start = round_up(offset, block_bytes(c));
-+ block_end = round_down(end, block_bytes(c));
-+ }
-+
-+ ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9);
-+
-+ /*
-+ * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update,
-+ * so that the VFS cache i_size is consistent with the btree i_size:
-+ */
-+ if (ret &&
-+ !(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)))
-+ return ret;
-+
-+ if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size)
-+ end = inode->v.i_size;
-+
-+ if (end >= inode->v.i_size &&
-+ (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) ||
-+ !(mode & FALLOC_FL_KEEP_SIZE))) {
-+ spin_lock(&inode->v.i_lock);
-+ i_size_write(&inode->v, end);
-+ spin_unlock(&inode->v.i_lock);
-+
-+ mutex_lock(&inode->ei_update_lock);
-+ ret2 = bch2_write_inode_size(c, inode, end, 0);
-+ mutex_unlock(&inode->ei_update_lock);
-+ }
-+
-+ return ret ?: ret2;
-+}
-+
-+long bch2_fallocate_dispatch(struct file *file, int mode,
-+ loff_t offset, loff_t len)
-+{
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ long ret;
-+
-+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate))
-+ return -EROFS;
-+
-+ inode_lock(&inode->v);
-+ inode_dio_wait(&inode->v);
-+ bch2_pagecache_block_get(inode);
-+
-+ ret = file_modified(file);
-+ if (ret)
-+ goto err;
-+
-+ if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
-+ ret = bchfs_fallocate(inode, mode, offset, len);
-+ else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
-+ ret = bchfs_fpunch(inode, offset, len);
-+ else if (mode == FALLOC_FL_INSERT_RANGE)
-+ ret = bchfs_fcollapse_finsert(inode, offset, len, true);
-+ else if (mode == FALLOC_FL_COLLAPSE_RANGE)
-+ ret = bchfs_fcollapse_finsert(inode, offset, len, false);
-+ else
-+ ret = -EOPNOTSUPP;
-+err:
-+ bch2_pagecache_block_put(inode);
-+ inode_unlock(&inode->v);
-+ bch2_write_ref_put(c, BCH_WRITE_REF_fallocate);
-+
-+ return bch2_err_class(ret);
-+}
-+
-+/*
-+ * Take a quota reservation for unallocated blocks in a given file range
-+ * Does not check pagecache
-+ */
-+static int quota_reserve_range(struct bch_inode_info *inode,
-+ struct quota_res *res,
-+ u64 start, u64 end)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ u32 snapshot;
-+ u64 sectors = end - start;
-+ u64 pos = start;
-+ int ret;
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ SPOS(inode->v.i_ino, pos, snapshot), 0);
-+
-+ while (!(ret = btree_trans_too_many_iters(trans)) &&
-+ (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k &&
-+ !(ret = bkey_err(k))) {
-+ if (bkey_extent_is_allocation(k.k)) {
-+ u64 s = min(end, k.k->p.offset) -
-+ max(start, bkey_start_offset(k.k));
-+ BUG_ON(s > sectors);
-+ sectors -= s;
-+ }
-+ bch2_btree_iter_advance(&iter);
-+ }
-+ pos = iter.pos.offset;
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_put(trans);
-+
-+ return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true);
-+}
-+
-+loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
-+ struct file *file_dst, loff_t pos_dst,
-+ loff_t len, unsigned remap_flags)
-+{
-+ struct bch_inode_info *src = file_bch_inode(file_src);
-+ struct bch_inode_info *dst = file_bch_inode(file_dst);
-+ struct bch_fs *c = src->v.i_sb->s_fs_info;
-+ struct quota_res quota_res = { 0 };
-+ s64 i_sectors_delta = 0;
-+ u64 aligned_len;
-+ loff_t ret = 0;
-+
-+ if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
-+ return -EINVAL;
-+
-+ if (remap_flags & REMAP_FILE_DEDUP)
-+ return -EOPNOTSUPP;
-+
-+ if ((pos_src & (block_bytes(c) - 1)) ||
-+ (pos_dst & (block_bytes(c) - 1)))
-+ return -EINVAL;
-+
-+ if (src == dst &&
-+ abs(pos_src - pos_dst) < len)
-+ return -EINVAL;
-+
-+ bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
-+
-+ inode_dio_wait(&src->v);
-+ inode_dio_wait(&dst->v);
-+
-+ ret = generic_remap_file_range_prep(file_src, pos_src,
-+ file_dst, pos_dst,
-+ &len, remap_flags);
-+ if (ret < 0 || len == 0)
-+ goto err;
-+
-+ aligned_len = round_up((u64) len, block_bytes(c));
-+
-+ ret = bch2_write_invalidate_inode_pages_range(dst->v.i_mapping,
-+ pos_dst, pos_dst + len - 1);
-+ if (ret)
-+ goto err;
-+
-+ ret = quota_reserve_range(dst, &quota_res, pos_dst >> 9,
-+ (pos_dst + aligned_len) >> 9);
-+ if (ret)
-+ goto err;
-+
-+ file_update_time(file_dst);
-+
-+ bch2_mark_pagecache_unallocated(src, pos_src >> 9,
-+ (pos_src + aligned_len) >> 9);
-+
-+ ret = bch2_remap_range(c,
-+ inode_inum(dst), pos_dst >> 9,
-+ inode_inum(src), pos_src >> 9,
-+ aligned_len >> 9,
-+ pos_dst + len, &i_sectors_delta);
-+ if (ret < 0)
-+ goto err;
-+
-+ /*
-+ * due to alignment, we might have remapped slightly more than requsted
-+ */
-+ ret = min((u64) ret << 9, (u64) len);
-+
-+ bch2_i_sectors_acct(c, dst, &quota_res, i_sectors_delta);
-+
-+ spin_lock(&dst->v.i_lock);
-+ if (pos_dst + ret > dst->v.i_size)
-+ i_size_write(&dst->v, pos_dst + ret);
-+ spin_unlock(&dst->v.i_lock);
-+
-+ if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) ||
-+ IS_SYNC(file_inode(file_dst)))
-+ ret = bch2_flush_inode(c, dst);
-+err:
-+ bch2_quota_reservation_put(c, dst, &quota_res);
-+ bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
-+
-+ return bch2_err_class(ret);
-+}
-+
-+/* fseek: */
-+
-+static loff_t bch2_seek_data(struct file *file, u64 offset)
-+{
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ subvol_inum inum = inode_inum(inode);
-+ u64 isize, next_data = MAX_LFS_FILESIZE;
-+ u32 snapshot;
-+ int ret;
-+
-+ isize = i_size_read(&inode->v);
-+ if (offset >= isize)
-+ return -ENXIO;
-+
-+ trans = bch2_trans_get(c);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents,
-+ SPOS(inode->v.i_ino, offset >> 9, snapshot),
-+ POS(inode->v.i_ino, U64_MAX),
-+ 0, k, ret) {
-+ if (bkey_extent_is_data(k.k)) {
-+ next_data = max(offset, bkey_start_offset(k.k) << 9);
-+ break;
-+ } else if (k.k->p.offset >> 9 > isize)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_put(trans);
-+ if (ret)
-+ return ret;
-+
-+ if (next_data > offset)
-+ next_data = bch2_seek_pagecache_data(&inode->v,
-+ offset, next_data, 0, false);
-+
-+ if (next_data >= isize)
-+ return -ENXIO;
-+
-+ return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
-+}
-+
-+static loff_t bch2_seek_hole(struct file *file, u64 offset)
-+{
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ subvol_inum inum = inode_inum(inode);
-+ u64 isize, next_hole = MAX_LFS_FILESIZE;
-+ u32 snapshot;
-+ int ret;
-+
-+ isize = i_size_read(&inode->v);
-+ if (offset >= isize)
-+ return -ENXIO;
-+
-+ trans = bch2_trans_get(c);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
-+ SPOS(inode->v.i_ino, offset >> 9, snapshot),
-+ BTREE_ITER_SLOTS, k, ret) {
-+ if (k.k->p.inode != inode->v.i_ino) {
-+ next_hole = bch2_seek_pagecache_hole(&inode->v,
-+ offset, MAX_LFS_FILESIZE, 0, false);
-+ break;
-+ } else if (!bkey_extent_is_data(k.k)) {
-+ next_hole = bch2_seek_pagecache_hole(&inode->v,
-+ max(offset, bkey_start_offset(k.k) << 9),
-+ k.k->p.offset << 9, 0, false);
-+
-+ if (next_hole < k.k->p.offset << 9)
-+ break;
-+ } else {
-+ offset = max(offset, bkey_start_offset(k.k) << 9);
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_put(trans);
-+ if (ret)
-+ return ret;
-+
-+ if (next_hole > isize)
-+ next_hole = isize;
-+
-+ return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE);
-+}
-+
-+loff_t bch2_llseek(struct file *file, loff_t offset, int whence)
-+{
-+ loff_t ret;
-+
-+ switch (whence) {
-+ case SEEK_SET:
-+ case SEEK_CUR:
-+ case SEEK_END:
-+ ret = generic_file_llseek(file, offset, whence);
-+ break;
-+ case SEEK_DATA:
-+ ret = bch2_seek_data(file, offset);
-+ break;
-+ case SEEK_HOLE:
-+ ret = bch2_seek_hole(file, offset);
-+ break;
-+ default:
-+ ret = -EINVAL;
-+ break;
-+ }
-+
-+ return bch2_err_class(ret);
-+}
-+
-+void bch2_fs_fsio_exit(struct bch_fs *c)
-+{
-+ bioset_exit(&c->nocow_flush_bioset);
-+}
-+
-+int bch2_fs_fsio_init(struct bch_fs *c)
-+{
-+ if (bioset_init(&c->nocow_flush_bioset,
-+ 1, offsetof(struct nocow_flush, bio), 0))
-+ return -BCH_ERR_ENOMEM_nocow_flush_bioset_init;
-+
-+ return 0;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h
-new file mode 100644
-index 000000000000..ca70346e68dc
---- /dev/null
-+++ b/fs/bcachefs/fs-io.h
-@@ -0,0 +1,184 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IO_H
-+#define _BCACHEFS_FS_IO_H
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "buckets.h"
-+#include "fs.h"
-+#include "io_write_types.h"
-+#include "quota.h"
-+
-+#include <linux/uio.h>
-+
-+struct folio_vec {
-+ struct folio *fv_folio;
-+ size_t fv_offset;
-+ size_t fv_len;
-+};
-+
-+static inline struct folio_vec biovec_to_foliovec(struct bio_vec bv)
-+{
-+
-+ struct folio *folio = page_folio(bv.bv_page);
-+ size_t offset = (folio_page_idx(folio, bv.bv_page) << PAGE_SHIFT) +
-+ bv.bv_offset;
-+ size_t len = min_t(size_t, folio_size(folio) - offset, bv.bv_len);
-+
-+ return (struct folio_vec) {
-+ .fv_folio = folio,
-+ .fv_offset = offset,
-+ .fv_len = len,
-+ };
-+}
-+
-+static inline struct folio_vec bio_iter_iovec_folio(struct bio *bio,
-+ struct bvec_iter iter)
-+{
-+ return biovec_to_foliovec(bio_iter_iovec(bio, iter));
-+}
-+
-+#define __bio_for_each_folio(bvl, bio, iter, start) \
-+ for (iter = (start); \
-+ (iter).bi_size && \
-+ ((bvl = bio_iter_iovec_folio((bio), (iter))), 1); \
-+ bio_advance_iter_single((bio), &(iter), (bvl).fv_len))
-+
-+/**
-+ * bio_for_each_folio - iterate over folios within a bio
-+ *
-+ * Like other non-_all versions, this iterates over what bio->bi_iter currently
-+ * points to. This version is for drivers, where the bio may have previously
-+ * been split or cloned.
-+ */
-+#define bio_for_each_folio(bvl, bio, iter) \
-+ __bio_for_each_folio(bvl, bio, iter, (bio)->bi_iter)
-+
-+struct quota_res {
-+ u64 sectors;
-+};
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+static inline void __bch2_quota_reservation_put(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct quota_res *res)
-+{
-+ BUG_ON(res->sectors > inode->ei_quota_reserved);
-+
-+ bch2_quota_acct(c, inode->ei_qid, Q_SPC,
-+ -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC);
-+ inode->ei_quota_reserved -= res->sectors;
-+ res->sectors = 0;
-+}
-+
-+static inline void bch2_quota_reservation_put(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct quota_res *res)
-+{
-+ if (res->sectors) {
-+ mutex_lock(&inode->ei_quota_lock);
-+ __bch2_quota_reservation_put(c, inode, res);
-+ mutex_unlock(&inode->ei_quota_lock);
-+ }
-+}
-+
-+static inline int bch2_quota_reservation_add(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct quota_res *res,
-+ u64 sectors,
-+ bool check_enospc)
-+{
-+ int ret;
-+
-+ if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags))
-+ return 0;
-+
-+ mutex_lock(&inode->ei_quota_lock);
-+ ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
-+ check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK);
-+ if (likely(!ret)) {
-+ inode->ei_quota_reserved += sectors;
-+ res->sectors += sectors;
-+ }
-+ mutex_unlock(&inode->ei_quota_lock);
-+
-+ return ret;
-+}
-+
-+#else
-+
-+static inline void __bch2_quota_reservation_put(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct quota_res *res) {}
-+
-+static inline void bch2_quota_reservation_put(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct quota_res *res) {}
-+
-+static inline int bch2_quota_reservation_add(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct quota_res *res,
-+ unsigned sectors,
-+ bool check_enospc)
-+{
-+ return 0;
-+}
-+
-+#endif
-+
-+void __bch2_i_sectors_acct(struct bch_fs *, struct bch_inode_info *,
-+ struct quota_res *, s64);
-+
-+static inline void bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
-+ struct quota_res *quota_res, s64 sectors)
-+{
-+ if (sectors) {
-+ mutex_lock(&inode->ei_quota_lock);
-+ __bch2_i_sectors_acct(c, inode, quota_res, sectors);
-+ mutex_unlock(&inode->ei_quota_lock);
-+ }
-+}
-+
-+static inline struct address_space *faults_disabled_mapping(void)
-+{
-+ return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL);
-+}
-+
-+static inline void set_fdm_dropped_locks(void)
-+{
-+ current->faults_disabled_mapping =
-+ (void *) (((unsigned long) current->faults_disabled_mapping)|1);
-+}
-+
-+static inline bool fdm_dropped_locks(void)
-+{
-+ return ((unsigned long) current->faults_disabled_mapping) & 1;
-+}
-+
-+void bch2_inode_flush_nocow_writes_async(struct bch_fs *,
-+ struct bch_inode_info *, struct closure *);
-+
-+int __must_check bch2_write_inode_size(struct bch_fs *,
-+ struct bch_inode_info *,
-+ loff_t, unsigned);
-+
-+int bch2_fsync(struct file *, loff_t, loff_t, int);
-+
-+int bchfs_truncate(struct mnt_idmap *,
-+ struct bch_inode_info *, struct iattr *);
-+long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
-+
-+loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
-+ loff_t, loff_t, unsigned);
-+
-+loff_t bch2_llseek(struct file *, loff_t, int);
-+
-+void bch2_fs_fsio_exit(struct bch_fs *);
-+int bch2_fs_fsio_init(struct bch_fs *);
-+#else
-+static inline void bch2_fs_fsio_exit(struct bch_fs *c) {}
-+static inline int bch2_fs_fsio_init(struct bch_fs *c) { return 0; }
-+#endif
-+
-+#endif /* _BCACHEFS_FS_IO_H */
-diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
-new file mode 100644
-index 000000000000..5a39bcb597a3
---- /dev/null
-+++ b/fs/bcachefs/fs-ioctl.c
-@@ -0,0 +1,572 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "chardev.h"
-+#include "dirent.h"
-+#include "fs.h"
-+#include "fs-common.h"
-+#include "fs-ioctl.h"
-+#include "quota.h"
-+
-+#include <linux/compat.h>
-+#include <linux/fsnotify.h>
-+#include <linux/mount.h>
-+#include <linux/namei.h>
-+#include <linux/security.h>
-+#include <linux/writeback.h>
-+
-+#define FS_IOC_GOINGDOWN _IOR('X', 125, __u32)
-+#define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */
-+#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
-+#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
-+
-+struct flags_set {
-+ unsigned mask;
-+ unsigned flags;
-+
-+ unsigned projid;
-+
-+ bool set_projinherit;
-+ bool projinherit;
-+};
-+
-+static int bch2_inode_flags_set(struct btree_trans *trans,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi,
-+ void *p)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ /*
-+ * We're relying on btree locking here for exclusion with other ioctl
-+ * calls - use the flags in the btree (@bi), not inode->i_flags:
-+ */
-+ struct flags_set *s = p;
-+ unsigned newflags = s->flags;
-+ unsigned oldflags = bi->bi_flags & s->mask;
-+
-+ if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
-+ !capable(CAP_LINUX_IMMUTABLE))
-+ return -EPERM;
-+
-+ if (!S_ISREG(bi->bi_mode) &&
-+ !S_ISDIR(bi->bi_mode) &&
-+ (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
-+ return -EINVAL;
-+
-+ if (s->set_projinherit) {
-+ bi->bi_fields_set &= ~(1 << Inode_opt_project);
-+ bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
-+ }
-+
-+ bi->bi_flags &= ~s->mask;
-+ bi->bi_flags |= newflags;
-+
-+ bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
-+ return 0;
-+}
-+
-+static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
-+{
-+ unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
-+
-+ return put_user(flags, arg);
-+}
-+
-+static int bch2_ioc_setflags(struct bch_fs *c,
-+ struct file *file,
-+ struct bch_inode_info *inode,
-+ void __user *arg)
-+{
-+ struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
-+ unsigned uflags;
-+ int ret;
-+
-+ if (get_user(uflags, (int __user *) arg))
-+ return -EFAULT;
-+
-+ s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
-+ if (uflags)
-+ return -EOPNOTSUPP;
-+
-+ ret = mnt_want_write_file(file);
-+ if (ret)
-+ return ret;
-+
-+ inode_lock(&inode->v);
-+ if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
-+ ret = -EACCES;
-+ goto setflags_out;
-+ }
-+
-+ mutex_lock(&inode->ei_update_lock);
-+ ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
-+ ATTR_CTIME);
-+ mutex_unlock(&inode->ei_update_lock);
-+
-+setflags_out:
-+ inode_unlock(&inode->v);
-+ mnt_drop_write_file(file);
-+ return ret;
-+}
-+
-+static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
-+ struct fsxattr __user *arg)
-+{
-+ struct fsxattr fa = { 0 };
-+
-+ fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
-+
-+ if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
-+ fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
-+
-+ fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
-+
-+ if (copy_to_user(arg, &fa, sizeof(fa)))
-+ return -EFAULT;
-+
-+ return 0;
-+}
-+
-+static int fssetxattr_inode_update_fn(struct btree_trans *trans,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi,
-+ void *p)
-+{
-+ struct flags_set *s = p;
-+
-+ if (s->projid != bi->bi_project) {
-+ bi->bi_fields_set |= 1U << Inode_opt_project;
-+ bi->bi_project = s->projid;
-+ }
-+
-+ return bch2_inode_flags_set(trans, inode, bi, p);
-+}
-+
-+static int bch2_ioc_fssetxattr(struct bch_fs *c,
-+ struct file *file,
-+ struct bch_inode_info *inode,
-+ struct fsxattr __user *arg)
-+{
-+ struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
-+ struct fsxattr fa;
-+ int ret;
-+
-+ if (copy_from_user(&fa, arg, sizeof(fa)))
-+ return -EFAULT;
-+
-+ s.set_projinherit = true;
-+ s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
-+ fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
-+
-+ s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
-+ if (fa.fsx_xflags)
-+ return -EOPNOTSUPP;
-+
-+ if (fa.fsx_projid >= U32_MAX)
-+ return -EINVAL;
-+
-+ /*
-+ * inode fields accessible via the xattr interface are stored with a +1
-+ * bias, so that 0 means unset:
-+ */
-+ s.projid = fa.fsx_projid + 1;
-+
-+ ret = mnt_want_write_file(file);
-+ if (ret)
-+ return ret;
-+
-+ inode_lock(&inode->v);
-+ if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
-+ ret = -EACCES;
-+ goto err;
-+ }
-+
-+ mutex_lock(&inode->ei_update_lock);
-+ ret = bch2_set_projid(c, inode, fa.fsx_projid);
-+ if (ret)
-+ goto err_unlock;
-+
-+ ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
-+ ATTR_CTIME);
-+err_unlock:
-+ mutex_unlock(&inode->ei_update_lock);
-+err:
-+ inode_unlock(&inode->v);
-+ mnt_drop_write_file(file);
-+ return ret;
-+}
-+
-+static int bch2_reinherit_attrs_fn(struct btree_trans *trans,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi,
-+ void *p)
-+{
-+ struct bch_inode_info *dir = p;
-+
-+ return !bch2_reinherit_attrs(bi, &dir->ei_inode);
-+}
-+
-+static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
-+ struct file *file,
-+ struct bch_inode_info *src,
-+ const char __user *name)
-+{
-+ struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode);
-+ struct bch_inode_info *dst;
-+ struct inode *vinode = NULL;
-+ char *kname = NULL;
-+ struct qstr qstr;
-+ int ret = 0;
-+ subvol_inum inum;
-+
-+ kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
-+ if (!kname)
-+ return -ENOMEM;
-+
-+ ret = strncpy_from_user(kname, name, BCH_NAME_MAX);
-+ if (unlikely(ret < 0))
-+ goto err1;
-+
-+ qstr.len = ret;
-+ qstr.name = kname;
-+
-+ ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum);
-+ if (ret)
-+ goto err1;
-+
-+ vinode = bch2_vfs_inode_get(c, inum);
-+ ret = PTR_ERR_OR_ZERO(vinode);
-+ if (ret)
-+ goto err1;
-+
-+ dst = to_bch_ei(vinode);
-+
-+ ret = mnt_want_write_file(file);
-+ if (ret)
-+ goto err2;
-+
-+ bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
-+
-+ if (inode_attr_changing(src, dst, Inode_opt_project)) {
-+ ret = bch2_fs_quota_transfer(c, dst,
-+ src->ei_qid,
-+ 1 << QTYP_PRJ,
-+ KEY_TYPE_QUOTA_PREALLOC);
-+ if (ret)
-+ goto err3;
-+ }
-+
-+ ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
-+err3:
-+ bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
-+
-+ /* return true if we did work */
-+ if (ret >= 0)
-+ ret = !ret;
-+
-+ mnt_drop_write_file(file);
-+err2:
-+ iput(vinode);
-+err1:
-+ kfree(kname);
-+
-+ return ret;
-+}
-+
-+static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
-+{
-+ u32 flags;
-+ int ret = 0;
-+
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EPERM;
-+
-+ if (get_user(flags, arg))
-+ return -EFAULT;
-+
-+ bch_notice(c, "shutdown by ioctl type %u", flags);
-+
-+ down_write(&c->vfs_sb->s_umount);
-+
-+ switch (flags) {
-+ case FSOP_GOING_FLAGS_DEFAULT:
-+ ret = freeze_bdev(c->vfs_sb->s_bdev);
-+ if (ret)
-+ goto err;
-+
-+ bch2_journal_flush(&c->journal);
-+ c->vfs_sb->s_flags |= SB_RDONLY;
-+ bch2_fs_emergency_read_only(c);
-+ thaw_bdev(c->vfs_sb->s_bdev);
-+ break;
-+
-+ case FSOP_GOING_FLAGS_LOGFLUSH:
-+ bch2_journal_flush(&c->journal);
-+ fallthrough;
-+
-+ case FSOP_GOING_FLAGS_NOLOGFLUSH:
-+ c->vfs_sb->s_flags |= SB_RDONLY;
-+ bch2_fs_emergency_read_only(c);
-+ break;
-+ default:
-+ ret = -EINVAL;
-+ break;
-+ }
-+err:
-+ up_write(&c->vfs_sb->s_umount);
-+ return ret;
-+}
-+
-+static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
-+ struct bch_ioctl_subvolume arg)
-+{
-+ struct inode *dir;
-+ struct bch_inode_info *inode;
-+ struct user_namespace *s_user_ns;
-+ struct dentry *dst_dentry;
-+ struct path src_path, dst_path;
-+ int how = LOOKUP_FOLLOW;
-+ int error;
-+ subvol_inum snapshot_src = { 0 };
-+ unsigned lookup_flags = 0;
-+ unsigned create_flags = BCH_CREATE_SUBVOL;
-+
-+ if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE|
-+ BCH_SUBVOL_SNAPSHOT_RO))
-+ return -EINVAL;
-+
-+ if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
-+ (arg.src_ptr ||
-+ (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)))
-+ return -EINVAL;
-+
-+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
-+ create_flags |= BCH_CREATE_SNAPSHOT;
-+
-+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)
-+ create_flags |= BCH_CREATE_SNAPSHOT_RO;
-+
-+ /* why do we need this lock? */
-+ down_read(&c->vfs_sb->s_umount);
-+
-+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
-+ sync_inodes_sb(c->vfs_sb);
-+retry:
-+ if (arg.src_ptr) {
-+ error = user_path_at(arg.dirfd,
-+ (const char __user *)(unsigned long)arg.src_ptr,
-+ how, &src_path);
-+ if (error)
-+ goto err1;
-+
-+ if (src_path.dentry->d_sb->s_fs_info != c) {
-+ path_put(&src_path);
-+ error = -EXDEV;
-+ goto err1;
-+ }
-+
-+ snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode));
-+ }
-+
-+ dst_dentry = user_path_create(arg.dirfd,
-+ (const char __user *)(unsigned long)arg.dst_ptr,
-+ &dst_path, lookup_flags);
-+ error = PTR_ERR_OR_ZERO(dst_dentry);
-+ if (error)
-+ goto err2;
-+
-+ if (dst_dentry->d_sb->s_fs_info != c) {
-+ error = -EXDEV;
-+ goto err3;
-+ }
-+
-+ if (dst_dentry->d_inode) {
-+ error = -EEXIST;
-+ goto err3;
-+ }
-+
-+ dir = dst_path.dentry->d_inode;
-+ if (IS_DEADDIR(dir)) {
-+ error = -BCH_ERR_ENOENT_directory_dead;
-+ goto err3;
-+ }
-+
-+ s_user_ns = dir->i_sb->s_user_ns;
-+ if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
-+ !kgid_has_mapping(s_user_ns, current_fsgid())) {
-+ error = -EOVERFLOW;
-+ goto err3;
-+ }
-+
-+ error = inode_permission(file_mnt_idmap(filp),
-+ dir, MAY_WRITE | MAY_EXEC);
-+ if (error)
-+ goto err3;
-+
-+ if (!IS_POSIXACL(dir))
-+ arg.mode &= ~current_umask();
-+
-+ error = security_path_mkdir(&dst_path, dst_dentry, arg.mode);
-+ if (error)
-+ goto err3;
-+
-+ if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
-+ !arg.src_ptr)
-+ snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol;
-+
-+ inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
-+ dst_dentry, arg.mode|S_IFDIR,
-+ 0, snapshot_src, create_flags);
-+ error = PTR_ERR_OR_ZERO(inode);
-+ if (error)
-+ goto err3;
-+
-+ d_instantiate(dst_dentry, &inode->v);
-+ fsnotify_mkdir(dir, dst_dentry);
-+err3:
-+ done_path_create(&dst_path, dst_dentry);
-+err2:
-+ if (arg.src_ptr)
-+ path_put(&src_path);
-+
-+ if (retry_estale(error, lookup_flags)) {
-+ lookup_flags |= LOOKUP_REVAL;
-+ goto retry;
-+ }
-+err1:
-+ up_read(&c->vfs_sb->s_umount);
-+
-+ return error;
-+}
-+
-+static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
-+ struct bch_ioctl_subvolume arg)
-+{
-+ down_write(&c->snapshot_create_lock);
-+ long ret = __bch2_ioctl_subvolume_create(c, filp, arg);
-+ up_write(&c->snapshot_create_lock);
-+
-+ return ret;
-+}
-+
-+static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
-+ struct bch_ioctl_subvolume arg)
-+{
-+ struct path path;
-+ struct inode *dir;
-+ int ret = 0;
-+
-+ if (arg.flags)
-+ return -EINVAL;
-+
-+ ret = user_path_at(arg.dirfd,
-+ (const char __user *)(unsigned long)arg.dst_ptr,
-+ LOOKUP_FOLLOW, &path);
-+ if (ret)
-+ return ret;
-+
-+ if (path.dentry->d_sb->s_fs_info != c) {
-+ ret = -EXDEV;
-+ goto err;
-+ }
-+
-+ dir = path.dentry->d_parent->d_inode;
-+
-+ ret = __bch2_unlink(dir, path.dentry, true);
-+ if (ret)
-+ goto err;
-+
-+ fsnotify_rmdir(dir, path.dentry);
-+ d_delete(path.dentry);
-+err:
-+ path_put(&path);
-+ return ret;
-+}
-+
-+long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-+{
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ long ret;
-+
-+ switch (cmd) {
-+ case FS_IOC_GETFLAGS:
-+ ret = bch2_ioc_getflags(inode, (int __user *) arg);
-+ break;
-+
-+ case FS_IOC_SETFLAGS:
-+ ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
-+ break;
-+
-+ case FS_IOC_FSGETXATTR:
-+ ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
-+ break;
-+
-+ case FS_IOC_FSSETXATTR:
-+ ret = bch2_ioc_fssetxattr(c, file, inode,
-+ (void __user *) arg);
-+ break;
-+
-+ case BCHFS_IOC_REINHERIT_ATTRS:
-+ ret = bch2_ioc_reinherit_attrs(c, file, inode,
-+ (void __user *) arg);
-+ break;
-+
-+ case FS_IOC_GETVERSION:
-+ ret = -ENOTTY;
-+ break;
-+
-+ case FS_IOC_SETVERSION:
-+ ret = -ENOTTY;
-+ break;
-+
-+ case FS_IOC_GOINGDOWN:
-+ ret = bch2_ioc_goingdown(c, (u32 __user *) arg);
-+ break;
-+
-+ case BCH_IOCTL_SUBVOLUME_CREATE: {
-+ struct bch_ioctl_subvolume i;
-+
-+ ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
-+ ? -EFAULT
-+ : bch2_ioctl_subvolume_create(c, file, i);
-+ break;
-+ }
-+
-+ case BCH_IOCTL_SUBVOLUME_DESTROY: {
-+ struct bch_ioctl_subvolume i;
-+
-+ ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
-+ ? -EFAULT
-+ : bch2_ioctl_subvolume_destroy(c, file, i);
-+ break;
-+ }
-+
-+ default:
-+ ret = bch2_fs_ioctl(c, cmd, (void __user *) arg);
-+ break;
-+ }
-+
-+ return bch2_err_class(ret);
-+}
-+
-+#ifdef CONFIG_COMPAT
-+long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-+{
-+ /* These are just misnamed, they actually get/put from/to user an int */
-+ switch (cmd) {
-+ case FS_IOC_GETFLAGS:
-+ cmd = FS_IOC_GETFLAGS;
-+ break;
-+ case FS_IOC32_SETFLAGS:
-+ cmd = FS_IOC_SETFLAGS;
-+ break;
-+ default:
-+ return -ENOIOCTLCMD;
-+ }
-+ return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
-+}
-+#endif
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h
-new file mode 100644
-index 000000000000..d30f9bb056fd
---- /dev/null
-+++ b/fs/bcachefs/fs-ioctl.h
-@@ -0,0 +1,81 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_IOCTL_H
-+#define _BCACHEFS_FS_IOCTL_H
-+
-+/* Inode flags: */
-+
-+/* bcachefs inode flags -> vfs inode flags: */
-+static const __maybe_unused unsigned bch_flags_to_vfs[] = {
-+ [__BCH_INODE_sync] = S_SYNC,
-+ [__BCH_INODE_immutable] = S_IMMUTABLE,
-+ [__BCH_INODE_append] = S_APPEND,
-+ [__BCH_INODE_noatime] = S_NOATIME,
-+};
-+
-+/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
-+static const __maybe_unused unsigned bch_flags_to_uflags[] = {
-+ [__BCH_INODE_sync] = FS_SYNC_FL,
-+ [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
-+ [__BCH_INODE_append] = FS_APPEND_FL,
-+ [__BCH_INODE_nodump] = FS_NODUMP_FL,
-+ [__BCH_INODE_noatime] = FS_NOATIME_FL,
-+};
-+
-+/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
-+static const __maybe_unused unsigned bch_flags_to_xflags[] = {
-+ [__BCH_INODE_sync] = FS_XFLAG_SYNC,
-+ [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
-+ [__BCH_INODE_append] = FS_XFLAG_APPEND,
-+ [__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
-+ [__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
-+ //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
-+};
-+
-+#define set_flags(_map, _in, _out) \
-+do { \
-+ unsigned _i; \
-+ \
-+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
-+ if ((_in) & (1 << _i)) \
-+ (_out) |= _map[_i]; \
-+ else \
-+ (_out) &= ~_map[_i]; \
-+} while (0)
-+
-+#define map_flags(_map, _in) \
-+({ \
-+ unsigned _out = 0; \
-+ \
-+ set_flags(_map, _in, _out); \
-+ _out; \
-+})
-+
-+#define map_flags_rev(_map, _in) \
-+({ \
-+ unsigned _i, _out = 0; \
-+ \
-+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
-+ if ((_in) & _map[_i]) { \
-+ (_out) |= 1 << _i; \
-+ (_in) &= ~_map[_i]; \
-+ } \
-+ (_out); \
-+})
-+
-+#define map_defined(_map) \
-+({ \
-+ unsigned _in = ~0; \
-+ \
-+ map_flags_rev(_map, _in); \
-+})
-+
-+/* Set VFS inode flags from bcachefs inode: */
-+static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
-+{
-+ set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
-+}
-+
-+long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
-+long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
-+
-+#endif /* _BCACHEFS_FS_IOCTL_H */
-diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
-new file mode 100644
-index 000000000000..82b668ea20aa
---- /dev/null
-+++ b/fs/bcachefs/fs.c
-@@ -0,0 +1,1977 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef NO_BCACHEFS_FS
-+
-+#include "bcachefs.h"
-+#include "acl.h"
-+#include "bkey_buf.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "chardev.h"
-+#include "dirent.h"
-+#include "errcode.h"
-+#include "extents.h"
-+#include "fs.h"
-+#include "fs-common.h"
-+#include "fs-io.h"
-+#include "fs-ioctl.h"
-+#include "fs-io-buffered.h"
-+#include "fs-io-direct.h"
-+#include "fs-io-pagecache.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io_read.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "quota.h"
-+#include "snapshot.h"
-+#include "super.h"
-+#include "xattr.h"
-+
-+#include <linux/aio.h>
-+#include <linux/backing-dev.h>
-+#include <linux/exportfs.h>
-+#include <linux/fiemap.h>
-+#include <linux/module.h>
-+#include <linux/pagemap.h>
-+#include <linux/posix_acl.h>
-+#include <linux/random.h>
-+#include <linux/seq_file.h>
-+#include <linux/statfs.h>
-+#include <linux/string.h>
-+#include <linux/xattr.h>
-+
-+static struct kmem_cache *bch2_inode_cache;
-+
-+static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
-+ struct bch_inode_info *,
-+ struct bch_inode_unpacked *,
-+ struct bch_subvolume *);
-+
-+void bch2_inode_update_after_write(struct btree_trans *trans,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi,
-+ unsigned fields)
-+{
-+ struct bch_fs *c = trans->c;
-+
-+ BUG_ON(bi->bi_inum != inode->v.i_ino);
-+
-+ bch2_assert_pos_locked(trans, BTREE_ID_inodes,
-+ POS(0, bi->bi_inum),
-+ c->opts.inodes_use_key_cache);
-+
-+ set_nlink(&inode->v, bch2_inode_nlink_get(bi));
-+ i_uid_write(&inode->v, bi->bi_uid);
-+ i_gid_write(&inode->v, bi->bi_gid);
-+ inode->v.i_mode = bi->bi_mode;
-+
-+ if (fields & ATTR_ATIME)
-+ inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
-+ if (fields & ATTR_MTIME)
-+ inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
-+ if (fields & ATTR_CTIME)
-+ inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime));
-+
-+ inode->ei_inode = *bi;
-+
-+ bch2_inode_flags_to_vfs(inode);
-+}
-+
-+int __must_check bch2_write_inode(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ inode_set_fn set,
-+ void *p, unsigned fields)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter = { NULL };
-+ struct bch_inode_unpacked inode_u;
-+ int ret;
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode),
-+ BTREE_ITER_INTENT) ?:
-+ (set ? set(trans, inode, &inode_u, p) : 0) ?:
-+ bch2_inode_write(trans, &iter, &inode_u) ?:
-+ bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
-+
-+ /*
-+ * the btree node lock protects inode->ei_inode, not ei_update_lock;
-+ * this is important for inode updates via bchfs_write_index_update
-+ */
-+ if (!ret)
-+ bch2_inode_update_after_write(trans, inode, &inode_u, fields);
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
-+ "inode %u:%llu not found when updating",
-+ inode_inum(inode).subvol,
-+ inode_inum(inode).inum);
-+
-+ bch2_trans_put(trans);
-+ return ret < 0 ? ret : 0;
-+}
-+
-+int bch2_fs_quota_transfer(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct bch_qid new_qid,
-+ unsigned qtypes,
-+ enum quota_acct_mode mode)
-+{
-+ unsigned i;
-+ int ret;
-+
-+ qtypes &= enabled_qtypes(c);
-+
-+ for (i = 0; i < QTYP_NR; i++)
-+ if (new_qid.q[i] == inode->ei_qid.q[i])
-+ qtypes &= ~(1U << i);
-+
-+ if (!qtypes)
-+ return 0;
-+
-+ mutex_lock(&inode->ei_quota_lock);
-+
-+ ret = bch2_quota_transfer(c, qtypes, new_qid,
-+ inode->ei_qid,
-+ inode->v.i_blocks +
-+ inode->ei_quota_reserved,
-+ mode);
-+ if (!ret)
-+ for (i = 0; i < QTYP_NR; i++)
-+ if (qtypes & (1 << i))
-+ inode->ei_qid.q[i] = new_qid.q[i];
-+
-+ mutex_unlock(&inode->ei_quota_lock);
-+
-+ return ret;
-+}
-+
-+static int bch2_iget5_test(struct inode *vinode, void *p)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+ subvol_inum *inum = p;
-+
-+ return inode->ei_subvol == inum->subvol &&
-+ inode->ei_inode.bi_inum == inum->inum;
-+}
-+
-+static int bch2_iget5_set(struct inode *vinode, void *p)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+ subvol_inum *inum = p;
-+
-+ inode->v.i_ino = inum->inum;
-+ inode->ei_subvol = inum->subvol;
-+ inode->ei_inode.bi_inum = inum->inum;
-+ return 0;
-+}
-+
-+static unsigned bch2_inode_hash(subvol_inum inum)
-+{
-+ return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
-+}
-+
-+struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
-+{
-+ struct bch_inode_unpacked inode_u;
-+ struct bch_inode_info *inode;
-+ struct btree_trans *trans;
-+ struct bch_subvolume subvol;
-+ int ret;
-+
-+ inode = to_bch_ei(iget5_locked(c->vfs_sb,
-+ bch2_inode_hash(inum),
-+ bch2_iget5_test,
-+ bch2_iget5_set,
-+ &inum));
-+ if (unlikely(!inode))
-+ return ERR_PTR(-ENOMEM);
-+ if (!(inode->v.i_state & I_NEW))
-+ return &inode->v;
-+
-+ trans = bch2_trans_get(c);
-+ ret = lockrestart_do(trans,
-+ bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
-+ bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
-+
-+ if (!ret)
-+ bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
-+ bch2_trans_put(trans);
-+
-+ if (ret) {
-+ iget_failed(&inode->v);
-+ return ERR_PTR(bch2_err_class(ret));
-+ }
-+
-+ mutex_lock(&c->vfs_inodes_lock);
-+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
-+ mutex_unlock(&c->vfs_inodes_lock);
-+
-+ unlock_new_inode(&inode->v);
-+
-+ return &inode->v;
-+}
-+
-+struct bch_inode_info *
-+__bch2_create(struct mnt_idmap *idmap,
-+ struct bch_inode_info *dir, struct dentry *dentry,
-+ umode_t mode, dev_t rdev, subvol_inum snapshot_src,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = dir->v.i_sb->s_fs_info;
-+ struct btree_trans *trans;
-+ struct bch_inode_unpacked dir_u;
-+ struct bch_inode_info *inode, *old;
-+ struct bch_inode_unpacked inode_u;
-+ struct posix_acl *default_acl = NULL, *acl = NULL;
-+ subvol_inum inum;
-+ struct bch_subvolume subvol;
-+ u64 journal_seq = 0;
-+ int ret;
-+
-+ /*
-+ * preallocate acls + vfs inode before btree transaction, so that
-+ * nothing can fail after the transaction succeeds:
-+ */
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+ ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
-+ if (ret)
-+ return ERR_PTR(ret);
-+#endif
-+ inode = to_bch_ei(new_inode(c->vfs_sb));
-+ if (unlikely(!inode)) {
-+ inode = ERR_PTR(-ENOMEM);
-+ goto err;
-+ }
-+
-+ bch2_inode_init_early(c, &inode_u);
-+
-+ if (!(flags & BCH_CREATE_TMPFILE))
-+ mutex_lock(&dir->ei_update_lock);
-+
-+ trans = bch2_trans_get(c);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_create_trans(trans,
-+ inode_inum(dir), &dir_u, &inode_u,
-+ !(flags & BCH_CREATE_TMPFILE)
-+ ? &dentry->d_name : NULL,
-+ from_kuid(i_user_ns(&dir->v), current_fsuid()),
-+ from_kgid(i_user_ns(&dir->v), current_fsgid()),
-+ mode, rdev,
-+ default_acl, acl, snapshot_src, flags) ?:
-+ bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
-+ KEY_TYPE_QUOTA_PREALLOC);
-+ if (unlikely(ret))
-+ goto err_before_quota;
-+
-+ inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
-+ inum.inum = inode_u.bi_inum;
-+
-+ ret = bch2_subvolume_get(trans, inum.subvol, true,
-+ BTREE_ITER_WITH_UPDATES, &subvol) ?:
-+ bch2_trans_commit(trans, NULL, &journal_seq, 0);
-+ if (unlikely(ret)) {
-+ bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
-+ KEY_TYPE_QUOTA_WARN);
-+err_before_quota:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+ goto err_trans;
-+ }
-+
-+ if (!(flags & BCH_CREATE_TMPFILE)) {
-+ bch2_inode_update_after_write(trans, dir, &dir_u,
-+ ATTR_MTIME|ATTR_CTIME);
-+ mutex_unlock(&dir->ei_update_lock);
-+ }
-+
-+ bch2_iget5_set(&inode->v, &inum);
-+ bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
-+
-+ set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
-+ set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
-+
-+ /*
-+ * we must insert the new inode into the inode cache before calling
-+ * bch2_trans_exit() and dropping locks, else we could race with another
-+ * thread pulling the inode in and modifying it:
-+ */
-+
-+ inode->v.i_state |= I_CREATING;
-+
-+ old = to_bch_ei(inode_insert5(&inode->v,
-+ bch2_inode_hash(inum),
-+ bch2_iget5_test,
-+ bch2_iget5_set,
-+ &inum));
-+ BUG_ON(!old);
-+
-+ if (unlikely(old != inode)) {
-+ /*
-+ * We raced, another process pulled the new inode into cache
-+ * before us:
-+ */
-+ make_bad_inode(&inode->v);
-+ iput(&inode->v);
-+
-+ inode = old;
-+ } else {
-+ mutex_lock(&c->vfs_inodes_lock);
-+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
-+ mutex_unlock(&c->vfs_inodes_lock);
-+ /*
-+ * we really don't want insert_inode_locked2() to be setting
-+ * I_NEW...
-+ */
-+ unlock_new_inode(&inode->v);
-+ }
-+
-+ bch2_trans_put(trans);
-+err:
-+ posix_acl_release(default_acl);
-+ posix_acl_release(acl);
-+ return inode;
-+err_trans:
-+ if (!(flags & BCH_CREATE_TMPFILE))
-+ mutex_unlock(&dir->ei_update_lock);
-+
-+ bch2_trans_put(trans);
-+ make_bad_inode(&inode->v);
-+ iput(&inode->v);
-+ inode = ERR_PTR(ret);
-+ goto err;
-+}
-+
-+/* methods */
-+
-+static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
-+ unsigned int flags)
-+{
-+ struct bch_fs *c = vdir->i_sb->s_fs_info;
-+ struct bch_inode_info *dir = to_bch_ei(vdir);
-+ struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
-+ struct inode *vinode = NULL;
-+ subvol_inum inum = { .subvol = 1 };
-+ int ret;
-+
-+ ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
-+ &dentry->d_name, &inum);
-+
-+ if (!ret)
-+ vinode = bch2_vfs_inode_get(c, inum);
-+
-+ return d_splice_alias(vinode, dentry);
-+}
-+
-+static int bch2_mknod(struct mnt_idmap *idmap,
-+ struct inode *vdir, struct dentry *dentry,
-+ umode_t mode, dev_t rdev)
-+{
-+ struct bch_inode_info *inode =
-+ __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
-+ (subvol_inum) { 0 }, 0);
-+
-+ if (IS_ERR(inode))
-+ return bch2_err_class(PTR_ERR(inode));
-+
-+ d_instantiate(dentry, &inode->v);
-+ return 0;
-+}
-+
-+static int bch2_create(struct mnt_idmap *idmap,
-+ struct inode *vdir, struct dentry *dentry,
-+ umode_t mode, bool excl)
-+{
-+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
-+}
-+
-+static int __bch2_link(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_info *dir,
-+ struct dentry *dentry)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct bch_inode_unpacked dir_u, inode_u;
-+ int ret;
-+
-+ mutex_lock(&inode->ei_update_lock);
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_link_trans(trans,
-+ inode_inum(dir), &dir_u,
-+ inode_inum(inode), &inode_u,
-+ &dentry->d_name));
-+
-+ if (likely(!ret)) {
-+ bch2_inode_update_after_write(trans, dir, &dir_u,
-+ ATTR_MTIME|ATTR_CTIME);
-+ bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
-+ }
-+
-+ bch2_trans_put(trans);
-+ mutex_unlock(&inode->ei_update_lock);
-+ return ret;
-+}
-+
-+static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
-+ struct dentry *dentry)
-+{
-+ struct bch_fs *c = vdir->i_sb->s_fs_info;
-+ struct bch_inode_info *dir = to_bch_ei(vdir);
-+ struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
-+ int ret;
-+
-+ lockdep_assert_held(&inode->v.i_rwsem);
-+
-+ ret = __bch2_link(c, inode, dir, dentry);
-+ if (unlikely(ret))
-+ return ret;
-+
-+ ihold(&inode->v);
-+ d_instantiate(dentry, &inode->v);
-+ return 0;
-+}
-+
-+int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
-+ bool deleting_snapshot)
-+{
-+ struct bch_fs *c = vdir->i_sb->s_fs_info;
-+ struct bch_inode_info *dir = to_bch_ei(vdir);
-+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+ struct bch_inode_unpacked dir_u, inode_u;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ int ret;
-+
-+ bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
-+
-+ ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL,
-+ bch2_unlink_trans(trans,
-+ inode_inum(dir), &dir_u,
-+ &inode_u, &dentry->d_name,
-+ deleting_snapshot));
-+ if (unlikely(ret))
-+ goto err;
-+
-+ bch2_inode_update_after_write(trans, dir, &dir_u,
-+ ATTR_MTIME|ATTR_CTIME);
-+ bch2_inode_update_after_write(trans, inode, &inode_u,
-+ ATTR_MTIME);
-+
-+ if (inode_u.bi_subvol) {
-+ /*
-+ * Subvolume deletion is asynchronous, but we still want to tell
-+ * the VFS that it's been deleted here:
-+ */
-+ set_nlink(&inode->v, 0);
-+ }
-+err:
-+ bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
-+ bch2_trans_put(trans);
-+
-+ return ret;
-+}
-+
-+static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
-+{
-+ return __bch2_unlink(vdir, dentry, false);
-+}
-+
-+static int bch2_symlink(struct mnt_idmap *idmap,
-+ struct inode *vdir, struct dentry *dentry,
-+ const char *symname)
-+{
-+ struct bch_fs *c = vdir->i_sb->s_fs_info;
-+ struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
-+ int ret;
-+
-+ inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
-+ (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
-+ if (IS_ERR(inode))
-+ return bch2_err_class(PTR_ERR(inode));
-+
-+ inode_lock(&inode->v);
-+ ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
-+ inode_unlock(&inode->v);
-+
-+ if (unlikely(ret))
-+ goto err;
-+
-+ ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ ret = __bch2_link(c, inode, dir, dentry);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ d_instantiate(dentry, &inode->v);
-+ return 0;
-+err:
-+ iput(&inode->v);
-+ return ret;
-+}
-+
-+static int bch2_mkdir(struct mnt_idmap *idmap,
-+ struct inode *vdir, struct dentry *dentry, umode_t mode)
-+{
-+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
-+}
-+
-+static int bch2_rename2(struct mnt_idmap *idmap,
-+ struct inode *src_vdir, struct dentry *src_dentry,
-+ struct inode *dst_vdir, struct dentry *dst_dentry,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = src_vdir->i_sb->s_fs_info;
-+ struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
-+ struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
-+ struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
-+ struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
-+ struct bch_inode_unpacked dst_dir_u, src_dir_u;
-+ struct bch_inode_unpacked src_inode_u, dst_inode_u;
-+ struct btree_trans *trans;
-+ enum bch_rename_mode mode = flags & RENAME_EXCHANGE
-+ ? BCH_RENAME_EXCHANGE
-+ : dst_dentry->d_inode
-+ ? BCH_RENAME_OVERWRITE : BCH_RENAME;
-+ int ret;
-+
-+ if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
-+ return -EINVAL;
-+
-+ if (mode == BCH_RENAME_OVERWRITE) {
-+ ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
-+ 0, LLONG_MAX);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ trans = bch2_trans_get(c);
-+
-+ bch2_lock_inodes(INODE_UPDATE_LOCK,
-+ src_dir,
-+ dst_dir,
-+ src_inode,
-+ dst_inode);
-+
-+ if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
-+ ret = bch2_fs_quota_transfer(c, src_inode,
-+ dst_dir->ei_qid,
-+ 1 << QTYP_PRJ,
-+ KEY_TYPE_QUOTA_PREALLOC);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ if (mode == BCH_RENAME_EXCHANGE &&
-+ inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
-+ ret = bch2_fs_quota_transfer(c, dst_inode,
-+ src_dir->ei_qid,
-+ 1 << QTYP_PRJ,
-+ KEY_TYPE_QUOTA_PREALLOC);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_rename_trans(trans,
-+ inode_inum(src_dir), &src_dir_u,
-+ inode_inum(dst_dir), &dst_dir_u,
-+ &src_inode_u,
-+ &dst_inode_u,
-+ &src_dentry->d_name,
-+ &dst_dentry->d_name,
-+ mode));
-+ if (unlikely(ret))
-+ goto err;
-+
-+ BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
-+ BUG_ON(dst_inode &&
-+ dst_inode->v.i_ino != dst_inode_u.bi_inum);
-+
-+ bch2_inode_update_after_write(trans, src_dir, &src_dir_u,
-+ ATTR_MTIME|ATTR_CTIME);
-+
-+ if (src_dir != dst_dir)
-+ bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u,
-+ ATTR_MTIME|ATTR_CTIME);
-+
-+ bch2_inode_update_after_write(trans, src_inode, &src_inode_u,
-+ ATTR_CTIME);
-+
-+ if (dst_inode)
-+ bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
-+ ATTR_CTIME);
-+err:
-+ bch2_trans_put(trans);
-+
-+ bch2_fs_quota_transfer(c, src_inode,
-+ bch_qid(&src_inode->ei_inode),
-+ 1 << QTYP_PRJ,
-+ KEY_TYPE_QUOTA_NOCHECK);
-+ if (dst_inode)
-+ bch2_fs_quota_transfer(c, dst_inode,
-+ bch_qid(&dst_inode->ei_inode),
-+ 1 << QTYP_PRJ,
-+ KEY_TYPE_QUOTA_NOCHECK);
-+
-+ bch2_unlock_inodes(INODE_UPDATE_LOCK,
-+ src_dir,
-+ dst_dir,
-+ src_inode,
-+ dst_inode);
-+
-+ return ret;
-+}
-+
-+static void bch2_setattr_copy(struct mnt_idmap *idmap,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi,
-+ struct iattr *attr)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ unsigned int ia_valid = attr->ia_valid;
-+
-+ if (ia_valid & ATTR_UID)
-+ bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
-+ if (ia_valid & ATTR_GID)
-+ bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
-+
-+ if (ia_valid & ATTR_SIZE)
-+ bi->bi_size = attr->ia_size;
-+
-+ if (ia_valid & ATTR_ATIME)
-+ bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
-+ if (ia_valid & ATTR_MTIME)
-+ bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
-+ if (ia_valid & ATTR_CTIME)
-+ bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
-+
-+ if (ia_valid & ATTR_MODE) {
-+ umode_t mode = attr->ia_mode;
-+ kgid_t gid = ia_valid & ATTR_GID
-+ ? attr->ia_gid
-+ : inode->v.i_gid;
-+
-+ if (!in_group_p(gid) &&
-+ !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
-+ mode &= ~S_ISGID;
-+ bi->bi_mode = mode;
-+ }
-+}
-+
-+int bch2_setattr_nonsize(struct mnt_idmap *idmap,
-+ struct bch_inode_info *inode,
-+ struct iattr *attr)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch_qid qid;
-+ struct btree_trans *trans;
-+ struct btree_iter inode_iter = { NULL };
-+ struct bch_inode_unpacked inode_u;
-+ struct posix_acl *acl = NULL;
-+ int ret;
-+
-+ mutex_lock(&inode->ei_update_lock);
-+
-+ qid = inode->ei_qid;
-+
-+ if (attr->ia_valid & ATTR_UID)
-+ qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
-+
-+ if (attr->ia_valid & ATTR_GID)
-+ qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
-+
-+ ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
-+ KEY_TYPE_QUOTA_PREALLOC);
-+ if (ret)
-+ goto err;
-+
-+ trans = bch2_trans_get(c);
-+retry:
-+ bch2_trans_begin(trans);
-+ kfree(acl);
-+ acl = NULL;
-+
-+ ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ goto btree_err;
-+
-+ bch2_setattr_copy(idmap, inode, &inode_u, attr);
-+
-+ if (attr->ia_valid & ATTR_MODE) {
-+ ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u,
-+ inode_u.bi_mode, &acl);
-+ if (ret)
-+ goto btree_err;
-+ }
-+
-+ ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL);
-+btree_err:
-+ bch2_trans_iter_exit(trans, &inode_iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+ if (unlikely(ret))
-+ goto err_trans;
-+
-+ bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
-+
-+ if (acl)
-+ set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
-+err_trans:
-+ bch2_trans_put(trans);
-+err:
-+ mutex_unlock(&inode->ei_update_lock);
-+
-+ return bch2_err_class(ret);
-+}
-+
-+static int bch2_getattr(struct mnt_idmap *idmap,
-+ const struct path *path, struct kstat *stat,
-+ u32 request_mask, unsigned query_flags)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+ stat->dev = inode->v.i_sb->s_dev;
-+ stat->ino = inode->v.i_ino;
-+ stat->mode = inode->v.i_mode;
-+ stat->nlink = inode->v.i_nlink;
-+ stat->uid = inode->v.i_uid;
-+ stat->gid = inode->v.i_gid;
-+ stat->rdev = inode->v.i_rdev;
-+ stat->size = i_size_read(&inode->v);
-+ stat->atime = inode->v.i_atime;
-+ stat->mtime = inode->v.i_mtime;
-+ stat->ctime = inode_get_ctime(&inode->v);
-+ stat->blksize = block_bytes(c);
-+ stat->blocks = inode->v.i_blocks;
-+
-+ if (request_mask & STATX_BTIME) {
-+ stat->result_mask |= STATX_BTIME;
-+ stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
-+ }
-+
-+ if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
-+ stat->attributes |= STATX_ATTR_IMMUTABLE;
-+ stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
-+
-+ if (inode->ei_inode.bi_flags & BCH_INODE_append)
-+ stat->attributes |= STATX_ATTR_APPEND;
-+ stat->attributes_mask |= STATX_ATTR_APPEND;
-+
-+ if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
-+ stat->attributes |= STATX_ATTR_NODUMP;
-+ stat->attributes_mask |= STATX_ATTR_NODUMP;
-+
-+ return 0;
-+}
-+
-+static int bch2_setattr(struct mnt_idmap *idmap,
-+ struct dentry *dentry, struct iattr *iattr)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+ int ret;
-+
-+ lockdep_assert_held(&inode->v.i_rwsem);
-+
-+ ret = setattr_prepare(idmap, dentry, iattr);
-+ if (ret)
-+ return ret;
-+
-+ return iattr->ia_valid & ATTR_SIZE
-+ ? bchfs_truncate(idmap, inode, iattr)
-+ : bch2_setattr_nonsize(idmap, inode, iattr);
-+}
-+
-+static int bch2_tmpfile(struct mnt_idmap *idmap,
-+ struct inode *vdir, struct file *file, umode_t mode)
-+{
-+ struct bch_inode_info *inode =
-+ __bch2_create(idmap, to_bch_ei(vdir),
-+ file->f_path.dentry, mode, 0,
-+ (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
-+
-+ if (IS_ERR(inode))
-+ return bch2_err_class(PTR_ERR(inode));
-+
-+ d_mark_tmpfile(file, &inode->v);
-+ d_instantiate(file->f_path.dentry, &inode->v);
-+ return finish_open_simple(file, 0);
-+}
-+
-+static int bch2_fill_extent(struct bch_fs *c,
-+ struct fiemap_extent_info *info,
-+ struct bkey_s_c k, unsigned flags)
-+{
-+ if (bkey_extent_is_direct_data(k.k)) {
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ int ret;
-+
-+ if (k.k->type == KEY_TYPE_reflink_v)
-+ flags |= FIEMAP_EXTENT_SHARED;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ int flags2 = 0;
-+ u64 offset = p.ptr.offset;
-+
-+ if (p.ptr.unwritten)
-+ flags2 |= FIEMAP_EXTENT_UNWRITTEN;
-+
-+ if (p.crc.compression_type)
-+ flags2 |= FIEMAP_EXTENT_ENCODED;
-+ else
-+ offset += p.crc.offset;
-+
-+ if ((offset & (block_sectors(c) - 1)) ||
-+ (k.k->size & (block_sectors(c) - 1)))
-+ flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
-+
-+ ret = fiemap_fill_next_extent(info,
-+ bkey_start_offset(k.k) << 9,
-+ offset << 9,
-+ k.k->size << 9, flags|flags2);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+ } else if (bkey_extent_is_inline_data(k.k)) {
-+ return fiemap_fill_next_extent(info,
-+ bkey_start_offset(k.k) << 9,
-+ 0, k.k->size << 9,
-+ flags|
-+ FIEMAP_EXTENT_DATA_INLINE);
-+ } else if (k.k->type == KEY_TYPE_reservation) {
-+ return fiemap_fill_next_extent(info,
-+ bkey_start_offset(k.k) << 9,
-+ 0, k.k->size << 9,
-+ flags|
-+ FIEMAP_EXTENT_DELALLOC|
-+ FIEMAP_EXTENT_UNWRITTEN);
-+ } else {
-+ BUG();
-+ }
-+}
-+
-+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
-+ u64 start, u64 len)
-+{
-+ struct bch_fs *c = vinode->i_sb->s_fs_info;
-+ struct bch_inode_info *ei = to_bch_ei(vinode);
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_buf cur, prev;
-+ struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
-+ unsigned offset_into_extent, sectors;
-+ bool have_extent = false;
-+ u32 snapshot;
-+ int ret = 0;
-+
-+ ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
-+ if (ret)
-+ return ret;
-+
-+ if (start + len < start)
-+ return -EINVAL;
-+
-+ start >>= 9;
-+
-+ bch2_bkey_buf_init(&cur);
-+ bch2_bkey_buf_init(&prev);
-+ trans = bch2_trans_get(c);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ SPOS(ei->v.i_ino, start, snapshot), 0);
-+
-+ while (!(ret = btree_trans_too_many_iters(trans)) &&
-+ (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
-+ !(ret = bkey_err(k))) {
-+ enum btree_id data_btree = BTREE_ID_extents;
-+
-+ if (!bkey_extent_is_data(k.k) &&
-+ k.k->type != KEY_TYPE_reservation) {
-+ bch2_btree_iter_advance(&iter);
-+ continue;
-+ }
-+
-+ offset_into_extent = iter.pos.offset -
-+ bkey_start_offset(k.k);
-+ sectors = k.k->size - offset_into_extent;
-+
-+ bch2_bkey_buf_reassemble(&cur, c, k);
-+
-+ ret = bch2_read_indirect_extent(trans, &data_btree,
-+ &offset_into_extent, &cur);
-+ if (ret)
-+ break;
-+
-+ k = bkey_i_to_s_c(cur.k);
-+ bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
-+
-+ sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+ bch2_cut_front(POS(k.k->p.inode,
-+ bkey_start_offset(k.k) +
-+ offset_into_extent),
-+ cur.k);
-+ bch2_key_resize(&cur.k->k, sectors);
-+ cur.k->k.p = iter.pos;
-+ cur.k->k.p.offset += cur.k->k.size;
-+
-+ if (have_extent) {
-+ bch2_trans_unlock(trans);
-+ ret = bch2_fill_extent(c, info,
-+ bkey_i_to_s_c(prev.k), 0);
-+ if (ret)
-+ break;
-+ }
-+
-+ bkey_copy(prev.k, cur.k);
-+ have_extent = true;
-+
-+ bch2_btree_iter_set_pos(&iter,
-+ POS(iter.pos.inode, iter.pos.offset + sectors));
-+ }
-+ start = iter.pos.offset;
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ if (!ret && have_extent) {
-+ bch2_trans_unlock(trans);
-+ ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
-+ FIEMAP_EXTENT_LAST);
-+ }
-+
-+ bch2_trans_put(trans);
-+ bch2_bkey_buf_exit(&cur, c);
-+ bch2_bkey_buf_exit(&prev, c);
-+ return ret < 0 ? ret : 0;
-+}
-+
-+static const struct vm_operations_struct bch_vm_ops = {
-+ .fault = bch2_page_fault,
-+ .map_pages = filemap_map_pages,
-+ .page_mkwrite = bch2_page_mkwrite,
-+};
-+
-+static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
-+{
-+ file_accessed(file);
-+
-+ vma->vm_ops = &bch_vm_ops;
-+ return 0;
-+}
-+
-+/* Directories: */
-+
-+static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
-+{
-+ return generic_file_llseek_size(file, offset, whence,
-+ S64_MAX, S64_MAX);
-+}
-+
-+static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
-+{
-+ struct bch_inode_info *inode = file_bch_inode(file);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ int ret;
-+
-+ if (!dir_emit_dots(file, ctx))
-+ return 0;
-+
-+ ret = bch2_readdir(c, inode_inum(inode), ctx);
-+ if (ret)
-+ bch_err_fn(c, ret);
-+
-+ return bch2_err_class(ret);
-+}
-+
-+static const struct file_operations bch_file_operations = {
-+ .llseek = bch2_llseek,
-+ .read_iter = bch2_read_iter,
-+ .write_iter = bch2_write_iter,
-+ .mmap = bch2_mmap,
-+ .open = generic_file_open,
-+ .fsync = bch2_fsync,
-+ .splice_read = filemap_splice_read,
-+ .splice_write = iter_file_splice_write,
-+ .fallocate = bch2_fallocate_dispatch,
-+ .unlocked_ioctl = bch2_fs_file_ioctl,
-+#ifdef CONFIG_COMPAT
-+ .compat_ioctl = bch2_compat_fs_ioctl,
-+#endif
-+ .remap_file_range = bch2_remap_file_range,
-+};
-+
-+static const struct inode_operations bch_file_inode_operations = {
-+ .getattr = bch2_getattr,
-+ .setattr = bch2_setattr,
-+ .fiemap = bch2_fiemap,
-+ .listxattr = bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+ .get_acl = bch2_get_acl,
-+ .set_acl = bch2_set_acl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_dir_inode_operations = {
-+ .lookup = bch2_lookup,
-+ .create = bch2_create,
-+ .link = bch2_link,
-+ .unlink = bch2_unlink,
-+ .symlink = bch2_symlink,
-+ .mkdir = bch2_mkdir,
-+ .rmdir = bch2_unlink,
-+ .mknod = bch2_mknod,
-+ .rename = bch2_rename2,
-+ .getattr = bch2_getattr,
-+ .setattr = bch2_setattr,
-+ .tmpfile = bch2_tmpfile,
-+ .listxattr = bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+ .get_acl = bch2_get_acl,
-+ .set_acl = bch2_set_acl,
-+#endif
-+};
-+
-+static const struct file_operations bch_dir_file_operations = {
-+ .llseek = bch2_dir_llseek,
-+ .read = generic_read_dir,
-+ .iterate_shared = bch2_vfs_readdir,
-+ .fsync = bch2_fsync,
-+ .unlocked_ioctl = bch2_fs_file_ioctl,
-+#ifdef CONFIG_COMPAT
-+ .compat_ioctl = bch2_compat_fs_ioctl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_symlink_inode_operations = {
-+ .get_link = page_get_link,
-+ .getattr = bch2_getattr,
-+ .setattr = bch2_setattr,
-+ .listxattr = bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+ .get_acl = bch2_get_acl,
-+ .set_acl = bch2_set_acl,
-+#endif
-+};
-+
-+static const struct inode_operations bch_special_inode_operations = {
-+ .getattr = bch2_getattr,
-+ .setattr = bch2_setattr,
-+ .listxattr = bch2_xattr_list,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+ .get_acl = bch2_get_acl,
-+ .set_acl = bch2_set_acl,
-+#endif
-+};
-+
-+static const struct address_space_operations bch_address_space_operations = {
-+ .read_folio = bch2_read_folio,
-+ .writepages = bch2_writepages,
-+ .readahead = bch2_readahead,
-+ .dirty_folio = filemap_dirty_folio,
-+ .write_begin = bch2_write_begin,
-+ .write_end = bch2_write_end,
-+ .invalidate_folio = bch2_invalidate_folio,
-+ .release_folio = bch2_release_folio,
-+ .direct_IO = noop_direct_IO,
-+#ifdef CONFIG_MIGRATION
-+ .migrate_folio = filemap_migrate_folio,
-+#endif
-+ .error_remove_page = generic_error_remove_page,
-+};
-+
-+struct bcachefs_fid {
-+ u64 inum;
-+ u32 subvol;
-+ u32 gen;
-+} __packed;
-+
-+struct bcachefs_fid_with_parent {
-+ struct bcachefs_fid fid;
-+ struct bcachefs_fid dir;
-+} __packed;
-+
-+static int bcachefs_fid_valid(int fh_len, int fh_type)
-+{
-+ switch (fh_type) {
-+ case FILEID_BCACHEFS_WITHOUT_PARENT:
-+ return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
-+ case FILEID_BCACHEFS_WITH_PARENT:
-+ return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
-+ default:
-+ return false;
-+ }
-+}
-+
-+static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
-+{
-+ return (struct bcachefs_fid) {
-+ .inum = inode->ei_inode.bi_inum,
-+ .subvol = inode->ei_subvol,
-+ .gen = inode->ei_inode.bi_generation,
-+ };
-+}
-+
-+static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
-+ struct inode *vdir)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+ struct bch_inode_info *dir = to_bch_ei(vdir);
-+
-+ if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32))
-+ return FILEID_INVALID;
-+
-+ if (!S_ISDIR(inode->v.i_mode) && dir) {
-+ struct bcachefs_fid_with_parent *fid = (void *) fh;
-+
-+ fid->fid = bch2_inode_to_fid(inode);
-+ fid->dir = bch2_inode_to_fid(dir);
-+
-+ *len = sizeof(*fid) / sizeof(u32);
-+ return FILEID_BCACHEFS_WITH_PARENT;
-+ } else {
-+ struct bcachefs_fid *fid = (void *) fh;
-+
-+ *fid = bch2_inode_to_fid(inode);
-+
-+ *len = sizeof(*fid) / sizeof(u32);
-+ return FILEID_BCACHEFS_WITHOUT_PARENT;
-+ }
-+}
-+
-+static struct inode *bch2_nfs_get_inode(struct super_block *sb,
-+ struct bcachefs_fid fid)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
-+ .subvol = fid.subvol,
-+ .inum = fid.inum,
-+ });
-+ if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
-+ iput(vinode);
-+ vinode = ERR_PTR(-ESTALE);
-+ }
-+ return vinode;
-+}
-+
-+static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
-+ int fh_len, int fh_type)
-+{
-+ struct bcachefs_fid *fid = (void *) _fid;
-+
-+ if (!bcachefs_fid_valid(fh_len, fh_type))
-+ return NULL;
-+
-+ return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
-+}
-+
-+static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
-+ int fh_len, int fh_type)
-+{
-+ struct bcachefs_fid_with_parent *fid = (void *) _fid;
-+
-+ if (!bcachefs_fid_valid(fh_len, fh_type) ||
-+ fh_type != FILEID_BCACHEFS_WITH_PARENT)
-+ return NULL;
-+
-+ return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
-+}
-+
-+static struct dentry *bch2_get_parent(struct dentry *child)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(child->d_inode);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ subvol_inum parent_inum = {
-+ .subvol = inode->ei_inode.bi_parent_subvol ?:
-+ inode->ei_subvol,
-+ .inum = inode->ei_inode.bi_dir,
-+ };
-+
-+ return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
-+}
-+
-+static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(child->d_inode);
-+ struct bch_inode_info *dir = to_bch_ei(parent->d_inode);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct btree_trans *trans;
-+ struct btree_iter iter1;
-+ struct btree_iter iter2;
-+ struct bkey_s_c k;
-+ struct bkey_s_c_dirent d;
-+ struct bch_inode_unpacked inode_u;
-+ subvol_inum target;
-+ u32 snapshot;
-+ struct qstr dirent_name;
-+ unsigned name_len = 0;
-+ int ret;
-+
-+ if (!S_ISDIR(dir->v.i_mode))
-+ return -EINVAL;
-+
-+ trans = bch2_trans_get(c);
-+
-+ bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
-+ POS(dir->ei_inode.bi_inum, 0), 0);
-+ bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
-+ POS(dir->ei_inode.bi_inum, 0), 0);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ bch2_btree_iter_set_snapshot(&iter1, snapshot);
-+ bch2_btree_iter_set_snapshot(&iter2, snapshot);
-+
-+ ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
-+ if (ret)
-+ goto err;
-+
-+ if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
-+ bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
-+
-+ k = bch2_btree_iter_peek_slot(&iter1);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (k.k->type != KEY_TYPE_dirent) {
-+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
-+ goto err;
-+ }
-+
-+ d = bkey_s_c_to_dirent(k);
-+ ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
-+ if (ret > 0)
-+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
-+ if (ret)
-+ goto err;
-+
-+ if (target.subvol == inode->ei_subvol &&
-+ target.inum == inode->ei_inode.bi_inum)
-+ goto found;
-+ } else {
-+ /*
-+ * File with multiple hardlinks and our backref is to the wrong
-+ * directory - linear search:
-+ */
-+ for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
-+ if (k.k->p.inode > dir->ei_inode.bi_inum)
-+ break;
-+
-+ if (k.k->type != KEY_TYPE_dirent)
-+ continue;
-+
-+ d = bkey_s_c_to_dirent(k);
-+ ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
-+ if (ret < 0)
-+ break;
-+ if (ret)
-+ continue;
-+
-+ if (target.subvol == inode->ei_subvol &&
-+ target.inum == inode->ei_inode.bi_inum)
-+ goto found;
-+ }
-+ }
-+
-+ ret = -ENOENT;
-+ goto err;
-+found:
-+ dirent_name = bch2_dirent_get_name(d);
-+
-+ name_len = min_t(unsigned, dirent_name.len, NAME_MAX);
-+ memcpy(name, dirent_name.name, name_len);
-+ name[name_len] = '\0';
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_iter_exit(trans, &iter1);
-+ bch2_trans_iter_exit(trans, &iter2);
-+ bch2_trans_put(trans);
-+
-+ return ret;
-+}
-+
-+static const struct export_operations bch_export_ops = {
-+ .encode_fh = bch2_encode_fh,
-+ .fh_to_dentry = bch2_fh_to_dentry,
-+ .fh_to_parent = bch2_fh_to_parent,
-+ .get_parent = bch2_get_parent,
-+ .get_name = bch2_get_name,
-+};
-+
-+static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi,
-+ struct bch_subvolume *subvol)
-+{
-+ bch2_inode_update_after_write(trans, inode, bi, ~0);
-+
-+ if (BCH_SUBVOLUME_SNAP(subvol))
-+ set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
-+ else
-+ clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
-+
-+ inode->v.i_blocks = bi->bi_sectors;
-+ inode->v.i_ino = bi->bi_inum;
-+ inode->v.i_rdev = bi->bi_dev;
-+ inode->v.i_generation = bi->bi_generation;
-+ inode->v.i_size = bi->bi_size;
-+
-+ inode->ei_flags = 0;
-+ inode->ei_quota_reserved = 0;
-+ inode->ei_qid = bch_qid(bi);
-+ inode->ei_subvol = inum.subvol;
-+
-+ inode->v.i_mapping->a_ops = &bch_address_space_operations;
-+
-+ switch (inode->v.i_mode & S_IFMT) {
-+ case S_IFREG:
-+ inode->v.i_op = &bch_file_inode_operations;
-+ inode->v.i_fop = &bch_file_operations;
-+ break;
-+ case S_IFDIR:
-+ inode->v.i_op = &bch_dir_inode_operations;
-+ inode->v.i_fop = &bch_dir_file_operations;
-+ break;
-+ case S_IFLNK:
-+ inode_nohighmem(&inode->v);
-+ inode->v.i_op = &bch_symlink_inode_operations;
-+ break;
-+ default:
-+ init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
-+ inode->v.i_op = &bch_special_inode_operations;
-+ break;
-+ }
-+
-+ mapping_set_large_folios(inode->v.i_mapping);
-+}
-+
-+static struct inode *bch2_alloc_inode(struct super_block *sb)
-+{
-+ struct bch_inode_info *inode;
-+
-+ inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
-+ if (!inode)
-+ return NULL;
-+
-+ inode_init_once(&inode->v);
-+ mutex_init(&inode->ei_update_lock);
-+ two_state_lock_init(&inode->ei_pagecache_lock);
-+ INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
-+ mutex_init(&inode->ei_quota_lock);
-+
-+ return &inode->v;
-+}
-+
-+static void bch2_i_callback(struct rcu_head *head)
-+{
-+ struct inode *vinode = container_of(head, struct inode, i_rcu);
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+
-+ kmem_cache_free(bch2_inode_cache, inode);
-+}
-+
-+static void bch2_destroy_inode(struct inode *vinode)
-+{
-+ call_rcu(&vinode->i_rcu, bch2_i_callback);
-+}
-+
-+static int inode_update_times_fn(struct btree_trans *trans,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi,
-+ void *p)
-+{
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+
-+ bi->bi_atime = timespec_to_bch2_time(c, inode->v.i_atime);
-+ bi->bi_mtime = timespec_to_bch2_time(c, inode->v.i_mtime);
-+ bi->bi_ctime = timespec_to_bch2_time(c, inode_get_ctime(&inode->v));
-+
-+ return 0;
-+}
-+
-+static int bch2_vfs_write_inode(struct inode *vinode,
-+ struct writeback_control *wbc)
-+{
-+ struct bch_fs *c = vinode->i_sb->s_fs_info;
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+ int ret;
-+
-+ mutex_lock(&inode->ei_update_lock);
-+ ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
-+ ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
-+ mutex_unlock(&inode->ei_update_lock);
-+
-+ return bch2_err_class(ret);
-+}
-+
-+static void bch2_evict_inode(struct inode *vinode)
-+{
-+ struct bch_fs *c = vinode->i_sb->s_fs_info;
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+
-+ truncate_inode_pages_final(&inode->v.i_data);
-+
-+ clear_inode(&inode->v);
-+
-+ BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
-+
-+ if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
-+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
-+ KEY_TYPE_QUOTA_WARN);
-+ bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
-+ KEY_TYPE_QUOTA_WARN);
-+ bch2_inode_rm(c, inode_inum(inode));
-+ }
-+
-+ mutex_lock(&c->vfs_inodes_lock);
-+ list_del_init(&inode->ei_vfs_inode_list);
-+ mutex_unlock(&c->vfs_inodes_lock);
-+}
-+
-+void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
-+{
-+ struct bch_inode_info *inode, **i;
-+ DARRAY(struct bch_inode_info *) grabbed;
-+ bool clean_pass = false, this_pass_clean;
-+
-+ /*
-+ * Initially, we scan for inodes without I_DONTCACHE, then mark them to
-+ * be pruned with d_mark_dontcache().
-+ *
-+ * Once we've had a clean pass where we didn't find any inodes without
-+ * I_DONTCACHE, we wait for them to be freed:
-+ */
-+
-+ darray_init(&grabbed);
-+ darray_make_room(&grabbed, 1024);
-+again:
-+ cond_resched();
-+ this_pass_clean = true;
-+
-+ mutex_lock(&c->vfs_inodes_lock);
-+ list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
-+ if (!snapshot_list_has_id(s, inode->ei_subvol))
-+ continue;
-+
-+ if (!(inode->v.i_state & I_DONTCACHE) &&
-+ !(inode->v.i_state & I_FREEING) &&
-+ igrab(&inode->v)) {
-+ this_pass_clean = false;
-+
-+ if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
-+ iput(&inode->v);
-+ break;
-+ }
-+ } else if (clean_pass && this_pass_clean) {
-+ wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
-+ DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
-+
-+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-+ mutex_unlock(&c->vfs_inodes_lock);
-+
-+ schedule();
-+ finish_wait(wq, &wait.wq_entry);
-+ goto again;
-+ }
-+ }
-+ mutex_unlock(&c->vfs_inodes_lock);
-+
-+ darray_for_each(grabbed, i) {
-+ inode = *i;
-+ d_mark_dontcache(&inode->v);
-+ d_prune_aliases(&inode->v);
-+ iput(&inode->v);
-+ }
-+ grabbed.nr = 0;
-+
-+ if (!clean_pass || !this_pass_clean) {
-+ clean_pass = this_pass_clean;
-+ goto again;
-+ }
-+
-+ darray_exit(&grabbed);
-+}
-+
-+static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
-+{
-+ struct super_block *sb = dentry->d_sb;
-+ struct bch_fs *c = sb->s_fs_info;
-+ struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
-+ unsigned shift = sb->s_blocksize_bits - 9;
-+ /*
-+ * this assumes inodes take up 64 bytes, which is a decent average
-+ * number:
-+ */
-+ u64 avail_inodes = ((usage.capacity - usage.used) << 3);
-+ u64 fsid;
-+
-+ buf->f_type = BCACHEFS_STATFS_MAGIC;
-+ buf->f_bsize = sb->s_blocksize;
-+ buf->f_blocks = usage.capacity >> shift;
-+ buf->f_bfree = usage.free >> shift;
-+ buf->f_bavail = avail_factor(usage.free) >> shift;
-+
-+ buf->f_files = usage.nr_inodes + avail_inodes;
-+ buf->f_ffree = avail_inodes;
-+
-+ fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
-+ le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
-+ buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
-+ buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
-+ buf->f_namelen = BCH_NAME_MAX;
-+
-+ return 0;
-+}
-+
-+static int bch2_sync_fs(struct super_block *sb, int wait)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ int ret;
-+
-+ if (c->opts.journal_flush_disabled)
-+ return 0;
-+
-+ if (!wait) {
-+ bch2_journal_flush_async(&c->journal, NULL);
-+ return 0;
-+ }
-+
-+ ret = bch2_journal_flush(&c->journal);
-+ return bch2_err_class(ret);
-+}
-+
-+static struct bch_fs *bch2_path_to_fs(const char *path)
-+{
-+ struct bch_fs *c;
-+ dev_t dev;
-+ int ret;
-+
-+ ret = lookup_bdev(path, &dev);
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ c = bch2_dev_to_fs(dev);
-+ if (c)
-+ closure_put(&c->cl);
-+ return c ?: ERR_PTR(-ENOENT);
-+}
-+
-+static char **split_devs(const char *_dev_name, unsigned *nr)
-+{
-+ char *dev_name = NULL, **devs = NULL, *s;
-+ size_t i = 0, nr_devs = 0;
-+
-+ dev_name = kstrdup(_dev_name, GFP_KERNEL);
-+ if (!dev_name)
-+ return NULL;
-+
-+ for (s = dev_name; s; s = strchr(s + 1, ':'))
-+ nr_devs++;
-+
-+ devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL);
-+ if (!devs) {
-+ kfree(dev_name);
-+ return NULL;
-+ }
-+
-+ while ((s = strsep(&dev_name, ":")))
-+ devs[i++] = s;
-+
-+ *nr = nr_devs;
-+ return devs;
-+}
-+
-+static int bch2_remount(struct super_block *sb, int *flags, char *data)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ struct bch_opts opts = bch2_opts_empty();
-+ int ret;
-+
-+ opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
-+
-+ ret = bch2_parse_mount_opts(c, &opts, data);
-+ if (ret)
-+ goto err;
-+
-+ if (opts.read_only != c->opts.read_only) {
-+ down_write(&c->state_lock);
-+
-+ if (opts.read_only) {
-+ bch2_fs_read_only(c);
-+
-+ sb->s_flags |= SB_RDONLY;
-+ } else {
-+ ret = bch2_fs_read_write(c);
-+ if (ret) {
-+ bch_err(c, "error going rw: %i", ret);
-+ up_write(&c->state_lock);
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ sb->s_flags &= ~SB_RDONLY;
-+ }
-+
-+ c->opts.read_only = opts.read_only;
-+
-+ up_write(&c->state_lock);
-+ }
-+
-+ if (opt_defined(opts, errors))
-+ c->opts.errors = opts.errors;
-+err:
-+ return bch2_err_class(ret);
-+}
-+
-+static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
-+{
-+ struct bch_fs *c = root->d_sb->s_fs_info;
-+ struct bch_dev *ca;
-+ unsigned i;
-+ bool first = true;
-+
-+ for_each_online_member(ca, c, i) {
-+ if (!first)
-+ seq_putc(seq, ':');
-+ first = false;
-+ seq_puts(seq, "/dev/");
-+ seq_puts(seq, ca->name);
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_show_options(struct seq_file *seq, struct dentry *root)
-+{
-+ struct bch_fs *c = root->d_sb->s_fs_info;
-+ enum bch_opt_id i;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ for (i = 0; i < bch2_opts_nr; i++) {
-+ const struct bch_option *opt = &bch2_opt_table[i];
-+ u64 v = bch2_opt_get_by_id(&c->opts, i);
-+
-+ if (!(opt->flags & OPT_MOUNT))
-+ continue;
-+
-+ if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
-+ continue;
-+
-+ printbuf_reset(&buf);
-+ bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v,
-+ OPT_SHOW_MOUNT_STYLE);
-+ seq_putc(seq, ',');
-+ seq_puts(seq, buf.buf);
-+ }
-+
-+ if (buf.allocation_failure)
-+ ret = -ENOMEM;
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static void bch2_put_super(struct super_block *sb)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+
-+ __bch2_fs_stop(c);
-+}
-+
-+/*
-+ * bcachefs doesn't currently integrate intwrite freeze protection but the
-+ * internal write references serve the same purpose. Therefore reuse the
-+ * read-only transition code to perform the quiesce. The caveat is that we don't
-+ * currently have the ability to block tasks that want a write reference while
-+ * the superblock is frozen. This is fine for now, but we should either add
-+ * blocking support or find a way to integrate sb_start_intwrite() and friends.
-+ */
-+static int bch2_freeze(struct super_block *sb)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+
-+ down_write(&c->state_lock);
-+ bch2_fs_read_only(c);
-+ up_write(&c->state_lock);
-+ return 0;
-+}
-+
-+static int bch2_unfreeze(struct super_block *sb)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ int ret;
-+
-+ down_write(&c->state_lock);
-+ ret = bch2_fs_read_write(c);
-+ up_write(&c->state_lock);
-+ return ret;
-+}
-+
-+static const struct super_operations bch_super_operations = {
-+ .alloc_inode = bch2_alloc_inode,
-+ .destroy_inode = bch2_destroy_inode,
-+ .write_inode = bch2_vfs_write_inode,
-+ .evict_inode = bch2_evict_inode,
-+ .sync_fs = bch2_sync_fs,
-+ .statfs = bch2_statfs,
-+ .show_devname = bch2_show_devname,
-+ .show_options = bch2_show_options,
-+ .remount_fs = bch2_remount,
-+ .put_super = bch2_put_super,
-+ .freeze_fs = bch2_freeze,
-+ .unfreeze_fs = bch2_unfreeze,
-+};
-+
-+static int bch2_set_super(struct super_block *s, void *data)
-+{
-+ s->s_fs_info = data;
-+ return 0;
-+}
-+
-+static int bch2_noset_super(struct super_block *s, void *data)
-+{
-+ return -EBUSY;
-+}
-+
-+static int bch2_test_super(struct super_block *s, void *data)
-+{
-+ struct bch_fs *c = s->s_fs_info;
-+ struct bch_fs **devs = data;
-+ unsigned i;
-+
-+ if (!c)
-+ return false;
-+
-+ for (i = 0; devs[i]; i++)
-+ if (c != devs[i])
-+ return false;
-+ return true;
-+}
-+
-+static struct dentry *bch2_mount(struct file_system_type *fs_type,
-+ int flags, const char *dev_name, void *data)
-+{
-+ struct bch_fs *c;
-+ struct bch_dev *ca;
-+ struct super_block *sb;
-+ struct inode *vinode;
-+ struct bch_opts opts = bch2_opts_empty();
-+ char **devs;
-+ struct bch_fs **devs_to_fs = NULL;
-+ unsigned i, nr_devs;
-+ int ret;
-+
-+ opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
-+
-+ ret = bch2_parse_mount_opts(NULL, &opts, data);
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ if (!dev_name || strlen(dev_name) == 0)
-+ return ERR_PTR(-EINVAL);
-+
-+ devs = split_devs(dev_name, &nr_devs);
-+ if (!devs)
-+ return ERR_PTR(-ENOMEM);
-+
-+ devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL);
-+ if (!devs_to_fs) {
-+ sb = ERR_PTR(-ENOMEM);
-+ goto got_sb;
-+ }
-+
-+ for (i = 0; i < nr_devs; i++)
-+ devs_to_fs[i] = bch2_path_to_fs(devs[i]);
-+
-+ sb = sget(fs_type, bch2_test_super, bch2_noset_super,
-+ flags|SB_NOSEC, devs_to_fs);
-+ if (!IS_ERR(sb))
-+ goto got_sb;
-+
-+ c = bch2_fs_open(devs, nr_devs, opts);
-+ if (IS_ERR(c)) {
-+ sb = ERR_CAST(c);
-+ goto got_sb;
-+ }
-+
-+ /* Some options can't be parsed until after the fs is started: */
-+ ret = bch2_parse_mount_opts(c, &opts, data);
-+ if (ret) {
-+ bch2_fs_stop(c);
-+ sb = ERR_PTR(ret);
-+ goto got_sb;
-+ }
-+
-+ bch2_opts_apply(&c->opts, opts);
-+
-+ sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
-+ if (IS_ERR(sb))
-+ bch2_fs_stop(c);
-+got_sb:
-+ kfree(devs_to_fs);
-+ kfree(devs[0]);
-+ kfree(devs);
-+
-+ if (IS_ERR(sb)) {
-+ ret = PTR_ERR(sb);
-+ ret = bch2_err_class(ret);
-+ return ERR_PTR(ret);
-+ }
-+
-+ c = sb->s_fs_info;
-+
-+ if (sb->s_root) {
-+ if ((flags ^ sb->s_flags) & SB_RDONLY) {
-+ ret = -EBUSY;
-+ goto err_put_super;
-+ }
-+ goto out;
-+ }
-+
-+ sb->s_blocksize = block_bytes(c);
-+ sb->s_blocksize_bits = ilog2(block_bytes(c));
-+ sb->s_maxbytes = MAX_LFS_FILESIZE;
-+ sb->s_op = &bch_super_operations;
-+ sb->s_export_op = &bch_export_ops;
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+ sb->s_qcop = &bch2_quotactl_operations;
-+ sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
-+#endif
-+ sb->s_xattr = bch2_xattr_handlers;
-+ sb->s_magic = BCACHEFS_STATFS_MAGIC;
-+ sb->s_time_gran = c->sb.nsec_per_time_unit;
-+ sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
-+ sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
-+ c->vfs_sb = sb;
-+ strscpy(sb->s_id, c->name, sizeof(sb->s_id));
-+
-+ ret = super_setup_bdi(sb);
-+ if (ret)
-+ goto err_put_super;
-+
-+ sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
-+
-+ for_each_online_member(ca, c, i) {
-+ struct block_device *bdev = ca->disk_sb.bdev;
-+
-+ /* XXX: create an anonymous device for multi device filesystems */
-+ sb->s_bdev = bdev;
-+ sb->s_dev = bdev->bd_dev;
-+ percpu_ref_put(&ca->io_ref);
-+ break;
-+ }
-+
-+ c->dev = sb->s_dev;
-+
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+ if (c->opts.acl)
-+ sb->s_flags |= SB_POSIXACL;
-+#endif
-+
-+ sb->s_shrink.seeks = 0;
-+
-+ vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
-+ ret = PTR_ERR_OR_ZERO(vinode);
-+ if (ret) {
-+ bch_err_msg(c, ret, "mounting: error getting root inode");
-+ goto err_put_super;
-+ }
-+
-+ sb->s_root = d_make_root(vinode);
-+ if (!sb->s_root) {
-+ bch_err(c, "error mounting: error allocating root dentry");
-+ ret = -ENOMEM;
-+ goto err_put_super;
-+ }
-+
-+ sb->s_flags |= SB_ACTIVE;
-+out:
-+ return dget(sb->s_root);
-+
-+err_put_super:
-+ sb->s_fs_info = NULL;
-+ c->vfs_sb = NULL;
-+ deactivate_locked_super(sb);
-+ bch2_fs_stop(c);
-+ return ERR_PTR(bch2_err_class(ret));
-+}
-+
-+static void bch2_kill_sb(struct super_block *sb)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+
-+ if (c)
-+ c->vfs_sb = NULL;
-+ generic_shutdown_super(sb);
-+ if (c)
-+ bch2_fs_free(c);
-+}
-+
-+static struct file_system_type bcache_fs_type = {
-+ .owner = THIS_MODULE,
-+ .name = "bcachefs",
-+ .mount = bch2_mount,
-+ .kill_sb = bch2_kill_sb,
-+ .fs_flags = FS_REQUIRES_DEV,
-+};
-+
-+MODULE_ALIAS_FS("bcachefs");
-+
-+void bch2_vfs_exit(void)
-+{
-+ unregister_filesystem(&bcache_fs_type);
-+ kmem_cache_destroy(bch2_inode_cache);
-+}
-+
-+int __init bch2_vfs_init(void)
-+{
-+ int ret = -ENOMEM;
-+
-+ bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
-+ if (!bch2_inode_cache)
-+ goto err;
-+
-+ ret = register_filesystem(&bcache_fs_type);
-+ if (ret)
-+ goto err;
-+
-+ return 0;
-+err:
-+ bch2_vfs_exit();
-+ return ret;
-+}
-+
-+#endif /* NO_BCACHEFS_FS */
-diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
-new file mode 100644
-index 000000000000..5edf1d4b9e6b
---- /dev/null
-+++ b/fs/bcachefs/fs.h
-@@ -0,0 +1,209 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FS_H
-+#define _BCACHEFS_FS_H
-+
-+#include "inode.h"
-+#include "opts.h"
-+#include "str_hash.h"
-+#include "quota_types.h"
-+#include "two_state_shared_lock.h"
-+
-+#include <linux/seqlock.h>
-+#include <linux/stat.h>
-+
-+struct bch_inode_info {
-+ struct inode v;
-+ struct list_head ei_vfs_inode_list;
-+ unsigned long ei_flags;
-+
-+ struct mutex ei_update_lock;
-+ u64 ei_quota_reserved;
-+ unsigned long ei_last_dirtied;
-+ two_state_lock_t ei_pagecache_lock;
-+
-+ struct mutex ei_quota_lock;
-+ struct bch_qid ei_qid;
-+
-+ u32 ei_subvol;
-+
-+ /*
-+ * When we've been doing nocow writes we'll need to issue flushes to the
-+ * underlying block devices
-+ *
-+ * XXX: a device may have had a flush issued by some other codepath. It
-+ * would be better to keep for each device a sequence number that's
-+ * incremented when we isusue a cache flush, and track here the sequence
-+ * number that needs flushing.
-+ */
-+ struct bch_devs_mask ei_devs_need_flush;
-+
-+ /* copy of inode in btree: */
-+ struct bch_inode_unpacked ei_inode;
-+};
-+
-+#define bch2_pagecache_add_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 0)
-+#define bch2_pagecache_add_tryget(i) bch2_two_state_trylock(&i->ei_pagecache_lock, 0)
-+#define bch2_pagecache_add_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 0)
-+
-+#define bch2_pagecache_block_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 1)
-+#define bch2_pagecache_block_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 1)
-+
-+static inline subvol_inum inode_inum(struct bch_inode_info *inode)
-+{
-+ return (subvol_inum) {
-+ .subvol = inode->ei_subvol,
-+ .inum = inode->ei_inode.bi_inum,
-+ };
-+}
-+
-+/*
-+ * Set if we've gotten a btree error for this inode, and thus the vfs inode and
-+ * btree inode may be inconsistent:
-+ */
-+#define EI_INODE_ERROR 0
-+
-+/*
-+ * Set in the inode is in a snapshot subvolume - we don't do quota accounting in
-+ * those:
-+ */
-+#define EI_INODE_SNAPSHOT 1
-+
-+#define to_bch_ei(_inode) \
-+ container_of_or_null(_inode, struct bch_inode_info, v)
-+
-+static inline int ptrcmp(void *l, void *r)
-+{
-+ return cmp_int(l, r);
-+}
-+
-+enum bch_inode_lock_op {
-+ INODE_LOCK = (1U << 0),
-+ INODE_PAGECACHE_BLOCK = (1U << 1),
-+ INODE_UPDATE_LOCK = (1U << 2),
-+};
-+
-+#define bch2_lock_inodes(_locks, ...) \
-+do { \
-+ struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \
-+ unsigned i; \
-+ \
-+ bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \
-+ \
-+ for (i = 1; i < ARRAY_SIZE(a); i++) \
-+ if (a[i] != a[i - 1]) { \
-+ if ((_locks) & INODE_LOCK) \
-+ down_write_nested(&a[i]->v.i_rwsem, i); \
-+ if ((_locks) & INODE_PAGECACHE_BLOCK) \
-+ bch2_pagecache_block_get(a[i]);\
-+ if ((_locks) & INODE_UPDATE_LOCK) \
-+ mutex_lock_nested(&a[i]->ei_update_lock, i);\
-+ } \
-+} while (0)
-+
-+#define bch2_unlock_inodes(_locks, ...) \
-+do { \
-+ struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \
-+ unsigned i; \
-+ \
-+ bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \
-+ \
-+ for (i = 1; i < ARRAY_SIZE(a); i++) \
-+ if (a[i] != a[i - 1]) { \
-+ if ((_locks) & INODE_LOCK) \
-+ up_write(&a[i]->v.i_rwsem); \
-+ if ((_locks) & INODE_PAGECACHE_BLOCK) \
-+ bch2_pagecache_block_put(a[i]);\
-+ if ((_locks) & INODE_UPDATE_LOCK) \
-+ mutex_unlock(&a[i]->ei_update_lock); \
-+ } \
-+} while (0)
-+
-+static inline struct bch_inode_info *file_bch_inode(struct file *file)
-+{
-+ return to_bch_ei(file_inode(file));
-+}
-+
-+static inline bool inode_attr_changing(struct bch_inode_info *dir,
-+ struct bch_inode_info *inode,
-+ enum inode_opt_id id)
-+{
-+ return !(inode->ei_inode.bi_fields_set & (1 << id)) &&
-+ bch2_inode_opt_get(&dir->ei_inode, id) !=
-+ bch2_inode_opt_get(&inode->ei_inode, id);
-+}
-+
-+static inline bool inode_attrs_changing(struct bch_inode_info *dir,
-+ struct bch_inode_info *inode)
-+{
-+ unsigned id;
-+
-+ for (id = 0; id < Inode_opt_nr; id++)
-+ if (inode_attr_changing(dir, inode, id))
-+ return true;
-+
-+ return false;
-+}
-+
-+struct bch_inode_unpacked;
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+struct bch_inode_info *
-+__bch2_create(struct mnt_idmap *, struct bch_inode_info *,
-+ struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
-+
-+int bch2_fs_quota_transfer(struct bch_fs *,
-+ struct bch_inode_info *,
-+ struct bch_qid,
-+ unsigned,
-+ enum quota_acct_mode);
-+
-+static inline int bch2_set_projid(struct bch_fs *c,
-+ struct bch_inode_info *inode,
-+ u32 projid)
-+{
-+ struct bch_qid qid = inode->ei_qid;
-+
-+ qid.q[QTYP_PRJ] = projid;
-+
-+ return bch2_fs_quota_transfer(c, inode, qid,
-+ 1 << QTYP_PRJ,
-+ KEY_TYPE_QUOTA_PREALLOC);
-+}
-+
-+struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum);
-+
-+/* returns 0 if we want to do the update, or error is passed up */
-+typedef int (*inode_set_fn)(struct btree_trans *,
-+ struct bch_inode_info *,
-+ struct bch_inode_unpacked *, void *);
-+
-+void bch2_inode_update_after_write(struct btree_trans *,
-+ struct bch_inode_info *,
-+ struct bch_inode_unpacked *,
-+ unsigned);
-+int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
-+ inode_set_fn, void *, unsigned);
-+
-+int bch2_setattr_nonsize(struct mnt_idmap *,
-+ struct bch_inode_info *,
-+ struct iattr *);
-+int __bch2_unlink(struct inode *, struct dentry *, bool);
-+
-+void bch2_evict_subvolume_inodes(struct bch_fs *, snapshot_id_list *);
-+
-+void bch2_vfs_exit(void);
-+int bch2_vfs_init(void);
-+
-+#else
-+
-+#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
-+
-+static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
-+ snapshot_id_list *s) {}
-+static inline void bch2_vfs_exit(void) {}
-+static inline int bch2_vfs_init(void) { return 0; }
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+#endif /* _BCACHEFS_FS_H */
-diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
-new file mode 100644
-index 000000000000..9f3e9bd3d767
---- /dev/null
-+++ b/fs/bcachefs/fsck.c
-@@ -0,0 +1,2490 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_buf.h"
-+#include "btree_cache.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "darray.h"
-+#include "dirent.h"
-+#include "error.h"
-+#include "fs-common.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "keylist.h"
-+#include "recovery.h"
-+#include "snapshot.h"
-+#include "super.h"
-+#include "xattr.h"
-+
-+#include <linux/bsearch.h>
-+#include <linux/dcache.h> /* struct qstr */
-+
-+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-+
-+/*
-+ * XXX: this is handling transaction restarts without returning
-+ * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
-+ */
-+static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
-+ u32 snapshot)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ u64 sectors = 0;
-+ int ret;
-+
-+ for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
-+ SPOS(inum, 0, snapshot),
-+ POS(inum, U64_MAX),
-+ 0, k, ret)
-+ if (bkey_extent_is_allocation(k.k))
-+ sectors += k.k->size;
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret ?: sectors;
-+}
-+
-+static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
-+ u32 snapshot)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_s_c_dirent d;
-+ u64 subdirs = 0;
-+ int ret;
-+
-+ for_each_btree_key_upto(trans, iter, BTREE_ID_dirents,
-+ SPOS(inum, 0, snapshot),
-+ POS(inum, U64_MAX),
-+ 0, k, ret) {
-+ if (k.k->type != KEY_TYPE_dirent)
-+ continue;
-+
-+ d = bkey_s_c_to_dirent(k);
-+ if (d.v->d_type == DT_DIR)
-+ subdirs++;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret ?: subdirs;
-+}
-+
-+static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
-+ u32 *subvol)
-+{
-+ struct bch_snapshot s;
-+ int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots,
-+ POS(0, snapshot), 0,
-+ snapshot, &s);
-+ if (!ret)
-+ *subvol = le32_to_cpu(s.subvol);
-+ else if (bch2_err_matches(ret, ENOENT))
-+ bch_err(trans->c, "snapshot %u not found", snapshot);
-+ return ret;
-+
-+}
-+
-+static int __subvol_lookup(struct btree_trans *trans, u32 subvol,
-+ u32 *snapshot, u64 *inum)
-+{
-+ struct bch_subvolume s;
-+ int ret;
-+
-+ ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
-+
-+ *snapshot = le32_to_cpu(s.snapshot);
-+ *inum = le64_to_cpu(s.inode);
-+ return ret;
-+}
-+
-+static int subvol_lookup(struct btree_trans *trans, u32 subvol,
-+ u32 *snapshot, u64 *inum)
-+{
-+ return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum));
-+}
-+
-+static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
-+ struct bch_inode_unpacked *inode)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
-+ POS(0, inode_nr),
-+ BTREE_ITER_ALL_SNAPSHOTS);
-+ k = bch2_btree_iter_peek(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) {
-+ ret = -BCH_ERR_ENOENT_inode;
-+ goto err;
-+ }
-+
-+ ret = bch2_inode_unpack(k, inode);
-+err:
-+ bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
-+ struct bch_inode_unpacked *inode,
-+ u32 *snapshot)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
-+ SPOS(0, inode_nr, *snapshot), 0);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ ret = bkey_is_inode(k.k)
-+ ? bch2_inode_unpack(k, inode)
-+ : -BCH_ERR_ENOENT_inode;
-+ if (!ret)
-+ *snapshot = iter.pos.snapshot;
-+err:
-+ bch_err_msg(trans->c, ret, "fetching inode %llu:%u", inode_nr, *snapshot);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
-+ struct bch_inode_unpacked *inode,
-+ u32 *snapshot)
-+{
-+ return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
-+}
-+
-+static int __lookup_dirent(struct btree_trans *trans,
-+ struct bch_hash_info hash_info,
-+ subvol_inum dir, struct qstr *name,
-+ u64 *target, unsigned *type)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c_dirent d;
-+ int ret;
-+
-+ ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc,
-+ &hash_info, dir, name, 0);
-+ if (ret)
-+ return ret;
-+
-+ d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter));
-+ *target = le64_to_cpu(d.v->d_inum);
-+ *type = d.v->d_type;
-+ bch2_trans_iter_exit(trans, &iter);
-+ return 0;
-+}
-+
-+static int __write_inode(struct btree_trans *trans,
-+ struct bch_inode_unpacked *inode,
-+ u32 snapshot)
-+{
-+ struct bkey_inode_buf *inode_p =
-+ bch2_trans_kmalloc(trans, sizeof(*inode_p));
-+
-+ if (IS_ERR(inode_p))
-+ return PTR_ERR(inode_p);
-+
-+ bch2_inode_pack(inode_p, inode);
-+ inode_p->inode.k.p.snapshot = snapshot;
-+
-+ return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
-+ &inode_p->inode.k_i,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+}
-+
-+static int fsck_write_inode(struct btree_trans *trans,
-+ struct bch_inode_unpacked *inode,
-+ u32 snapshot)
-+{
-+ int ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_LAZY_RW,
-+ __write_inode(trans, inode, snapshot));
-+ if (ret)
-+ bch_err_fn(trans->c, ret);
-+ return ret;
-+}
-+
-+static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bch_inode_unpacked dir_inode;
-+ struct bch_hash_info dir_hash_info;
-+ int ret;
-+
-+ ret = lookup_first_inode(trans, pos.inode, &dir_inode);
-+ if (ret)
-+ goto err;
-+
-+ dir_hash_info = bch2_hash_info_init(c, &dir_inode);
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
-+
-+ ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
-+ &dir_hash_info, &iter,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+/* Get lost+found, create if it doesn't exist: */
-+static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
-+ struct bch_inode_unpacked *lostfound)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_inode_unpacked root;
-+ struct bch_hash_info root_hash_info;
-+ struct qstr lostfound_str = QSTR("lost+found");
-+ subvol_inum root_inum = { .subvol = subvol };
-+ u64 inum = 0;
-+ unsigned d_type = 0;
-+ u32 snapshot;
-+ int ret;
-+
-+ ret = __subvol_lookup(trans, subvol, &snapshot, &root_inum.inum);
-+ if (ret)
-+ return ret;
-+
-+ ret = __lookup_inode(trans, root_inum.inum, &root, &snapshot);
-+ if (ret)
-+ return ret;
-+
-+ root_hash_info = bch2_hash_info_init(c, &root);
-+
-+ ret = __lookup_dirent(trans, root_hash_info, root_inum,
-+ &lostfound_str, &inum, &d_type);
-+ if (bch2_err_matches(ret, ENOENT)) {
-+ bch_notice(c, "creating lost+found");
-+ goto create_lostfound;
-+ }
-+
-+ bch_err_fn(c, ret);
-+ if (ret)
-+ return ret;
-+
-+ if (d_type != DT_DIR) {
-+ bch_err(c, "error looking up lost+found: not a directory");
-+ return -BCH_ERR_ENOENT_not_directory;
-+ }
-+
-+ /*
-+ * The bch2_check_dirents pass has already run, dangling dirents
-+ * shouldn't exist here:
-+ */
-+ return __lookup_inode(trans, inum, lostfound, &snapshot);
-+
-+create_lostfound:
-+ bch2_inode_init_early(c, lostfound);
-+
-+ ret = bch2_create_trans(trans, root_inum, &root,
-+ lostfound, &lostfound_str,
-+ 0, 0, S_IFDIR|0700, 0, NULL, NULL,
-+ (subvol_inum) { }, 0);
-+ bch_err_msg(c, ret, "creating lost+found");
-+ return ret;
-+}
-+
-+static int __reattach_inode(struct btree_trans *trans,
-+ struct bch_inode_unpacked *inode,
-+ u32 inode_snapshot)
-+{
-+ struct bch_hash_info dir_hash;
-+ struct bch_inode_unpacked lostfound;
-+ char name_buf[20];
-+ struct qstr name;
-+ u64 dir_offset = 0;
-+ u32 subvol;
-+ int ret;
-+
-+ ret = __snapshot_lookup_subvol(trans, inode_snapshot, &subvol);
-+ if (ret)
-+ return ret;
-+
-+ ret = lookup_lostfound(trans, subvol, &lostfound);
-+ if (ret)
-+ return ret;
-+
-+ if (S_ISDIR(inode->bi_mode)) {
-+ lostfound.bi_nlink++;
-+
-+ ret = __write_inode(trans, &lostfound, U32_MAX);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ dir_hash = bch2_hash_info_init(trans->c, &lostfound);
-+
-+ snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
-+ name = (struct qstr) QSTR(name_buf);
-+
-+ ret = bch2_dirent_create(trans,
-+ (subvol_inum) {
-+ .subvol = subvol,
-+ .inum = lostfound.bi_inum,
-+ },
-+ &dir_hash,
-+ inode_d_type(inode),
-+ &name, inode->bi_inum, &dir_offset,
-+ BCH_HASH_SET_MUST_CREATE);
-+ if (ret)
-+ return ret;
-+
-+ inode->bi_dir = lostfound.bi_inum;
-+ inode->bi_dir_offset = dir_offset;
-+
-+ return __write_inode(trans, inode, inode_snapshot);
-+}
-+
-+static int reattach_inode(struct btree_trans *trans,
-+ struct bch_inode_unpacked *inode,
-+ u32 inode_snapshot)
-+{
-+ int ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_LAZY_RW|
-+ BTREE_INSERT_NOFAIL,
-+ __reattach_inode(trans, inode, inode_snapshot));
-+ bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum);
-+ return ret;
-+}
-+
-+static int remove_backpointer(struct btree_trans *trans,
-+ struct bch_inode_unpacked *inode)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c_dirent d;
-+ int ret;
-+
-+ d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents,
-+ POS(inode->bi_dir, inode->bi_dir_offset), 0,
-+ dirent);
-+ ret = bkey_err(d) ?:
-+ __remove_dirent(trans, d.k->p);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+struct snapshots_seen_entry {
-+ u32 id;
-+ u32 equiv;
-+};
-+
-+struct snapshots_seen {
-+ struct bpos pos;
-+ DARRAY(struct snapshots_seen_entry) ids;
-+};
-+
-+static inline void snapshots_seen_exit(struct snapshots_seen *s)
-+{
-+ darray_exit(&s->ids);
-+}
-+
-+static inline void snapshots_seen_init(struct snapshots_seen *s)
-+{
-+ memset(s, 0, sizeof(*s));
-+}
-+
-+static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id)
-+{
-+ struct snapshots_seen_entry *i, n = {
-+ .id = id,
-+ .equiv = bch2_snapshot_equiv(c, id),
-+ };
-+ int ret = 0;
-+
-+ darray_for_each(s->ids, i) {
-+ if (i->id == id)
-+ return 0;
-+ if (i->id > id)
-+ break;
-+ }
-+
-+ ret = darray_insert_item(&s->ids, i - s->ids.data, n);
-+ if (ret)
-+ bch_err(c, "error reallocating snapshots_seen table (size %zu)",
-+ s->ids.size);
-+ return ret;
-+}
-+
-+static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
-+ enum btree_id btree_id, struct bpos pos)
-+{
-+ struct snapshots_seen_entry *i, n = {
-+ .id = pos.snapshot,
-+ .equiv = bch2_snapshot_equiv(c, pos.snapshot),
-+ };
-+ int ret = 0;
-+
-+ if (!bkey_eq(s->pos, pos))
-+ s->ids.nr = 0;
-+
-+ s->pos = pos;
-+ s->pos.snapshot = n.equiv;
-+
-+ darray_for_each(s->ids, i) {
-+ if (i->id == n.id)
-+ return 0;
-+
-+ /*
-+ * We currently don't rigorously track for snapshot cleanup
-+ * needing to be run, so it shouldn't be a fsck error yet:
-+ */
-+ if (i->equiv == n.equiv) {
-+ bch_err(c, "snapshot deletion did not finish:\n"
-+ " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
-+ bch2_btree_id_str(btree_id),
-+ pos.inode, pos.offset,
-+ i->id, n.id, n.equiv);
-+ set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
-+ return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots);
-+ }
-+ }
-+
-+ ret = darray_push(&s->ids, n);
-+ if (ret)
-+ bch_err(c, "error reallocating snapshots_seen table (size %zu)",
-+ s->ids.size);
-+ return ret;
-+}
-+
-+/**
-+ * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
-+ * and @ancestor hasn't been overwritten in @seen
-+ *
-+ * @c: filesystem handle
-+ * @seen: list of snapshot ids already seen at current position
-+ * @id: descendent snapshot id
-+ * @ancestor: ancestor snapshot id
-+ *
-+ * Returns: whether key in @ancestor snapshot is visible in @id snapshot
-+ */
-+static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
-+ u32 id, u32 ancestor)
-+{
-+ ssize_t i;
-+
-+ EBUG_ON(id > ancestor);
-+ EBUG_ON(!bch2_snapshot_is_equiv(c, id));
-+ EBUG_ON(!bch2_snapshot_is_equiv(c, ancestor));
-+
-+ /* @ancestor should be the snapshot most recently added to @seen */
-+ EBUG_ON(ancestor != seen->pos.snapshot);
-+ EBUG_ON(ancestor != seen->ids.data[seen->ids.nr - 1].equiv);
-+
-+ if (id == ancestor)
-+ return true;
-+
-+ if (!bch2_snapshot_is_ancestor(c, id, ancestor))
-+ return false;
-+
-+ /*
-+ * We know that @id is a descendant of @ancestor, we're checking if
-+ * we've seen a key that overwrote @ancestor - i.e. also a descendent of
-+ * @ascestor and with @id as a descendent.
-+ *
-+ * But we already know that we're scanning IDs between @id and @ancestor
-+ * numerically, since snapshot ID lists are kept sorted, so if we find
-+ * an id that's an ancestor of @id we're done:
-+ */
-+
-+ for (i = seen->ids.nr - 2;
-+ i >= 0 && seen->ids.data[i].equiv >= id;
-+ --i)
-+ if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv))
-+ return false;
-+
-+ return true;
-+}
-+
-+/**
-+ * ref_visible - given a key with snapshot id @src that points to a key with
-+ * snapshot id @dst, test whether there is some snapshot in which @dst is
-+ * visible.
-+ *
-+ * @c: filesystem handle
-+ * @s: list of snapshot IDs already seen at @src
-+ * @src: snapshot ID of src key
-+ * @dst: snapshot ID of dst key
-+ * Returns: true if there is some snapshot in which @dst is visible
-+ *
-+ * Assumes we're visiting @src keys in natural key order
-+ */
-+static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s,
-+ u32 src, u32 dst)
-+{
-+ return dst <= src
-+ ? key_visible_in_snapshot(c, s, dst, src)
-+ : bch2_snapshot_is_ancestor(c, src, dst);
-+}
-+
-+static int ref_visible2(struct bch_fs *c,
-+ u32 src, struct snapshots_seen *src_seen,
-+ u32 dst, struct snapshots_seen *dst_seen)
-+{
-+ src = bch2_snapshot_equiv(c, src);
-+ dst = bch2_snapshot_equiv(c, dst);
-+
-+ if (dst > src) {
-+ swap(dst, src);
-+ swap(dst_seen, src_seen);
-+ }
-+ return key_visible_in_snapshot(c, src_seen, dst, src);
-+}
-+
-+#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
-+ for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
-+ (_i)->snapshot <= (_snapshot); _i++) \
-+ if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
-+
-+struct inode_walker_entry {
-+ struct bch_inode_unpacked inode;
-+ u32 snapshot;
-+ bool seen_this_pos;
-+ u64 count;
-+};
-+
-+struct inode_walker {
-+ bool first_this_inode;
-+ bool recalculate_sums;
-+ struct bpos last_pos;
-+
-+ DARRAY(struct inode_walker_entry) inodes;
-+};
-+
-+static void inode_walker_exit(struct inode_walker *w)
-+{
-+ darray_exit(&w->inodes);
-+}
-+
-+static struct inode_walker inode_walker_init(void)
-+{
-+ return (struct inode_walker) { 0, };
-+}
-+
-+static int add_inode(struct bch_fs *c, struct inode_walker *w,
-+ struct bkey_s_c inode)
-+{
-+ struct bch_inode_unpacked u;
-+
-+ BUG_ON(bch2_inode_unpack(inode, &u));
-+
-+ return darray_push(&w->inodes, ((struct inode_walker_entry) {
-+ .inode = u,
-+ .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot),
-+ }));
-+}
-+
-+static int get_inodes_all_snapshots(struct btree_trans *trans,
-+ struct inode_walker *w, u64 inum)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ u32 restart_count = trans->restart_count;
-+ int ret;
-+
-+ w->recalculate_sums = false;
-+ w->inodes.nr = 0;
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
-+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-+ if (k.k->p.offset != inum)
-+ break;
-+
-+ if (bkey_is_inode(k.k))
-+ add_inode(c, w, k);
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret)
-+ return ret;
-+
-+ w->first_this_inode = true;
-+
-+ return trans_was_restarted(trans, restart_count);
-+}
-+
-+static struct inode_walker_entry *
-+lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w,
-+ u32 snapshot, bool is_whiteout)
-+{
-+ struct inode_walker_entry *i;
-+
-+ snapshot = bch2_snapshot_equiv(c, snapshot);
-+
-+ darray_for_each(w->inodes, i)
-+ if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
-+ goto found;
-+
-+ return NULL;
-+found:
-+ BUG_ON(snapshot > i->snapshot);
-+
-+ if (snapshot != i->snapshot && !is_whiteout) {
-+ struct inode_walker_entry new = *i;
-+ size_t pos;
-+ int ret;
-+
-+ new.snapshot = snapshot;
-+ new.count = 0;
-+
-+ bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
-+ w->last_pos.inode, snapshot, i->snapshot);
-+
-+ while (i > w->inodes.data && i[-1].snapshot > snapshot)
-+ --i;
-+
-+ pos = i - w->inodes.data;
-+ ret = darray_insert_item(&w->inodes, pos, new);
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ i = w->inodes.data + pos;
-+ }
-+
-+ return i;
-+}
-+
-+static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
-+ struct inode_walker *w, struct bpos pos,
-+ bool is_whiteout)
-+{
-+ if (w->last_pos.inode != pos.inode) {
-+ int ret = get_inodes_all_snapshots(trans, w, pos.inode);
-+ if (ret)
-+ return ERR_PTR(ret);
-+ } else if (bkey_cmp(w->last_pos, pos)) {
-+ struct inode_walker_entry *i;
-+
-+ darray_for_each(w->inodes, i)
-+ i->seen_this_pos = false;
-+
-+ }
-+
-+ w->last_pos = pos;
-+
-+ return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout);
-+}
-+
-+static int __get_visible_inodes(struct btree_trans *trans,
-+ struct inode_walker *w,
-+ struct snapshots_seen *s,
-+ u64 inum)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ w->inodes.nr = 0;
-+
-+ for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
-+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
-+
-+ if (k.k->p.offset != inum)
-+ break;
-+
-+ if (!ref_visible(c, s, s->pos.snapshot, equiv))
-+ continue;
-+
-+ if (bkey_is_inode(k.k))
-+ add_inode(c, w, k);
-+
-+ if (equiv >= s->pos.snapshot)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret;
-+}
-+
-+static int check_key_has_snapshot(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
-+ bkey_in_missing_snapshot,
-+ "key in missing snapshot: %s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
-+ ret = bch2_btree_delete_at(trans, iter,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1;
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int hash_redo_key(struct btree_trans *trans,
-+ const struct bch_hash_desc desc,
-+ struct bch_hash_info *hash_info,
-+ struct btree_iter *k_iter, struct bkey_s_c k)
-+{
-+ struct bkey_i *delete;
-+ struct bkey_i *tmp;
-+
-+ delete = bch2_trans_kmalloc(trans, sizeof(*delete));
-+ if (IS_ERR(delete))
-+ return PTR_ERR(delete);
-+
-+ tmp = bch2_bkey_make_mut_noupdate(trans, k);
-+ if (IS_ERR(tmp))
-+ return PTR_ERR(tmp);
-+
-+ bkey_init(&delete->k);
-+ delete->k.p = k_iter->pos;
-+ return bch2_btree_iter_traverse(k_iter) ?:
-+ bch2_trans_update(trans, k_iter, delete, 0) ?:
-+ bch2_hash_set_snapshot(trans, desc, hash_info,
-+ (subvol_inum) { 0, k.k->p.inode },
-+ k.k->p.snapshot, tmp,
-+ BCH_HASH_SET_MUST_CREATE,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_LAZY_RW);
-+}
-+
-+static int hash_check_key(struct btree_trans *trans,
-+ const struct bch_hash_desc desc,
-+ struct bch_hash_info *hash_info,
-+ struct btree_iter *k_iter, struct bkey_s_c hash_k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter = { NULL };
-+ struct printbuf buf = PRINTBUF;
-+ struct bkey_s_c k;
-+ u64 hash;
-+ int ret = 0;
-+
-+ if (hash_k.k->type != desc.key_type)
-+ return 0;
-+
-+ hash = desc.hash_bkey(hash_info, hash_k);
-+
-+ if (likely(hash == hash_k.k->p.offset))
-+ return 0;
-+
-+ if (hash_k.k->p.offset < hash)
-+ goto bad_hash;
-+
-+ for_each_btree_key_norestart(trans, iter, desc.btree_id,
-+ SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
-+ BTREE_ITER_SLOTS, k, ret) {
-+ if (bkey_eq(k.k->p, hash_k.k->p))
-+ break;
-+
-+ if (fsck_err_on(k.k->type == desc.key_type &&
-+ !desc.cmp_bkey(k, hash_k), c,
-+ hash_table_key_duplicate,
-+ "duplicate hash table keys:\n%s",
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, hash_k),
-+ buf.buf))) {
-+ ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1;
-+ break;
-+ }
-+
-+ if (bkey_deleted(k.k)) {
-+ bch2_trans_iter_exit(trans, &iter);
-+ goto bad_hash;
-+ }
-+ }
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+ printbuf_exit(&buf);
-+ return ret;
-+bad_hash:
-+ if (fsck_err(c, hash_table_key_wrong_offset,
-+ "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
-+ bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
-+ ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
-+ bch_err_fn(c, ret);
-+ if (ret)
-+ return ret;
-+ ret = -BCH_ERR_transaction_restart_nested;
-+ }
-+fsck_err:
-+ goto out;
-+}
-+
-+static int check_inode(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ struct bch_inode_unpacked *prev,
-+ struct snapshots_seen *s,
-+ bool full)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_inode_unpacked u;
-+ bool do_update = false;
-+ int ret;
-+
-+ ret = check_key_has_snapshot(trans, iter, k);
-+ if (ret < 0)
-+ goto err;
-+ if (ret)
-+ return 0;
-+
-+ ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
-+ if (ret)
-+ goto err;
-+
-+ if (!bkey_is_inode(k.k))
-+ return 0;
-+
-+ BUG_ON(bch2_inode_unpack(k, &u));
-+
-+ if (!full &&
-+ !(u.bi_flags & (BCH_INODE_i_size_dirty|
-+ BCH_INODE_i_sectors_dirty|
-+ BCH_INODE_unlinked)))
-+ return 0;
-+
-+ if (prev->bi_inum != u.bi_inum)
-+ *prev = u;
-+
-+ if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed ||
-+ inode_d_type(prev) != inode_d_type(&u),
-+ c, inode_snapshot_mismatch,
-+ "inodes in different snapshots don't match")) {
-+ bch_err(c, "repair not implemented yet");
-+ return -EINVAL;
-+ }
-+
-+ if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) &&
-+ bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) {
-+ struct bpos new_min_pos;
-+
-+ ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos);
-+ if (ret)
-+ goto err;
-+
-+ u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked;
-+
-+ ret = __write_inode(trans, &u, iter->pos.snapshot);
-+ bch_err_msg(c, ret, "in fsck updating inode");
-+ if (ret)
-+ return ret;
-+
-+ if (!bpos_eq(new_min_pos, POS_MIN))
-+ bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos));
-+ return 0;
-+ }
-+
-+ if (u.bi_flags & BCH_INODE_unlinked &&
-+ (!c->sb.clean ||
-+ fsck_err(c, inode_unlinked_but_clean,
-+ "filesystem marked clean, but inode %llu unlinked",
-+ u.bi_inum))) {
-+ bch2_trans_unlock(trans);
-+ bch2_fs_lazy_rw(c);
-+
-+ ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
-+ bch_err_msg(c, ret, "in fsck deleting inode");
-+ return ret;
-+ }
-+
-+ if (u.bi_flags & BCH_INODE_i_size_dirty &&
-+ (!c->sb.clean ||
-+ fsck_err(c, inode_i_size_dirty_but_clean,
-+ "filesystem marked clean, but inode %llu has i_size dirty",
-+ u.bi_inum))) {
-+ bch_verbose(c, "truncating inode %llu", u.bi_inum);
-+
-+ bch2_trans_unlock(trans);
-+ bch2_fs_lazy_rw(c);
-+
-+ /*
-+ * XXX: need to truncate partial blocks too here - or ideally
-+ * just switch units to bytes and that issue goes away
-+ */
-+ ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
-+ SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9,
-+ iter->pos.snapshot),
-+ POS(u.bi_inum, U64_MAX),
-+ 0, NULL);
-+ bch_err_msg(c, ret, "in fsck truncating inode");
-+ if (ret)
-+ return ret;
-+
-+ /*
-+ * We truncated without our normal sector accounting hook, just
-+ * make sure we recalculate it:
-+ */
-+ u.bi_flags |= BCH_INODE_i_sectors_dirty;
-+
-+ u.bi_flags &= ~BCH_INODE_i_size_dirty;
-+ do_update = true;
-+ }
-+
-+ if (u.bi_flags & BCH_INODE_i_sectors_dirty &&
-+ (!c->sb.clean ||
-+ fsck_err(c, inode_i_sectors_dirty_but_clean,
-+ "filesystem marked clean, but inode %llu has i_sectors dirty",
-+ u.bi_inum))) {
-+ s64 sectors;
-+
-+ bch_verbose(c, "recounting sectors for inode %llu",
-+ u.bi_inum);
-+
-+ sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
-+ if (sectors < 0) {
-+ bch_err_msg(c, sectors, "in fsck recounting inode sectors");
-+ return sectors;
-+ }
-+
-+ u.bi_sectors = sectors;
-+ u.bi_flags &= ~BCH_INODE_i_sectors_dirty;
-+ do_update = true;
-+ }
-+
-+ if (u.bi_flags & BCH_INODE_backptr_untrusted) {
-+ u.bi_dir = 0;
-+ u.bi_dir_offset = 0;
-+ u.bi_flags &= ~BCH_INODE_backptr_untrusted;
-+ do_update = true;
-+ }
-+
-+ if (do_update) {
-+ ret = __write_inode(trans, &u, iter->pos.snapshot);
-+ bch_err_msg(c, ret, "in fsck updating inode");
-+ if (ret)
-+ return ret;
-+ }
-+err:
-+fsck_err:
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+noinline_for_stack
-+int bch2_check_inodes(struct bch_fs *c)
-+{
-+ bool full = c->opts.fsck;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bch_inode_unpacked prev = { 0 };
-+ struct snapshots_seen s;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ snapshots_seen_init(&s);
-+
-+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
-+ POS_MIN,
-+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
-+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-+ check_inode(trans, &iter, k, &prev, &s, full));
-+
-+ snapshots_seen_exit(&s);
-+ bch2_trans_put(trans);
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bpos pos)
-+{
-+ return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
-+}
-+
-+static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
-+ struct bkey_s_c_dirent d)
-+{
-+ return inode->bi_dir == d.k->p.inode &&
-+ inode->bi_dir_offset == d.k->p.offset;
-+}
-+
-+static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
-+ struct bch_inode_unpacked *inode)
-+{
-+ return d.v->d_type == DT_SUBVOL
-+ ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
-+ : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
-+}
-+
-+static int inode_backpointer_exists(struct btree_trans *trans,
-+ struct bch_inode_unpacked *inode,
-+ u32 snapshot)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c_dirent d;
-+ int ret;
-+
-+ d = dirent_get_by_pos(trans, &iter,
-+ SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot));
-+ ret = bkey_err(d);
-+ if (ret)
-+ return bch2_err_matches(ret, ENOENT) ? 0 : ret;
-+
-+ ret = dirent_points_to_inode(d, inode);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct inode_walker_entry *i;
-+ u32 restart_count = trans->restart_count;
-+ int ret = 0;
-+ s64 count2;
-+
-+ darray_for_each(w->inodes, i) {
-+ if (i->inode.bi_sectors == i->count)
-+ continue;
-+
-+ count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot);
-+
-+ if (w->recalculate_sums)
-+ i->count = count2;
-+
-+ if (i->count != count2) {
-+ bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
-+ w->last_pos.inode, i->snapshot, i->count, count2);
-+ return -BCH_ERR_internal_fsck_err;
-+ }
-+
-+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
-+ c, inode_i_sectors_wrong,
-+ "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
-+ w->last_pos.inode, i->snapshot,
-+ i->inode.bi_sectors, i->count)) {
-+ i->inode.bi_sectors = i->count;
-+ ret = fsck_write_inode(trans, &i->inode, i->snapshot);
-+ if (ret)
-+ break;
-+ }
-+ }
-+fsck_err:
-+ bch_err_fn(c, ret);
-+ return ret ?: trans_was_restarted(trans, restart_count);
-+}
-+
-+struct extent_end {
-+ u32 snapshot;
-+ u64 offset;
-+ struct snapshots_seen seen;
-+};
-+
-+struct extent_ends {
-+ struct bpos last_pos;
-+ DARRAY(struct extent_end) e;
-+};
-+
-+static void extent_ends_reset(struct extent_ends *extent_ends)
-+{
-+ struct extent_end *i;
-+
-+ darray_for_each(extent_ends->e, i)
-+ snapshots_seen_exit(&i->seen);
-+
-+ extent_ends->e.nr = 0;
-+}
-+
-+static void extent_ends_exit(struct extent_ends *extent_ends)
-+{
-+ extent_ends_reset(extent_ends);
-+ darray_exit(&extent_ends->e);
-+}
-+
-+static void extent_ends_init(struct extent_ends *extent_ends)
-+{
-+ memset(extent_ends, 0, sizeof(*extent_ends));
-+}
-+
-+static int extent_ends_at(struct bch_fs *c,
-+ struct extent_ends *extent_ends,
-+ struct snapshots_seen *seen,
-+ struct bkey_s_c k)
-+{
-+ struct extent_end *i, n = (struct extent_end) {
-+ .offset = k.k->p.offset,
-+ .snapshot = k.k->p.snapshot,
-+ .seen = *seen,
-+ };
-+
-+ n.seen.ids.data = kmemdup(seen->ids.data,
-+ sizeof(seen->ids.data[0]) * seen->ids.size,
-+ GFP_KERNEL);
-+ if (!n.seen.ids.data)
-+ return -BCH_ERR_ENOMEM_fsck_extent_ends_at;
-+
-+ darray_for_each(extent_ends->e, i) {
-+ if (i->snapshot == k.k->p.snapshot) {
-+ snapshots_seen_exit(&i->seen);
-+ *i = n;
-+ return 0;
-+ }
-+
-+ if (i->snapshot >= k.k->p.snapshot)
-+ break;
-+ }
-+
-+ return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n);
-+}
-+
-+static int overlapping_extents_found(struct btree_trans *trans,
-+ enum btree_id btree,
-+ struct bpos pos1, struct snapshots_seen *pos1_seen,
-+ struct bkey pos2,
-+ bool *fixed,
-+ struct extent_end *extent_end)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct printbuf buf = PRINTBUF;
-+ struct btree_iter iter1, iter2 = { NULL };
-+ struct bkey_s_c k1, k2;
-+ int ret;
-+
-+ BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2)));
-+
-+ bch2_trans_iter_init(trans, &iter1, btree, pos1,
-+ BTREE_ITER_ALL_SNAPSHOTS|
-+ BTREE_ITER_NOT_EXTENTS);
-+ k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX));
-+ ret = bkey_err(k1);
-+ if (ret)
-+ goto err;
-+
-+ prt_str(&buf, "\n ");
-+ bch2_bkey_val_to_text(&buf, c, k1);
-+
-+ if (!bpos_eq(pos1, k1.k->p)) {
-+ prt_str(&buf, "\n wanted\n ");
-+ bch2_bpos_to_text(&buf, pos1);
-+ prt_str(&buf, "\n ");
-+ bch2_bkey_to_text(&buf, &pos2);
-+
-+ bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
-+ __func__, buf.buf);
-+ ret = -BCH_ERR_internal_fsck_err;
-+ goto err;
-+ }
-+
-+ bch2_trans_copy_iter(&iter2, &iter1);
-+
-+ while (1) {
-+ bch2_btree_iter_advance(&iter2);
-+
-+ k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX));
-+ ret = bkey_err(k2);
-+ if (ret)
-+ goto err;
-+
-+ if (bpos_ge(k2.k->p, pos2.p))
-+ break;
-+ }
-+
-+ prt_str(&buf, "\n ");
-+ bch2_bkey_val_to_text(&buf, c, k2);
-+
-+ if (bpos_gt(k2.k->p, pos2.p) ||
-+ pos2.size != k2.k->size) {
-+ bch_err(c, "%s: error finding seconding overlapping extent when repairing%s",
-+ __func__, buf.buf);
-+ ret = -BCH_ERR_internal_fsck_err;
-+ goto err;
-+ }
-+
-+ prt_printf(&buf, "\n overwriting %s extent",
-+ pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
-+
-+ if (fsck_err(c, extent_overlapping,
-+ "overlapping extents%s", buf.buf)) {
-+ struct btree_iter *old_iter = &iter1;
-+ struct disk_reservation res = { 0 };
-+
-+ if (pos1.snapshot < pos2.p.snapshot) {
-+ old_iter = &iter2;
-+ swap(k1, k2);
-+ }
-+
-+ trans->extra_journal_res += bch2_bkey_sectors_compressed(k2);
-+
-+ ret = bch2_trans_update_extent_overwrite(trans, old_iter,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
-+ k1, k2) ?:
-+ bch2_trans_commit(trans, &res, NULL,
-+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL);
-+ bch2_disk_reservation_put(c, &res);
-+
-+ if (ret)
-+ goto err;
-+
-+ *fixed = true;
-+
-+ if (pos1.snapshot == pos2.p.snapshot) {
-+ /*
-+ * We overwrote the first extent, and did the overwrite
-+ * in the same snapshot:
-+ */
-+ extent_end->offset = bkey_start_offset(&pos2);
-+ } else if (pos1.snapshot > pos2.p.snapshot) {
-+ /*
-+ * We overwrote the first extent in pos2's snapshot:
-+ */
-+ ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot);
-+ } else {
-+ /*
-+ * We overwrote the second extent - restart
-+ * check_extent() from the top:
-+ */
-+ ret = -BCH_ERR_transaction_restart_nested;
-+ }
-+ }
-+fsck_err:
-+err:
-+ bch2_trans_iter_exit(trans, &iter2);
-+ bch2_trans_iter_exit(trans, &iter1);
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int check_overlapping_extents(struct btree_trans *trans,
-+ struct snapshots_seen *seen,
-+ struct extent_ends *extent_ends,
-+ struct bkey_s_c k,
-+ u32 equiv,
-+ struct btree_iter *iter,
-+ bool *fixed)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct extent_end *i;
-+ int ret = 0;
-+
-+ /* transaction restart, running again */
-+ if (bpos_eq(extent_ends->last_pos, k.k->p))
-+ return 0;
-+
-+ if (extent_ends->last_pos.inode != k.k->p.inode)
-+ extent_ends_reset(extent_ends);
-+
-+ darray_for_each(extent_ends->e, i) {
-+ if (i->offset <= bkey_start_offset(k.k))
-+ continue;
-+
-+ if (!ref_visible2(c,
-+ k.k->p.snapshot, seen,
-+ i->snapshot, &i->seen))
-+ continue;
-+
-+ ret = overlapping_extents_found(trans, iter->btree_id,
-+ SPOS(iter->pos.inode,
-+ i->offset,
-+ i->snapshot),
-+ &i->seen,
-+ *k.k, fixed, i);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = extent_ends_at(c, extent_ends, seen, k);
-+ if (ret)
-+ goto err;
-+
-+ extent_ends->last_pos = k.k->p;
-+err:
-+ return ret;
-+}
-+
-+static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ struct bch_extent_crc_unpacked crc;
-+ const union bch_extent_entry *i;
-+ unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9;
-+
-+ bkey_for_each_crc(k.k, ptrs, crc, i)
-+ if (crc_is_encoded(crc) &&
-+ crc.uncompressed_size > encoded_extent_max_sectors) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+
-+ return 0;
-+}
-+
-+static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ struct inode_walker *inode,
-+ struct snapshots_seen *s,
-+ struct extent_ends *extent_ends)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct inode_walker_entry *i;
-+ struct printbuf buf = PRINTBUF;
-+ struct bpos equiv = k.k->p;
-+ int ret = 0;
-+
-+ equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
-+
-+ ret = check_key_has_snapshot(trans, iter, k);
-+ if (ret) {
-+ ret = ret < 0 ? ret : 0;
-+ goto out;
-+ }
-+
-+ if (inode->last_pos.inode != k.k->p.inode) {
-+ ret = check_i_sectors(trans, inode);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout);
-+ ret = PTR_ERR_OR_ZERO(i);
-+ if (ret)
-+ goto err;
-+
-+ ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
-+ if (ret)
-+ goto err;
-+
-+ if (k.k->type != KEY_TYPE_whiteout) {
-+ if (fsck_err_on(!i, c, extent_in_missing_inode,
-+ "extent in missing inode:\n %s",
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
-+ goto delete;
-+
-+ if (fsck_err_on(i &&
-+ !S_ISREG(i->inode.bi_mode) &&
-+ !S_ISLNK(i->inode.bi_mode),
-+ c, extent_in_non_reg_inode,
-+ "extent in non regular inode mode %o:\n %s",
-+ i->inode.bi_mode,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
-+ goto delete;
-+
-+ ret = check_overlapping_extents(trans, s, extent_ends, k,
-+ equiv.snapshot, iter,
-+ &inode->recalculate_sums);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ /*
-+ * Check inodes in reverse order, from oldest snapshots to newest,
-+ * starting from the inode that matches this extent's snapshot. If we
-+ * didn't have one, iterate over all inodes:
-+ */
-+ if (!i)
-+ i = inode->inodes.data + inode->inodes.nr - 1;
-+
-+ for (;
-+ inode->inodes.data && i >= inode->inodes.data;
-+ --i) {
-+ if (i->snapshot > equiv.snapshot ||
-+ !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot))
-+ continue;
-+
-+ if (k.k->type != KEY_TYPE_whiteout) {
-+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) &&
-+ k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
-+ !bkey_extent_is_reservation(k),
-+ c, extent_past_end_of_inode,
-+ "extent type past end of inode %llu:%u, i_size %llu\n %s",
-+ i->inode.bi_inum, i->snapshot, i->inode.bi_size,
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-+ struct btree_iter iter2;
-+
-+ bch2_trans_copy_iter(&iter2, iter);
-+ bch2_btree_iter_set_snapshot(&iter2, i->snapshot);
-+ ret = bch2_btree_iter_traverse(&iter2) ?:
-+ bch2_btree_delete_at(trans, &iter2,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ bch2_trans_iter_exit(trans, &iter2);
-+ if (ret)
-+ goto err;
-+
-+ iter->k.type = KEY_TYPE_whiteout;
-+ }
-+
-+ if (bkey_extent_is_allocation(k.k))
-+ i->count += k.k->size;
-+ }
-+
-+ i->seen_this_pos = true;
-+ }
-+out:
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ bch_err_fn(c, ret);
-+ return ret;
-+delete:
-+ ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ goto out;
-+}
-+
-+/*
-+ * Walk extents: verify that extents have a corresponding S_ISREG inode, and
-+ * that i_size an i_sectors are consistent
-+ */
-+int bch2_check_extents(struct bch_fs *c)
-+{
-+ struct inode_walker w = inode_walker_init();
-+ struct snapshots_seen s;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct extent_ends extent_ends;
-+ struct disk_reservation res = { 0 };
-+ int ret = 0;
-+
-+ snapshots_seen_init(&s);
-+ extent_ends_init(&extent_ends);
-+
-+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_extents,
-+ POS(BCACHEFS_ROOT_INO, 0),
-+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
-+ &res, NULL,
-+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
-+ bch2_disk_reservation_put(c, &res);
-+ check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
-+ check_extent_overbig(trans, &iter, k);
-+ })) ?:
-+ check_i_sectors(trans, &w);
-+
-+ bch2_disk_reservation_put(c, &res);
-+ extent_ends_exit(&extent_ends);
-+ inode_walker_exit(&w);
-+ snapshots_seen_exit(&s);
-+ bch2_trans_put(trans);
-+
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+int bch2_check_indirect_extents(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct disk_reservation res = { 0 };
-+ int ret = 0;
-+
-+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink,
-+ POS_MIN,
-+ BTREE_ITER_PREFETCH, k,
-+ &res, NULL,
-+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
-+ bch2_disk_reservation_put(c, &res);
-+ check_extent_overbig(trans, &iter, k);
-+ }));
-+
-+ bch2_disk_reservation_put(c, &res);
-+ bch2_trans_put(trans);
-+
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct inode_walker_entry *i;
-+ u32 restart_count = trans->restart_count;
-+ int ret = 0;
-+ s64 count2;
-+
-+ darray_for_each(w->inodes, i) {
-+ if (i->inode.bi_nlink == i->count)
-+ continue;
-+
-+ count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot);
-+ if (count2 < 0)
-+ return count2;
-+
-+ if (i->count != count2) {
-+ bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
-+ i->count, count2);
-+ i->count = count2;
-+ if (i->inode.bi_nlink == i->count)
-+ continue;
-+ }
-+
-+ if (fsck_err_on(i->inode.bi_nlink != i->count,
-+ c, inode_dir_wrong_nlink,
-+ "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
-+ w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
-+ i->inode.bi_nlink = i->count;
-+ ret = fsck_write_inode(trans, &i->inode, i->snapshot);
-+ if (ret)
-+ break;
-+ }
-+ }
-+fsck_err:
-+ bch_err_fn(c, ret);
-+ return ret ?: trans_was_restarted(trans, restart_count);
-+}
-+
-+static int check_dirent_target(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c_dirent d,
-+ struct bch_inode_unpacked *target,
-+ u32 target_snapshot)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_i_dirent *n;
-+ bool backpointer_exists = true;
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ if (!target->bi_dir &&
-+ !target->bi_dir_offset) {
-+ target->bi_dir = d.k->p.inode;
-+ target->bi_dir_offset = d.k->p.offset;
-+
-+ ret = __write_inode(trans, target, target_snapshot);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ if (!inode_points_to_dirent(target, d)) {
-+ ret = inode_backpointer_exists(trans, target, d.k->p.snapshot);
-+ if (ret < 0)
-+ goto err;
-+
-+ backpointer_exists = ret;
-+ ret = 0;
-+
-+ if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists,
-+ c, inode_dir_multiple_links,
-+ "directory %llu with multiple links",
-+ target->bi_inum)) {
-+ ret = __remove_dirent(trans, d.k->p);
-+ goto out;
-+ }
-+
-+ if (fsck_err_on(backpointer_exists && !target->bi_nlink,
-+ c, inode_multiple_links_but_nlink_0,
-+ "inode %llu type %s has multiple links but i_nlink 0",
-+ target->bi_inum, bch2_d_types[d.v->d_type])) {
-+ target->bi_nlink++;
-+ target->bi_flags &= ~BCH_INODE_unlinked;
-+
-+ ret = __write_inode(trans, target, target_snapshot);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ if (fsck_err_on(!backpointer_exists,
-+ c, inode_wrong_backpointer,
-+ "inode %llu:%u has wrong backpointer:\n"
-+ "got %llu:%llu\n"
-+ "should be %llu:%llu",
-+ target->bi_inum, target_snapshot,
-+ target->bi_dir,
-+ target->bi_dir_offset,
-+ d.k->p.inode,
-+ d.k->p.offset)) {
-+ target->bi_dir = d.k->p.inode;
-+ target->bi_dir_offset = d.k->p.offset;
-+
-+ ret = __write_inode(trans, target, target_snapshot);
-+ if (ret)
-+ goto err;
-+ }
-+ }
-+
-+ if (fsck_err_on(d.v->d_type != inode_d_type(target),
-+ c, dirent_d_type_wrong,
-+ "incorrect d_type: got %s, should be %s:\n%s",
-+ bch2_d_type_str(d.v->d_type),
-+ bch2_d_type_str(inode_d_type(target)),
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
-+ n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
-+ ret = PTR_ERR_OR_ZERO(n);
-+ if (ret)
-+ goto err;
-+
-+ bkey_reassemble(&n->k_i, d.s_c);
-+ n->v.d_type = inode_d_type(target);
-+
-+ ret = bch2_trans_update(trans, iter, &n->k_i, 0);
-+ if (ret)
-+ goto err;
-+
-+ d = dirent_i_to_s_c(n);
-+ }
-+
-+ if (d.v->d_type == DT_SUBVOL &&
-+ target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) &&
-+ (c->sb.version < bcachefs_metadata_version_subvol_dirent ||
-+ fsck_err(c, dirent_d_parent_subvol_wrong,
-+ "dirent has wrong d_parent_subvol field: got %u, should be %u",
-+ le32_to_cpu(d.v->d_parent_subvol),
-+ target->bi_parent_subvol))) {
-+ n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
-+ ret = PTR_ERR_OR_ZERO(n);
-+ if (ret)
-+ goto err;
-+
-+ bkey_reassemble(&n->k_i, d.s_c);
-+ n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
-+
-+ ret = bch2_trans_update(trans, iter, &n->k_i, 0);
-+ if (ret)
-+ goto err;
-+
-+ d = dirent_i_to_s_c(n);
-+ }
-+out:
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ struct bch_hash_info *hash_info,
-+ struct inode_walker *dir,
-+ struct inode_walker *target,
-+ struct snapshots_seen *s)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_s_c_dirent d;
-+ struct inode_walker_entry *i;
-+ struct printbuf buf = PRINTBUF;
-+ struct bpos equiv;
-+ int ret = 0;
-+
-+ ret = check_key_has_snapshot(trans, iter, k);
-+ if (ret) {
-+ ret = ret < 0 ? ret : 0;
-+ goto out;
-+ }
-+
-+ equiv = k.k->p;
-+ equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
-+
-+ ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
-+ if (ret)
-+ goto err;
-+
-+ if (k.k->type == KEY_TYPE_whiteout)
-+ goto out;
-+
-+ if (dir->last_pos.inode != k.k->p.inode) {
-+ ret = check_subdir_count(trans, dir);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ BUG_ON(!iter->path->should_be_locked);
-+
-+ i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout);
-+ ret = PTR_ERR_OR_ZERO(i);
-+ if (ret < 0)
-+ goto err;
-+
-+ if (dir->first_this_inode && dir->inodes.nr)
-+ *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
-+ dir->first_this_inode = false;
-+
-+ if (fsck_err_on(!i, c, dirent_in_missing_dir_inode,
-+ "dirent in nonexisting directory:\n%s",
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-+ ret = bch2_btree_delete_at(trans, iter,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ goto out;
-+ }
-+
-+ if (!i)
-+ goto out;
-+
-+ if (fsck_err_on(!S_ISDIR(i->inode.bi_mode),
-+ c, dirent_in_non_dir_inode,
-+ "dirent in non directory inode type %s:\n%s",
-+ bch2_d_type_str(inode_d_type(&i->inode)),
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-+ ret = bch2_btree_delete_at(trans, iter, 0);
-+ goto out;
-+ }
-+
-+ ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k);
-+ if (ret < 0)
-+ goto err;
-+ if (ret) {
-+ /* dirent has been deleted */
-+ ret = 0;
-+ goto out;
-+ }
-+
-+ if (k.k->type != KEY_TYPE_dirent)
-+ goto out;
-+
-+ d = bkey_s_c_to_dirent(k);
-+
-+ if (d.v->d_type == DT_SUBVOL) {
-+ struct bch_inode_unpacked subvol_root;
-+ u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
-+ u32 target_snapshot;
-+ u64 target_inum;
-+
-+ ret = __subvol_lookup(trans, target_subvol,
-+ &target_snapshot, &target_inum);
-+ if (ret && !bch2_err_matches(ret, ENOENT))
-+ goto err;
-+
-+ if (fsck_err_on(ret, c, dirent_to_missing_subvol,
-+ "dirent points to missing subvolume %u",
-+ le32_to_cpu(d.v->d_child_subvol))) {
-+ ret = __remove_dirent(trans, d.k->p);
-+ goto err;
-+ }
-+
-+ ret = __lookup_inode(trans, target_inum,
-+ &subvol_root, &target_snapshot);
-+ if (ret && !bch2_err_matches(ret, ENOENT))
-+ goto err;
-+
-+ if (fsck_err_on(ret, c, subvol_to_missing_root,
-+ "subvolume %u points to missing subvolume root %llu",
-+ target_subvol,
-+ target_inum)) {
-+ bch_err(c, "repair not implemented yet");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
-+ c, subvol_root_wrong_bi_subvol,
-+ "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
-+ target_inum,
-+ subvol_root.bi_subvol, target_subvol)) {
-+ subvol_root.bi_subvol = target_subvol;
-+ ret = __write_inode(trans, &subvol_root, target_snapshot);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = check_dirent_target(trans, iter, d, &subvol_root,
-+ target_snapshot);
-+ if (ret)
-+ goto err;
-+ } else {
-+ ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum));
-+ if (ret)
-+ goto err;
-+
-+ if (fsck_err_on(!target->inodes.nr,
-+ c, dirent_to_missing_inode,
-+ "dirent points to missing inode: (equiv %u)\n%s",
-+ equiv.snapshot,
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, k),
-+ buf.buf))) {
-+ ret = __remove_dirent(trans, d.k->p);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ darray_for_each(target->inodes, i) {
-+ ret = check_dirent_target(trans, iter, d,
-+ &i->inode, i->snapshot);
-+ if (ret)
-+ goto err;
-+ }
-+ }
-+
-+ if (d.v->d_type == DT_DIR)
-+ for_each_visible_inode(c, s, dir, equiv.snapshot, i)
-+ i->count++;
-+
-+out:
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+/*
-+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
-+ * validate d_type
-+ */
-+int bch2_check_dirents(struct bch_fs *c)
-+{
-+ struct inode_walker dir = inode_walker_init();
-+ struct inode_walker target = inode_walker_init();
-+ struct snapshots_seen s;
-+ struct bch_hash_info hash_info;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ snapshots_seen_init(&s);
-+
-+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents,
-+ POS(BCACHEFS_ROOT_INO, 0),
-+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
-+ k,
-+ NULL, NULL,
-+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-+ check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s));
-+
-+ bch2_trans_put(trans);
-+ snapshots_seen_exit(&s);
-+ inode_walker_exit(&dir);
-+ inode_walker_exit(&target);
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ struct bch_hash_info *hash_info,
-+ struct inode_walker *inode)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct inode_walker_entry *i;
-+ int ret;
-+
-+ ret = check_key_has_snapshot(trans, iter, k);
-+ if (ret)
-+ return ret;
-+
-+ i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout);
-+ ret = PTR_ERR_OR_ZERO(i);
-+ if (ret)
-+ return ret;
-+
-+ if (inode->first_this_inode && inode->inodes.nr)
-+ *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode);
-+ inode->first_this_inode = false;
-+
-+ if (fsck_err_on(!i, c, xattr_in_missing_inode,
-+ "xattr for missing inode %llu",
-+ k.k->p.inode))
-+ return bch2_btree_delete_at(trans, iter, 0);
-+
-+ if (!i)
-+ return 0;
-+
-+ ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
-+fsck_err:
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+/*
-+ * Walk xattrs: verify that they all have a corresponding inode
-+ */
-+int bch2_check_xattrs(struct bch_fs *c)
-+{
-+ struct inode_walker inode = inode_walker_init();
-+ struct bch_hash_info hash_info;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
-+ POS(BCACHEFS_ROOT_INO, 0),
-+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
-+ k,
-+ NULL, NULL,
-+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-+ check_xattr(trans, &iter, k, &hash_info, &inode)));
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int check_root_trans(struct btree_trans *trans)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_inode_unpacked root_inode;
-+ u32 snapshot;
-+ u64 inum;
-+ int ret;
-+
-+ ret = __subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
-+ if (ret && !bch2_err_matches(ret, ENOENT))
-+ return ret;
-+
-+ if (mustfix_fsck_err_on(ret, c, root_subvol_missing,
-+ "root subvol missing")) {
-+ struct bkey_i_subvolume root_subvol;
-+
-+ snapshot = U32_MAX;
-+ inum = BCACHEFS_ROOT_INO;
-+
-+ bkey_subvolume_init(&root_subvol.k_i);
-+ root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
-+ root_subvol.v.flags = 0;
-+ root_subvol.v.snapshot = cpu_to_le32(snapshot);
-+ root_subvol.v.inode = cpu_to_le64(inum);
-+ ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_LAZY_RW,
-+ bch2_btree_insert_trans(trans, BTREE_ID_subvolumes,
-+ &root_subvol.k_i, 0));
-+ bch_err_msg(c, ret, "writing root subvol");
-+ if (ret)
-+ goto err;
-+
-+ }
-+
-+ ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot);
-+ if (ret && !bch2_err_matches(ret, ENOENT))
-+ return ret;
-+
-+ if (mustfix_fsck_err_on(ret, c, root_dir_missing,
-+ "root directory missing") ||
-+ mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode),
-+ c, root_inode_not_dir,
-+ "root inode not a directory")) {
-+ bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
-+ 0, NULL);
-+ root_inode.bi_inum = inum;
-+
-+ ret = __write_inode(trans, &root_inode, snapshot);
-+ bch_err_msg(c, ret, "writing root inode");
-+ }
-+err:
-+fsck_err:
-+ return ret;
-+}
-+
-+/* Get root directory, create if it doesn't exist: */
-+int bch2_check_root(struct bch_fs *c)
-+{
-+ int ret;
-+
-+ ret = bch2_trans_do(c, NULL, NULL,
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_LAZY_RW,
-+ check_root_trans(trans));
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+struct pathbuf_entry {
-+ u64 inum;
-+ u32 snapshot;
-+};
-+
-+typedef DARRAY(struct pathbuf_entry) pathbuf;
-+
-+static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
-+{
-+ struct pathbuf_entry *i;
-+
-+ darray_for_each(*p, i)
-+ if (i->inum == inum &&
-+ i->snapshot == snapshot)
-+ return true;
-+
-+ return false;
-+}
-+
-+static int path_down(struct bch_fs *c, pathbuf *p,
-+ u64 inum, u32 snapshot)
-+{
-+ int ret = darray_push(p, ((struct pathbuf_entry) {
-+ .inum = inum,
-+ .snapshot = snapshot,
-+ }));
-+
-+ if (ret)
-+ bch_err(c, "fsck: error allocating memory for pathbuf, size %zu",
-+ p->size);
-+ return ret;
-+}
-+
-+/*
-+ * Check that a given inode is reachable from the root:
-+ *
-+ * XXX: we should also be verifying that inodes are in the right subvolumes
-+ */
-+static int check_path(struct btree_trans *trans,
-+ pathbuf *p,
-+ struct bch_inode_unpacked *inode,
-+ u32 snapshot)
-+{
-+ struct bch_fs *c = trans->c;
-+ int ret = 0;
-+
-+ snapshot = bch2_snapshot_equiv(c, snapshot);
-+ p->nr = 0;
-+
-+ while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
-+ inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
-+ struct btree_iter dirent_iter;
-+ struct bkey_s_c_dirent d;
-+ u32 parent_snapshot = snapshot;
-+
-+ if (inode->bi_subvol) {
-+ u64 inum;
-+
-+ ret = subvol_lookup(trans, inode->bi_parent_subvol,
-+ &parent_snapshot, &inum);
-+ if (ret)
-+ break;
-+ }
-+
-+ ret = lockrestart_do(trans,
-+ PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter,
-+ SPOS(inode->bi_dir, inode->bi_dir_offset,
-+ parent_snapshot))).k));
-+ if (ret && !bch2_err_matches(ret, ENOENT))
-+ break;
-+
-+ if (!ret && !dirent_points_to_inode(d, inode)) {
-+ bch2_trans_iter_exit(trans, &dirent_iter);
-+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
-+ }
-+
-+ if (bch2_err_matches(ret, ENOENT)) {
-+ if (fsck_err(c, inode_unreachable,
-+ "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
-+ inode->bi_inum, snapshot,
-+ bch2_d_type_str(inode_d_type(inode)),
-+ inode->bi_nlink,
-+ inode->bi_dir,
-+ inode->bi_dir_offset))
-+ ret = reattach_inode(trans, inode, snapshot);
-+ break;
-+ }
-+
-+ bch2_trans_iter_exit(trans, &dirent_iter);
-+
-+ if (!S_ISDIR(inode->bi_mode))
-+ break;
-+
-+ ret = path_down(c, p, inode->bi_inum, snapshot);
-+ if (ret) {
-+ bch_err(c, "memory allocation failure");
-+ return ret;
-+ }
-+
-+ snapshot = parent_snapshot;
-+
-+ ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
-+ if (ret) {
-+ /* Should have been caught in dirents pass */
-+ bch_err(c, "error looking up parent directory: %i", ret);
-+ break;
-+ }
-+
-+ if (path_is_dup(p, inode->bi_inum, snapshot)) {
-+ struct pathbuf_entry *i;
-+
-+ /* XXX print path */
-+ bch_err(c, "directory structure loop");
-+
-+ darray_for_each(*p, i)
-+ pr_err("%llu:%u", i->inum, i->snapshot);
-+ pr_err("%llu:%u", inode->bi_inum, snapshot);
-+
-+ if (!fsck_err(c, dir_loop,
-+ "directory structure loop"))
-+ return 0;
-+
-+ ret = commit_do(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL|
-+ BTREE_INSERT_LAZY_RW,
-+ remove_backpointer(trans, inode));
-+ if (ret) {
-+ bch_err(c, "error removing dirent: %i", ret);
-+ break;
-+ }
-+
-+ ret = reattach_inode(trans, inode, snapshot);
-+ }
-+ }
-+fsck_err:
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+/*
-+ * Check for unreachable inodes, as well as loops in the directory structure:
-+ * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's
-+ * unreachable:
-+ */
-+int bch2_check_directory_structure(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_inode_unpacked u;
-+ pathbuf path = { 0, };
-+ int ret;
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN,
-+ BTREE_ITER_INTENT|
-+ BTREE_ITER_PREFETCH|
-+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-+ if (!bkey_is_inode(k.k))
-+ continue;
-+
-+ ret = bch2_inode_unpack(k, &u);
-+ if (ret) {
-+ /* Should have been caught earlier in fsck: */
-+ bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
-+ break;
-+ }
-+
-+ if (u.bi_flags & BCH_INODE_unlinked)
-+ continue;
-+
-+ ret = check_path(trans, &path, &u, iter.pos.snapshot);
-+ if (ret)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ darray_exit(&path);
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+struct nlink_table {
-+ size_t nr;
-+ size_t size;
-+
-+ struct nlink {
-+ u64 inum;
-+ u32 snapshot;
-+ u32 count;
-+ } *d;
-+};
-+
-+static int add_nlink(struct bch_fs *c, struct nlink_table *t,
-+ u64 inum, u32 snapshot)
-+{
-+ if (t->nr == t->size) {
-+ size_t new_size = max_t(size_t, 128UL, t->size * 2);
-+ void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
-+
-+ if (!d) {
-+ bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
-+ new_size);
-+ return -BCH_ERR_ENOMEM_fsck_add_nlink;
-+ }
-+
-+ if (t->d)
-+ memcpy(d, t->d, t->size * sizeof(t->d[0]));
-+ kvfree(t->d);
-+
-+ t->d = d;
-+ t->size = new_size;
-+ }
-+
-+
-+ t->d[t->nr++] = (struct nlink) {
-+ .inum = inum,
-+ .snapshot = snapshot,
-+ };
-+
-+ return 0;
-+}
-+
-+static int nlink_cmp(const void *_l, const void *_r)
-+{
-+ const struct nlink *l = _l;
-+ const struct nlink *r = _r;
-+
-+ return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
-+}
-+
-+static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
-+ struct nlink_table *links,
-+ u64 range_start, u64 range_end, u64 inum, u32 snapshot)
-+{
-+ struct nlink *link, key = {
-+ .inum = inum, .snapshot = U32_MAX,
-+ };
-+
-+ if (inum < range_start || inum >= range_end)
-+ return;
-+
-+ link = __inline_bsearch(&key, links->d, links->nr,
-+ sizeof(links->d[0]), nlink_cmp);
-+ if (!link)
-+ return;
-+
-+ while (link > links->d && link[0].inum == link[-1].inum)
-+ --link;
-+
-+ for (; link < links->d + links->nr && link->inum == inum; link++)
-+ if (ref_visible(c, s, snapshot, link->snapshot)) {
-+ link->count++;
-+ if (link->snapshot >= snapshot)
-+ break;
-+ }
-+}
-+
-+noinline_for_stack
-+static int check_nlinks_find_hardlinks(struct bch_fs *c,
-+ struct nlink_table *t,
-+ u64 start, u64 *end)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_inode_unpacked u;
-+ int ret = 0;
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_inodes,
-+ POS(0, start),
-+ BTREE_ITER_INTENT|
-+ BTREE_ITER_PREFETCH|
-+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-+ if (!bkey_is_inode(k.k))
-+ continue;
-+
-+ /* Should never fail, checked by bch2_inode_invalid: */
-+ BUG_ON(bch2_inode_unpack(k, &u));
-+
-+ /*
-+ * Backpointer and directory structure checks are sufficient for
-+ * directories, since they can't have hardlinks:
-+ */
-+ if (S_ISDIR(u.bi_mode))
-+ continue;
-+
-+ if (!u.bi_nlink)
-+ continue;
-+
-+ ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
-+ if (ret) {
-+ *end = k.k->p.offset;
-+ ret = 0;
-+ break;
-+ }
-+
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+
-+ if (ret)
-+ bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
-+
-+ return ret;
-+}
-+
-+noinline_for_stack
-+static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
-+ u64 range_start, u64 range_end)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct snapshots_seen s;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_s_c_dirent d;
-+ int ret;
-+
-+ snapshots_seen_init(&s);
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
-+ BTREE_ITER_INTENT|
-+ BTREE_ITER_PREFETCH|
-+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-+ ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
-+ if (ret)
-+ break;
-+
-+ switch (k.k->type) {
-+ case KEY_TYPE_dirent:
-+ d = bkey_s_c_to_dirent(k);
-+
-+ if (d.v->d_type != DT_DIR &&
-+ d.v->d_type != DT_SUBVOL)
-+ inc_link(c, &s, links, range_start, range_end,
-+ le64_to_cpu(d.v->d_inum),
-+ bch2_snapshot_equiv(c, d.k->p.snapshot));
-+ break;
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret)
-+ bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
-+
-+ bch2_trans_put(trans);
-+ snapshots_seen_exit(&s);
-+ return ret;
-+}
-+
-+static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ struct nlink_table *links,
-+ size_t *idx, u64 range_end)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_inode_unpacked u;
-+ struct nlink *link = &links->d[*idx];
-+ int ret = 0;
-+
-+ if (k.k->p.offset >= range_end)
-+ return 1;
-+
-+ if (!bkey_is_inode(k.k))
-+ return 0;
-+
-+ BUG_ON(bch2_inode_unpack(k, &u));
-+
-+ if (S_ISDIR(u.bi_mode))
-+ return 0;
-+
-+ if (!u.bi_nlink)
-+ return 0;
-+
-+ while ((cmp_int(link->inum, k.k->p.offset) ?:
-+ cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
-+ BUG_ON(*idx == links->nr);
-+ link = &links->d[++*idx];
-+ }
-+
-+ if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count,
-+ c, inode_wrong_nlink,
-+ "inode %llu type %s has wrong i_nlink (%u, should be %u)",
-+ u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
-+ bch2_inode_nlink_get(&u), link->count)) {
-+ bch2_inode_nlink_set(&u, link->count);
-+ ret = __write_inode(trans, &u, k.k->p.snapshot);
-+ }
-+fsck_err:
-+ return ret;
-+}
-+
-+noinline_for_stack
-+static int check_nlinks_update_hardlinks(struct bch_fs *c,
-+ struct nlink_table *links,
-+ u64 range_start, u64 range_end)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ size_t idx = 0;
-+ int ret = 0;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
-+ POS(0, range_start),
-+ BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
-+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-+ check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)));
-+ if (ret < 0) {
-+ bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_check_nlinks(struct bch_fs *c)
-+{
-+ struct nlink_table links = { 0 };
-+ u64 this_iter_range_start, next_iter_range_start = 0;
-+ int ret = 0;
-+
-+ do {
-+ this_iter_range_start = next_iter_range_start;
-+ next_iter_range_start = U64_MAX;
-+
-+ ret = check_nlinks_find_hardlinks(c, &links,
-+ this_iter_range_start,
-+ &next_iter_range_start);
-+
-+ ret = check_nlinks_walk_dirents(c, &links,
-+ this_iter_range_start,
-+ next_iter_range_start);
-+ if (ret)
-+ break;
-+
-+ ret = check_nlinks_update_hardlinks(c, &links,
-+ this_iter_range_start,
-+ next_iter_range_start);
-+ if (ret)
-+ break;
-+
-+ links.nr = 0;
-+ } while (next_iter_range_start != U64_MAX);
-+
-+ kvfree(links.d);
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_reflink_p p;
-+ struct bkey_i_reflink_p *u;
-+ int ret;
-+
-+ if (k.k->type != KEY_TYPE_reflink_p)
-+ return 0;
-+
-+ p = bkey_s_c_to_reflink_p(k);
-+
-+ if (!p.v->front_pad && !p.v->back_pad)
-+ return 0;
-+
-+ u = bch2_trans_kmalloc(trans, sizeof(*u));
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ return ret;
-+
-+ bkey_reassemble(&u->k_i, k);
-+ u->v.front_pad = 0;
-+ u->v.back_pad = 0;
-+
-+ return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN);
-+}
-+
-+int bch2_fix_reflink_p(struct bch_fs *c)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
-+ return 0;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter,
-+ BTREE_ID_extents, POS_MIN,
-+ BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|
-+ BTREE_ITER_ALL_SNAPSHOTS, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+ fix_reflink_p_key(trans, &iter, k)));
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h
-new file mode 100644
-index 000000000000..da991e8cf27e
---- /dev/null
-+++ b/fs/bcachefs/fsck.h
-@@ -0,0 +1,15 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_FSCK_H
-+#define _BCACHEFS_FSCK_H
-+
-+int bch2_check_inodes(struct bch_fs *);
-+int bch2_check_extents(struct bch_fs *);
-+int bch2_check_indirect_extents(struct bch_fs *);
-+int bch2_check_dirents(struct bch_fs *);
-+int bch2_check_xattrs(struct bch_fs *);
-+int bch2_check_root(struct bch_fs *);
-+int bch2_check_directory_structure(struct bch_fs *);
-+int bch2_check_nlinks(struct bch_fs *);
-+int bch2_fix_reflink_p(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_FSCK_H */
-diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
-new file mode 100644
-index 000000000000..def77f2d8802
---- /dev/null
-+++ b/fs/bcachefs/inode.c
-@@ -0,0 +1,1198 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_key_cache.h"
-+#include "btree_write_buffer.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "compress.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+#include "inode.h"
-+#include "str_hash.h"
-+#include "snapshot.h"
-+#include "subvolume.h"
-+#include "varint.h"
-+
-+#include <linux/random.h>
-+
-+#include <asm/unaligned.h>
-+
-+#define x(name, ...) #name,
-+const char * const bch2_inode_opts[] = {
-+ BCH_INODE_OPTS()
-+ NULL,
-+};
-+
-+static const char * const bch2_inode_flag_strs[] = {
-+ BCH_INODE_FLAGS()
-+ NULL
-+};
-+#undef x
-+
-+static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
-+
-+static int inode_decode_field(const u8 *in, const u8 *end,
-+ u64 out[2], unsigned *out_bits)
-+{
-+ __be64 be[2] = { 0, 0 };
-+ unsigned bytes, shift;
-+ u8 *p;
-+
-+ if (in >= end)
-+ return -1;
-+
-+ if (!*in)
-+ return -1;
-+
-+ /*
-+ * position of highest set bit indicates number of bytes:
-+ * shift = number of bits to remove in high byte:
-+ */
-+ shift = 8 - __fls(*in); /* 1 <= shift <= 8 */
-+ bytes = byte_table[shift - 1];
-+
-+ if (in + bytes > end)
-+ return -1;
-+
-+ p = (u8 *) be + 16 - bytes;
-+ memcpy(p, in, bytes);
-+ *p ^= (1 << 8) >> shift;
-+
-+ out[0] = be64_to_cpu(be[0]);
-+ out[1] = be64_to_cpu(be[1]);
-+ *out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]);
-+
-+ return bytes;
-+}
-+
-+static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
-+ const struct bch_inode_unpacked *inode)
-+{
-+ struct bkey_i_inode_v3 *k = &packed->inode;
-+ u8 *out = k->v.fields;
-+ u8 *end = (void *) &packed[1];
-+ u8 *last_nonzero_field = out;
-+ unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
-+ unsigned bytes;
-+ int ret;
-+
-+ bkey_inode_v3_init(&packed->inode.k_i);
-+ packed->inode.k.p.offset = inode->bi_inum;
-+ packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq);
-+ packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
-+ packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags);
-+ packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors);
-+ packed->inode.v.bi_size = cpu_to_le64(inode->bi_size);
-+ packed->inode.v.bi_version = cpu_to_le64(inode->bi_version);
-+ SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode);
-+ SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR);
-+
-+
-+#define x(_name, _bits) \
-+ nr_fields++; \
-+ \
-+ if (inode->_name) { \
-+ ret = bch2_varint_encode_fast(out, inode->_name); \
-+ out += ret; \
-+ \
-+ if (_bits > 64) \
-+ *out++ = 0; \
-+ \
-+ last_nonzero_field = out; \
-+ last_nonzero_fieldnr = nr_fields; \
-+ } else { \
-+ *out++ = 0; \
-+ \
-+ if (_bits > 64) \
-+ *out++ = 0; \
-+ }
-+
-+ BCH_INODE_FIELDS_v3()
-+#undef x
-+ BUG_ON(out > end);
-+
-+ out = last_nonzero_field;
-+ nr_fields = last_nonzero_fieldnr;
-+
-+ bytes = out - (u8 *) &packed->inode.v;
-+ set_bkey_val_bytes(&packed->inode.k, bytes);
-+ memset_u64s_tail(&packed->inode.v, 0, bytes);
-+
-+ SET_INODEv3_NR_FIELDS(&k->v, nr_fields);
-+
-+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
-+ struct bch_inode_unpacked unpacked;
-+
-+ ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked);
-+ BUG_ON(ret);
-+ BUG_ON(unpacked.bi_inum != inode->bi_inum);
-+ BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
-+ BUG_ON(unpacked.bi_sectors != inode->bi_sectors);
-+ BUG_ON(unpacked.bi_size != inode->bi_size);
-+ BUG_ON(unpacked.bi_version != inode->bi_version);
-+ BUG_ON(unpacked.bi_mode != inode->bi_mode);
-+
-+#define x(_name, _bits) if (unpacked._name != inode->_name) \
-+ panic("unpacked %llu should be %llu", \
-+ (u64) unpacked._name, (u64) inode->_name);
-+ BCH_INODE_FIELDS_v3()
-+#undef x
-+ }
-+}
-+
-+void bch2_inode_pack(struct bkey_inode_buf *packed,
-+ const struct bch_inode_unpacked *inode)
-+{
-+ bch2_inode_pack_inlined(packed, inode);
-+}
-+
-+static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
-+ struct bch_inode_unpacked *unpacked)
-+{
-+ const u8 *in = inode.v->fields;
-+ const u8 *end = bkey_val_end(inode);
-+ u64 field[2];
-+ unsigned fieldnr = 0, field_bits;
-+ int ret;
-+
-+#define x(_name, _bits) \
-+ if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \
-+ unsigned offset = offsetof(struct bch_inode_unpacked, _name);\
-+ memset((void *) unpacked + offset, 0, \
-+ sizeof(*unpacked) - offset); \
-+ return 0; \
-+ } \
-+ \
-+ ret = inode_decode_field(in, end, field, &field_bits); \
-+ if (ret < 0) \
-+ return ret; \
-+ \
-+ if (field_bits > sizeof(unpacked->_name) * 8) \
-+ return -1; \
-+ \
-+ unpacked->_name = field[1]; \
-+ in += ret;
-+
-+ BCH_INODE_FIELDS_v2()
-+#undef x
-+
-+ /* XXX: signal if there were more fields than expected? */
-+ return 0;
-+}
-+
-+static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
-+ const u8 *in, const u8 *end,
-+ unsigned nr_fields)
-+{
-+ unsigned fieldnr = 0;
-+ int ret;
-+ u64 v[2];
-+
-+#define x(_name, _bits) \
-+ if (fieldnr < nr_fields) { \
-+ ret = bch2_varint_decode_fast(in, end, &v[0]); \
-+ if (ret < 0) \
-+ return ret; \
-+ in += ret; \
-+ \
-+ if (_bits > 64) { \
-+ ret = bch2_varint_decode_fast(in, end, &v[1]); \
-+ if (ret < 0) \
-+ return ret; \
-+ in += ret; \
-+ } else { \
-+ v[1] = 0; \
-+ } \
-+ } else { \
-+ v[0] = v[1] = 0; \
-+ } \
-+ \
-+ unpacked->_name = v[0]; \
-+ if (v[1] || v[0] != unpacked->_name) \
-+ return -1; \
-+ fieldnr++;
-+
-+ BCH_INODE_FIELDS_v2()
-+#undef x
-+
-+ /* XXX: signal if there were more fields than expected? */
-+ return 0;
-+}
-+
-+static int bch2_inode_unpack_v3(struct bkey_s_c k,
-+ struct bch_inode_unpacked *unpacked)
-+{
-+ struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
-+ const u8 *in = inode.v->fields;
-+ const u8 *end = bkey_val_end(inode);
-+ unsigned nr_fields = INODEv3_NR_FIELDS(inode.v);
-+ unsigned fieldnr = 0;
-+ int ret;
-+ u64 v[2];
-+
-+ unpacked->bi_inum = inode.k->p.offset;
-+ unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
-+ unpacked->bi_hash_seed = inode.v->bi_hash_seed;
-+ unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
-+ unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors);
-+ unpacked->bi_size = le64_to_cpu(inode.v->bi_size);
-+ unpacked->bi_version = le64_to_cpu(inode.v->bi_version);
-+ unpacked->bi_mode = INODEv3_MODE(inode.v);
-+
-+#define x(_name, _bits) \
-+ if (fieldnr < nr_fields) { \
-+ ret = bch2_varint_decode_fast(in, end, &v[0]); \
-+ if (ret < 0) \
-+ return ret; \
-+ in += ret; \
-+ \
-+ if (_bits > 64) { \
-+ ret = bch2_varint_decode_fast(in, end, &v[1]); \
-+ if (ret < 0) \
-+ return ret; \
-+ in += ret; \
-+ } else { \
-+ v[1] = 0; \
-+ } \
-+ } else { \
-+ v[0] = v[1] = 0; \
-+ } \
-+ \
-+ unpacked->_name = v[0]; \
-+ if (v[1] || v[0] != unpacked->_name) \
-+ return -1; \
-+ fieldnr++;
-+
-+ BCH_INODE_FIELDS_v3()
-+#undef x
-+
-+ /* XXX: signal if there were more fields than expected? */
-+ return 0;
-+}
-+
-+static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
-+ struct bch_inode_unpacked *unpacked)
-+{
-+ memset(unpacked, 0, sizeof(*unpacked));
-+
-+ switch (k.k->type) {
-+ case KEY_TYPE_inode: {
-+ struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
-+
-+ unpacked->bi_inum = inode.k->p.offset;
-+ unpacked->bi_journal_seq= 0;
-+ unpacked->bi_hash_seed = inode.v->bi_hash_seed;
-+ unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
-+ unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
-+
-+ if (INODE_NEW_VARINT(inode.v)) {
-+ return bch2_inode_unpack_v2(unpacked, inode.v->fields,
-+ bkey_val_end(inode),
-+ INODE_NR_FIELDS(inode.v));
-+ } else {
-+ return bch2_inode_unpack_v1(inode, unpacked);
-+ }
-+ break;
-+ }
-+ case KEY_TYPE_inode_v2: {
-+ struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
-+
-+ unpacked->bi_inum = inode.k->p.offset;
-+ unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
-+ unpacked->bi_hash_seed = inode.v->bi_hash_seed;
-+ unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
-+ unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
-+
-+ return bch2_inode_unpack_v2(unpacked, inode.v->fields,
-+ bkey_val_end(inode),
-+ INODEv2_NR_FIELDS(inode.v));
-+ }
-+ default:
-+ BUG();
-+ }
-+}
-+
-+int bch2_inode_unpack(struct bkey_s_c k,
-+ struct bch_inode_unpacked *unpacked)
-+{
-+ if (likely(k.k->type == KEY_TYPE_inode_v3))
-+ return bch2_inode_unpack_v3(k, unpacked);
-+ return bch2_inode_unpack_slowpath(k, unpacked);
-+}
-+
-+static int bch2_inode_peek_nowarn(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bch_inode_unpacked *inode,
-+ subvol_inum inum, unsigned flags)
-+{
-+ struct bkey_s_c k;
-+ u32 snapshot;
-+ int ret;
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ return ret;
-+
-+ k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes,
-+ SPOS(0, inum.inum, snapshot),
-+ flags|BTREE_ITER_CACHED);
-+ ret = bkey_err(k);
-+ if (ret)
-+ return ret;
-+
-+ ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_inode_unpack(k, inode);
-+ if (ret)
-+ goto err;
-+
-+ return 0;
-+err:
-+ bch2_trans_iter_exit(trans, iter);
-+ return ret;
-+}
-+
-+int bch2_inode_peek(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bch_inode_unpacked *inode,
-+ subvol_inum inum, unsigned flags)
-+{
-+ int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
-+ bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum);
-+ return ret;
-+}
-+
-+int bch2_inode_write_flags(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bch_inode_unpacked *inode,
-+ enum btree_update_flags flags)
-+{
-+ struct bkey_inode_buf *inode_p;
-+
-+ inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
-+ if (IS_ERR(inode_p))
-+ return PTR_ERR(inode_p);
-+
-+ bch2_inode_pack_inlined(inode_p, inode);
-+ inode_p->inode.k.p.snapshot = iter->snapshot;
-+ return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags);
-+}
-+
-+struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
-+{
-+ struct bch_inode_unpacked u;
-+ struct bkey_inode_buf *inode_p;
-+ int ret;
-+
-+ if (!bkey_is_inode(&k->k))
-+ return ERR_PTR(-ENOENT);
-+
-+ inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
-+ if (IS_ERR(inode_p))
-+ return ERR_CAST(inode_p);
-+
-+ ret = bch2_inode_unpack(bkey_i_to_s_c(k), &u);
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ bch2_inode_pack(inode_p, &u);
-+ return &inode_p->inode.k_i;
-+}
-+
-+static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err)
-+{
-+ struct bch_inode_unpacked unpacked;
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(k.k->p.inode, c, err,
-+ inode_pos_inode_nonzero,
-+ "nonzero k.p.inode");
-+
-+ bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err,
-+ inode_pos_blockdev_range,
-+ "fs inode in blockdev range");
-+
-+ bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err,
-+ inode_unpack_error,
-+ "invalid variable length fields");
-+
-+ bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err,
-+ inode_checksum_type_invalid,
-+ "invalid data checksum type (%u >= %u",
-+ unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1);
-+
-+ bkey_fsck_err_on(unpacked.bi_compression &&
-+ !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err,
-+ inode_compression_type_invalid,
-+ "invalid compression opt %u", unpacked.bi_compression - 1);
-+
-+ bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) &&
-+ unpacked.bi_nlink != 0, c, err,
-+ inode_unlinked_but_nlink_nonzero,
-+ "flagged as unlinked but bi_nlink != 0");
-+
-+ bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err,
-+ inode_subvol_root_but_not_dir,
-+ "subvolume root but not a directory");
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
-+ inode_str_hash_invalid,
-+ "invalid str hash type (%llu >= %u)",
-+ INODE_STR_HASH(inode.v), BCH_STR_HASH_NR);
-+
-+ ret = __bch2_inode_invalid(c, k, err);
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
-+ inode_str_hash_invalid,
-+ "invalid str hash type (%llu >= %u)",
-+ INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR);
-+
-+ ret = __bch2_inode_invalid(c, k, err);
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
-+ INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err,
-+ inode_v3_fields_start_bad,
-+ "invalid fields_start (got %llu, min %u max %zu)",
-+ INODEv3_FIELDS_START(inode.v),
-+ INODEv3_FIELDS_START_INITIAL,
-+ bkey_val_u64s(inode.k));
-+
-+ bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
-+ inode_str_hash_invalid,
-+ "invalid str hash type (%llu >= %u)",
-+ INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
-+
-+ ret = __bch2_inode_invalid(c, k, err);
-+fsck_err:
-+ return ret;
-+}
-+
-+static void __bch2_inode_unpacked_to_text(struct printbuf *out,
-+ struct bch_inode_unpacked *inode)
-+{
-+ prt_printf(out, "mode=%o ", inode->bi_mode);
-+
-+ prt_str(out, "flags=");
-+ prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
-+ prt_printf(out, " (%x)", inode->bi_flags);
-+
-+ prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu",
-+ inode->bi_journal_seq,
-+ inode->bi_size,
-+ inode->bi_sectors,
-+ inode->bi_version);
-+
-+#define x(_name, _bits) \
-+ prt_printf(out, " "#_name "=%llu", (u64) inode->_name);
-+ BCH_INODE_FIELDS_v3()
-+#undef x
-+}
-+
-+void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
-+{
-+ prt_printf(out, "inum: %llu ", inode->bi_inum);
-+ __bch2_inode_unpacked_to_text(out, inode);
-+}
-+
-+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
-+{
-+ struct bch_inode_unpacked inode;
-+
-+ if (bch2_inode_unpack(k, &inode)) {
-+ prt_printf(out, "(unpack error)");
-+ return;
-+ }
-+
-+ __bch2_inode_unpacked_to_text(out, &inode);
-+}
-+
-+static inline u64 bkey_inode_flags(struct bkey_s_c k)
-+{
-+ switch (k.k->type) {
-+ case KEY_TYPE_inode:
-+ return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags);
-+ case KEY_TYPE_inode_v2:
-+ return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags);
-+ case KEY_TYPE_inode_v3:
-+ return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags);
-+ default:
-+ return 0;
-+ }
-+}
-+
-+static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
-+{
-+ return bkey_inode_flags(k) & BCH_INODE_unlinked;
-+}
-+
-+int bch2_trans_mark_inode(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old,
-+ struct bkey_i *new,
-+ unsigned flags)
-+{
-+ int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k);
-+ bool old_deleted = bkey_is_deleted_inode(old);
-+ bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new));
-+
-+ if (nr) {
-+ int ret = bch2_replicas_deltas_realloc(trans, 0);
-+ struct replicas_delta_list *d = trans->fs_usage_deltas;
-+
-+ if (ret)
-+ return ret;
-+
-+ d->nr_inodes += nr;
-+ }
-+
-+ if (old_deleted != new_deleted) {
-+ int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_mark_inode(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_s_c new,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_fs_usage *fs_usage;
-+ u64 journal_seq = trans->journal_res.seq;
-+
-+ if (flags & BTREE_TRIGGER_INSERT) {
-+ struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v;
-+
-+ BUG_ON(!journal_seq);
-+ BUG_ON(new.k->type != KEY_TYPE_inode_v3);
-+
-+ v->bi_journal_seq = cpu_to_le64(journal_seq);
-+ }
-+
-+ if (flags & BTREE_TRIGGER_GC) {
-+ percpu_down_read(&c->mark_lock);
-+ preempt_disable();
-+
-+ fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC);
-+ fs_usage->nr_inodes += bkey_is_inode(new.k);
-+ fs_usage->nr_inodes -= bkey_is_inode(old.k);
-+
-+ preempt_enable();
-+ percpu_up_read(&c->mark_lock);
-+ }
-+ return 0;
-+}
-+
-+int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(k.k->p.inode, c, err,
-+ inode_pos_inode_nonzero,
-+ "nonzero k.p.inode");
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
-+
-+ prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
-+}
-+
-+void bch2_inode_init_early(struct bch_fs *c,
-+ struct bch_inode_unpacked *inode_u)
-+{
-+ enum bch_str_hash_type str_hash =
-+ bch2_str_hash_opt_to_type(c, c->opts.str_hash);
-+
-+ memset(inode_u, 0, sizeof(*inode_u));
-+
-+ /* ick */
-+ inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET;
-+ get_random_bytes(&inode_u->bi_hash_seed,
-+ sizeof(inode_u->bi_hash_seed));
-+}
-+
-+void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
-+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+ struct bch_inode_unpacked *parent)
-+{
-+ inode_u->bi_mode = mode;
-+ inode_u->bi_uid = uid;
-+ inode_u->bi_gid = gid;
-+ inode_u->bi_dev = rdev;
-+ inode_u->bi_atime = now;
-+ inode_u->bi_mtime = now;
-+ inode_u->bi_ctime = now;
-+ inode_u->bi_otime = now;
-+
-+ if (parent && parent->bi_mode & S_ISGID) {
-+ inode_u->bi_gid = parent->bi_gid;
-+ if (S_ISDIR(mode))
-+ inode_u->bi_mode |= S_ISGID;
-+ }
-+
-+ if (parent) {
-+#define x(_name, ...) inode_u->bi_##_name = parent->bi_##_name;
-+ BCH_INODE_OPTS()
-+#undef x
-+ }
-+}
-+
-+void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
-+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
-+ struct bch_inode_unpacked *parent)
-+{
-+ bch2_inode_init_early(c, inode_u);
-+ bch2_inode_init_late(inode_u, bch2_current_time(c),
-+ uid, gid, mode, rdev, parent);
-+}
-+
-+static inline u32 bkey_generation(struct bkey_s_c k)
-+{
-+ switch (k.k->type) {
-+ case KEY_TYPE_inode:
-+ case KEY_TYPE_inode_v2:
-+ BUG();
-+ case KEY_TYPE_inode_generation:
-+ return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
-+ default:
-+ return 0;
-+ }
-+}
-+
-+/*
-+ * This just finds an empty slot:
-+ */
-+int bch2_inode_create(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bch_inode_unpacked *inode_u,
-+ u32 snapshot, u64 cpu)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_s_c k;
-+ u64 min, max, start, pos, *hint;
-+ int ret = 0;
-+ unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
-+
-+ if (c->opts.shard_inode_numbers) {
-+ bits -= c->inode_shard_bits;
-+
-+ min = (cpu << bits);
-+ max = (cpu << bits) | ~(ULLONG_MAX << bits);
-+
-+ min = max_t(u64, min, BLOCKDEV_INODE_MAX);
-+ hint = c->unused_inode_hints + cpu;
-+ } else {
-+ min = BLOCKDEV_INODE_MAX;
-+ max = ~(ULLONG_MAX << bits);
-+ hint = c->unused_inode_hints;
-+ }
-+
-+ start = READ_ONCE(*hint);
-+
-+ if (start >= max || start < min)
-+ start = min;
-+
-+ pos = start;
-+ bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos),
-+ BTREE_ITER_ALL_SNAPSHOTS|
-+ BTREE_ITER_INTENT);
-+again:
-+ while ((k = bch2_btree_iter_peek(iter)).k &&
-+ !(ret = bkey_err(k)) &&
-+ bkey_lt(k.k->p, POS(0, max))) {
-+ if (pos < iter->pos.offset)
-+ goto found_slot;
-+
-+ /*
-+ * We don't need to iterate over keys in every snapshot once
-+ * we've found just one:
-+ */
-+ pos = iter->pos.offset + 1;
-+ bch2_btree_iter_set_pos(iter, POS(0, pos));
-+ }
-+
-+ if (!ret && pos < max)
-+ goto found_slot;
-+
-+ if (!ret && start == min)
-+ ret = -BCH_ERR_ENOSPC_inode_create;
-+
-+ if (ret) {
-+ bch2_trans_iter_exit(trans, iter);
-+ return ret;
-+ }
-+
-+ /* Retry from start */
-+ pos = start = min;
-+ bch2_btree_iter_set_pos(iter, POS(0, pos));
-+ goto again;
-+found_slot:
-+ bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
-+ k = bch2_btree_iter_peek_slot(iter);
-+ ret = bkey_err(k);
-+ if (ret) {
-+ bch2_trans_iter_exit(trans, iter);
-+ return ret;
-+ }
-+
-+ *hint = k.k->p.offset;
-+ inode_u->bi_inum = k.k->p.offset;
-+ inode_u->bi_generation = bkey_generation(k);
-+ return 0;
-+}
-+
-+static int bch2_inode_delete_keys(struct btree_trans *trans,
-+ subvol_inum inum, enum btree_id id)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_i delete;
-+ struct bpos end = POS(inum.inum, U64_MAX);
-+ u32 snapshot;
-+ int ret = 0;
-+
-+ /*
-+ * We're never going to be deleting partial extents, no need to use an
-+ * extent iterator:
-+ */
-+ bch2_trans_iter_init(trans, &iter, id, POS(inum.inum, 0),
-+ BTREE_ITER_INTENT);
-+
-+ while (1) {
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ bch2_btree_iter_set_snapshot(&iter, snapshot);
-+
-+ k = bch2_btree_iter_peek_upto(&iter, end);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (!k.k)
-+ break;
-+
-+ bkey_init(&delete.k);
-+ delete.k.p = iter.pos;
-+
-+ if (iter.flags & BTREE_ITER_IS_EXTENTS)
-+ bch2_key_resize(&delete.k,
-+ bpos_min(end, k.k->p).offset -
-+ iter.pos.offset);
-+
-+ ret = bch2_trans_update(trans, &iter, &delete, 0) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL);
-+err:
-+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ break;
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter = { NULL };
-+ struct bkey_i_inode_generation delete;
-+ struct bch_inode_unpacked inode_u;
-+ struct bkey_s_c k;
-+ u32 snapshot;
-+ int ret;
-+
-+ /*
-+ * If this was a directory, there shouldn't be any real dirents left -
-+ * but there could be whiteouts (from hash collisions) that we should
-+ * delete:
-+ *
-+ * XXX: the dirent could ideally would delete whiteouts when they're no
-+ * longer needed
-+ */
-+ ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?:
-+ bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?:
-+ bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents);
-+ if (ret)
-+ goto err;
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
-+ SPOS(0, inum.inum, snapshot),
-+ BTREE_ITER_INTENT|BTREE_ITER_CACHED);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (!bkey_is_inode(k.k)) {
-+ bch2_fs_inconsistent(c,
-+ "inode %llu:%u not found when deleting",
-+ inum.inum, snapshot);
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ bch2_inode_unpack(k, &inode_u);
-+
-+ bkey_inode_generation_init(&delete.k_i);
-+ delete.k.p = iter.pos;
-+ delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
-+
-+ ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans,
-+ subvol_inum inum,
-+ struct bch_inode_unpacked *inode)
-+{
-+ struct btree_iter iter;
-+ int ret;
-+
-+ ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0);
-+ if (!ret)
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
-+ subvol_inum inum,
-+ struct bch_inode_unpacked *inode)
-+{
-+ struct btree_iter iter;
-+ int ret;
-+
-+ ret = bch2_inode_peek(trans, &iter, inode, inum, 0);
-+ if (!ret)
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
-+ struct bch_inode_unpacked *inode)
-+{
-+ return bch2_trans_do(c, NULL, NULL, 0,
-+ bch2_inode_find_by_inum_trans(trans, inum, inode));
-+}
-+
-+int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
-+{
-+ if (bi->bi_flags & BCH_INODE_unlinked)
-+ bi->bi_flags &= ~BCH_INODE_unlinked;
-+ else {
-+ if (bi->bi_nlink == U32_MAX)
-+ return -EINVAL;
-+
-+ bi->bi_nlink++;
-+ }
-+
-+ return 0;
-+}
-+
-+void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi)
-+{
-+ if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) {
-+ bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero",
-+ bi->bi_inum);
-+ return;
-+ }
-+
-+ if (bi->bi_flags & BCH_INODE_unlinked) {
-+ bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum);
-+ return;
-+ }
-+
-+ if (bi->bi_nlink)
-+ bi->bi_nlink--;
-+ else
-+ bi->bi_flags |= BCH_INODE_unlinked;
-+}
-+
-+struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode)
-+{
-+ struct bch_opts ret = { 0 };
-+#define x(_name, _bits) \
-+ if (inode->bi_##_name) \
-+ opt_set(ret, _name, inode->bi_##_name - 1);
-+ BCH_INODE_OPTS()
-+#undef x
-+ return ret;
-+}
-+
-+void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
-+ struct bch_inode_unpacked *inode)
-+{
-+#define x(_name, _bits) opts->_name = inode_opt_get(c, inode, _name);
-+ BCH_INODE_OPTS()
-+#undef x
-+
-+ if (opts->nocow)
-+ opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0;
-+}
-+
-+int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts)
-+{
-+ struct bch_inode_unpacked inode;
-+ int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode));
-+
-+ if (ret)
-+ return ret;
-+
-+ bch2_inode_opts_get(opts, trans->c, &inode);
-+ return 0;
-+}
-+
-+int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter = { NULL };
-+ struct bkey_i_inode_generation delete;
-+ struct bch_inode_unpacked inode_u;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ do {
-+ ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
-+ SPOS(inum, 0, snapshot),
-+ SPOS(inum, U64_MAX, snapshot),
-+ 0, NULL) ?:
-+ bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
-+ SPOS(inum, 0, snapshot),
-+ SPOS(inum, U64_MAX, snapshot),
-+ 0, NULL) ?:
-+ bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
-+ SPOS(inum, 0, snapshot),
-+ SPOS(inum, U64_MAX, snapshot),
-+ 0, NULL);
-+ } while (ret == -BCH_ERR_transaction_restart_nested);
-+ if (ret)
-+ goto err;
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
-+ SPOS(0, inum, snapshot), BTREE_ITER_INTENT);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (!bkey_is_inode(k.k)) {
-+ bch2_fs_inconsistent(c,
-+ "inode %llu:%u not found when deleting",
-+ inum, snapshot);
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ bch2_inode_unpack(k, &inode_u);
-+
-+ /* Subvolume root? */
-+ if (inode_u.bi_subvol)
-+ bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum);
-+
-+ bkey_inode_generation_init(&delete.k_i);
-+ delete.k.p = iter.pos;
-+ delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
-+
-+ ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ return ret ?: -BCH_ERR_transaction_restart_nested;
-+}
-+
-+static int may_delete_deleted_inode(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bpos pos,
-+ bool *need_another_pass)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter inode_iter;
-+ struct bkey_s_c k;
-+ struct bch_inode_unpacked inode;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED);
-+ ret = bkey_err(k);
-+ if (ret)
-+ return ret;
-+
-+ ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
-+ if (fsck_err_on(!bkey_is_inode(k.k), c,
-+ deleted_inode_missing,
-+ "nonexistent inode %llu:%u in deleted_inodes btree",
-+ pos.offset, pos.snapshot))
-+ goto delete;
-+
-+ ret = bch2_inode_unpack(k, &inode);
-+ if (ret)
-+ goto out;
-+
-+ if (fsck_err_on(S_ISDIR(inode.bi_mode), c,
-+ deleted_inode_is_dir,
-+ "directory %llu:%u in deleted_inodes btree",
-+ pos.offset, pos.snapshot))
-+ goto delete;
-+
-+ if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c,
-+ deleted_inode_not_unlinked,
-+ "non-deleted inode %llu:%u in deleted_inodes btree",
-+ pos.offset, pos.snapshot))
-+ goto delete;
-+
-+ if (c->sb.clean &&
-+ !fsck_err(c,
-+ deleted_inode_but_clean,
-+ "filesystem marked as clean but have deleted inode %llu:%u",
-+ pos.offset, pos.snapshot)) {
-+ ret = 0;
-+ goto out;
-+ }
-+
-+ if (bch2_snapshot_is_internal_node(c, pos.snapshot)) {
-+ struct bpos new_min_pos;
-+
-+ ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos);
-+ if (ret)
-+ goto out;
-+
-+ inode.bi_flags &= ~BCH_INODE_unlinked;
-+
-+ ret = bch2_inode_write_flags(trans, &inode_iter, &inode,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ bch_err_msg(c, ret, "clearing inode unlinked flag");
-+ if (ret)
-+ goto out;
-+
-+ /*
-+ * We'll need another write buffer flush to pick up the new
-+ * unlinked inodes in the snapshot leaves:
-+ */
-+ *need_another_pass = true;
-+ return 0;
-+ }
-+
-+ ret = 1;
-+out:
-+fsck_err:
-+ bch2_trans_iter_exit(trans, &inode_iter);
-+ return ret;
-+delete:
-+ ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
-+ goto out;
-+}
-+
-+int bch2_delete_dead_inodes(struct bch_fs *c)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ bool need_another_pass;
-+ int ret;
-+again:
-+ need_another_pass = false;
-+
-+ ret = bch2_btree_write_buffer_flush_sync(trans);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * Weird transaction restart handling here because on successful delete,
-+ * bch2_inode_rm_snapshot() will return a nested transaction restart,
-+ * but we can't retry because the btree write buffer won't have been
-+ * flushed and we'd spin:
-+ */
-+ for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
-+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-+ ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p,
-+ &need_another_pass));
-+ if (ret < 0)
-+ break;
-+
-+ if (ret) {
-+ if (!test_bit(BCH_FS_RW, &c->flags)) {
-+ bch2_trans_unlock(trans);
-+ bch2_fs_lazy_rw(c);
-+ }
-+
-+ bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot);
-+
-+ ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
-+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ break;
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (!ret && need_another_pass)
-+ goto again;
-+err:
-+ bch2_trans_put(trans);
-+
-+ return ret;
-+}
-diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
-new file mode 100644
-index 000000000000..88818a332b1e
---- /dev/null
-+++ b/fs/bcachefs/inode.h
-@@ -0,0 +1,217 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_INODE_H
-+#define _BCACHEFS_INODE_H
-+
-+#include "bkey.h"
-+#include "bkey_methods.h"
-+#include "opts.h"
-+
-+enum bkey_invalid_flags;
-+extern const char * const bch2_inode_opts[];
-+
-+int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_i *, unsigned);
-+int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_s_c, unsigned);
-+
-+#define bch2_bkey_ops_inode ((struct bkey_ops) { \
-+ .key_invalid = bch2_inode_invalid, \
-+ .val_to_text = bch2_inode_to_text, \
-+ .trans_trigger = bch2_trans_mark_inode, \
-+ .atomic_trigger = bch2_mark_inode, \
-+ .min_val_size = 16, \
-+})
-+
-+#define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \
-+ .key_invalid = bch2_inode_v2_invalid, \
-+ .val_to_text = bch2_inode_to_text, \
-+ .trans_trigger = bch2_trans_mark_inode, \
-+ .atomic_trigger = bch2_mark_inode, \
-+ .min_val_size = 32, \
-+})
-+
-+#define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \
-+ .key_invalid = bch2_inode_v3_invalid, \
-+ .val_to_text = bch2_inode_to_text, \
-+ .trans_trigger = bch2_trans_mark_inode, \
-+ .atomic_trigger = bch2_mark_inode, \
-+ .min_val_size = 48, \
-+})
-+
-+static inline bool bkey_is_inode(const struct bkey *k)
-+{
-+ return k->type == KEY_TYPE_inode ||
-+ k->type == KEY_TYPE_inode_v2 ||
-+ k->type == KEY_TYPE_inode_v3;
-+}
-+
-+int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \
-+ .key_invalid = bch2_inode_generation_invalid, \
-+ .val_to_text = bch2_inode_generation_to_text, \
-+ .min_val_size = 8, \
-+})
-+
-+#if 0
-+typedef struct {
-+ u64 lo;
-+ u32 hi;
-+} __packed __aligned(4) u96;
-+#endif
-+typedef u64 u96;
-+
-+struct bch_inode_unpacked {
-+ u64 bi_inum;
-+ u64 bi_journal_seq;
-+ __le64 bi_hash_seed;
-+ u64 bi_size;
-+ u64 bi_sectors;
-+ u64 bi_version;
-+ u32 bi_flags;
-+ u16 bi_mode;
-+
-+#define x(_name, _bits) u##_bits _name;
-+ BCH_INODE_FIELDS_v3()
-+#undef x
-+};
-+
-+struct bkey_inode_buf {
-+ struct bkey_i_inode_v3 inode;
-+
-+#define x(_name, _bits) + 8 + _bits / 8
-+ u8 _pad[0 + BCH_INODE_FIELDS_v3()];
-+#undef x
-+} __packed __aligned(8);
-+
-+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
-+int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *);
-+struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *);
-+
-+void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
-+
-+int bch2_inode_peek(struct btree_trans *, struct btree_iter *,
-+ struct bch_inode_unpacked *, subvol_inum, unsigned);
-+
-+int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *,
-+ struct bch_inode_unpacked *, enum btree_update_flags);
-+
-+static inline int bch2_inode_write(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bch_inode_unpacked *inode)
-+{
-+ return bch2_inode_write_flags(trans, iter, inode, 0);
-+}
-+
-+void bch2_inode_init_early(struct bch_fs *,
-+ struct bch_inode_unpacked *);
-+void bch2_inode_init_late(struct bch_inode_unpacked *, u64,
-+ uid_t, gid_t, umode_t, dev_t,
-+ struct bch_inode_unpacked *);
-+void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
-+ uid_t, gid_t, umode_t, dev_t,
-+ struct bch_inode_unpacked *);
-+
-+int bch2_inode_create(struct btree_trans *, struct btree_iter *,
-+ struct bch_inode_unpacked *, u32, u64);
-+
-+int bch2_inode_rm(struct bch_fs *, subvol_inum);
-+
-+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *,
-+ subvol_inum,
-+ struct bch_inode_unpacked *);
-+int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum,
-+ struct bch_inode_unpacked *);
-+int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum,
-+ struct bch_inode_unpacked *);
-+
-+#define inode_opt_get(_c, _inode, _name) \
-+ ((_inode)->bi_##_name ? (_inode)->bi_##_name - 1 : (_c)->opts._name)
-+
-+static inline void bch2_inode_opt_set(struct bch_inode_unpacked *inode,
-+ enum inode_opt_id id, u64 v)
-+{
-+ switch (id) {
-+#define x(_name, ...) \
-+ case Inode_opt_##_name: \
-+ inode->bi_##_name = v; \
-+ break;
-+ BCH_INODE_OPTS()
-+#undef x
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static inline u64 bch2_inode_opt_get(struct bch_inode_unpacked *inode,
-+ enum inode_opt_id id)
-+{
-+ switch (id) {
-+#define x(_name, ...) \
-+ case Inode_opt_##_name: \
-+ return inode->bi_##_name;
-+ BCH_INODE_OPTS()
-+#undef x
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static inline u8 mode_to_type(umode_t mode)
-+{
-+ return (mode >> 12) & 15;
-+}
-+
-+static inline u8 inode_d_type(struct bch_inode_unpacked *inode)
-+{
-+ return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode);
-+}
-+
-+/* i_nlink: */
-+
-+static inline unsigned nlink_bias(umode_t mode)
-+{
-+ return S_ISDIR(mode) ? 2 : 1;
-+}
-+
-+static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi)
-+{
-+ return bi->bi_flags & BCH_INODE_unlinked
-+ ? 0
-+ : bi->bi_nlink + nlink_bias(bi->bi_mode);
-+}
-+
-+static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
-+ unsigned nlink)
-+{
-+ if (nlink) {
-+ bi->bi_nlink = nlink - nlink_bias(bi->bi_mode);
-+ bi->bi_flags &= ~BCH_INODE_unlinked;
-+ } else {
-+ bi->bi_nlink = 0;
-+ bi->bi_flags |= BCH_INODE_unlinked;
-+ }
-+}
-+
-+int bch2_inode_nlink_inc(struct bch_inode_unpacked *);
-+void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *);
-+
-+struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
-+void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
-+ struct bch_inode_unpacked *);
-+int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *);
-+
-+int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32);
-+int bch2_delete_dead_inodes(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_INODE_H */
-diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
-new file mode 100644
-index 000000000000..bebc11444ef5
---- /dev/null
-+++ b/fs/bcachefs/io_misc.c
-@@ -0,0 +1,524 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * io_misc.c - fallocate, fpunch, truncate:
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_buf.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "error.h"
-+#include "extents.h"
-+#include "extent_update.h"
-+#include "inode.h"
-+#include "io_misc.h"
-+#include "io_write.h"
-+#include "logged_ops.h"
-+#include "rebalance.h"
-+#include "subvolume.h"
-+
-+/* Overwrites whatever was present with zeroes: */
-+int bch2_extent_fallocate(struct btree_trans *trans,
-+ subvol_inum inum,
-+ struct btree_iter *iter,
-+ u64 sectors,
-+ struct bch_io_opts opts,
-+ s64 *i_sectors_delta,
-+ struct write_point_specifier write_point)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct disk_reservation disk_res = { 0 };
-+ struct closure cl;
-+ struct open_buckets open_buckets = { 0 };
-+ struct bkey_s_c k;
-+ struct bkey_buf old, new;
-+ unsigned sectors_allocated = 0;
-+ bool have_reservation = false;
-+ bool unwritten = opts.nocow &&
-+ c->sb.version >= bcachefs_metadata_version_unwritten_extents;
-+ int ret;
-+
-+ bch2_bkey_buf_init(&old);
-+ bch2_bkey_buf_init(&new);
-+ closure_init_stack(&cl);
-+
-+ k = bch2_btree_iter_peek_slot(iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ return ret;
-+
-+ sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset);
-+
-+ if (!have_reservation) {
-+ unsigned new_replicas =
-+ max(0, (int) opts.data_replicas -
-+ (int) bch2_bkey_nr_ptrs_fully_allocated(k));
-+ /*
-+ * Get a disk reservation before (in the nocow case) calling
-+ * into the allocator:
-+ */
-+ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ bch2_bkey_buf_reassemble(&old, c, k);
-+ }
-+
-+ if (have_reservation) {
-+ if (!bch2_extents_match(k, bkey_i_to_s_c(old.k)))
-+ goto err;
-+
-+ bch2_key_resize(&new.k->k, sectors);
-+ } else if (!unwritten) {
-+ struct bkey_i_reservation *reservation;
-+
-+ bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64));
-+ reservation = bkey_reservation_init(new.k);
-+ reservation->k.p = iter->pos;
-+ bch2_key_resize(&reservation->k, sectors);
-+ reservation->v.nr_replicas = opts.data_replicas;
-+ } else {
-+ struct bkey_i_extent *e;
-+ struct bch_devs_list devs_have;
-+ struct write_point *wp;
-+ struct bch_extent_ptr *ptr;
-+
-+ devs_have.nr = 0;
-+
-+ bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX);
-+
-+ e = bkey_extent_init(new.k);
-+ e->k.p = iter->pos;
-+
-+ ret = bch2_alloc_sectors_start_trans(trans,
-+ opts.foreground_target,
-+ false,
-+ write_point,
-+ &devs_have,
-+ opts.data_replicas,
-+ opts.data_replicas,
-+ BCH_WATERMARK_normal, 0, &cl, &wp);
-+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
-+ ret = -BCH_ERR_transaction_restart_nested;
-+ if (ret)
-+ goto err;
-+
-+ sectors = min_t(u64, sectors, wp->sectors_free);
-+ sectors_allocated = sectors;
-+
-+ bch2_key_resize(&e->k, sectors);
-+
-+ bch2_open_bucket_get(c, wp, &open_buckets);
-+ bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
-+ bch2_alloc_sectors_done(c, wp);
-+
-+ extent_for_each_ptr(extent_i_to_s(e), ptr)
-+ ptr->unwritten = true;
-+ }
-+
-+ have_reservation = true;
-+
-+ ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res,
-+ 0, i_sectors_delta, true);
-+err:
-+ if (!ret && sectors_allocated)
-+ bch2_increment_clock(c, sectors_allocated, WRITE);
-+
-+ bch2_open_buckets_put(c, &open_buckets);
-+ bch2_disk_reservation_put(c, &disk_res);
-+ bch2_bkey_buf_exit(&new, c);
-+ bch2_bkey_buf_exit(&old, c);
-+
-+ if (closure_nr_remaining(&cl) != 1) {
-+ bch2_trans_unlock(trans);
-+ closure_sync(&cl);
-+ }
-+
-+ return ret;
-+}
-+
-+/*
-+ * Returns -BCH_ERR_transacton_restart if we had to drop locks:
-+ */
-+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
-+ subvol_inum inum, u64 end,
-+ s64 *i_sectors_delta)
-+{
-+ struct bch_fs *c = trans->c;
-+ unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
-+ struct bpos end_pos = POS(inum.inum, end);
-+ struct bkey_s_c k;
-+ int ret = 0, ret2 = 0;
-+ u32 snapshot;
-+
-+ while (!ret ||
-+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
-+ struct disk_reservation disk_res =
-+ bch2_disk_reservation_init(c, 0);
-+ struct bkey_i delete;
-+
-+ if (ret)
-+ ret2 = ret;
-+
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ continue;
-+
-+ bch2_btree_iter_set_snapshot(iter, snapshot);
-+
-+ /*
-+ * peek_upto() doesn't have ideal semantics for extents:
-+ */
-+ k = bch2_btree_iter_peek_upto(iter, end_pos);
-+ if (!k.k)
-+ break;
-+
-+ ret = bkey_err(k);
-+ if (ret)
-+ continue;
-+
-+ bkey_init(&delete.k);
-+ delete.k.p = iter->pos;
-+
-+ /* create the biggest key we can */
-+ bch2_key_resize(&delete.k, max_sectors);
-+ bch2_cut_back(end_pos, &delete);
-+
-+ ret = bch2_extent_update(trans, inum, iter, &delete,
-+ &disk_res, 0, i_sectors_delta, false);
-+ bch2_disk_reservation_put(c, &disk_res);
-+ }
-+
-+ return ret ?: ret2;
-+}
-+
-+int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
-+ s64 *i_sectors_delta)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ POS(inum.inum, start),
-+ BTREE_ITER_INTENT);
-+
-+ ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta);
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ ret = 0;
-+
-+ return ret;
-+}
-+
-+/* truncate: */
-+
-+void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
-+{
-+ struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate(k);
-+
-+ prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol));
-+ prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum));
-+ prt_printf(out, " new_i_size=%llu", le64_to_cpu(op.v->new_i_size));
-+}
-+
-+static int truncate_set_isize(struct btree_trans *trans,
-+ subvol_inum inum,
-+ u64 new_i_size)
-+{
-+ struct btree_iter iter = { NULL };
-+ struct bch_inode_unpacked inode_u;
-+ int ret;
-+
-+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT) ?:
-+ (inode_u.bi_size = new_i_size, 0) ?:
-+ bch2_inode_write(trans, &iter, &inode_u);
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
-+ struct bkey_i *op_k,
-+ u64 *i_sectors_delta)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter fpunch_iter;
-+ struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k);
-+ subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
-+ u64 new_i_size = le64_to_cpu(op->v.new_i_size);
-+ int ret;
-+
-+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ truncate_set_isize(trans, inum, new_i_size));
-+ if (ret)
-+ goto err;
-+
-+ bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents,
-+ POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9),
-+ BTREE_ITER_INTENT);
-+ ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta);
-+ bch2_trans_iter_exit(trans, &fpunch_iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ ret = 0;
-+err:
-+ bch2_logged_op_finish(trans, op_k);
-+ return ret;
-+}
-+
-+int bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k)
-+{
-+ return __bch2_resume_logged_op_truncate(trans, op_k, NULL);
-+}
-+
-+int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta)
-+{
-+ struct bkey_i_logged_op_truncate op;
-+
-+ bkey_logged_op_truncate_init(&op.k_i);
-+ op.v.subvol = cpu_to_le32(inum.subvol);
-+ op.v.inum = cpu_to_le64(inum.inum);
-+ op.v.new_i_size = cpu_to_le64(new_i_size);
-+
-+ /*
-+ * Logged ops aren't atomic w.r.t. snapshot creation: creating a
-+ * snapshot while they're in progress, then crashing, will result in the
-+ * resume only proceeding in one of the snapshots
-+ */
-+ down_read(&c->snapshot_create_lock);
-+ int ret = bch2_trans_run(c,
-+ bch2_logged_op_start(trans, &op.k_i) ?:
-+ __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta));
-+ up_read(&c->snapshot_create_lock);
-+
-+ return ret;
-+}
-+
-+/* finsert/fcollapse: */
-+
-+void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
-+{
-+ struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert(k);
-+
-+ prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol));
-+ prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum));
-+ prt_printf(out, " dst_offset=%lli", le64_to_cpu(op.v->dst_offset));
-+ prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset));
-+}
-+
-+static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len)
-+{
-+ struct btree_iter iter;
-+ struct bch_inode_unpacked inode_u;
-+ int ret;
-+
-+ offset <<= 9;
-+ len <<= 9;
-+
-+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT);
-+ if (ret)
-+ return ret;
-+
-+ if (len > 0) {
-+ if (MAX_LFS_FILESIZE - inode_u.bi_size < len) {
-+ ret = -EFBIG;
-+ goto err;
-+ }
-+
-+ if (offset >= inode_u.bi_size) {
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+ }
-+
-+ inode_u.bi_size += len;
-+ inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c);
-+
-+ ret = bch2_inode_write(trans, &iter, &inode_u);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
-+ struct bkey_i *op_k,
-+ u64 *i_sectors_delta)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k);
-+ subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
-+ struct bch_io_opts opts;
-+ u64 dst_offset = le64_to_cpu(op->v.dst_offset);
-+ u64 src_offset = le64_to_cpu(op->v.src_offset);
-+ s64 shift = dst_offset - src_offset;
-+ u64 len = abs(shift);
-+ u64 pos = le64_to_cpu(op->v.pos);
-+ bool insert = shift > 0;
-+ int ret = 0;
-+
-+ ret = bch2_inum_opts_get(trans, inum, &opts);
-+ if (ret)
-+ return ret;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ POS(inum.inum, 0),
-+ BTREE_ITER_INTENT);
-+
-+ switch (op->v.state) {
-+case LOGGED_OP_FINSERT_start:
-+ op->v.state = LOGGED_OP_FINSERT_shift_extents;
-+
-+ if (insert) {
-+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ adjust_i_size(trans, inum, src_offset, len) ?:
-+ bch2_logged_op_update(trans, &op->k_i));
-+ if (ret)
-+ goto err;
-+ } else {
-+ bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset));
-+
-+ ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta);
-+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto err;
-+
-+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ bch2_logged_op_update(trans, &op->k_i));
-+ }
-+
-+ fallthrough;
-+case LOGGED_OP_FINSERT_shift_extents:
-+ while (1) {
-+ struct disk_reservation disk_res =
-+ bch2_disk_reservation_init(c, 0);
-+ struct bkey_i delete, *copy;
-+ struct bkey_s_c k;
-+ struct bpos src_pos = POS(inum.inum, src_offset);
-+ u32 snapshot;
-+
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ goto btree_err;
-+
-+ bch2_btree_iter_set_snapshot(&iter, snapshot);
-+ bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot));
-+
-+ k = insert
-+ ? bch2_btree_iter_peek_prev(&iter)
-+ : bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX));
-+ if ((ret = bkey_err(k)))
-+ goto btree_err;
-+
-+ if (!k.k ||
-+ k.k->p.inode != inum.inum ||
-+ bkey_le(k.k->p, POS(inum.inum, src_offset)))
-+ break;
-+
-+ copy = bch2_bkey_make_mut_noupdate(trans, k);
-+ if ((ret = PTR_ERR_OR_ZERO(copy)))
-+ goto btree_err;
-+
-+ if (insert &&
-+ bkey_lt(bkey_start_pos(k.k), src_pos)) {
-+ bch2_cut_front(src_pos, copy);
-+
-+ /* Splitting compressed extent? */
-+ bch2_disk_reservation_add(c, &disk_res,
-+ copy->k.size *
-+ bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy)),
-+ BCH_DISK_RESERVATION_NOFAIL);
-+ }
-+
-+ bkey_init(&delete.k);
-+ delete.k.p = copy->k.p;
-+ delete.k.p.snapshot = snapshot;
-+ delete.k.size = copy->k.size;
-+
-+ copy->k.p.offset += shift;
-+ copy->k.p.snapshot = snapshot;
-+
-+ op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
-+
-+ ret = bch2_bkey_set_needs_rebalance(c, copy,
-+ opts.background_target,
-+ opts.background_compression) ?:
-+ bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
-+ bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
-+ bch2_logged_op_update(trans, &op->k_i) ?:
-+ bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL);
-+btree_err:
-+ bch2_disk_reservation_put(c, &disk_res);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ goto err;
-+
-+ pos = le64_to_cpu(op->v.pos);
-+ }
-+
-+ op->v.state = LOGGED_OP_FINSERT_finish;
-+
-+ if (!insert) {
-+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ adjust_i_size(trans, inum, src_offset, shift) ?:
-+ bch2_logged_op_update(trans, &op->k_i));
-+ } else {
-+ /* We need an inode update to update bi_journal_seq for fsync: */
-+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ adjust_i_size(trans, inum, 0, 0) ?:
-+ bch2_logged_op_update(trans, &op->k_i));
-+ }
-+
-+ break;
-+case LOGGED_OP_FINSERT_finish:
-+ break;
-+ }
-+err:
-+ bch2_logged_op_finish(trans, op_k);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k)
-+{
-+ return __bch2_resume_logged_op_finsert(trans, op_k, NULL);
-+}
-+
-+int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
-+ u64 offset, u64 len, bool insert,
-+ s64 *i_sectors_delta)
-+{
-+ struct bkey_i_logged_op_finsert op;
-+ s64 shift = insert ? len : -len;
-+
-+ bkey_logged_op_finsert_init(&op.k_i);
-+ op.v.subvol = cpu_to_le32(inum.subvol);
-+ op.v.inum = cpu_to_le64(inum.inum);
-+ op.v.dst_offset = cpu_to_le64(offset + shift);
-+ op.v.src_offset = cpu_to_le64(offset);
-+ op.v.pos = cpu_to_le64(insert ? U64_MAX : offset);
-+
-+ /*
-+ * Logged ops aren't atomic w.r.t. snapshot creation: creating a
-+ * snapshot while they're in progress, then crashing, will result in the
-+ * resume only proceeding in one of the snapshots
-+ */
-+ down_read(&c->snapshot_create_lock);
-+ int ret = bch2_trans_run(c,
-+ bch2_logged_op_start(trans, &op.k_i) ?:
-+ __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta));
-+ up_read(&c->snapshot_create_lock);
-+
-+ return ret;
-+}
-diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h
-new file mode 100644
-index 000000000000..9cb44a7c43c1
---- /dev/null
-+++ b/fs/bcachefs/io_misc.h
-@@ -0,0 +1,34 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_MISC_H
-+#define _BCACHEFS_IO_MISC_H
-+
-+int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
-+ u64, struct bch_io_opts, s64 *,
-+ struct write_point_specifier);
-+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
-+ subvol_inum, u64, s64 *);
-+int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
-+
-+void bch2_logged_op_truncate_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_logged_op_truncate ((struct bkey_ops) { \
-+ .val_to_text = bch2_logged_op_truncate_to_text, \
-+ .min_val_size = 24, \
-+})
-+
-+int bch2_resume_logged_op_truncate(struct btree_trans *, struct bkey_i *);
-+
-+int bch2_truncate(struct bch_fs *, subvol_inum, u64, u64 *);
-+
-+void bch2_logged_op_finsert_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_logged_op_finsert ((struct bkey_ops) { \
-+ .val_to_text = bch2_logged_op_finsert_to_text, \
-+ .min_val_size = 24, \
-+})
-+
-+int bch2_resume_logged_op_finsert(struct btree_trans *, struct bkey_i *);
-+
-+int bch2_fcollapse_finsert(struct bch_fs *, subvol_inum, u64, u64, bool, s64 *);
-+
-+#endif /* _BCACHEFS_IO_MISC_H */
-diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
-new file mode 100644
-index 000000000000..a56ed553dc15
---- /dev/null
-+++ b/fs/bcachefs/io_read.c
-@@ -0,0 +1,1210 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Some low level IO code, and hacks for various block layer limitations
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "clock.h"
-+#include "compress.h"
-+#include "data_update.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "io_read.h"
-+#include "io_misc.h"
-+#include "io_write.h"
-+#include "subvolume.h"
-+#include "trace.h"
-+
-+#include <linux/sched/mm.h>
-+
-+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-+
-+static bool bch2_target_congested(struct bch_fs *c, u16 target)
-+{
-+ const struct bch_devs_mask *devs;
-+ unsigned d, nr = 0, total = 0;
-+ u64 now = local_clock(), last;
-+ s64 congested;
-+ struct bch_dev *ca;
-+
-+ if (!target)
-+ return false;
-+
-+ rcu_read_lock();
-+ devs = bch2_target_to_mask(c, target) ?:
-+ &c->rw_devs[BCH_DATA_user];
-+
-+ for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
-+ ca = rcu_dereference(c->devs[d]);
-+ if (!ca)
-+ continue;
-+
-+ congested = atomic_read(&ca->congested);
-+ last = READ_ONCE(ca->congested_last);
-+ if (time_after64(now, last))
-+ congested -= (now - last) >> 12;
-+
-+ total += max(congested, 0LL);
-+ nr++;
-+ }
-+ rcu_read_unlock();
-+
-+ return bch2_rand_range(nr * CONGESTED_MAX) < total;
-+}
-+
-+#else
-+
-+static bool bch2_target_congested(struct bch_fs *c, u16 target)
-+{
-+ return false;
-+}
-+
-+#endif
-+
-+/* Cache promotion on read */
-+
-+struct promote_op {
-+ struct rcu_head rcu;
-+ u64 start_time;
-+
-+ struct rhash_head hash;
-+ struct bpos pos;
-+
-+ struct data_update write;
-+ struct bio_vec bi_inline_vecs[0]; /* must be last */
-+};
-+
-+static const struct rhashtable_params bch_promote_params = {
-+ .head_offset = offsetof(struct promote_op, hash),
-+ .key_offset = offsetof(struct promote_op, pos),
-+ .key_len = sizeof(struct bpos),
-+};
-+
-+static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
-+ struct bpos pos,
-+ struct bch_io_opts opts,
-+ unsigned flags)
-+{
-+ BUG_ON(!opts.promote_target);
-+
-+ if (!(flags & BCH_READ_MAY_PROMOTE))
-+ return -BCH_ERR_nopromote_may_not;
-+
-+ if (bch2_bkey_has_target(c, k, opts.promote_target))
-+ return -BCH_ERR_nopromote_already_promoted;
-+
-+ if (bkey_extent_is_unwritten(k))
-+ return -BCH_ERR_nopromote_unwritten;
-+
-+ if (bch2_target_congested(c, opts.promote_target))
-+ return -BCH_ERR_nopromote_congested;
-+
-+ if (rhashtable_lookup_fast(&c->promote_table, &pos,
-+ bch_promote_params))
-+ return -BCH_ERR_nopromote_in_flight;
-+
-+ return 0;
-+}
-+
-+static void promote_free(struct bch_fs *c, struct promote_op *op)
-+{
-+ int ret;
-+
-+ bch2_data_update_exit(&op->write);
-+
-+ ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
-+ bch_promote_params);
-+ BUG_ON(ret);
-+ bch2_write_ref_put(c, BCH_WRITE_REF_promote);
-+ kfree_rcu(op, rcu);
-+}
-+
-+static void promote_done(struct bch_write_op *wop)
-+{
-+ struct promote_op *op =
-+ container_of(wop, struct promote_op, write.op);
-+ struct bch_fs *c = op->write.op.c;
-+
-+ bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
-+ op->start_time);
-+ promote_free(c, op);
-+}
-+
-+static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
-+{
-+ struct bio *bio = &op->write.op.wbio.bio;
-+
-+ trace_and_count(op->write.op.c, read_promote, &rbio->bio);
-+
-+ /* we now own pages: */
-+ BUG_ON(!rbio->bounce);
-+ BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
-+
-+ memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
-+ sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
-+ swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
-+
-+ bch2_data_update_read_done(&op->write, rbio->pick.crc);
-+}
-+
-+static struct promote_op *__promote_alloc(struct btree_trans *trans,
-+ enum btree_id btree_id,
-+ struct bkey_s_c k,
-+ struct bpos pos,
-+ struct extent_ptr_decoded *pick,
-+ struct bch_io_opts opts,
-+ unsigned sectors,
-+ struct bch_read_bio **rbio)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct promote_op *op = NULL;
-+ struct bio *bio;
-+ unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
-+ int ret;
-+
-+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
-+ return NULL;
-+
-+ op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS);
-+ if (!op)
-+ goto err;
-+
-+ op->start_time = local_clock();
-+ op->pos = pos;
-+
-+ /*
-+ * We don't use the mempool here because extents that aren't
-+ * checksummed or compressed can be too big for the mempool:
-+ */
-+ *rbio = kzalloc(sizeof(struct bch_read_bio) +
-+ sizeof(struct bio_vec) * pages,
-+ GFP_NOFS);
-+ if (!*rbio)
-+ goto err;
-+
-+ rbio_init(&(*rbio)->bio, opts);
-+ bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0);
-+
-+ if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
-+ GFP_NOFS))
-+ goto err;
-+
-+ (*rbio)->bounce = true;
-+ (*rbio)->split = true;
-+ (*rbio)->kmalloc = true;
-+
-+ if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
-+ bch_promote_params))
-+ goto err;
-+
-+ bio = &op->write.op.wbio.bio;
-+ bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
-+
-+ ret = bch2_data_update_init(trans, NULL, &op->write,
-+ writepoint_hashed((unsigned long) current),
-+ opts,
-+ (struct data_update_opts) {
-+ .target = opts.promote_target,
-+ .extra_replicas = 1,
-+ .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
-+ },
-+ btree_id, k);
-+ /*
-+ * possible errors: -BCH_ERR_nocow_lock_blocked,
-+ * -BCH_ERR_ENOSPC_disk_reservation:
-+ */
-+ if (ret) {
-+ ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
-+ bch_promote_params);
-+ BUG_ON(ret);
-+ goto err;
-+ }
-+
-+ op->write.op.end_io = promote_done;
-+
-+ return op;
-+err:
-+ if (*rbio)
-+ bio_free_pages(&(*rbio)->bio);
-+ kfree(*rbio);
-+ *rbio = NULL;
-+ kfree(op);
-+ bch2_write_ref_put(c, BCH_WRITE_REF_promote);
-+ return NULL;
-+}
-+
-+noinline
-+static struct promote_op *promote_alloc(struct btree_trans *trans,
-+ struct bvec_iter iter,
-+ struct bkey_s_c k,
-+ struct extent_ptr_decoded *pick,
-+ struct bch_io_opts opts,
-+ unsigned flags,
-+ struct bch_read_bio **rbio,
-+ bool *bounce,
-+ bool *read_full)
-+{
-+ struct bch_fs *c = trans->c;
-+ bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
-+ /* data might have to be decompressed in the write path: */
-+ unsigned sectors = promote_full
-+ ? max(pick->crc.compressed_size, pick->crc.live_size)
-+ : bvec_iter_sectors(iter);
-+ struct bpos pos = promote_full
-+ ? bkey_start_pos(k.k)
-+ : POS(k.k->p.inode, iter.bi_sector);
-+ struct promote_op *promote;
-+ int ret;
-+
-+ ret = should_promote(c, k, pos, opts, flags);
-+ if (ret)
-+ goto nopromote;
-+
-+ promote = __promote_alloc(trans,
-+ k.k->type == KEY_TYPE_reflink_v
-+ ? BTREE_ID_reflink
-+ : BTREE_ID_extents,
-+ k, pos, pick, opts, sectors, rbio);
-+ if (!promote) {
-+ ret = -BCH_ERR_nopromote_enomem;
-+ goto nopromote;
-+ }
-+
-+ *bounce = true;
-+ *read_full = promote_full;
-+ return promote;
-+nopromote:
-+ trace_read_nopromote(c, ret);
-+ return NULL;
-+}
-+
-+/* Read */
-+
-+#define READ_RETRY_AVOID 1
-+#define READ_RETRY 2
-+#define READ_ERR 3
-+
-+enum rbio_context {
-+ RBIO_CONTEXT_NULL,
-+ RBIO_CONTEXT_HIGHPRI,
-+ RBIO_CONTEXT_UNBOUND,
-+};
-+
-+static inline struct bch_read_bio *
-+bch2_rbio_parent(struct bch_read_bio *rbio)
-+{
-+ return rbio->split ? rbio->parent : rbio;
-+}
-+
-+__always_inline
-+static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn,
-+ enum rbio_context context,
-+ struct workqueue_struct *wq)
-+{
-+ if (context <= rbio->context) {
-+ fn(&rbio->work);
-+ } else {
-+ rbio->work.func = fn;
-+ rbio->context = context;
-+ queue_work(wq, &rbio->work);
-+ }
-+}
-+
-+static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
-+{
-+ BUG_ON(rbio->bounce && !rbio->split);
-+
-+ if (rbio->promote)
-+ promote_free(rbio->c, rbio->promote);
-+ rbio->promote = NULL;
-+
-+ if (rbio->bounce)
-+ bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
-+
-+ if (rbio->split) {
-+ struct bch_read_bio *parent = rbio->parent;
-+
-+ if (rbio->kmalloc)
-+ kfree(rbio);
-+ else
-+ bio_put(&rbio->bio);
-+
-+ rbio = parent;
-+ }
-+
-+ return rbio;
-+}
-+
-+/*
-+ * Only called on a top level bch_read_bio to complete an entire read request,
-+ * not a split:
-+ */
-+static void bch2_rbio_done(struct bch_read_bio *rbio)
-+{
-+ if (rbio->start_time)
-+ bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
-+ rbio->start_time);
-+ bio_endio(&rbio->bio);
-+}
-+
-+static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
-+ struct bvec_iter bvec_iter,
-+ struct bch_io_failures *failed,
-+ unsigned flags)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_buf sk;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ flags &= ~BCH_READ_LAST_FRAGMENT;
-+ flags |= BCH_READ_MUST_CLONE;
-+
-+ bch2_bkey_buf_init(&sk);
-+
-+ bch2_trans_iter_init(trans, &iter, rbio->data_btree,
-+ rbio->read_pos, BTREE_ITER_SLOTS);
-+retry:
-+ rbio->bio.bi_status = 0;
-+
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ if (bkey_err(k))
-+ goto err;
-+
-+ bch2_bkey_buf_reassemble(&sk, c, k);
-+ k = bkey_i_to_s_c(sk.k);
-+ bch2_trans_unlock(trans);
-+
-+ if (!bch2_bkey_matches_ptr(c, k,
-+ rbio->pick.ptr,
-+ rbio->data_pos.offset -
-+ rbio->pick.crc.offset)) {
-+ /* extent we wanted to read no longer exists: */
-+ rbio->hole = true;
-+ goto out;
-+ }
-+
-+ ret = __bch2_read_extent(trans, rbio, bvec_iter,
-+ rbio->read_pos,
-+ rbio->data_btree,
-+ k, 0, failed, flags);
-+ if (ret == READ_RETRY)
-+ goto retry;
-+ if (ret)
-+ goto err;
-+out:
-+ bch2_rbio_done(rbio);
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ bch2_bkey_buf_exit(&sk, c);
-+ return;
-+err:
-+ rbio->bio.bi_status = BLK_STS_IOERR;
-+ goto out;
-+}
-+
-+static void bch2_rbio_retry(struct work_struct *work)
-+{
-+ struct bch_read_bio *rbio =
-+ container_of(work, struct bch_read_bio, work);
-+ struct bch_fs *c = rbio->c;
-+ struct bvec_iter iter = rbio->bvec_iter;
-+ unsigned flags = rbio->flags;
-+ subvol_inum inum = {
-+ .subvol = rbio->subvol,
-+ .inum = rbio->read_pos.inode,
-+ };
-+ struct bch_io_failures failed = { .nr = 0 };
-+
-+ trace_and_count(c, read_retry, &rbio->bio);
-+
-+ if (rbio->retry == READ_RETRY_AVOID)
-+ bch2_mark_io_failure(&failed, &rbio->pick);
-+
-+ rbio->bio.bi_status = 0;
-+
-+ rbio = bch2_rbio_free(rbio);
-+
-+ flags |= BCH_READ_IN_RETRY;
-+ flags &= ~BCH_READ_MAY_PROMOTE;
-+
-+ if (flags & BCH_READ_NODECODE) {
-+ bch2_read_retry_nodecode(c, rbio, iter, &failed, flags);
-+ } else {
-+ flags &= ~BCH_READ_LAST_FRAGMENT;
-+ flags |= BCH_READ_MUST_CLONE;
-+
-+ __bch2_read(c, rbio, iter, inum, &failed, flags);
-+ }
-+}
-+
-+static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
-+ blk_status_t error)
-+{
-+ rbio->retry = retry;
-+
-+ if (rbio->flags & BCH_READ_IN_RETRY)
-+ return;
-+
-+ if (retry == READ_ERR) {
-+ rbio = bch2_rbio_free(rbio);
-+
-+ rbio->bio.bi_status = error;
-+ bch2_rbio_done(rbio);
-+ } else {
-+ bch2_rbio_punt(rbio, bch2_rbio_retry,
-+ RBIO_CONTEXT_UNBOUND, system_unbound_wq);
-+ }
-+}
-+
-+static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
-+ struct bch_read_bio *rbio)
-+{
-+ struct bch_fs *c = rbio->c;
-+ u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
-+ struct bch_extent_crc_unpacked new_crc;
-+ struct btree_iter iter;
-+ struct bkey_i *new;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ if (crc_is_compressed(rbio->pick.crc))
-+ return 0;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos,
-+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+ if ((ret = bkey_err(k)))
-+ goto out;
-+
-+ if (bversion_cmp(k.k->version, rbio->version) ||
-+ !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
-+ goto out;
-+
-+ /* Extent was merged? */
-+ if (bkey_start_offset(k.k) < data_offset ||
-+ k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
-+ goto out;
-+
-+ if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
-+ rbio->pick.crc, NULL, &new_crc,
-+ bkey_start_offset(k.k) - data_offset, k.k->size,
-+ rbio->pick.crc.csum_type)) {
-+ bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
-+ ret = 0;
-+ goto out;
-+ }
-+
-+ /*
-+ * going to be temporarily appending another checksum entry:
-+ */
-+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
-+ sizeof(struct bch_extent_crc128));
-+ if ((ret = PTR_ERR_OR_ZERO(new)))
-+ goto out;
-+
-+ bkey_reassemble(new, k);
-+
-+ if (!bch2_bkey_narrow_crcs(new, new_crc))
-+ goto out;
-+
-+ ret = bch2_trans_update(trans, &iter, new,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
-+{
-+ bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ __bch2_rbio_narrow_crcs(trans, rbio));
-+}
-+
-+/* Inner part that may run in process context */
-+static void __bch2_read_endio(struct work_struct *work)
-+{
-+ struct bch_read_bio *rbio =
-+ container_of(work, struct bch_read_bio, work);
-+ struct bch_fs *c = rbio->c;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
-+ struct bio *src = &rbio->bio;
-+ struct bio *dst = &bch2_rbio_parent(rbio)->bio;
-+ struct bvec_iter dst_iter = rbio->bvec_iter;
-+ struct bch_extent_crc_unpacked crc = rbio->pick.crc;
-+ struct nonce nonce = extent_nonce(rbio->version, crc);
-+ unsigned nofs_flags;
-+ struct bch_csum csum;
-+ int ret;
-+
-+ nofs_flags = memalloc_nofs_save();
-+
-+ /* Reset iterator for checksumming and copying bounced data: */
-+ if (rbio->bounce) {
-+ src->bi_iter.bi_size = crc.compressed_size << 9;
-+ src->bi_iter.bi_idx = 0;
-+ src->bi_iter.bi_bvec_done = 0;
-+ } else {
-+ src->bi_iter = rbio->bvec_iter;
-+ }
-+
-+ csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
-+ if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io)
-+ goto csum_err;
-+
-+ /*
-+ * XXX
-+ * We need to rework the narrow_crcs path to deliver the read completion
-+ * first, and then punt to a different workqueue, otherwise we're
-+ * holding up reads while doing btree updates which is bad for memory
-+ * reclaim.
-+ */
-+ if (unlikely(rbio->narrow_crcs))
-+ bch2_rbio_narrow_crcs(rbio);
-+
-+ if (rbio->flags & BCH_READ_NODECODE)
-+ goto nodecode;
-+
-+ /* Adjust crc to point to subset of data we want: */
-+ crc.offset += rbio->offset_into_extent;
-+ crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
-+
-+ if (crc_is_compressed(crc)) {
-+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+ if (ret)
-+ goto decrypt_err;
-+
-+ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
-+ !c->opts.no_data_io)
-+ goto decompression_err;
-+ } else {
-+ /* don't need to decrypt the entire bio: */
-+ nonce = nonce_add(nonce, crc.offset << 9);
-+ bio_advance(src, crc.offset << 9);
-+
-+ BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
-+ src->bi_iter.bi_size = dst_iter.bi_size;
-+
-+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+ if (ret)
-+ goto decrypt_err;
-+
-+ if (rbio->bounce) {
-+ struct bvec_iter src_iter = src->bi_iter;
-+
-+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
-+ }
-+ }
-+
-+ if (rbio->promote) {
-+ /*
-+ * Re encrypt data we decrypted, so it's consistent with
-+ * rbio->crc:
-+ */
-+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
-+ if (ret)
-+ goto decrypt_err;
-+
-+ promote_start(rbio->promote, rbio);
-+ rbio->promote = NULL;
-+ }
-+nodecode:
-+ if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
-+ rbio = bch2_rbio_free(rbio);
-+ bch2_rbio_done(rbio);
-+ }
-+out:
-+ memalloc_nofs_restore(nofs_flags);
-+ return;
-+csum_err:
-+ /*
-+ * Checksum error: if the bio wasn't bounced, we may have been
-+ * reading into buffers owned by userspace (that userspace can
-+ * scribble over) - retry the read, bouncing it this time:
-+ */
-+ if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
-+ rbio->flags |= BCH_READ_MUST_BOUNCE;
-+ bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
-+ goto out;
-+ }
-+
-+ bch_err_inum_offset_ratelimited(ca,
-+ rbio->read_pos.inode,
-+ rbio->read_pos.offset << 9,
-+ "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
-+ rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
-+ csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
-+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
-+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+ goto out;
-+decompression_err:
-+ bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
-+ rbio->read_pos.offset << 9,
-+ "decompression error");
-+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
-+ goto out;
-+decrypt_err:
-+ bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
-+ rbio->read_pos.offset << 9,
-+ "decrypt error");
-+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
-+ goto out;
-+}
-+
-+static void bch2_read_endio(struct bio *bio)
-+{
-+ struct bch_read_bio *rbio =
-+ container_of(bio, struct bch_read_bio, bio);
-+ struct bch_fs *c = rbio->c;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
-+ struct workqueue_struct *wq = NULL;
-+ enum rbio_context context = RBIO_CONTEXT_NULL;
-+
-+ if (rbio->have_ioref) {
-+ bch2_latency_acct(ca, rbio->submit_time, READ);
-+ percpu_ref_put(&ca->io_ref);
-+ }
-+
-+ if (!rbio->split)
-+ rbio->bio.bi_end_io = rbio->end_io;
-+
-+ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
-+ rbio->read_pos.inode,
-+ rbio->read_pos.offset,
-+ "data read error: %s",
-+ bch2_blk_status_to_str(bio->bi_status))) {
-+ bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
-+ return;
-+ }
-+
-+ if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
-+ ptr_stale(ca, &rbio->pick.ptr)) {
-+ trace_and_count(c, read_reuse_race, &rbio->bio);
-+
-+ if (rbio->flags & BCH_READ_RETRY_IF_STALE)
-+ bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
-+ else
-+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
-+ return;
-+ }
-+
-+ if (rbio->narrow_crcs ||
-+ rbio->promote ||
-+ crc_is_compressed(rbio->pick.crc) ||
-+ bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
-+ context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq;
-+ else if (rbio->pick.crc.csum_type)
-+ context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq;
-+
-+ bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
-+}
-+
-+int __bch2_read_indirect_extent(struct btree_trans *trans,
-+ unsigned *offset_into_extent,
-+ struct bkey_buf *orig_k)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ u64 reflink_offset;
-+ int ret;
-+
-+ reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
-+ *offset_into_extent;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink,
-+ POS(0, reflink_offset), 0);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (k.k->type != KEY_TYPE_reflink_v &&
-+ k.k->type != KEY_TYPE_indirect_inline_data) {
-+ bch_err_inum_offset_ratelimited(trans->c,
-+ orig_k->k->k.p.inode,
-+ orig_k->k->k.p.offset << 9,
-+ "%llu len %u points to nonexistent indirect extent %llu",
-+ orig_k->k->k.p.offset,
-+ orig_k->k->k.size,
-+ reflink_offset);
-+ bch2_inconsistent_error(trans->c);
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
-+ bch2_bkey_buf_reassemble(orig_k, trans->c, k);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
-+ struct bkey_s_c k,
-+ struct bch_extent_ptr ptr)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
-+ struct btree_iter iter;
-+ struct printbuf buf = PRINTBUF;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
-+ PTR_BUCKET_POS(c, &ptr),
-+ BTREE_ITER_CACHED);
-+
-+ prt_printf(&buf, "Attempting to read from stale dirty pointer:");
-+ printbuf_indent_add(&buf, 2);
-+ prt_newline(&buf);
-+
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ prt_newline(&buf);
-+
-+ prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
-+
-+ ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
-+ if (!ret) {
-+ prt_newline(&buf);
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ }
-+
-+ bch2_fs_inconsistent(c, "%s", buf.buf);
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ printbuf_exit(&buf);
-+}
-+
-+int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
-+ struct bvec_iter iter, struct bpos read_pos,
-+ enum btree_id data_btree, struct bkey_s_c k,
-+ unsigned offset_into_extent,
-+ struct bch_io_failures *failed, unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct extent_ptr_decoded pick;
-+ struct bch_read_bio *rbio = NULL;
-+ struct bch_dev *ca = NULL;
-+ struct promote_op *promote = NULL;
-+ bool bounce = false, read_full = false, narrow_crcs = false;
-+ struct bpos data_pos = bkey_start_pos(k.k);
-+ int pick_ret;
-+
-+ if (bkey_extent_is_inline_data(k.k)) {
-+ unsigned bytes = min_t(unsigned, iter.bi_size,
-+ bkey_inline_data_bytes(k.k));
-+
-+ swap(iter.bi_size, bytes);
-+ memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k));
-+ swap(iter.bi_size, bytes);
-+ bio_advance_iter(&orig->bio, &iter, bytes);
-+ zero_fill_bio_iter(&orig->bio, iter);
-+ goto out_read_done;
-+ }
-+retry_pick:
-+ pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
-+
-+ /* hole or reservation - just zero fill: */
-+ if (!pick_ret)
-+ goto hole;
-+
-+ if (pick_ret < 0) {
-+ bch_err_inum_offset_ratelimited(c,
-+ read_pos.inode, read_pos.offset << 9,
-+ "no device to read from");
-+ goto err;
-+ }
-+
-+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
-+
-+ /*
-+ * Stale dirty pointers are treated as IO errors, but @failed isn't
-+ * allocated unless we're in the retry path - so if we're not in the
-+ * retry path, don't check here, it'll be caught in bch2_read_endio()
-+ * and we'll end up in the retry path:
-+ */
-+ if ((flags & BCH_READ_IN_RETRY) &&
-+ !pick.ptr.cached &&
-+ unlikely(ptr_stale(ca, &pick.ptr))) {
-+ read_from_stale_dirty_pointer(trans, k, pick.ptr);
-+ bch2_mark_io_failure(failed, &pick);
-+ goto retry_pick;
-+ }
-+
-+ /*
-+ * Unlock the iterator while the btree node's lock is still in
-+ * cache, before doing the IO:
-+ */
-+ bch2_trans_unlock(trans);
-+
-+ if (flags & BCH_READ_NODECODE) {
-+ /*
-+ * can happen if we retry, and the extent we were going to read
-+ * has been merged in the meantime:
-+ */
-+ if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
-+ goto hole;
-+
-+ iter.bi_size = pick.crc.compressed_size << 9;
-+ goto get_bio;
-+ }
-+
-+ if (!(flags & BCH_READ_LAST_FRAGMENT) ||
-+ bio_flagged(&orig->bio, BIO_CHAIN))
-+ flags |= BCH_READ_MUST_CLONE;
-+
-+ narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
-+ bch2_can_narrow_extent_crcs(k, pick.crc);
-+
-+ if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
-+ flags |= BCH_READ_MUST_BOUNCE;
-+
-+ EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
-+
-+ if (crc_is_compressed(pick.crc) ||
-+ (pick.crc.csum_type != BCH_CSUM_none &&
-+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
-+ (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
-+ (flags & BCH_READ_USER_MAPPED)) ||
-+ (flags & BCH_READ_MUST_BOUNCE)))) {
-+ read_full = true;
-+ bounce = true;
-+ }
-+
-+ if (orig->opts.promote_target)
-+ promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
-+ &rbio, &bounce, &read_full);
-+
-+ if (!read_full) {
-+ EBUG_ON(crc_is_compressed(pick.crc));
-+ EBUG_ON(pick.crc.csum_type &&
-+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
-+ bvec_iter_sectors(iter) != pick.crc.live_size ||
-+ pick.crc.offset ||
-+ offset_into_extent));
-+
-+ data_pos.offset += offset_into_extent;
-+ pick.ptr.offset += pick.crc.offset +
-+ offset_into_extent;
-+ offset_into_extent = 0;
-+ pick.crc.compressed_size = bvec_iter_sectors(iter);
-+ pick.crc.uncompressed_size = bvec_iter_sectors(iter);
-+ pick.crc.offset = 0;
-+ pick.crc.live_size = bvec_iter_sectors(iter);
-+ }
-+get_bio:
-+ if (rbio) {
-+ /*
-+ * promote already allocated bounce rbio:
-+ * promote needs to allocate a bio big enough for uncompressing
-+ * data in the write path, but we're not going to use it all
-+ * here:
-+ */
-+ EBUG_ON(rbio->bio.bi_iter.bi_size <
-+ pick.crc.compressed_size << 9);
-+ rbio->bio.bi_iter.bi_size =
-+ pick.crc.compressed_size << 9;
-+ } else if (bounce) {
-+ unsigned sectors = pick.crc.compressed_size;
-+
-+ rbio = rbio_init(bio_alloc_bioset(NULL,
-+ DIV_ROUND_UP(sectors, PAGE_SECTORS),
-+ 0,
-+ GFP_NOFS,
-+ &c->bio_read_split),
-+ orig->opts);
-+
-+ bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
-+ rbio->bounce = true;
-+ rbio->split = true;
-+ } else if (flags & BCH_READ_MUST_CLONE) {
-+ /*
-+ * Have to clone if there were any splits, due to error
-+ * reporting issues (if a split errored, and retrying didn't
-+ * work, when it reports the error to its parent (us) we don't
-+ * know if the error was from our bio, and we should retry, or
-+ * from the whole bio, in which case we don't want to retry and
-+ * lose the error)
-+ */
-+ rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
-+ &c->bio_read_split),
-+ orig->opts);
-+ rbio->bio.bi_iter = iter;
-+ rbio->split = true;
-+ } else {
-+ rbio = orig;
-+ rbio->bio.bi_iter = iter;
-+ EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
-+ }
-+
-+ EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
-+
-+ rbio->c = c;
-+ rbio->submit_time = local_clock();
-+ if (rbio->split)
-+ rbio->parent = orig;
-+ else
-+ rbio->end_io = orig->bio.bi_end_io;
-+ rbio->bvec_iter = iter;
-+ rbio->offset_into_extent= offset_into_extent;
-+ rbio->flags = flags;
-+ rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
-+ rbio->narrow_crcs = narrow_crcs;
-+ rbio->hole = 0;
-+ rbio->retry = 0;
-+ rbio->context = 0;
-+ /* XXX: only initialize this if needed */
-+ rbio->devs_have = bch2_bkey_devs(k);
-+ rbio->pick = pick;
-+ rbio->subvol = orig->subvol;
-+ rbio->read_pos = read_pos;
-+ rbio->data_btree = data_btree;
-+ rbio->data_pos = data_pos;
-+ rbio->version = k.k->version;
-+ rbio->promote = promote;
-+ INIT_WORK(&rbio->work, NULL);
-+
-+ rbio->bio.bi_opf = orig->bio.bi_opf;
-+ rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
-+ rbio->bio.bi_end_io = bch2_read_endio;
-+
-+ if (rbio->bounce)
-+ trace_and_count(c, read_bounce, &rbio->bio);
-+
-+ this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
-+ bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
-+
-+ /*
-+ * If it's being moved internally, we don't want to flag it as a cache
-+ * hit:
-+ */
-+ if (pick.ptr.cached && !(flags & BCH_READ_NODECODE))
-+ bch2_bucket_io_time_reset(trans, pick.ptr.dev,
-+ PTR_BUCKET_NR(ca, &pick.ptr), READ);
-+
-+ if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
-+ bio_inc_remaining(&orig->bio);
-+ trace_and_count(c, read_split, &orig->bio);
-+ }
-+
-+ if (!rbio->pick.idx) {
-+ if (!rbio->have_ioref) {
-+ bch_err_inum_offset_ratelimited(c,
-+ read_pos.inode,
-+ read_pos.offset << 9,
-+ "no device to read from");
-+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+ goto out;
-+ }
-+
-+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user],
-+ bio_sectors(&rbio->bio));
-+ bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
-+
-+ if (unlikely(c->opts.no_data_io)) {
-+ if (likely(!(flags & BCH_READ_IN_RETRY)))
-+ bio_endio(&rbio->bio);
-+ } else {
-+ if (likely(!(flags & BCH_READ_IN_RETRY)))
-+ submit_bio(&rbio->bio);
-+ else
-+ submit_bio_wait(&rbio->bio);
-+ }
-+
-+ /*
-+ * We just submitted IO which may block, we expect relock fail
-+ * events and shouldn't count them:
-+ */
-+ trans->notrace_relock_fail = true;
-+ } else {
-+ /* Attempting reconstruct read: */
-+ if (bch2_ec_read_extent(trans, rbio)) {
-+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
-+ goto out;
-+ }
-+
-+ if (likely(!(flags & BCH_READ_IN_RETRY)))
-+ bio_endio(&rbio->bio);
-+ }
-+out:
-+ if (likely(!(flags & BCH_READ_IN_RETRY))) {
-+ return 0;
-+ } else {
-+ int ret;
-+
-+ rbio->context = RBIO_CONTEXT_UNBOUND;
-+ bch2_read_endio(&rbio->bio);
-+
-+ ret = rbio->retry;
-+ rbio = bch2_rbio_free(rbio);
-+
-+ if (ret == READ_RETRY_AVOID) {
-+ bch2_mark_io_failure(failed, &pick);
-+ ret = READ_RETRY;
-+ }
-+
-+ if (!ret)
-+ goto out_read_done;
-+
-+ return ret;
-+ }
-+
-+err:
-+ if (flags & BCH_READ_IN_RETRY)
-+ return READ_ERR;
-+
-+ orig->bio.bi_status = BLK_STS_IOERR;
-+ goto out_read_done;
-+
-+hole:
-+ /*
-+ * won't normally happen in the BCH_READ_NODECODE
-+ * (bch2_move_extent()) path, but if we retry and the extent we wanted
-+ * to read no longer exists we have to signal that:
-+ */
-+ if (flags & BCH_READ_NODECODE)
-+ orig->hole = true;
-+
-+ zero_fill_bio_iter(&orig->bio, iter);
-+out_read_done:
-+ if (flags & BCH_READ_LAST_FRAGMENT)
-+ bch2_rbio_done(orig);
-+ return 0;
-+}
-+
-+void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
-+ struct bvec_iter bvec_iter, subvol_inum inum,
-+ struct bch_io_failures *failed, unsigned flags)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_buf sk;
-+ struct bkey_s_c k;
-+ u32 snapshot;
-+ int ret;
-+
-+ BUG_ON(flags & BCH_READ_NODECODE);
-+
-+ bch2_bkey_buf_init(&sk);
-+retry:
-+ bch2_trans_begin(trans);
-+ iter = (struct btree_iter) { NULL };
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ SPOS(inum.inum, bvec_iter.bi_sector, snapshot),
-+ BTREE_ITER_SLOTS);
-+ while (1) {
-+ unsigned bytes, sectors, offset_into_extent;
-+ enum btree_id data_btree = BTREE_ID_extents;
-+
-+ /*
-+ * read_extent -> io_time_reset may cause a transaction restart
-+ * without returning an error, we need to check for that here:
-+ */
-+ ret = bch2_trans_relock(trans);
-+ if (ret)
-+ break;
-+
-+ bch2_btree_iter_set_pos(&iter,
-+ POS(inum.inum, bvec_iter.bi_sector));
-+
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ break;
-+
-+ offset_into_extent = iter.pos.offset -
-+ bkey_start_offset(k.k);
-+ sectors = k.k->size - offset_into_extent;
-+
-+ bch2_bkey_buf_reassemble(&sk, c, k);
-+
-+ ret = bch2_read_indirect_extent(trans, &data_btree,
-+ &offset_into_extent, &sk);
-+ if (ret)
-+ break;
-+
-+ k = bkey_i_to_s_c(sk.k);
-+
-+ /*
-+ * With indirect extents, the amount of data to read is the min
-+ * of the original extent and the indirect extent:
-+ */
-+ sectors = min(sectors, k.k->size - offset_into_extent);
-+
-+ bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
-+ swap(bvec_iter.bi_size, bytes);
-+
-+ if (bvec_iter.bi_size == bytes)
-+ flags |= BCH_READ_LAST_FRAGMENT;
-+
-+ ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
-+ data_btree, k,
-+ offset_into_extent, failed, flags);
-+ if (ret)
-+ break;
-+
-+ if (flags & BCH_READ_LAST_FRAGMENT)
-+ break;
-+
-+ swap(bvec_iter.bi_size, bytes);
-+ bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
-+
-+ ret = btree_trans_too_many_iters(trans);
-+ if (ret)
-+ break;
-+ }
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
-+ ret == READ_RETRY ||
-+ ret == READ_RETRY_AVOID)
-+ goto retry;
-+
-+ bch2_trans_put(trans);
-+ bch2_bkey_buf_exit(&sk, c);
-+
-+ if (ret) {
-+ bch_err_inum_offset_ratelimited(c, inum.inum,
-+ bvec_iter.bi_sector << 9,
-+ "read error %i from btree lookup", ret);
-+ rbio->bio.bi_status = BLK_STS_IOERR;
-+ bch2_rbio_done(rbio);
-+ }
-+}
-+
-+void bch2_fs_io_read_exit(struct bch_fs *c)
-+{
-+ if (c->promote_table.tbl)
-+ rhashtable_destroy(&c->promote_table);
-+ bioset_exit(&c->bio_read_split);
-+ bioset_exit(&c->bio_read);
-+}
-+
-+int bch2_fs_io_read_init(struct bch_fs *c)
-+{
-+ if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
-+ BIOSET_NEED_BVECS))
-+ return -BCH_ERR_ENOMEM_bio_read_init;
-+
-+ if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
-+ BIOSET_NEED_BVECS))
-+ return -BCH_ERR_ENOMEM_bio_read_split_init;
-+
-+ if (rhashtable_init(&c->promote_table, &bch_promote_params))
-+ return -BCH_ERR_ENOMEM_promote_table_init;
-+
-+ return 0;
-+}
-diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
-new file mode 100644
-index 000000000000..d9c18bb7d403
---- /dev/null
-+++ b/fs/bcachefs/io_read.h
-@@ -0,0 +1,158 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_READ_H
-+#define _BCACHEFS_IO_READ_H
-+
-+#include "bkey_buf.h"
-+
-+struct bch_read_bio {
-+ struct bch_fs *c;
-+ u64 start_time;
-+ u64 submit_time;
-+
-+ /*
-+ * Reads will often have to be split, and if the extent being read from
-+ * was checksummed or compressed we'll also have to allocate bounce
-+ * buffers and copy the data back into the original bio.
-+ *
-+ * If we didn't have to split, we have to save and restore the original
-+ * bi_end_io - @split below indicates which:
-+ */
-+ union {
-+ struct bch_read_bio *parent;
-+ bio_end_io_t *end_io;
-+ };
-+
-+ /*
-+ * Saved copy of bio->bi_iter, from submission time - allows us to
-+ * resubmit on IO error, and also to copy data back to the original bio
-+ * when we're bouncing:
-+ */
-+ struct bvec_iter bvec_iter;
-+
-+ unsigned offset_into_extent;
-+
-+ u16 flags;
-+ union {
-+ struct {
-+ u16 bounce:1,
-+ split:1,
-+ kmalloc:1,
-+ have_ioref:1,
-+ narrow_crcs:1,
-+ hole:1,
-+ retry:2,
-+ context:2;
-+ };
-+ u16 _state;
-+ };
-+
-+ struct bch_devs_list devs_have;
-+
-+ struct extent_ptr_decoded pick;
-+
-+ /*
-+ * pos we read from - different from data_pos for indirect extents:
-+ */
-+ u32 subvol;
-+ struct bpos read_pos;
-+
-+ /*
-+ * start pos of data we read (may not be pos of data we want) - for
-+ * promote, narrow extents paths:
-+ */
-+ enum btree_id data_btree;
-+ struct bpos data_pos;
-+ struct bversion version;
-+
-+ struct promote_op *promote;
-+
-+ struct bch_io_opts opts;
-+
-+ struct work_struct work;
-+
-+ struct bio bio;
-+};
-+
-+#define to_rbio(_bio) container_of((_bio), struct bch_read_bio, bio)
-+
-+struct bch_devs_mask;
-+struct cache_promote_op;
-+struct extent_ptr_decoded;
-+
-+int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
-+ struct bkey_buf *);
-+
-+static inline int bch2_read_indirect_extent(struct btree_trans *trans,
-+ enum btree_id *data_btree,
-+ unsigned *offset_into_extent,
-+ struct bkey_buf *k)
-+{
-+ if (k->k->k.type != KEY_TYPE_reflink_p)
-+ return 0;
-+
-+ *data_btree = BTREE_ID_reflink;
-+ return __bch2_read_indirect_extent(trans, offset_into_extent, k);
-+}
-+
-+enum bch_read_flags {
-+ BCH_READ_RETRY_IF_STALE = 1 << 0,
-+ BCH_READ_MAY_PROMOTE = 1 << 1,
-+ BCH_READ_USER_MAPPED = 1 << 2,
-+ BCH_READ_NODECODE = 1 << 3,
-+ BCH_READ_LAST_FRAGMENT = 1 << 4,
-+
-+ /* internal: */
-+ BCH_READ_MUST_BOUNCE = 1 << 5,
-+ BCH_READ_MUST_CLONE = 1 << 6,
-+ BCH_READ_IN_RETRY = 1 << 7,
-+};
-+
-+int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
-+ struct bvec_iter, struct bpos, enum btree_id,
-+ struct bkey_s_c, unsigned,
-+ struct bch_io_failures *, unsigned);
-+
-+static inline void bch2_read_extent(struct btree_trans *trans,
-+ struct bch_read_bio *rbio, struct bpos read_pos,
-+ enum btree_id data_btree, struct bkey_s_c k,
-+ unsigned offset_into_extent, unsigned flags)
-+{
-+ __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
-+ data_btree, k, offset_into_extent, NULL, flags);
-+}
-+
-+void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
-+ subvol_inum, struct bch_io_failures *, unsigned flags);
-+
-+static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
-+ subvol_inum inum)
-+{
-+ struct bch_io_failures failed = { .nr = 0 };
-+
-+ BUG_ON(rbio->_state);
-+
-+ rbio->c = c;
-+ rbio->start_time = local_clock();
-+ rbio->subvol = inum.subvol;
-+
-+ __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed,
-+ BCH_READ_RETRY_IF_STALE|
-+ BCH_READ_MAY_PROMOTE|
-+ BCH_READ_USER_MAPPED);
-+}
-+
-+static inline struct bch_read_bio *rbio_init(struct bio *bio,
-+ struct bch_io_opts opts)
-+{
-+ struct bch_read_bio *rbio = to_rbio(bio);
-+
-+ rbio->_state = 0;
-+ rbio->promote = NULL;
-+ rbio->opts = opts;
-+ return rbio;
-+}
-+
-+void bch2_fs_io_read_exit(struct bch_fs *);
-+int bch2_fs_io_read_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_IO_READ_H */
-diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
-new file mode 100644
-index 000000000000..f02b3f7d26a0
---- /dev/null
-+++ b/fs/bcachefs/io_write.c
-@@ -0,0 +1,1675 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_buf.h"
-+#include "bset.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "clock.h"
-+#include "compress.h"
-+#include "debug.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "extent_update.h"
-+#include "inode.h"
-+#include "io_write.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "nocow_locking.h"
-+#include "rebalance.h"
-+#include "subvolume.h"
-+#include "super.h"
-+#include "super-io.h"
-+#include "trace.h"
-+
-+#include <linux/blkdev.h>
-+#include <linux/prefetch.h>
-+#include <linux/random.h>
-+#include <linux/sched/mm.h>
-+
-+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-+
-+static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency,
-+ u64 now, int rw)
-+{
-+ u64 latency_capable =
-+ ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m;
-+ /* ideally we'd be taking into account the device's variance here: */
-+ u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3);
-+ s64 latency_over = io_latency - latency_threshold;
-+
-+ if (latency_threshold && latency_over > 0) {
-+ /*
-+ * bump up congested by approximately latency_over * 4 /
-+ * latency_threshold - we don't need much accuracy here so don't
-+ * bother with the divide:
-+ */
-+ if (atomic_read(&ca->congested) < CONGESTED_MAX)
-+ atomic_add(latency_over >>
-+ max_t(int, ilog2(latency_threshold) - 2, 0),
-+ &ca->congested);
-+
-+ ca->congested_last = now;
-+ } else if (atomic_read(&ca->congested) > 0) {
-+ atomic_dec(&ca->congested);
-+ }
-+}
-+
-+void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
-+{
-+ atomic64_t *latency = &ca->cur_latency[rw];
-+ u64 now = local_clock();
-+ u64 io_latency = time_after64(now, submit_time)
-+ ? now - submit_time
-+ : 0;
-+ u64 old, new, v = atomic64_read(latency);
-+
-+ do {
-+ old = v;
-+
-+ /*
-+ * If the io latency was reasonably close to the current
-+ * latency, skip doing the update and atomic operation - most of
-+ * the time:
-+ */
-+ if (abs((int) (old - io_latency)) < (old >> 1) &&
-+ now & ~(~0U << 5))
-+ break;
-+
-+ new = ewma_add(old, io_latency, 5);
-+ } while ((v = atomic64_cmpxchg(latency, old, new)) != old);
-+
-+ bch2_congested_acct(ca, io_latency, now, rw);
-+
-+ __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
-+}
-+
-+#endif
-+
-+/* Allocate, free from mempool: */
-+
-+void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
-+{
-+ struct bvec_iter_all iter;
-+ struct bio_vec *bv;
-+
-+ bio_for_each_segment_all(bv, bio, iter)
-+ if (bv->bv_page != ZERO_PAGE(0))
-+ mempool_free(bv->bv_page, &c->bio_bounce_pages);
-+ bio->bi_vcnt = 0;
-+}
-+
-+static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
-+{
-+ struct page *page;
-+
-+ if (likely(!*using_mempool)) {
-+ page = alloc_page(GFP_NOFS);
-+ if (unlikely(!page)) {
-+ mutex_lock(&c->bio_bounce_pages_lock);
-+ *using_mempool = true;
-+ goto pool_alloc;
-+
-+ }
-+ } else {
-+pool_alloc:
-+ page = mempool_alloc(&c->bio_bounce_pages, GFP_NOFS);
-+ }
-+
-+ return page;
-+}
-+
-+void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
-+ size_t size)
-+{
-+ bool using_mempool = false;
-+
-+ while (size) {
-+ struct page *page = __bio_alloc_page_pool(c, &using_mempool);
-+ unsigned len = min_t(size_t, PAGE_SIZE, size);
-+
-+ BUG_ON(!bio_add_page(bio, page, len, 0));
-+ size -= len;
-+ }
-+
-+ if (using_mempool)
-+ mutex_unlock(&c->bio_bounce_pages_lock);
-+}
-+
-+/* Extent update path: */
-+
-+int bch2_sum_sector_overwrites(struct btree_trans *trans,
-+ struct btree_iter *extent_iter,
-+ struct bkey_i *new,
-+ bool *usage_increasing,
-+ s64 *i_sectors_delta,
-+ s64 *disk_sectors_delta)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c old;
-+ unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new));
-+ bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new));
-+ int ret = 0;
-+
-+ *usage_increasing = false;
-+ *i_sectors_delta = 0;
-+ *disk_sectors_delta = 0;
-+
-+ bch2_trans_copy_iter(&iter, extent_iter);
-+
-+ for_each_btree_key_upto_continue_norestart(iter,
-+ new->k.p, BTREE_ITER_SLOTS, old, ret) {
-+ s64 sectors = min(new->k.p.offset, old.k->p.offset) -
-+ max(bkey_start_offset(&new->k),
-+ bkey_start_offset(old.k));
-+
-+ *i_sectors_delta += sectors *
-+ (bkey_extent_is_allocation(&new->k) -
-+ bkey_extent_is_allocation(old.k));
-+
-+ *disk_sectors_delta += sectors * bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new));
-+ *disk_sectors_delta -= new->k.p.snapshot == old.k->p.snapshot
-+ ? sectors * bch2_bkey_nr_ptrs_fully_allocated(old)
-+ : 0;
-+
-+ if (!*usage_increasing &&
-+ (new->k.p.snapshot != old.k->p.snapshot ||
-+ new_replicas > bch2_bkey_replicas(c, old) ||
-+ (!new_compressed && bch2_bkey_sectors_compressed(old))))
-+ *usage_increasing = true;
-+
-+ if (bkey_ge(old.k->p, new->k.p))
-+ break;
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
-+ struct btree_iter *extent_iter,
-+ u64 new_i_size,
-+ s64 i_sectors_delta)
-+{
-+ struct btree_iter iter;
-+ struct bkey_i *k;
-+ struct bkey_i_inode_v3 *inode;
-+ /*
-+ * Crazy performance optimization:
-+ * Every extent update needs to also update the inode: the inode trigger
-+ * will set bi->journal_seq to the journal sequence number of this
-+ * transaction - for fsync.
-+ *
-+ * But if that's the only reason we're updating the inode (we're not
-+ * updating bi_size or bi_sectors), then we don't need the inode update
-+ * to be journalled - if we crash, the bi_journal_seq update will be
-+ * lost, but that's fine.
-+ */
-+ unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
-+ int ret;
-+
-+ k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes,
-+ SPOS(0,
-+ extent_iter->pos.inode,
-+ extent_iter->snapshot),
-+ BTREE_ITER_CACHED);
-+ ret = PTR_ERR_OR_ZERO(k);
-+ if (unlikely(ret))
-+ return ret;
-+
-+ if (unlikely(k->k.type != KEY_TYPE_inode_v3)) {
-+ k = bch2_inode_to_v3(trans, k);
-+ ret = PTR_ERR_OR_ZERO(k);
-+ if (unlikely(ret))
-+ goto err;
-+ }
-+
-+ inode = bkey_i_to_inode_v3(k);
-+
-+ if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) &&
-+ new_i_size > le64_to_cpu(inode->v.bi_size)) {
-+ inode->v.bi_size = cpu_to_le64(new_i_size);
-+ inode_update_flags = 0;
-+ }
-+
-+ if (i_sectors_delta) {
-+ le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta);
-+ inode_update_flags = 0;
-+ }
-+
-+ if (inode->k.p.snapshot != iter.snapshot) {
-+ inode->k.p.snapshot = iter.snapshot;
-+ inode_update_flags = 0;
-+ }
-+
-+ ret = bch2_trans_update(trans, &iter, &inode->k_i,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
-+ inode_update_flags);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_extent_update(struct btree_trans *trans,
-+ subvol_inum inum,
-+ struct btree_iter *iter,
-+ struct bkey_i *k,
-+ struct disk_reservation *disk_res,
-+ u64 new_i_size,
-+ s64 *i_sectors_delta_total,
-+ bool check_enospc)
-+{
-+ struct bpos next_pos;
-+ bool usage_increasing;
-+ s64 i_sectors_delta = 0, disk_sectors_delta = 0;
-+ int ret;
-+
-+ /*
-+ * This traverses us the iterator without changing iter->path->pos to
-+ * search_key() (which is pos + 1 for extents): we want there to be a
-+ * path already traversed at iter->pos because
-+ * bch2_trans_extent_update() will use it to attempt extent merging
-+ */
-+ ret = __bch2_btree_iter_traverse(iter);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_extent_trim_atomic(trans, iter, k);
-+ if (ret)
-+ return ret;
-+
-+ next_pos = k->k.p;
-+
-+ ret = bch2_sum_sector_overwrites(trans, iter, k,
-+ &usage_increasing,
-+ &i_sectors_delta,
-+ &disk_sectors_delta);
-+ if (ret)
-+ return ret;
-+
-+ if (disk_res &&
-+ disk_sectors_delta > (s64) disk_res->sectors) {
-+ ret = bch2_disk_reservation_add(trans->c, disk_res,
-+ disk_sectors_delta - disk_res->sectors,
-+ !check_enospc || !usage_increasing
-+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ /*
-+ * Note:
-+ * We always have to do an inode update - even when i_size/i_sectors
-+ * aren't changing - for fsync to work properly; fsync relies on
-+ * inode->bi_journal_seq which is updated by the trigger code:
-+ */
-+ ret = bch2_extent_update_i_size_sectors(trans, iter,
-+ min(k->k.p.offset << 9, new_i_size),
-+ i_sectors_delta) ?:
-+ bch2_trans_update(trans, iter, k, 0) ?:
-+ bch2_trans_commit(trans, disk_res, NULL,
-+ BTREE_INSERT_NOCHECK_RW|
-+ BTREE_INSERT_NOFAIL);
-+ if (unlikely(ret))
-+ return ret;
-+
-+ if (i_sectors_delta_total)
-+ *i_sectors_delta_total += i_sectors_delta;
-+ bch2_btree_iter_set_pos(iter, next_pos);
-+ return 0;
-+}
-+
-+static int bch2_write_index_default(struct bch_write_op *op)
-+{
-+ struct bch_fs *c = op->c;
-+ struct bkey_buf sk;
-+ struct keylist *keys = &op->insert_keys;
-+ struct bkey_i *k = bch2_keylist_front(keys);
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ subvol_inum inum = {
-+ .subvol = op->subvol,
-+ .inum = k->k.p.inode,
-+ };
-+ int ret;
-+
-+ BUG_ON(!inum.subvol);
-+
-+ bch2_bkey_buf_init(&sk);
-+
-+ do {
-+ bch2_trans_begin(trans);
-+
-+ k = bch2_keylist_front(keys);
-+ bch2_bkey_buf_copy(&sk, c, k);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol,
-+ &sk.k->k.p.snapshot);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ break;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ bkey_start_pos(&sk.k->k),
-+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+
-+ ret = bch2_bkey_set_needs_rebalance(c, sk.k,
-+ op->opts.background_target,
-+ op->opts.background_compression) ?:
-+ bch2_extent_update(trans, inum, &iter, sk.k,
-+ &op->res,
-+ op->new_i_size, &op->i_sectors_delta,
-+ op->flags & BCH_WRITE_CHECK_ENOSPC);
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ break;
-+
-+ if (bkey_ge(iter.pos, k->k.p))
-+ bch2_keylist_pop_front(&op->insert_keys);
-+ else
-+ bch2_cut_front(iter.pos, k);
-+ } while (!bch2_keylist_empty(keys));
-+
-+ bch2_trans_put(trans);
-+ bch2_bkey_buf_exit(&sk, c);
-+
-+ return ret;
-+}
-+
-+/* Writes */
-+
-+void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
-+ enum bch_data_type type,
-+ const struct bkey_i *k,
-+ bool nocow)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
-+ const struct bch_extent_ptr *ptr;
-+ struct bch_write_bio *n;
-+ struct bch_dev *ca;
-+
-+ BUG_ON(c->opts.nochanges);
-+
-+ bkey_for_each_ptr(ptrs, ptr) {
-+ BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX ||
-+ !c->devs[ptr->dev]);
-+
-+ ca = bch_dev_bkey_exists(c, ptr->dev);
-+
-+ if (to_entry(ptr + 1) < ptrs.end) {
-+ n = to_wbio(bio_alloc_clone(NULL, &wbio->bio,
-+ GFP_NOFS, &ca->replica_set));
-+
-+ n->bio.bi_end_io = wbio->bio.bi_end_io;
-+ n->bio.bi_private = wbio->bio.bi_private;
-+ n->parent = wbio;
-+ n->split = true;
-+ n->bounce = false;
-+ n->put_bio = true;
-+ n->bio.bi_opf = wbio->bio.bi_opf;
-+ bio_inc_remaining(&wbio->bio);
-+ } else {
-+ n = wbio;
-+ n->split = false;
-+ }
-+
-+ n->c = c;
-+ n->dev = ptr->dev;
-+ n->have_ioref = nocow || bch2_dev_get_ioref(ca,
-+ type == BCH_DATA_btree ? READ : WRITE);
-+ n->nocow = nocow;
-+ n->submit_time = local_clock();
-+ n->inode_offset = bkey_start_offset(&k->k);
-+ n->bio.bi_iter.bi_sector = ptr->offset;
-+
-+ if (likely(n->have_ioref)) {
-+ this_cpu_add(ca->io_done->sectors[WRITE][type],
-+ bio_sectors(&n->bio));
-+
-+ bio_set_dev(&n->bio, ca->disk_sb.bdev);
-+
-+ if (type != BCH_DATA_btree && unlikely(c->opts.no_data_io)) {
-+ bio_endio(&n->bio);
-+ continue;
-+ }
-+
-+ submit_bio(&n->bio);
-+ } else {
-+ n->bio.bi_status = BLK_STS_REMOVED;
-+ bio_endio(&n->bio);
-+ }
-+ }
-+}
-+
-+static void __bch2_write(struct bch_write_op *);
-+
-+static void bch2_write_done(struct closure *cl)
-+{
-+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+ struct bch_fs *c = op->c;
-+
-+ EBUG_ON(op->open_buckets.nr);
-+
-+ bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
-+ bch2_disk_reservation_put(c, &op->res);
-+
-+ if (!(op->flags & BCH_WRITE_MOVE))
-+ bch2_write_ref_put(c, BCH_WRITE_REF_write);
-+ bch2_keylist_free(&op->insert_keys, op->inline_keys);
-+
-+ EBUG_ON(cl->parent);
-+ closure_debug_destroy(cl);
-+ if (op->end_io)
-+ op->end_io(op);
-+}
-+
-+static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
-+{
-+ struct keylist *keys = &op->insert_keys;
-+ struct bch_extent_ptr *ptr;
-+ struct bkey_i *src, *dst = keys->keys, *n;
-+
-+ for (src = keys->keys; src != keys->top; src = n) {
-+ n = bkey_next(src);
-+
-+ if (bkey_extent_is_direct_data(&src->k)) {
-+ bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr,
-+ test_bit(ptr->dev, op->failed.d));
-+
-+ if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src)))
-+ return -EIO;
-+ }
-+
-+ if (dst != src)
-+ memmove_u64s_down(dst, src, src->k.u64s);
-+ dst = bkey_next(dst);
-+ }
-+
-+ keys->top = dst;
-+ return 0;
-+}
-+
-+/**
-+ * __bch2_write_index - after a write, update index to point to new data
-+ * @op: bch_write_op to process
-+ */
-+static void __bch2_write_index(struct bch_write_op *op)
-+{
-+ struct bch_fs *c = op->c;
-+ struct keylist *keys = &op->insert_keys;
-+ unsigned dev;
-+ int ret = 0;
-+
-+ if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) {
-+ ret = bch2_write_drop_io_error_ptrs(op);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ if (!bch2_keylist_empty(keys)) {
-+ u64 sectors_start = keylist_sectors(keys);
-+
-+ ret = !(op->flags & BCH_WRITE_MOVE)
-+ ? bch2_write_index_default(op)
-+ : bch2_data_update_index_update(op);
-+
-+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
-+ BUG_ON(keylist_sectors(keys) && !ret);
-+
-+ op->written += sectors_start - keylist_sectors(keys);
-+
-+ if (ret && !bch2_err_matches(ret, EROFS)) {
-+ struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
-+
-+ bch_err_inum_offset_ratelimited(c,
-+ insert->k.p.inode, insert->k.p.offset << 9,
-+ "write error while doing btree update: %s",
-+ bch2_err_str(ret));
-+ }
-+
-+ if (ret)
-+ goto err;
-+ }
-+out:
-+ /* If some a bucket wasn't written, we can't erasure code it: */
-+ for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX)
-+ bch2_open_bucket_write_error(c, &op->open_buckets, dev);
-+
-+ bch2_open_buckets_put(c, &op->open_buckets);
-+ return;
-+err:
-+ keys->top = keys->keys;
-+ op->error = ret;
-+ op->flags |= BCH_WRITE_DONE;
-+ goto out;
-+}
-+
-+static inline void __wp_update_state(struct write_point *wp, enum write_point_state state)
-+{
-+ if (state != wp->state) {
-+ u64 now = ktime_get_ns();
-+
-+ if (wp->last_state_change &&
-+ time_after64(now, wp->last_state_change))
-+ wp->time[wp->state] += now - wp->last_state_change;
-+ wp->state = state;
-+ wp->last_state_change = now;
-+ }
-+}
-+
-+static inline void wp_update_state(struct write_point *wp, bool running)
-+{
-+ enum write_point_state state;
-+
-+ state = running ? WRITE_POINT_running :
-+ !list_empty(&wp->writes) ? WRITE_POINT_waiting_io
-+ : WRITE_POINT_stopped;
-+
-+ __wp_update_state(wp, state);
-+}
-+
-+static void bch2_write_index(struct closure *cl)
-+{
-+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+ struct write_point *wp = op->wp;
-+ struct workqueue_struct *wq = index_update_wq(op);
-+ unsigned long flags;
-+
-+ if ((op->flags & BCH_WRITE_DONE) &&
-+ (op->flags & BCH_WRITE_MOVE))
-+ bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
-+
-+ spin_lock_irqsave(&wp->writes_lock, flags);
-+ if (wp->state == WRITE_POINT_waiting_io)
-+ __wp_update_state(wp, WRITE_POINT_waiting_work);
-+ list_add_tail(&op->wp_list, &wp->writes);
-+ spin_unlock_irqrestore (&wp->writes_lock, flags);
-+
-+ queue_work(wq, &wp->index_update_work);
-+}
-+
-+static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp)
-+{
-+ op->wp = wp;
-+
-+ if (wp->state == WRITE_POINT_stopped) {
-+ spin_lock_irq(&wp->writes_lock);
-+ __wp_update_state(wp, WRITE_POINT_waiting_io);
-+ spin_unlock_irq(&wp->writes_lock);
-+ }
-+}
-+
-+void bch2_write_point_do_index_updates(struct work_struct *work)
-+{
-+ struct write_point *wp =
-+ container_of(work, struct write_point, index_update_work);
-+ struct bch_write_op *op;
-+
-+ while (1) {
-+ spin_lock_irq(&wp->writes_lock);
-+ op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list);
-+ if (op)
-+ list_del(&op->wp_list);
-+ wp_update_state(wp, op != NULL);
-+ spin_unlock_irq(&wp->writes_lock);
-+
-+ if (!op)
-+ break;
-+
-+ op->flags |= BCH_WRITE_IN_WORKER;
-+
-+ __bch2_write_index(op);
-+
-+ if (!(op->flags & BCH_WRITE_DONE))
-+ __bch2_write(op);
-+ else
-+ bch2_write_done(&op->cl);
-+ }
-+}
-+
-+static void bch2_write_endio(struct bio *bio)
-+{
-+ struct closure *cl = bio->bi_private;
-+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+ struct bch_write_bio *wbio = to_wbio(bio);
-+ struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
-+ struct bch_fs *c = wbio->c;
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
-+
-+ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
-+ op->pos.inode,
-+ wbio->inode_offset << 9,
-+ "data write error: %s",
-+ bch2_blk_status_to_str(bio->bi_status))) {
-+ set_bit(wbio->dev, op->failed.d);
-+ op->flags |= BCH_WRITE_IO_ERROR;
-+ }
-+
-+ if (wbio->nocow)
-+ set_bit(wbio->dev, op->devs_need_flush->d);
-+
-+ if (wbio->have_ioref) {
-+ bch2_latency_acct(ca, wbio->submit_time, WRITE);
-+ percpu_ref_put(&ca->io_ref);
-+ }
-+
-+ if (wbio->bounce)
-+ bch2_bio_free_pages_pool(c, bio);
-+
-+ if (wbio->put_bio)
-+ bio_put(bio);
-+
-+ if (parent)
-+ bio_endio(&parent->bio);
-+ else
-+ closure_put(cl);
-+}
-+
-+static void init_append_extent(struct bch_write_op *op,
-+ struct write_point *wp,
-+ struct bversion version,
-+ struct bch_extent_crc_unpacked crc)
-+{
-+ struct bkey_i_extent *e;
-+
-+ op->pos.offset += crc.uncompressed_size;
-+
-+ e = bkey_extent_init(op->insert_keys.top);
-+ e->k.p = op->pos;
-+ e->k.size = crc.uncompressed_size;
-+ e->k.version = version;
-+
-+ if (crc.csum_type ||
-+ crc.compression_type ||
-+ crc.nonce)
-+ bch2_extent_crc_append(&e->k_i, crc);
-+
-+ bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size,
-+ op->flags & BCH_WRITE_CACHED);
-+
-+ bch2_keylist_push(&op->insert_keys);
-+}
-+
-+static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
-+ struct write_point *wp,
-+ struct bio *src,
-+ bool *page_alloc_failed,
-+ void *buf)
-+{
-+ struct bch_write_bio *wbio;
-+ struct bio *bio;
-+ unsigned output_available =
-+ min(wp->sectors_free << 9, src->bi_iter.bi_size);
-+ unsigned pages = DIV_ROUND_UP(output_available +
-+ (buf
-+ ? ((unsigned long) buf & (PAGE_SIZE - 1))
-+ : 0), PAGE_SIZE);
-+
-+ pages = min(pages, BIO_MAX_VECS);
-+
-+ bio = bio_alloc_bioset(NULL, pages, 0,
-+ GFP_NOFS, &c->bio_write);
-+ wbio = wbio_init(bio);
-+ wbio->put_bio = true;
-+ /* copy WRITE_SYNC flag */
-+ wbio->bio.bi_opf = src->bi_opf;
-+
-+ if (buf) {
-+ bch2_bio_map(bio, buf, output_available);
-+ return bio;
-+ }
-+
-+ wbio->bounce = true;
-+
-+ /*
-+ * We can't use mempool for more than c->sb.encoded_extent_max
-+ * worth of pages, but we'd like to allocate more if we can:
-+ */
-+ bch2_bio_alloc_pages_pool(c, bio,
-+ min_t(unsigned, output_available,
-+ c->opts.encoded_extent_max));
-+
-+ if (bio->bi_iter.bi_size < output_available)
-+ *page_alloc_failed =
-+ bch2_bio_alloc_pages(bio,
-+ output_available -
-+ bio->bi_iter.bi_size,
-+ GFP_NOFS) != 0;
-+
-+ return bio;
-+}
-+
-+static int bch2_write_rechecksum(struct bch_fs *c,
-+ struct bch_write_op *op,
-+ unsigned new_csum_type)
-+{
-+ struct bio *bio = &op->wbio.bio;
-+ struct bch_extent_crc_unpacked new_crc;
-+ int ret;
-+
-+ /* bch2_rechecksum_bio() can't encrypt or decrypt data: */
-+
-+ if (bch2_csum_type_is_encryption(op->crc.csum_type) !=
-+ bch2_csum_type_is_encryption(new_csum_type))
-+ new_csum_type = op->crc.csum_type;
-+
-+ ret = bch2_rechecksum_bio(c, bio, op->version, op->crc,
-+ NULL, &new_crc,
-+ op->crc.offset, op->crc.live_size,
-+ new_csum_type);
-+ if (ret)
-+ return ret;
-+
-+ bio_advance(bio, op->crc.offset << 9);
-+ bio->bi_iter.bi_size = op->crc.live_size << 9;
-+ op->crc = new_crc;
-+ return 0;
-+}
-+
-+static int bch2_write_decrypt(struct bch_write_op *op)
-+{
-+ struct bch_fs *c = op->c;
-+ struct nonce nonce = extent_nonce(op->version, op->crc);
-+ struct bch_csum csum;
-+ int ret;
-+
-+ if (!bch2_csum_type_is_encryption(op->crc.csum_type))
-+ return 0;
-+
-+ /*
-+ * If we need to decrypt data in the write path, we'll no longer be able
-+ * to verify the existing checksum (poly1305 mac, in this case) after
-+ * it's decrypted - this is the last point we'll be able to reverify the
-+ * checksum:
-+ */
-+ csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-+ if (bch2_crc_cmp(op->crc.csum, csum))
-+ return -EIO;
-+
-+ ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-+ op->crc.csum_type = 0;
-+ op->crc.csum = (struct bch_csum) { 0, 0 };
-+ return ret;
-+}
-+
-+static enum prep_encoded_ret {
-+ PREP_ENCODED_OK,
-+ PREP_ENCODED_ERR,
-+ PREP_ENCODED_CHECKSUM_ERR,
-+ PREP_ENCODED_DO_WRITE,
-+} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp)
-+{
-+ struct bch_fs *c = op->c;
-+ struct bio *bio = &op->wbio.bio;
-+
-+ if (!(op->flags & BCH_WRITE_DATA_ENCODED))
-+ return PREP_ENCODED_OK;
-+
-+ BUG_ON(bio_sectors(bio) != op->crc.compressed_size);
-+
-+ /* Can we just write the entire extent as is? */
-+ if (op->crc.uncompressed_size == op->crc.live_size &&
-+ op->crc.uncompressed_size <= c->opts.encoded_extent_max >> 9 &&
-+ op->crc.compressed_size <= wp->sectors_free &&
-+ (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) ||
-+ op->incompressible)) {
-+ if (!crc_is_compressed(op->crc) &&
-+ op->csum_type != op->crc.csum_type &&
-+ bch2_write_rechecksum(c, op, op->csum_type) &&
-+ !c->opts.no_data_io)
-+ return PREP_ENCODED_CHECKSUM_ERR;
-+
-+ return PREP_ENCODED_DO_WRITE;
-+ }
-+
-+ /*
-+ * If the data is compressed and we couldn't write the entire extent as
-+ * is, we have to decompress it:
-+ */
-+ if (crc_is_compressed(op->crc)) {
-+ struct bch_csum csum;
-+
-+ if (bch2_write_decrypt(op))
-+ return PREP_ENCODED_CHECKSUM_ERR;
-+
-+ /* Last point we can still verify checksum: */
-+ csum = bch2_checksum_bio(c, op->crc.csum_type,
-+ extent_nonce(op->version, op->crc),
-+ bio);
-+ if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
-+ return PREP_ENCODED_CHECKSUM_ERR;
-+
-+ if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
-+ return PREP_ENCODED_ERR;
-+ }
-+
-+ /*
-+ * No longer have compressed data after this point - data might be
-+ * encrypted:
-+ */
-+
-+ /*
-+ * If the data is checksummed and we're only writing a subset,
-+ * rechecksum and adjust bio to point to currently live data:
-+ */
-+ if ((op->crc.live_size != op->crc.uncompressed_size ||
-+ op->crc.csum_type != op->csum_type) &&
-+ bch2_write_rechecksum(c, op, op->csum_type) &&
-+ !c->opts.no_data_io)
-+ return PREP_ENCODED_CHECKSUM_ERR;
-+
-+ /*
-+ * If we want to compress the data, it has to be decrypted:
-+ */
-+ if ((op->compression_opt ||
-+ bch2_csum_type_is_encryption(op->crc.csum_type) !=
-+ bch2_csum_type_is_encryption(op->csum_type)) &&
-+ bch2_write_decrypt(op))
-+ return PREP_ENCODED_CHECKSUM_ERR;
-+
-+ return PREP_ENCODED_OK;
-+}
-+
-+static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
-+ struct bio **_dst)
-+{
-+ struct bch_fs *c = op->c;
-+ struct bio *src = &op->wbio.bio, *dst = src;
-+ struct bvec_iter saved_iter;
-+ void *ec_buf;
-+ unsigned total_output = 0, total_input = 0;
-+ bool bounce = false;
-+ bool page_alloc_failed = false;
-+ int ret, more = 0;
-+
-+ BUG_ON(!bio_sectors(src));
-+
-+ ec_buf = bch2_writepoint_ec_buf(c, wp);
-+
-+ switch (bch2_write_prep_encoded_data(op, wp)) {
-+ case PREP_ENCODED_OK:
-+ break;
-+ case PREP_ENCODED_ERR:
-+ ret = -EIO;
-+ goto err;
-+ case PREP_ENCODED_CHECKSUM_ERR:
-+ goto csum_err;
-+ case PREP_ENCODED_DO_WRITE:
-+ /* XXX look for bug here */
-+ if (ec_buf) {
-+ dst = bch2_write_bio_alloc(c, wp, src,
-+ &page_alloc_failed,
-+ ec_buf);
-+ bio_copy_data(dst, src);
-+ bounce = true;
-+ }
-+ init_append_extent(op, wp, op->version, op->crc);
-+ goto do_write;
-+ }
-+
-+ if (ec_buf ||
-+ op->compression_opt ||
-+ (op->csum_type &&
-+ !(op->flags & BCH_WRITE_PAGES_STABLE)) ||
-+ (bch2_csum_type_is_encryption(op->csum_type) &&
-+ !(op->flags & BCH_WRITE_PAGES_OWNED))) {
-+ dst = bch2_write_bio_alloc(c, wp, src,
-+ &page_alloc_failed,
-+ ec_buf);
-+ bounce = true;
-+ }
-+
-+ saved_iter = dst->bi_iter;
-+
-+ do {
-+ struct bch_extent_crc_unpacked crc = { 0 };
-+ struct bversion version = op->version;
-+ size_t dst_len = 0, src_len = 0;
-+
-+ if (page_alloc_failed &&
-+ dst->bi_iter.bi_size < (wp->sectors_free << 9) &&
-+ dst->bi_iter.bi_size < c->opts.encoded_extent_max)
-+ break;
-+
-+ BUG_ON(op->compression_opt &&
-+ (op->flags & BCH_WRITE_DATA_ENCODED) &&
-+ bch2_csum_type_is_encryption(op->crc.csum_type));
-+ BUG_ON(op->compression_opt && !bounce);
-+
-+ crc.compression_type = op->incompressible
-+ ? BCH_COMPRESSION_TYPE_incompressible
-+ : op->compression_opt
-+ ? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
-+ op->compression_opt)
-+ : 0;
-+ if (!crc_is_compressed(crc)) {
-+ dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
-+ dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9);
-+
-+ if (op->csum_type)
-+ dst_len = min_t(unsigned, dst_len,
-+ c->opts.encoded_extent_max);
-+
-+ if (bounce) {
-+ swap(dst->bi_iter.bi_size, dst_len);
-+ bio_copy_data(dst, src);
-+ swap(dst->bi_iter.bi_size, dst_len);
-+ }
-+
-+ src_len = dst_len;
-+ }
-+
-+ BUG_ON(!src_len || !dst_len);
-+
-+ if (bch2_csum_type_is_encryption(op->csum_type)) {
-+ if (bversion_zero(version)) {
-+ version.lo = atomic64_inc_return(&c->key_version);
-+ } else {
-+ crc.nonce = op->nonce;
-+ op->nonce += src_len >> 9;
-+ }
-+ }
-+
-+ if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
-+ !crc_is_compressed(crc) &&
-+ bch2_csum_type_is_encryption(op->crc.csum_type) ==
-+ bch2_csum_type_is_encryption(op->csum_type)) {
-+ u8 compression_type = crc.compression_type;
-+ u16 nonce = crc.nonce;
-+ /*
-+ * Note: when we're using rechecksum(), we need to be
-+ * checksumming @src because it has all the data our
-+ * existing checksum covers - if we bounced (because we
-+ * were trying to compress), @dst will only have the
-+ * part of the data the new checksum will cover.
-+ *
-+ * But normally we want to be checksumming post bounce,
-+ * because part of the reason for bouncing is so the
-+ * data can't be modified (by userspace) while it's in
-+ * flight.
-+ */
-+ if (bch2_rechecksum_bio(c, src, version, op->crc,
-+ &crc, &op->crc,
-+ src_len >> 9,
-+ bio_sectors(src) - (src_len >> 9),
-+ op->csum_type))
-+ goto csum_err;
-+ /*
-+ * rchecksum_bio sets compression_type on crc from op->crc,
-+ * this isn't always correct as sometimes we're changing
-+ * an extent from uncompressed to incompressible.
-+ */
-+ crc.compression_type = compression_type;
-+ crc.nonce = nonce;
-+ } else {
-+ if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
-+ bch2_rechecksum_bio(c, src, version, op->crc,
-+ NULL, &op->crc,
-+ src_len >> 9,
-+ bio_sectors(src) - (src_len >> 9),
-+ op->crc.csum_type))
-+ goto csum_err;
-+
-+ crc.compressed_size = dst_len >> 9;
-+ crc.uncompressed_size = src_len >> 9;
-+ crc.live_size = src_len >> 9;
-+
-+ swap(dst->bi_iter.bi_size, dst_len);
-+ ret = bch2_encrypt_bio(c, op->csum_type,
-+ extent_nonce(version, crc), dst);
-+ if (ret)
-+ goto err;
-+
-+ crc.csum = bch2_checksum_bio(c, op->csum_type,
-+ extent_nonce(version, crc), dst);
-+ crc.csum_type = op->csum_type;
-+ swap(dst->bi_iter.bi_size, dst_len);
-+ }
-+
-+ init_append_extent(op, wp, version, crc);
-+
-+ if (dst != src)
-+ bio_advance(dst, dst_len);
-+ bio_advance(src, src_len);
-+ total_output += dst_len;
-+ total_input += src_len;
-+ } while (dst->bi_iter.bi_size &&
-+ src->bi_iter.bi_size &&
-+ wp->sectors_free &&
-+ !bch2_keylist_realloc(&op->insert_keys,
-+ op->inline_keys,
-+ ARRAY_SIZE(op->inline_keys),
-+ BKEY_EXTENT_U64s_MAX));
-+
-+ more = src->bi_iter.bi_size != 0;
-+
-+ dst->bi_iter = saved_iter;
-+
-+ if (dst == src && more) {
-+ BUG_ON(total_output != total_input);
-+
-+ dst = bio_split(src, total_input >> 9,
-+ GFP_NOFS, &c->bio_write);
-+ wbio_init(dst)->put_bio = true;
-+ /* copy WRITE_SYNC flag */
-+ dst->bi_opf = src->bi_opf;
-+ }
-+
-+ dst->bi_iter.bi_size = total_output;
-+do_write:
-+ *_dst = dst;
-+ return more;
-+csum_err:
-+ bch_err(c, "error verifying existing checksum while rewriting existing data (memory corruption?)");
-+ ret = -EIO;
-+err:
-+ if (to_wbio(dst)->bounce)
-+ bch2_bio_free_pages_pool(c, dst);
-+ if (to_wbio(dst)->put_bio)
-+ bio_put(dst);
-+
-+ return ret;
-+}
-+
-+static bool bch2_extent_is_writeable(struct bch_write_op *op,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = op->c;
-+ struct bkey_s_c_extent e;
-+ struct extent_ptr_decoded p;
-+ const union bch_extent_entry *entry;
-+ unsigned replicas = 0;
-+
-+ if (k.k->type != KEY_TYPE_extent)
-+ return false;
-+
-+ e = bkey_s_c_to_extent(k);
-+ extent_for_each_ptr_decode(e, p, entry) {
-+ if (crc_is_encoded(p.crc) || p.has_ec)
-+ return false;
-+
-+ replicas += bch2_extent_ptr_durability(c, &p);
-+ }
-+
-+ return replicas >= op->opts.data_replicas;
-+}
-+
-+static inline void bch2_nocow_write_unlock(struct bch_write_op *op)
-+{
-+ struct bch_fs *c = op->c;
-+ const struct bch_extent_ptr *ptr;
-+ struct bkey_i *k;
-+
-+ for_each_keylist_key(&op->insert_keys, k) {
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
-+
-+ bkey_for_each_ptr(ptrs, ptr)
-+ bch2_bucket_nocow_unlock(&c->nocow_locks,
-+ PTR_BUCKET_POS(c, ptr),
-+ BUCKET_NOCOW_LOCK_UPDATE);
-+ }
-+}
-+
-+static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_i *orig,
-+ struct bkey_s_c k,
-+ u64 new_i_size)
-+{
-+ struct bkey_i *new;
-+ struct bkey_ptrs ptrs;
-+ struct bch_extent_ptr *ptr;
-+ int ret;
-+
-+ if (!bch2_extents_match(bkey_i_to_s_c(orig), k)) {
-+ /* trace this */
-+ return 0;
-+ }
-+
-+ new = bch2_bkey_make_mut_noupdate(trans, k);
-+ ret = PTR_ERR_OR_ZERO(new);
-+ if (ret)
-+ return ret;
-+
-+ bch2_cut_front(bkey_start_pos(&orig->k), new);
-+ bch2_cut_back(orig->k.p, new);
-+
-+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
-+ bkey_for_each_ptr(ptrs, ptr)
-+ ptr->unwritten = 0;
-+
-+ /*
-+ * Note that we're not calling bch2_subvol_get_snapshot() in this path -
-+ * that was done when we kicked off the write, and here it's important
-+ * that we update the extent that we wrote to - even if a snapshot has
-+ * since been created. The write is still outstanding, so we're ok
-+ * w.r.t. snapshot atomicity:
-+ */
-+ return bch2_extent_update_i_size_sectors(trans, iter,
-+ min(new->k.p.offset << 9, new_i_size), 0) ?:
-+ bch2_trans_update(trans, iter, new,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+}
-+
-+static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
-+{
-+ struct bch_fs *c = op->c;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_i *orig;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ for_each_keylist_key(&op->insert_keys, orig) {
-+ ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents,
-+ bkey_start_pos(&orig->k), orig->k.p,
-+ BTREE_ITER_INTENT, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL, ({
-+ bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size);
-+ }));
-+
-+ if (ret && !bch2_err_matches(ret, EROFS)) {
-+ struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
-+
-+ bch_err_inum_offset_ratelimited(c,
-+ insert->k.p.inode, insert->k.p.offset << 9,
-+ "write error while doing btree update: %s",
-+ bch2_err_str(ret));
-+ }
-+
-+ if (ret) {
-+ op->error = ret;
-+ break;
-+ }
-+ }
-+
-+ bch2_trans_put(trans);
-+}
-+
-+static void __bch2_nocow_write_done(struct bch_write_op *op)
-+{
-+ bch2_nocow_write_unlock(op);
-+
-+ if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) {
-+ op->error = -EIO;
-+ } else if (unlikely(op->flags & BCH_WRITE_CONVERT_UNWRITTEN))
-+ bch2_nocow_write_convert_unwritten(op);
-+}
-+
-+static void bch2_nocow_write_done(struct closure *cl)
-+{
-+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+
-+ __bch2_nocow_write_done(op);
-+ bch2_write_done(cl);
-+}
-+
-+static void bch2_nocow_write(struct bch_write_op *op)
-+{
-+ struct bch_fs *c = op->c;
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_ptrs_c ptrs;
-+ const struct bch_extent_ptr *ptr;
-+ struct {
-+ struct bpos b;
-+ unsigned gen;
-+ struct nocow_lock_bucket *l;
-+ } buckets[BCH_REPLICAS_MAX];
-+ unsigned nr_buckets = 0;
-+ u32 snapshot;
-+ int ret, i;
-+
-+ if (op->flags & BCH_WRITE_MOVE)
-+ return;
-+
-+ trans = bch2_trans_get(c);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, op->subvol, &snapshot);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ SPOS(op->pos.inode, op->pos.offset, snapshot),
-+ BTREE_ITER_SLOTS);
-+ while (1) {
-+ struct bio *bio = &op->wbio.bio;
-+
-+ nr_buckets = 0;
-+
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ break;
-+
-+ /* fall back to normal cow write path? */
-+ if (unlikely(k.k->p.snapshot != snapshot ||
-+ !bch2_extent_is_writeable(op, k)))
-+ break;
-+
-+ if (bch2_keylist_realloc(&op->insert_keys,
-+ op->inline_keys,
-+ ARRAY_SIZE(op->inline_keys),
-+ k.k->u64s))
-+ break;
-+
-+ /* Get iorefs before dropping btree locks: */
-+ ptrs = bch2_bkey_ptrs_c(k);
-+ bkey_for_each_ptr(ptrs, ptr) {
-+ buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
-+ buckets[nr_buckets].gen = ptr->gen;
-+ buckets[nr_buckets].l =
-+ bucket_nocow_lock(&c->nocow_locks,
-+ bucket_to_u64(buckets[nr_buckets].b));
-+
-+ prefetch(buckets[nr_buckets].l);
-+
-+ if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
-+ goto err_get_ioref;
-+
-+ nr_buckets++;
-+
-+ if (ptr->unwritten)
-+ op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
-+ }
-+
-+ /* Unlock before taking nocow locks, doing IO: */
-+ bkey_reassemble(op->insert_keys.top, k);
-+ bch2_trans_unlock(trans);
-+
-+ bch2_cut_front(op->pos, op->insert_keys.top);
-+ if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
-+ bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);
-+
-+ for (i = 0; i < nr_buckets; i++) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
-+ struct nocow_lock_bucket *l = buckets[i].l;
-+ bool stale;
-+
-+ __bch2_bucket_nocow_lock(&c->nocow_locks, l,
-+ bucket_to_u64(buckets[i].b),
-+ BUCKET_NOCOW_LOCK_UPDATE);
-+
-+ rcu_read_lock();
-+ stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
-+ rcu_read_unlock();
-+
-+ if (unlikely(stale))
-+ goto err_bucket_stale;
-+ }
-+
-+ bio = &op->wbio.bio;
-+ if (k.k->p.offset < op->pos.offset + bio_sectors(bio)) {
-+ bio = bio_split(bio, k.k->p.offset - op->pos.offset,
-+ GFP_KERNEL, &c->bio_write);
-+ wbio_init(bio)->put_bio = true;
-+ bio->bi_opf = op->wbio.bio.bi_opf;
-+ } else {
-+ op->flags |= BCH_WRITE_DONE;
-+ }
-+
-+ op->pos.offset += bio_sectors(bio);
-+ op->written += bio_sectors(bio);
-+
-+ bio->bi_end_io = bch2_write_endio;
-+ bio->bi_private = &op->cl;
-+ bio->bi_opf |= REQ_OP_WRITE;
-+ closure_get(&op->cl);
-+ bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
-+ op->insert_keys.top, true);
-+
-+ bch2_keylist_push(&op->insert_keys);
-+ if (op->flags & BCH_WRITE_DONE)
-+ break;
-+ bch2_btree_iter_advance(&iter);
-+ }
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ if (ret) {
-+ bch_err_inum_offset_ratelimited(c,
-+ op->pos.inode,
-+ op->pos.offset << 9,
-+ "%s: btree lookup error %s",
-+ __func__, bch2_err_str(ret));
-+ op->error = ret;
-+ op->flags |= BCH_WRITE_DONE;
-+ }
-+
-+ bch2_trans_put(trans);
-+
-+ /* fallback to cow write path? */
-+ if (!(op->flags & BCH_WRITE_DONE)) {
-+ closure_sync(&op->cl);
-+ __bch2_nocow_write_done(op);
-+ op->insert_keys.top = op->insert_keys.keys;
-+ } else if (op->flags & BCH_WRITE_SYNC) {
-+ closure_sync(&op->cl);
-+ bch2_nocow_write_done(&op->cl);
-+ } else {
-+ /*
-+ * XXX
-+ * needs to run out of process context because ei_quota_lock is
-+ * a mutex
-+ */
-+ continue_at(&op->cl, bch2_nocow_write_done, index_update_wq(op));
-+ }
-+ return;
-+err_get_ioref:
-+ for (i = 0; i < nr_buckets; i++)
-+ percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
-+
-+ /* Fall back to COW path: */
-+ goto out;
-+err_bucket_stale:
-+ while (i >= 0) {
-+ bch2_bucket_nocow_unlock(&c->nocow_locks,
-+ buckets[i].b,
-+ BUCKET_NOCOW_LOCK_UPDATE);
-+ --i;
-+ }
-+ for (i = 0; i < nr_buckets; i++)
-+ percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
-+
-+ /* We can retry this: */
-+ ret = -BCH_ERR_transaction_restart;
-+ goto out;
-+}
-+
-+static void __bch2_write(struct bch_write_op *op)
-+{
-+ struct bch_fs *c = op->c;
-+ struct write_point *wp = NULL;
-+ struct bio *bio = NULL;
-+ unsigned nofs_flags;
-+ int ret;
-+
-+ nofs_flags = memalloc_nofs_save();
-+
-+ if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) {
-+ bch2_nocow_write(op);
-+ if (op->flags & BCH_WRITE_DONE)
-+ goto out_nofs_restore;
-+ }
-+again:
-+ memset(&op->failed, 0, sizeof(op->failed));
-+
-+ do {
-+ struct bkey_i *key_to_write;
-+ unsigned key_to_write_offset = op->insert_keys.top_p -
-+ op->insert_keys.keys_p;
-+
-+ /* +1 for possible cache device: */
-+ if (op->open_buckets.nr + op->nr_replicas + 1 >
-+ ARRAY_SIZE(op->open_buckets.v))
-+ break;
-+
-+ if (bch2_keylist_realloc(&op->insert_keys,
-+ op->inline_keys,
-+ ARRAY_SIZE(op->inline_keys),
-+ BKEY_EXTENT_U64s_MAX))
-+ break;
-+
-+ /*
-+ * The copygc thread is now global, which means it's no longer
-+ * freeing up space on specific disks, which means that
-+ * allocations for specific disks may hang arbitrarily long:
-+ */
-+ ret = bch2_trans_do(c, NULL, NULL, 0,
-+ bch2_alloc_sectors_start_trans(trans,
-+ op->target,
-+ op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED),
-+ op->write_point,
-+ &op->devs_have,
-+ op->nr_replicas,
-+ op->nr_replicas_required,
-+ op->watermark,
-+ op->flags,
-+ (op->flags & (BCH_WRITE_ALLOC_NOWAIT|
-+ BCH_WRITE_ONLY_SPECIFIED_DEVS))
-+ ? NULL : &op->cl, &wp));
-+ if (unlikely(ret)) {
-+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
-+ break;
-+
-+ goto err;
-+ }
-+
-+ EBUG_ON(!wp);
-+
-+ bch2_open_bucket_get(c, wp, &op->open_buckets);
-+ ret = bch2_write_extent(op, wp, &bio);
-+
-+ bch2_alloc_sectors_done_inlined(c, wp);
-+err:
-+ if (ret <= 0) {
-+ op->flags |= BCH_WRITE_DONE;
-+
-+ if (ret < 0) {
-+ op->error = ret;
-+ break;
-+ }
-+ }
-+
-+ bio->bi_end_io = bch2_write_endio;
-+ bio->bi_private = &op->cl;
-+ bio->bi_opf |= REQ_OP_WRITE;
-+
-+ closure_get(bio->bi_private);
-+
-+ key_to_write = (void *) (op->insert_keys.keys_p +
-+ key_to_write_offset);
-+
-+ bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
-+ key_to_write, false);
-+ } while (ret);
-+
-+ /*
-+ * Sync or no?
-+ *
-+ * If we're running asynchronously, wne may still want to block
-+ * synchronously here if we weren't able to submit all of the IO at
-+ * once, as that signals backpressure to the caller.
-+ */
-+ if ((op->flags & BCH_WRITE_SYNC) ||
-+ (!(op->flags & BCH_WRITE_DONE) &&
-+ !(op->flags & BCH_WRITE_IN_WORKER))) {
-+ closure_sync(&op->cl);
-+ __bch2_write_index(op);
-+
-+ if (!(op->flags & BCH_WRITE_DONE))
-+ goto again;
-+ bch2_write_done(&op->cl);
-+ } else {
-+ bch2_write_queue(op, wp);
-+ continue_at(&op->cl, bch2_write_index, NULL);
-+ }
-+out_nofs_restore:
-+ memalloc_nofs_restore(nofs_flags);
-+}
-+
-+static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
-+{
-+ struct bio *bio = &op->wbio.bio;
-+ struct bvec_iter iter;
-+ struct bkey_i_inline_data *id;
-+ unsigned sectors;
-+ int ret;
-+
-+ op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
-+ op->flags |= BCH_WRITE_DONE;
-+
-+ bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
-+
-+ ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
-+ ARRAY_SIZE(op->inline_keys),
-+ BKEY_U64s + DIV_ROUND_UP(data_len, 8));
-+ if (ret) {
-+ op->error = ret;
-+ goto err;
-+ }
-+
-+ sectors = bio_sectors(bio);
-+ op->pos.offset += sectors;
-+
-+ id = bkey_inline_data_init(op->insert_keys.top);
-+ id->k.p = op->pos;
-+ id->k.version = op->version;
-+ id->k.size = sectors;
-+
-+ iter = bio->bi_iter;
-+ iter.bi_size = data_len;
-+ memcpy_from_bio(id->v.data, bio, iter);
-+
-+ while (data_len & 7)
-+ id->v.data[data_len++] = '\0';
-+ set_bkey_val_bytes(&id->k, data_len);
-+ bch2_keylist_push(&op->insert_keys);
-+
-+ __bch2_write_index(op);
-+err:
-+ bch2_write_done(&op->cl);
-+}
-+
-+/**
-+ * bch2_write() - handle a write to a cache device or flash only volume
-+ * @cl: &bch_write_op->cl
-+ *
-+ * This is the starting point for any data to end up in a cache device; it could
-+ * be from a normal write, or a writeback write, or a write to a flash only
-+ * volume - it's also used by the moving garbage collector to compact data in
-+ * mostly empty buckets.
-+ *
-+ * It first writes the data to the cache, creating a list of keys to be inserted
-+ * (if the data won't fit in a single open bucket, there will be multiple keys);
-+ * after the data is written it calls bch_journal, and after the keys have been
-+ * added to the next journal write they're inserted into the btree.
-+ *
-+ * If op->discard is true, instead of inserting the data it invalidates the
-+ * region of the cache represented by op->bio and op->inode.
-+ */
-+void bch2_write(struct closure *cl)
-+{
-+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-+ struct bio *bio = &op->wbio.bio;
-+ struct bch_fs *c = op->c;
-+ unsigned data_len;
-+
-+ EBUG_ON(op->cl.parent);
-+ BUG_ON(!op->nr_replicas);
-+ BUG_ON(!op->write_point.v);
-+ BUG_ON(bkey_eq(op->pos, POS_MAX));
-+
-+ op->start_time = local_clock();
-+ bch2_keylist_init(&op->insert_keys, op->inline_keys);
-+ wbio_init(bio)->put_bio = false;
-+
-+ if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) {
-+ bch_err_inum_offset_ratelimited(c,
-+ op->pos.inode,
-+ op->pos.offset << 9,
-+ "misaligned write");
-+ op->error = -EIO;
-+ goto err;
-+ }
-+
-+ if (c->opts.nochanges) {
-+ op->error = -BCH_ERR_erofs_no_writes;
-+ goto err;
-+ }
-+
-+ if (!(op->flags & BCH_WRITE_MOVE) &&
-+ !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
-+ op->error = -BCH_ERR_erofs_no_writes;
-+ goto err;
-+ }
-+
-+ this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio));
-+ bch2_increment_clock(c, bio_sectors(bio), WRITE);
-+
-+ data_len = min_t(u64, bio->bi_iter.bi_size,
-+ op->new_i_size - (op->pos.offset << 9));
-+
-+ if (c->opts.inline_data &&
-+ data_len <= min(block_bytes(c) / 2, 1024U)) {
-+ bch2_write_data_inline(op, data_len);
-+ return;
-+ }
-+
-+ __bch2_write(op);
-+ return;
-+err:
-+ bch2_disk_reservation_put(c, &op->res);
-+
-+ closure_debug_destroy(&op->cl);
-+ if (op->end_io)
-+ op->end_io(op);
-+}
-+
-+static const char * const bch2_write_flags[] = {
-+#define x(f) #f,
-+ BCH_WRITE_FLAGS()
-+#undef x
-+ NULL
-+};
-+
-+void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
-+{
-+ prt_str(out, "pos: ");
-+ bch2_bpos_to_text(out, op->pos);
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+
-+ prt_str(out, "started: ");
-+ bch2_pr_time_units(out, local_clock() - op->start_time);
-+ prt_newline(out);
-+
-+ prt_str(out, "flags: ");
-+ prt_bitflags(out, bch2_write_flags, op->flags);
-+ prt_newline(out);
-+
-+ prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl));
-+ prt_newline(out);
-+
-+ printbuf_indent_sub(out, 2);
-+}
-+
-+void bch2_fs_io_write_exit(struct bch_fs *c)
-+{
-+ mempool_exit(&c->bio_bounce_pages);
-+ bioset_exit(&c->bio_write);
-+}
-+
-+int bch2_fs_io_write_init(struct bch_fs *c)
-+{
-+ if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
-+ BIOSET_NEED_BVECS))
-+ return -BCH_ERR_ENOMEM_bio_write_init;
-+
-+ if (mempool_init_page_pool(&c->bio_bounce_pages,
-+ max_t(unsigned,
-+ c->opts.btree_node_size,
-+ c->opts.encoded_extent_max) /
-+ PAGE_SIZE, 0))
-+ return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
-+
-+ return 0;
-+}
-diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h
-new file mode 100644
-index 000000000000..9323167229ee
---- /dev/null
-+++ b/fs/bcachefs/io_write.h
-@@ -0,0 +1,110 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_WRITE_H
-+#define _BCACHEFS_IO_WRITE_H
-+
-+#include "checksum.h"
-+#include "io_write_types.h"
-+
-+#define to_wbio(_bio) \
-+ container_of((_bio), struct bch_write_bio, bio)
-+
-+void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
-+void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
-+
-+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-+void bch2_latency_acct(struct bch_dev *, u64, int);
-+#else
-+static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
-+#endif
-+
-+void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
-+ enum bch_data_type, const struct bkey_i *, bool);
-+
-+#define BCH_WRITE_FLAGS() \
-+ x(ALLOC_NOWAIT) \
-+ x(CACHED) \
-+ x(DATA_ENCODED) \
-+ x(PAGES_STABLE) \
-+ x(PAGES_OWNED) \
-+ x(ONLY_SPECIFIED_DEVS) \
-+ x(WROTE_DATA_INLINE) \
-+ x(FROM_INTERNAL) \
-+ x(CHECK_ENOSPC) \
-+ x(SYNC) \
-+ x(MOVE) \
-+ x(IN_WORKER) \
-+ x(DONE) \
-+ x(IO_ERROR) \
-+ x(CONVERT_UNWRITTEN)
-+
-+enum __bch_write_flags {
-+#define x(f) __BCH_WRITE_##f,
-+ BCH_WRITE_FLAGS()
-+#undef x
-+};
-+
-+enum bch_write_flags {
-+#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
-+ BCH_WRITE_FLAGS()
-+#undef x
-+};
-+
-+static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
-+{
-+ return op->watermark == BCH_WATERMARK_copygc
-+ ? op->c->copygc_wq
-+ : op->c->btree_update_wq;
-+}
-+
-+int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
-+ struct bkey_i *, bool *, s64 *, s64 *);
-+int bch2_extent_update(struct btree_trans *, subvol_inum,
-+ struct btree_iter *, struct bkey_i *,
-+ struct disk_reservation *, u64, s64 *, bool);
-+
-+static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
-+ struct bch_io_opts opts)
-+{
-+ op->c = c;
-+ op->end_io = NULL;
-+ op->flags = 0;
-+ op->written = 0;
-+ op->error = 0;
-+ op->csum_type = bch2_data_checksum_type(c, opts);
-+ op->compression_opt = opts.compression;
-+ op->nr_replicas = 0;
-+ op->nr_replicas_required = c->opts.data_replicas_required;
-+ op->watermark = BCH_WATERMARK_normal;
-+ op->incompressible = 0;
-+ op->open_buckets.nr = 0;
-+ op->devs_have.nr = 0;
-+ op->target = 0;
-+ op->opts = opts;
-+ op->subvol = 0;
-+ op->pos = POS_MAX;
-+ op->version = ZERO_VERSION;
-+ op->write_point = (struct write_point_specifier) { 0 };
-+ op->res = (struct disk_reservation) { 0 };
-+ op->new_i_size = U64_MAX;
-+ op->i_sectors_delta = 0;
-+ op->devs_need_flush = NULL;
-+}
-+
-+void bch2_write(struct closure *);
-+
-+void bch2_write_point_do_index_updates(struct work_struct *);
-+
-+static inline struct bch_write_bio *wbio_init(struct bio *bio)
-+{
-+ struct bch_write_bio *wbio = to_wbio(bio);
-+
-+ memset(&wbio->wbio, 0, sizeof(wbio->wbio));
-+ return wbio;
-+}
-+
-+void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *);
-+
-+void bch2_fs_io_write_exit(struct bch_fs *);
-+int bch2_fs_io_write_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_IO_WRITE_H */
-diff --git a/fs/bcachefs/io_write_types.h b/fs/bcachefs/io_write_types.h
-new file mode 100644
-index 000000000000..c7f97c2c4805
---- /dev/null
-+++ b/fs/bcachefs/io_write_types.h
-@@ -0,0 +1,96 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_IO_WRITE_TYPES_H
-+#define _BCACHEFS_IO_WRITE_TYPES_H
-+
-+#include "alloc_types.h"
-+#include "btree_types.h"
-+#include "buckets_types.h"
-+#include "extents_types.h"
-+#include "keylist_types.h"
-+#include "opts.h"
-+#include "super_types.h"
-+
-+#include <linux/llist.h>
-+#include <linux/workqueue.h>
-+
-+struct bch_write_bio {
-+ struct_group(wbio,
-+ struct bch_fs *c;
-+ struct bch_write_bio *parent;
-+
-+ u64 submit_time;
-+ u64 inode_offset;
-+
-+ struct bch_devs_list failed;
-+ u8 dev;
-+
-+ unsigned split:1,
-+ bounce:1,
-+ put_bio:1,
-+ have_ioref:1,
-+ nocow:1,
-+ used_mempool:1,
-+ first_btree_write:1;
-+ );
-+
-+ struct bio bio;
-+};
-+
-+struct bch_write_op {
-+ struct closure cl;
-+ struct bch_fs *c;
-+ void (*end_io)(struct bch_write_op *);
-+ u64 start_time;
-+
-+ unsigned written; /* sectors */
-+ u16 flags;
-+ s16 error; /* dio write path expects it to hold -ERESTARTSYS... */
-+
-+ unsigned compression_opt:8;
-+ unsigned csum_type:4;
-+ unsigned nr_replicas:4;
-+ unsigned nr_replicas_required:4;
-+ unsigned watermark:3;
-+ unsigned incompressible:1;
-+ unsigned stripe_waited:1;
-+
-+ struct bch_devs_list devs_have;
-+ u16 target;
-+ u16 nonce;
-+ struct bch_io_opts opts;
-+
-+ u32 subvol;
-+ struct bpos pos;
-+ struct bversion version;
-+
-+ /* For BCH_WRITE_DATA_ENCODED: */
-+ struct bch_extent_crc_unpacked crc;
-+
-+ struct write_point_specifier write_point;
-+
-+ struct write_point *wp;
-+ struct list_head wp_list;
-+
-+ struct disk_reservation res;
-+
-+ struct open_buckets open_buckets;
-+
-+ u64 new_i_size;
-+ s64 i_sectors_delta;
-+
-+ struct bch_devs_mask failed;
-+
-+ struct keylist insert_keys;
-+ u64 inline_keys[BKEY_EXTENT_U64s_MAX * 2];
-+
-+ /*
-+ * Bitmask of devices that have had nocow writes issued to them since
-+ * last flush:
-+ */
-+ struct bch_devs_mask *devs_need_flush;
-+
-+ /* Must be last: */
-+ struct bch_write_bio wbio;
-+};
-+
-+#endif /* _BCACHEFS_IO_WRITE_TYPES_H */
-diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
-new file mode 100644
-index 000000000000..5b5d69f2316b
---- /dev/null
-+++ b/fs/bcachefs/journal.c
-@@ -0,0 +1,1468 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcachefs journalling code, for btree insertions
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_foreground.h"
-+#include "bkey_methods.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "journal_sb.h"
-+#include "journal_seq_blacklist.h"
-+#include "trace.h"
-+
-+static const char * const bch2_journal_errors[] = {
-+#define x(n) #n,
-+ JOURNAL_ERRORS()
-+#undef x
-+ NULL
-+};
-+
-+static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
-+{
-+ return seq > j->seq_ondisk;
-+}
-+
-+static bool __journal_entry_is_open(union journal_res_state state)
-+{
-+ return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
-+}
-+
-+static inline unsigned nr_unwritten_journal_entries(struct journal *j)
-+{
-+ return atomic64_read(&j->seq) - j->seq_ondisk;
-+}
-+
-+static bool journal_entry_is_open(struct journal *j)
-+{
-+ return __journal_entry_is_open(j->reservations);
-+}
-+
-+static inline struct journal_buf *
-+journal_seq_to_buf(struct journal *j, u64 seq)
-+{
-+ struct journal_buf *buf = NULL;
-+
-+ EBUG_ON(seq > journal_cur_seq(j));
-+
-+ if (journal_seq_unwritten(j, seq)) {
-+ buf = j->buf + (seq & JOURNAL_BUF_MASK);
-+ EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
-+ }
-+ return buf;
-+}
-+
-+static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(p->list); i++)
-+ INIT_LIST_HEAD(&p->list[i]);
-+ INIT_LIST_HEAD(&p->flushed);
-+ atomic_set(&p->count, count);
-+ p->devs.nr = 0;
-+}
-+
-+/*
-+ * Detect stuck journal conditions and trigger shutdown. Technically the journal
-+ * can end up stuck for a variety of reasons, such as a blocked I/O, journal
-+ * reservation lockup, etc. Since this is a fatal error with potentially
-+ * unpredictable characteristics, we want to be fairly conservative before we
-+ * decide to shut things down.
-+ *
-+ * Consider the journal stuck when it appears full with no ability to commit
-+ * btree transactions, to discard journal buckets, nor acquire priority
-+ * (reserved watermark) reservation.
-+ */
-+static inline bool
-+journal_error_check_stuck(struct journal *j, int error, unsigned flags)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ bool stuck = false;
-+ struct printbuf buf = PRINTBUF;
-+
-+ if (!(error == JOURNAL_ERR_journal_full ||
-+ error == JOURNAL_ERR_journal_pin_full) ||
-+ nr_unwritten_journal_entries(j) ||
-+ (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim)
-+ return stuck;
-+
-+ spin_lock(&j->lock);
-+
-+ if (j->can_discard) {
-+ spin_unlock(&j->lock);
-+ return stuck;
-+ }
-+
-+ stuck = true;
-+
-+ /*
-+ * The journal shutdown path will set ->err_seq, but do it here first to
-+ * serialize against concurrent failures and avoid duplicate error
-+ * reports.
-+ */
-+ if (j->err_seq) {
-+ spin_unlock(&j->lock);
-+ return stuck;
-+ }
-+ j->err_seq = journal_cur_seq(j);
-+ spin_unlock(&j->lock);
-+
-+ bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)",
-+ bch2_journal_errors[error]);
-+ bch2_journal_debug_to_text(&buf, j);
-+ bch_err(c, "%s", buf.buf);
-+
-+ printbuf_reset(&buf);
-+ bch2_journal_pins_to_text(&buf, j);
-+ bch_err(c, "Journal pins:\n%s", buf.buf);
-+ printbuf_exit(&buf);
-+
-+ bch2_fatal_error(c);
-+ dump_stack();
-+
-+ return stuck;
-+}
-+
-+/*
-+ * Final processing when the last reference of a journal buffer has been
-+ * dropped. Drop the pin list reference acquired at journal entry open and write
-+ * the buffer, if requested.
-+ */
-+void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+
-+ lockdep_assert_held(&j->lock);
-+
-+ if (__bch2_journal_pin_put(j, seq))
-+ bch2_journal_reclaim_fast(j);
-+ if (write)
-+ closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
-+}
-+
-+/*
-+ * Returns true if journal entry is now closed:
-+ *
-+ * We don't close a journal_buf until the next journal_buf is finished writing,
-+ * and can be opened again - this also initializes the next journal_buf:
-+ */
-+static void __journal_entry_close(struct journal *j, unsigned closed_val)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct journal_buf *buf = journal_cur_buf(j);
-+ union journal_res_state old, new;
-+ u64 v = atomic64_read(&j->reservations.counter);
-+ unsigned sectors;
-+
-+ BUG_ON(closed_val != JOURNAL_ENTRY_CLOSED_VAL &&
-+ closed_val != JOURNAL_ENTRY_ERROR_VAL);
-+
-+ lockdep_assert_held(&j->lock);
-+
-+ do {
-+ old.v = new.v = v;
-+ new.cur_entry_offset = closed_val;
-+
-+ if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL ||
-+ old.cur_entry_offset == new.cur_entry_offset)
-+ return;
-+ } while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+ old.v, new.v)) != old.v);
-+
-+ if (!__journal_entry_is_open(old))
-+ return;
-+
-+ /* Close out old buffer: */
-+ buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
-+
-+ sectors = vstruct_blocks_plus(buf->data, c->block_bits,
-+ buf->u64s_reserved) << c->block_bits;
-+ BUG_ON(sectors > buf->sectors);
-+ buf->sectors = sectors;
-+
-+ /*
-+ * We have to set last_seq here, _before_ opening a new journal entry:
-+ *
-+ * A threads may replace an old pin with a new pin on their current
-+ * journal reservation - the expectation being that the journal will
-+ * contain either what the old pin protected or what the new pin
-+ * protects.
-+ *
-+ * After the old pin is dropped journal_last_seq() won't include the old
-+ * pin, so we can only write the updated last_seq on the entry that
-+ * contains whatever the new pin protects.
-+ *
-+ * Restated, we can _not_ update last_seq for a given entry if there
-+ * could be a newer entry open with reservations/pins that have been
-+ * taken against it.
-+ *
-+ * Hence, we want update/set last_seq on the current journal entry right
-+ * before we open a new one:
-+ */
-+ buf->last_seq = journal_last_seq(j);
-+ buf->data->last_seq = cpu_to_le64(buf->last_seq);
-+ BUG_ON(buf->last_seq > le64_to_cpu(buf->data->seq));
-+
-+ cancel_delayed_work(&j->write_work);
-+
-+ bch2_journal_space_available(j);
-+
-+ __bch2_journal_buf_put(j, old.idx, le64_to_cpu(buf->data->seq));
-+}
-+
-+void bch2_journal_halt(struct journal *j)
-+{
-+ spin_lock(&j->lock);
-+ __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL);
-+ if (!j->err_seq)
-+ j->err_seq = journal_cur_seq(j);
-+ journal_wake(j);
-+ spin_unlock(&j->lock);
-+}
-+
-+static bool journal_entry_want_write(struct journal *j)
-+{
-+ bool ret = !journal_entry_is_open(j) ||
-+ journal_cur_seq(j) == journal_last_unwritten_seq(j);
-+
-+ /* Don't close it yet if we already have a write in flight: */
-+ if (ret)
-+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
-+ else if (nr_unwritten_journal_entries(j)) {
-+ struct journal_buf *buf = journal_cur_buf(j);
-+
-+ if (!buf->flush_time) {
-+ buf->flush_time = local_clock() ?: 1;
-+ buf->expires = jiffies;
-+ }
-+ }
-+
-+ return ret;
-+}
-+
-+static bool journal_entry_close(struct journal *j)
-+{
-+ bool ret;
-+
-+ spin_lock(&j->lock);
-+ ret = journal_entry_want_write(j);
-+ spin_unlock(&j->lock);
-+
-+ return ret;
-+}
-+
-+/*
-+ * should _only_ called from journal_res_get() - when we actually want a
-+ * journal reservation - journal entry is open means journal is dirty:
-+ */
-+static int journal_entry_open(struct journal *j)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct journal_buf *buf = j->buf +
-+ ((journal_cur_seq(j) + 1) & JOURNAL_BUF_MASK);
-+ union journal_res_state old, new;
-+ int u64s;
-+ u64 v;
-+
-+ lockdep_assert_held(&j->lock);
-+ BUG_ON(journal_entry_is_open(j));
-+ BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
-+
-+ if (j->blocked)
-+ return JOURNAL_ERR_blocked;
-+
-+ if (j->cur_entry_error)
-+ return j->cur_entry_error;
-+
-+ if (bch2_journal_error(j))
-+ return JOURNAL_ERR_insufficient_devices; /* -EROFS */
-+
-+ if (!fifo_free(&j->pin))
-+ return JOURNAL_ERR_journal_pin_full;
-+
-+ if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
-+ return JOURNAL_ERR_max_in_flight;
-+
-+ BUG_ON(!j->cur_entry_sectors);
-+
-+ buf->expires =
-+ (journal_cur_seq(j) == j->flushed_seq_ondisk
-+ ? jiffies
-+ : j->last_flush_write) +
-+ msecs_to_jiffies(c->opts.journal_flush_delay);
-+
-+ buf->u64s_reserved = j->entry_u64s_reserved;
-+ buf->disk_sectors = j->cur_entry_sectors;
-+ buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9);
-+
-+ u64s = (int) (buf->sectors << 9) / sizeof(u64) -
-+ journal_entry_overhead(j);
-+ u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
-+
-+ if (u64s <= (ssize_t) j->early_journal_entries.nr)
-+ return JOURNAL_ERR_journal_full;
-+
-+ if (fifo_empty(&j->pin) && j->reclaim_thread)
-+ wake_up_process(j->reclaim_thread);
-+
-+ /*
-+ * The fifo_push() needs to happen at the same time as j->seq is
-+ * incremented for journal_last_seq() to be calculated correctly
-+ */
-+ atomic64_inc(&j->seq);
-+ journal_pin_list_init(fifo_push_ref(&j->pin), 1);
-+
-+ BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
-+
-+ bkey_extent_init(&buf->key);
-+ buf->noflush = false;
-+ buf->must_flush = false;
-+ buf->separate_flush = false;
-+ buf->flush_time = 0;
-+
-+ memset(buf->data, 0, sizeof(*buf->data));
-+ buf->data->seq = cpu_to_le64(journal_cur_seq(j));
-+ buf->data->u64s = 0;
-+
-+ if (j->early_journal_entries.nr) {
-+ memcpy(buf->data->_data, j->early_journal_entries.data,
-+ j->early_journal_entries.nr * sizeof(u64));
-+ le32_add_cpu(&buf->data->u64s, j->early_journal_entries.nr);
-+ }
-+
-+ /*
-+ * Must be set before marking the journal entry as open:
-+ */
-+ j->cur_entry_u64s = u64s;
-+
-+ v = atomic64_read(&j->reservations.counter);
-+ do {
-+ old.v = new.v = v;
-+
-+ BUG_ON(old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL);
-+
-+ new.idx++;
-+ BUG_ON(journal_state_count(new, new.idx));
-+ BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK));
-+
-+ journal_state_inc(&new);
-+
-+ /* Handle any already added entries */
-+ new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
-+ } while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+ old.v, new.v)) != old.v);
-+
-+ if (j->res_get_blocked_start)
-+ bch2_time_stats_update(j->blocked_time,
-+ j->res_get_blocked_start);
-+ j->res_get_blocked_start = 0;
-+
-+ mod_delayed_work(c->io_complete_wq,
-+ &j->write_work,
-+ msecs_to_jiffies(c->opts.journal_flush_delay));
-+ journal_wake(j);
-+
-+ if (j->early_journal_entries.nr)
-+ darray_exit(&j->early_journal_entries);
-+ return 0;
-+}
-+
-+static bool journal_quiesced(struct journal *j)
-+{
-+ bool ret = atomic64_read(&j->seq) == j->seq_ondisk;
-+
-+ if (!ret)
-+ journal_entry_close(j);
-+ return ret;
-+}
-+
-+static void journal_quiesce(struct journal *j)
-+{
-+ wait_event(j->wait, journal_quiesced(j));
-+}
-+
-+static void journal_write_work(struct work_struct *work)
-+{
-+ struct journal *j = container_of(work, struct journal, write_work.work);
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ long delta;
-+
-+ spin_lock(&j->lock);
-+ if (!__journal_entry_is_open(j->reservations))
-+ goto unlock;
-+
-+ delta = journal_cur_buf(j)->expires - jiffies;
-+
-+ if (delta > 0)
-+ mod_delayed_work(c->io_complete_wq, &j->write_work, delta);
-+ else
-+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
-+unlock:
-+ spin_unlock(&j->lock);
-+}
-+
-+static int __journal_res_get(struct journal *j, struct journal_res *res,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct journal_buf *buf;
-+ bool can_discard;
-+ int ret;
-+retry:
-+ if (journal_res_get_fast(j, res, flags))
-+ return 0;
-+
-+ if (bch2_journal_error(j))
-+ return -BCH_ERR_erofs_journal_err;
-+
-+ spin_lock(&j->lock);
-+
-+ /* check once more in case somebody else shut things down... */
-+ if (bch2_journal_error(j)) {
-+ spin_unlock(&j->lock);
-+ return -BCH_ERR_erofs_journal_err;
-+ }
-+
-+ /*
-+ * Recheck after taking the lock, so we don't race with another thread
-+ * that just did journal_entry_open() and call journal_entry_close()
-+ * unnecessarily
-+ */
-+ if (journal_res_get_fast(j, res, flags)) {
-+ spin_unlock(&j->lock);
-+ return 0;
-+ }
-+
-+ if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
-+ /*
-+ * Don't want to close current journal entry, just need to
-+ * invoke reclaim:
-+ */
-+ ret = JOURNAL_ERR_journal_full;
-+ goto unlock;
-+ }
-+
-+ /*
-+ * If we couldn't get a reservation because the current buf filled up,
-+ * and we had room for a bigger entry on disk, signal that we want to
-+ * realloc the journal bufs:
-+ */
-+ buf = journal_cur_buf(j);
-+ if (journal_entry_is_open(j) &&
-+ buf->buf_size >> 9 < buf->disk_sectors &&
-+ buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
-+ j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
-+
-+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
-+ ret = journal_entry_open(j);
-+
-+ if (ret == JOURNAL_ERR_max_in_flight)
-+ trace_and_count(c, journal_entry_full, c);
-+unlock:
-+ if ((ret && ret != JOURNAL_ERR_insufficient_devices) &&
-+ !j->res_get_blocked_start) {
-+ j->res_get_blocked_start = local_clock() ?: 1;
-+ trace_and_count(c, journal_full, c);
-+ }
-+
-+ can_discard = j->can_discard;
-+ spin_unlock(&j->lock);
-+
-+ if (!ret)
-+ goto retry;
-+ if (journal_error_check_stuck(j, ret, flags))
-+ ret = -BCH_ERR_journal_res_get_blocked;
-+
-+ /*
-+ * Journal is full - can't rely on reclaim from work item due to
-+ * freezing:
-+ */
-+ if ((ret == JOURNAL_ERR_journal_full ||
-+ ret == JOURNAL_ERR_journal_pin_full) &&
-+ !(flags & JOURNAL_RES_GET_NONBLOCK)) {
-+ if (can_discard) {
-+ bch2_journal_do_discards(j);
-+ goto retry;
-+ }
-+
-+ if (mutex_trylock(&j->reclaim_lock)) {
-+ bch2_journal_reclaim(j);
-+ mutex_unlock(&j->reclaim_lock);
-+ }
-+ }
-+
-+ return ret == JOURNAL_ERR_insufficient_devices
-+ ? -BCH_ERR_erofs_journal_err
-+ : -BCH_ERR_journal_res_get_blocked;
-+}
-+
-+/*
-+ * Essentially the entry function to the journaling code. When bcachefs is doing
-+ * a btree insert, it calls this function to get the current journal write.
-+ * Journal write is the structure used set up journal writes. The calling
-+ * function will then add its keys to the structure, queuing them for the next
-+ * write.
-+ *
-+ * To ensure forward progress, the current task must not be holding any
-+ * btree node write locks.
-+ */
-+int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
-+ unsigned flags)
-+{
-+ int ret;
-+
-+ closure_wait_event(&j->async_wait,
-+ (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
-+ (flags & JOURNAL_RES_GET_NONBLOCK));
-+ return ret;
-+}
-+
-+/* journal_preres: */
-+
-+static bool journal_preres_available(struct journal *j,
-+ struct journal_preres *res,
-+ unsigned new_u64s,
-+ unsigned flags)
-+{
-+ bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
-+
-+ if (!ret && mutex_trylock(&j->reclaim_lock)) {
-+ bch2_journal_reclaim(j);
-+ mutex_unlock(&j->reclaim_lock);
-+ }
-+
-+ return ret;
-+}
-+
-+int __bch2_journal_preres_get(struct journal *j,
-+ struct journal_preres *res,
-+ unsigned new_u64s,
-+ unsigned flags)
-+{
-+ int ret;
-+
-+ closure_wait_event(&j->preres_wait,
-+ (ret = bch2_journal_error(j)) ||
-+ journal_preres_available(j, res, new_u64s, flags));
-+ return ret;
-+}
-+
-+/* journal_entry_res: */
-+
-+void bch2_journal_entry_res_resize(struct journal *j,
-+ struct journal_entry_res *res,
-+ unsigned new_u64s)
-+{
-+ union journal_res_state state;
-+ int d = new_u64s - res->u64s;
-+
-+ spin_lock(&j->lock);
-+
-+ j->entry_u64s_reserved += d;
-+ if (d <= 0)
-+ goto out;
-+
-+ j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
-+ smp_mb();
-+ state = READ_ONCE(j->reservations);
-+
-+ if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
-+ state.cur_entry_offset > j->cur_entry_u64s) {
-+ j->cur_entry_u64s += d;
-+ /*
-+ * Not enough room in current journal entry, have to flush it:
-+ */
-+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
-+ } else {
-+ journal_cur_buf(j)->u64s_reserved += d;
-+ }
-+out:
-+ spin_unlock(&j->lock);
-+ res->u64s += d;
-+}
-+
-+/* journal flushing: */
-+
-+/**
-+ * bch2_journal_flush_seq_async - wait for a journal entry to be written
-+ * @j: journal object
-+ * @seq: seq to flush
-+ * @parent: closure object to wait with
-+ * Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed,
-+ * -EIO if @seq will never be flushed
-+ *
-+ * Like bch2_journal_wait_on_seq, except that it triggers a write immediately if
-+ * necessary
-+ */
-+int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
-+ struct closure *parent)
-+{
-+ struct journal_buf *buf;
-+ int ret = 0;
-+
-+ if (seq <= j->flushed_seq_ondisk)
-+ return 1;
-+
-+ spin_lock(&j->lock);
-+
-+ if (WARN_ONCE(seq > journal_cur_seq(j),
-+ "requested to flush journal seq %llu, but currently at %llu",
-+ seq, journal_cur_seq(j)))
-+ goto out;
-+
-+ /* Recheck under lock: */
-+ if (j->err_seq && seq >= j->err_seq) {
-+ ret = -EIO;
-+ goto out;
-+ }
-+
-+ if (seq <= j->flushed_seq_ondisk) {
-+ ret = 1;
-+ goto out;
-+ }
-+
-+ /* if seq was written, but not flushed - flush a newer one instead */
-+ seq = max(seq, journal_last_unwritten_seq(j));
-+
-+recheck_need_open:
-+ if (seq > journal_cur_seq(j)) {
-+ struct journal_res res = { 0 };
-+
-+ if (journal_entry_is_open(j))
-+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
-+
-+ spin_unlock(&j->lock);
-+
-+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
-+ if (ret)
-+ return ret;
-+
-+ seq = res.seq;
-+ buf = j->buf + (seq & JOURNAL_BUF_MASK);
-+ buf->must_flush = true;
-+
-+ if (!buf->flush_time) {
-+ buf->flush_time = local_clock() ?: 1;
-+ buf->expires = jiffies;
-+ }
-+
-+ if (parent && !closure_wait(&buf->wait, parent))
-+ BUG();
-+
-+ bch2_journal_res_put(j, &res);
-+
-+ spin_lock(&j->lock);
-+ goto want_write;
-+ }
-+
-+ /*
-+ * if write was kicked off without a flush, flush the next sequence
-+ * number instead
-+ */
-+ buf = journal_seq_to_buf(j, seq);
-+ if (buf->noflush) {
-+ seq++;
-+ goto recheck_need_open;
-+ }
-+
-+ buf->must_flush = true;
-+
-+ if (parent && !closure_wait(&buf->wait, parent))
-+ BUG();
-+want_write:
-+ if (seq == journal_cur_seq(j))
-+ journal_entry_want_write(j);
-+out:
-+ spin_unlock(&j->lock);
-+ return ret;
-+}
-+
-+int bch2_journal_flush_seq(struct journal *j, u64 seq)
-+{
-+ u64 start_time = local_clock();
-+ int ret, ret2;
-+
-+ /*
-+ * Don't update time_stats when @seq is already flushed:
-+ */
-+ if (seq <= j->flushed_seq_ondisk)
-+ return 0;
-+
-+ ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
-+
-+ if (!ret)
-+ bch2_time_stats_update(j->flush_seq_time, start_time);
-+
-+ return ret ?: ret2 < 0 ? ret2 : 0;
-+}
-+
-+/*
-+ * bch2_journal_flush_async - if there is an open journal entry, or a journal
-+ * still being written, write it and wait for the write to complete
-+ */
-+void bch2_journal_flush_async(struct journal *j, struct closure *parent)
-+{
-+ bch2_journal_flush_seq_async(j, atomic64_read(&j->seq), parent);
-+}
-+
-+int bch2_journal_flush(struct journal *j)
-+{
-+ return bch2_journal_flush_seq(j, atomic64_read(&j->seq));
-+}
-+
-+/*
-+ * bch2_journal_noflush_seq - tell the journal not to issue any flushes before
-+ * @seq
-+ */
-+bool bch2_journal_noflush_seq(struct journal *j, u64 seq)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ u64 unwritten_seq;
-+ bool ret = false;
-+
-+ if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush)))
-+ return false;
-+
-+ if (seq <= c->journal.flushed_seq_ondisk)
-+ return false;
-+
-+ spin_lock(&j->lock);
-+ if (seq <= c->journal.flushed_seq_ondisk)
-+ goto out;
-+
-+ for (unwritten_seq = journal_last_unwritten_seq(j);
-+ unwritten_seq < seq;
-+ unwritten_seq++) {
-+ struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq);
-+
-+ /* journal write is already in flight, and was a flush write: */
-+ if (unwritten_seq == journal_last_unwritten_seq(j) && !buf->noflush)
-+ goto out;
-+
-+ buf->noflush = true;
-+ }
-+
-+ ret = true;
-+out:
-+ spin_unlock(&j->lock);
-+ return ret;
-+}
-+
-+int bch2_journal_meta(struct journal *j)
-+{
-+ struct journal_buf *buf;
-+ struct journal_res res;
-+ int ret;
-+
-+ memset(&res, 0, sizeof(res));
-+
-+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
-+ if (ret)
-+ return ret;
-+
-+ buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
-+ buf->must_flush = true;
-+
-+ if (!buf->flush_time) {
-+ buf->flush_time = local_clock() ?: 1;
-+ buf->expires = jiffies;
-+ }
-+
-+ bch2_journal_res_put(j, &res);
-+
-+ return bch2_journal_flush_seq(j, res.seq);
-+}
-+
-+/* block/unlock the journal: */
-+
-+void bch2_journal_unblock(struct journal *j)
-+{
-+ spin_lock(&j->lock);
-+ j->blocked--;
-+ spin_unlock(&j->lock);
-+
-+ journal_wake(j);
-+}
-+
-+void bch2_journal_block(struct journal *j)
-+{
-+ spin_lock(&j->lock);
-+ j->blocked++;
-+ spin_unlock(&j->lock);
-+
-+ journal_quiesce(j);
-+}
-+
-+/* allocate journal on a device: */
-+
-+static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
-+ bool new_fs, struct closure *cl)
-+{
-+ struct bch_fs *c = ca->fs;
-+ struct journal_device *ja = &ca->journal;
-+ u64 *new_bucket_seq = NULL, *new_buckets = NULL;
-+ struct open_bucket **ob = NULL;
-+ long *bu = NULL;
-+ unsigned i, pos, nr_got = 0, nr_want = nr - ja->nr;
-+ int ret = 0;
-+
-+ BUG_ON(nr <= ja->nr);
-+
-+ bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL);
-+ ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL);
-+ new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL);
-+ new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL);
-+ if (!bu || !ob || !new_buckets || !new_bucket_seq) {
-+ ret = -BCH_ERR_ENOMEM_set_nr_journal_buckets;
-+ goto err_free;
-+ }
-+
-+ for (nr_got = 0; nr_got < nr_want; nr_got++) {
-+ if (new_fs) {
-+ bu[nr_got] = bch2_bucket_alloc_new_fs(ca);
-+ if (bu[nr_got] < 0) {
-+ ret = -BCH_ERR_ENOSPC_bucket_alloc;
-+ break;
-+ }
-+ } else {
-+ ob[nr_got] = bch2_bucket_alloc(c, ca, BCH_WATERMARK_normal, cl);
-+ ret = PTR_ERR_OR_ZERO(ob[nr_got]);
-+ if (ret)
-+ break;
-+
-+ ret = bch2_trans_run(c,
-+ bch2_trans_mark_metadata_bucket(trans, ca,
-+ ob[nr_got]->bucket, BCH_DATA_journal,
-+ ca->mi.bucket_size));
-+ if (ret) {
-+ bch2_open_bucket_put(c, ob[nr_got]);
-+ bch_err_msg(c, ret, "marking new journal buckets");
-+ break;
-+ }
-+
-+ bu[nr_got] = ob[nr_got]->bucket;
-+ }
-+ }
-+
-+ if (!nr_got)
-+ goto err_free;
-+
-+ /* Don't return an error if we successfully allocated some buckets: */
-+ ret = 0;
-+
-+ if (c) {
-+ bch2_journal_flush_all_pins(&c->journal);
-+ bch2_journal_block(&c->journal);
-+ mutex_lock(&c->sb_lock);
-+ }
-+
-+ memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
-+ memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64));
-+
-+ BUG_ON(ja->discard_idx > ja->nr);
-+
-+ pos = ja->discard_idx ?: ja->nr;
-+
-+ memmove(new_buckets + pos + nr_got,
-+ new_buckets + pos,
-+ sizeof(new_buckets[0]) * (ja->nr - pos));
-+ memmove(new_bucket_seq + pos + nr_got,
-+ new_bucket_seq + pos,
-+ sizeof(new_bucket_seq[0]) * (ja->nr - pos));
-+
-+ for (i = 0; i < nr_got; i++) {
-+ new_buckets[pos + i] = bu[i];
-+ new_bucket_seq[pos + i] = 0;
-+ }
-+
-+ nr = ja->nr + nr_got;
-+
-+ ret = bch2_journal_buckets_to_sb(c, ca, new_buckets, nr);
-+ if (ret)
-+ goto err_unblock;
-+
-+ if (!new_fs)
-+ bch2_write_super(c);
-+
-+ /* Commit: */
-+ if (c)
-+ spin_lock(&c->journal.lock);
-+
-+ swap(new_buckets, ja->buckets);
-+ swap(new_bucket_seq, ja->bucket_seq);
-+ ja->nr = nr;
-+
-+ if (pos <= ja->discard_idx)
-+ ja->discard_idx = (ja->discard_idx + nr_got) % ja->nr;
-+ if (pos <= ja->dirty_idx_ondisk)
-+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + nr_got) % ja->nr;
-+ if (pos <= ja->dirty_idx)
-+ ja->dirty_idx = (ja->dirty_idx + nr_got) % ja->nr;
-+ if (pos <= ja->cur_idx)
-+ ja->cur_idx = (ja->cur_idx + nr_got) % ja->nr;
-+
-+ if (c)
-+ spin_unlock(&c->journal.lock);
-+err_unblock:
-+ if (c) {
-+ bch2_journal_unblock(&c->journal);
-+ mutex_unlock(&c->sb_lock);
-+ }
-+
-+ if (ret && !new_fs)
-+ for (i = 0; i < nr_got; i++)
-+ bch2_trans_run(c,
-+ bch2_trans_mark_metadata_bucket(trans, ca,
-+ bu[i], BCH_DATA_free, 0));
-+err_free:
-+ if (!new_fs)
-+ for (i = 0; i < nr_got; i++)
-+ bch2_open_bucket_put(c, ob[i]);
-+
-+ kfree(new_bucket_seq);
-+ kfree(new_buckets);
-+ kfree(ob);
-+ kfree(bu);
-+ return ret;
-+}
-+
-+/*
-+ * Allocate more journal space at runtime - not currently making use if it, but
-+ * the code works:
-+ */
-+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
-+ unsigned nr)
-+{
-+ struct journal_device *ja = &ca->journal;
-+ struct closure cl;
-+ int ret = 0;
-+
-+ closure_init_stack(&cl);
-+
-+ down_write(&c->state_lock);
-+
-+ /* don't handle reducing nr of buckets yet: */
-+ if (nr < ja->nr)
-+ goto unlock;
-+
-+ while (ja->nr < nr) {
-+ struct disk_reservation disk_res = { 0, 0, 0 };
-+
-+ /*
-+ * note: journal buckets aren't really counted as _sectors_ used yet, so
-+ * we don't need the disk reservation to avoid the BUG_ON() in buckets.c
-+ * when space used goes up without a reservation - but we do need the
-+ * reservation to ensure we'll actually be able to allocate:
-+ *
-+ * XXX: that's not right, disk reservations only ensure a
-+ * filesystem-wide allocation will succeed, this is a device
-+ * specific allocation - we can hang here:
-+ */
-+
-+ ret = bch2_disk_reservation_get(c, &disk_res,
-+ bucket_to_sector(ca, nr - ja->nr), 1, 0);
-+ if (ret)
-+ break;
-+
-+ ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
-+
-+ bch2_disk_reservation_put(c, &disk_res);
-+
-+ closure_sync(&cl);
-+
-+ if (ret && ret != -BCH_ERR_bucket_alloc_blocked)
-+ break;
-+ }
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+unlock:
-+ up_write(&c->state_lock);
-+ return ret;
-+}
-+
-+int bch2_dev_journal_alloc(struct bch_dev *ca)
-+{
-+ unsigned nr;
-+ int ret;
-+
-+ if (dynamic_fault("bcachefs:add:journal_alloc")) {
-+ ret = -BCH_ERR_ENOMEM_set_nr_journal_buckets;
-+ goto err;
-+ }
-+
-+ /* 1/128th of the device by default: */
-+ nr = ca->mi.nbuckets >> 7;
-+
-+ /*
-+ * clamp journal size to 8192 buckets or 8GB (in sectors), whichever
-+ * is smaller:
-+ */
-+ nr = clamp_t(unsigned, nr,
-+ BCH_JOURNAL_BUCKETS_MIN,
-+ min(1 << 13,
-+ (1 << 24) / ca->mi.bucket_size));
-+
-+ ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
-+err:
-+ if (ret)
-+ bch_err_fn(ca, ret);
-+ return ret;
-+}
-+
-+int bch2_fs_journal_alloc(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ for_each_online_member(ca, c, i) {
-+ if (ca->journal.nr)
-+ continue;
-+
-+ int ret = bch2_dev_journal_alloc(ca);
-+ if (ret) {
-+ percpu_ref_put(&ca->io_ref);
-+ return ret;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* startup/shutdown: */
-+
-+static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
-+{
-+ bool ret = false;
-+ u64 seq;
-+
-+ spin_lock(&j->lock);
-+ for (seq = journal_last_unwritten_seq(j);
-+ seq <= journal_cur_seq(j) && !ret;
-+ seq++) {
-+ struct journal_buf *buf = journal_seq_to_buf(j, seq);
-+
-+ if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx))
-+ ret = true;
-+ }
-+ spin_unlock(&j->lock);
-+
-+ return ret;
-+}
-+
-+void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
-+{
-+ wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx));
-+}
-+
-+void bch2_fs_journal_stop(struct journal *j)
-+{
-+ bch2_journal_reclaim_stop(j);
-+ bch2_journal_flush_all_pins(j);
-+
-+ wait_event(j->wait, journal_entry_close(j));
-+
-+ /*
-+ * Always write a new journal entry, to make sure the clock hands are up
-+ * to date (and match the superblock)
-+ */
-+ bch2_journal_meta(j);
-+
-+ journal_quiesce(j);
-+
-+ BUG_ON(!bch2_journal_error(j) &&
-+ test_bit(JOURNAL_REPLAY_DONE, &j->flags) &&
-+ j->last_empty_seq != journal_cur_seq(j));
-+
-+ cancel_delayed_work_sync(&j->write_work);
-+}
-+
-+int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct journal_entry_pin_list *p;
-+ struct journal_replay *i, **_i;
-+ struct genradix_iter iter;
-+ bool had_entries = false;
-+ unsigned ptr;
-+ u64 last_seq = cur_seq, nr, seq;
-+
-+ genradix_for_each_reverse(&c->journal_entries, iter, _i) {
-+ i = *_i;
-+
-+ if (!i || i->ignore)
-+ continue;
-+
-+ last_seq = le64_to_cpu(i->j.last_seq);
-+ break;
-+ }
-+
-+ nr = cur_seq - last_seq;
-+
-+ if (nr + 1 > j->pin.size) {
-+ free_fifo(&j->pin);
-+ init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
-+ if (!j->pin.data) {
-+ bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
-+ return -BCH_ERR_ENOMEM_journal_pin_fifo;
-+ }
-+ }
-+
-+ j->replay_journal_seq = last_seq;
-+ j->replay_journal_seq_end = cur_seq;
-+ j->last_seq_ondisk = last_seq;
-+ j->flushed_seq_ondisk = cur_seq - 1;
-+ j->seq_ondisk = cur_seq - 1;
-+ j->pin.front = last_seq;
-+ j->pin.back = cur_seq;
-+ atomic64_set(&j->seq, cur_seq - 1);
-+
-+ fifo_for_each_entry_ptr(p, &j->pin, seq)
-+ journal_pin_list_init(p, 1);
-+
-+ genradix_for_each(&c->journal_entries, iter, _i) {
-+ i = *_i;
-+
-+ if (!i || i->ignore)
-+ continue;
-+
-+ seq = le64_to_cpu(i->j.seq);
-+ BUG_ON(seq >= cur_seq);
-+
-+ if (seq < last_seq)
-+ continue;
-+
-+ if (journal_entry_empty(&i->j))
-+ j->last_empty_seq = le64_to_cpu(i->j.seq);
-+
-+ p = journal_seq_pin(j, seq);
-+
-+ p->devs.nr = 0;
-+ for (ptr = 0; ptr < i->nr_ptrs; ptr++)
-+ bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev);
-+
-+ had_entries = true;
-+ }
-+
-+ if (!had_entries)
-+ j->last_empty_seq = cur_seq;
-+
-+ spin_lock(&j->lock);
-+
-+ set_bit(JOURNAL_STARTED, &j->flags);
-+ j->last_flush_write = jiffies;
-+
-+ j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
-+ j->reservations.unwritten_idx++;
-+
-+ c->last_bucket_seq_cleanup = journal_cur_seq(j);
-+
-+ bch2_journal_space_available(j);
-+ spin_unlock(&j->lock);
-+
-+ return bch2_journal_reclaim_start(j);
-+}
-+
-+/* init/exit: */
-+
-+void bch2_dev_journal_exit(struct bch_dev *ca)
-+{
-+ kfree(ca->journal.bio);
-+ kfree(ca->journal.buckets);
-+ kfree(ca->journal.bucket_seq);
-+
-+ ca->journal.bio = NULL;
-+ ca->journal.buckets = NULL;
-+ ca->journal.bucket_seq = NULL;
-+}
-+
-+int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
-+{
-+ struct journal_device *ja = &ca->journal;
-+ struct bch_sb_field_journal *journal_buckets =
-+ bch2_sb_field_get(sb, journal);
-+ struct bch_sb_field_journal_v2 *journal_buckets_v2 =
-+ bch2_sb_field_get(sb, journal_v2);
-+ unsigned i, nr_bvecs;
-+
-+ ja->nr = 0;
-+
-+ if (journal_buckets_v2) {
-+ unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
-+
-+ for (i = 0; i < nr; i++)
-+ ja->nr += le64_to_cpu(journal_buckets_v2->d[i].nr);
-+ } else if (journal_buckets) {
-+ ja->nr = bch2_nr_journal_buckets(journal_buckets);
-+ }
-+
-+ ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
-+ if (!ja->bucket_seq)
-+ return -BCH_ERR_ENOMEM_dev_journal_init;
-+
-+ nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
-+
-+ ca->journal.bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
-+ if (!ca->journal.bio)
-+ return -BCH_ERR_ENOMEM_dev_journal_init;
-+
-+ bio_init(ca->journal.bio, NULL, ca->journal.bio->bi_inline_vecs, nr_bvecs, 0);
-+
-+ ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
-+ if (!ja->buckets)
-+ return -BCH_ERR_ENOMEM_dev_journal_init;
-+
-+ if (journal_buckets_v2) {
-+ unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
-+ unsigned j, dst = 0;
-+
-+ for (i = 0; i < nr; i++)
-+ for (j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++)
-+ ja->buckets[dst++] =
-+ le64_to_cpu(journal_buckets_v2->d[i].start) + j;
-+ } else if (journal_buckets) {
-+ for (i = 0; i < ja->nr; i++)
-+ ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
-+ }
-+
-+ return 0;
-+}
-+
-+void bch2_fs_journal_exit(struct journal *j)
-+{
-+ unsigned i;
-+
-+ darray_exit(&j->early_journal_entries);
-+
-+ for (i = 0; i < ARRAY_SIZE(j->buf); i++)
-+ kvpfree(j->buf[i].data, j->buf[i].buf_size);
-+ free_fifo(&j->pin);
-+}
-+
-+int bch2_fs_journal_init(struct journal *j)
-+{
-+ static struct lock_class_key res_key;
-+ unsigned i;
-+
-+ spin_lock_init(&j->lock);
-+ spin_lock_init(&j->err_lock);
-+ init_waitqueue_head(&j->wait);
-+ INIT_DELAYED_WORK(&j->write_work, journal_write_work);
-+ init_waitqueue_head(&j->reclaim_wait);
-+ init_waitqueue_head(&j->pin_flush_wait);
-+ mutex_init(&j->reclaim_lock);
-+ mutex_init(&j->discard_lock);
-+
-+ lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
-+
-+ atomic64_set(&j->reservations.counter,
-+ ((union journal_res_state)
-+ { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
-+
-+ if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
-+ return -BCH_ERR_ENOMEM_journal_pin_fifo;
-+
-+ for (i = 0; i < ARRAY_SIZE(j->buf); i++) {
-+ j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
-+ j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL);
-+ if (!j->buf[i].data)
-+ return -BCH_ERR_ENOMEM_journal_buf;
-+ }
-+
-+ j->pin.front = j->pin.back = 1;
-+ return 0;
-+}
-+
-+/* debug: */
-+
-+void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ union journal_res_state s;
-+ struct bch_dev *ca;
-+ unsigned long now = jiffies;
-+ u64 seq;
-+ unsigned i;
-+
-+ if (!out->nr_tabstops)
-+ printbuf_tabstop_push(out, 24);
-+ out->atomic++;
-+
-+ rcu_read_lock();
-+ s = READ_ONCE(j->reservations);
-+
-+ prt_printf(out, "dirty journal entries:\t%llu/%llu\n", fifo_used(&j->pin), j->pin.size);
-+ prt_printf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j));
-+ prt_printf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk);
-+ prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
-+ prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
-+ prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
-+ prt_printf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining);
-+ prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]);
-+ prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
-+ prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
-+ prt_printf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes);
-+ prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim);
-+ prt_printf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim);
-+ prt_printf(out, "reclaim kicked:\t\t%u\n", j->reclaim_kicked);
-+ prt_printf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now)
-+ ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0);
-+ prt_printf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors);
-+ prt_printf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]);
-+ prt_printf(out, "current entry:\t\t");
-+
-+ switch (s.cur_entry_offset) {
-+ case JOURNAL_ENTRY_ERROR_VAL:
-+ prt_printf(out, "error");
-+ break;
-+ case JOURNAL_ENTRY_CLOSED_VAL:
-+ prt_printf(out, "closed");
-+ break;
-+ default:
-+ prt_printf(out, "%u/%u", s.cur_entry_offset, j->cur_entry_u64s);
-+ break;
-+ }
-+
-+ prt_newline(out);
-+
-+ for (seq = journal_cur_seq(j);
-+ seq >= journal_last_unwritten_seq(j);
-+ --seq) {
-+ i = seq & JOURNAL_BUF_MASK;
-+
-+ prt_printf(out, "unwritten entry:");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", seq);
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+
-+ prt_printf(out, "refcount:");
-+ prt_tab(out);
-+ prt_printf(out, "%u", journal_state_count(s, i));
-+ prt_newline(out);
-+
-+ prt_printf(out, "sectors:");
-+ prt_tab(out);
-+ prt_printf(out, "%u", j->buf[i].sectors);
-+ prt_newline(out);
-+
-+ prt_printf(out, "expires");
-+ prt_tab(out);
-+ prt_printf(out, "%li jiffies", j->buf[i].expires - jiffies);
-+ prt_newline(out);
-+
-+ printbuf_indent_sub(out, 2);
-+ }
-+
-+ prt_printf(out,
-+ "replay done:\t\t%i\n",
-+ test_bit(JOURNAL_REPLAY_DONE, &j->flags));
-+
-+ prt_printf(out, "space:\n");
-+ prt_printf(out, "\tdiscarded\t%u:%u\n",
-+ j->space[journal_space_discarded].next_entry,
-+ j->space[journal_space_discarded].total);
-+ prt_printf(out, "\tclean ondisk\t%u:%u\n",
-+ j->space[journal_space_clean_ondisk].next_entry,
-+ j->space[journal_space_clean_ondisk].total);
-+ prt_printf(out, "\tclean\t\t%u:%u\n",
-+ j->space[journal_space_clean].next_entry,
-+ j->space[journal_space_clean].total);
-+ prt_printf(out, "\ttotal\t\t%u:%u\n",
-+ j->space[journal_space_total].next_entry,
-+ j->space[journal_space_total].total);
-+
-+ for_each_member_device_rcu(ca, c, i,
-+ &c->rw_devs[BCH_DATA_journal]) {
-+ struct journal_device *ja = &ca->journal;
-+
-+ if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d))
-+ continue;
-+
-+ if (!ja->nr)
-+ continue;
-+
-+ prt_printf(out, "dev %u:\n", i);
-+ prt_printf(out, "\tnr\t\t%u\n", ja->nr);
-+ prt_printf(out, "\tbucket size\t%u\n", ca->mi.bucket_size);
-+ prt_printf(out, "\tavailable\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free);
-+ prt_printf(out, "\tdiscard_idx\t%u\n", ja->discard_idx);
-+ prt_printf(out, "\tdirty_ondisk\t%u (seq %llu)\n", ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk]);
-+ prt_printf(out, "\tdirty_idx\t%u (seq %llu)\n", ja->dirty_idx, ja->bucket_seq[ja->dirty_idx]);
-+ prt_printf(out, "\tcur_idx\t\t%u (seq %llu)\n", ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
-+ }
-+
-+ rcu_read_unlock();
-+
-+ --out->atomic;
-+}
-+
-+void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
-+{
-+ spin_lock(&j->lock);
-+ __bch2_journal_debug_to_text(out, j);
-+ spin_unlock(&j->lock);
-+}
-+
-+bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
-+{
-+ struct journal_entry_pin_list *pin_list;
-+ struct journal_entry_pin *pin;
-+ unsigned i;
-+
-+ spin_lock(&j->lock);
-+ *seq = max(*seq, j->pin.front);
-+
-+ if (*seq >= j->pin.back) {
-+ spin_unlock(&j->lock);
-+ return true;
-+ }
-+
-+ out->atomic++;
-+
-+ pin_list = journal_seq_pin(j, *seq);
-+
-+ prt_printf(out, "%llu: count %u", *seq, atomic_read(&pin_list->count));
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+
-+ for (i = 0; i < ARRAY_SIZE(pin_list->list); i++)
-+ list_for_each_entry(pin, &pin_list->list[i], list) {
-+ prt_printf(out, "\t%px %ps", pin, pin->flush);
-+ prt_newline(out);
-+ }
-+
-+ if (!list_empty(&pin_list->flushed)) {
-+ prt_printf(out, "flushed:");
-+ prt_newline(out);
-+ }
-+
-+ list_for_each_entry(pin, &pin_list->flushed, list) {
-+ prt_printf(out, "\t%px %ps", pin, pin->flush);
-+ prt_newline(out);
-+ }
-+
-+ printbuf_indent_sub(out, 2);
-+
-+ --out->atomic;
-+ spin_unlock(&j->lock);
-+
-+ return false;
-+}
-+
-+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
-+{
-+ u64 seq = 0;
-+
-+ while (!bch2_journal_seq_pins_to_text(out, j, &seq))
-+ seq++;
-+}
-diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
-new file mode 100644
-index 000000000000..011711e99c8d
---- /dev/null
-+++ b/fs/bcachefs/journal.h
-@@ -0,0 +1,549 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_H
-+#define _BCACHEFS_JOURNAL_H
-+
-+/*
-+ * THE JOURNAL:
-+ *
-+ * The primary purpose of the journal is to log updates (insertions) to the
-+ * b-tree, to avoid having to do synchronous updates to the b-tree on disk.
-+ *
-+ * Without the journal, the b-tree is always internally consistent on
-+ * disk - and in fact, in the earliest incarnations bcache didn't have a journal
-+ * but did handle unclean shutdowns by doing all index updates synchronously
-+ * (with coalescing).
-+ *
-+ * Updates to interior nodes still happen synchronously and without the journal
-+ * (for simplicity) - this may change eventually but updates to interior nodes
-+ * are rare enough it's not a huge priority.
-+ *
-+ * This means the journal is relatively separate from the b-tree; it consists of
-+ * just a list of keys and journal replay consists of just redoing those
-+ * insertions in same order that they appear in the journal.
-+ *
-+ * PERSISTENCE:
-+ *
-+ * For synchronous updates (where we're waiting on the index update to hit
-+ * disk), the journal entry will be written out immediately (or as soon as
-+ * possible, if the write for the previous journal entry was still in flight).
-+ *
-+ * Synchronous updates are specified by passing a closure (@flush_cl) to
-+ * bch2_btree_insert() or bch_btree_insert_node(), which then pass that parameter
-+ * down to the journalling code. That closure will wait on the journal write to
-+ * complete (via closure_wait()).
-+ *
-+ * If the index update wasn't synchronous, the journal entry will be
-+ * written out after 10 ms have elapsed, by default (the delay_ms field
-+ * in struct journal).
-+ *
-+ * JOURNAL ENTRIES:
-+ *
-+ * A journal entry is variable size (struct jset), it's got a fixed length
-+ * header and then a variable number of struct jset_entry entries.
-+ *
-+ * Journal entries are identified by monotonically increasing 64 bit sequence
-+ * numbers - jset->seq; other places in the code refer to this sequence number.
-+ *
-+ * A jset_entry entry contains one or more bkeys (which is what gets inserted
-+ * into the b-tree). We need a container to indicate which b-tree the key is
-+ * for; also, the roots of the various b-trees are stored in jset_entry entries
-+ * (one for each b-tree) - this lets us add new b-tree types without changing
-+ * the on disk format.
-+ *
-+ * We also keep some things in the journal header that are logically part of the
-+ * superblock - all the things that are frequently updated. This is for future
-+ * bcache on raw flash support; the superblock (which will become another
-+ * journal) can't be moved or wear leveled, so it contains just enough
-+ * information to find the main journal, and the superblock only has to be
-+ * rewritten when we want to move/wear level the main journal.
-+ *
-+ * JOURNAL LAYOUT ON DISK:
-+ *
-+ * The journal is written to a ringbuffer of buckets (which is kept in the
-+ * superblock); the individual buckets are not necessarily contiguous on disk
-+ * which means that journal entries are not allowed to span buckets, but also
-+ * that we can resize the journal at runtime if desired (unimplemented).
-+ *
-+ * The journal buckets exist in the same pool as all the other buckets that are
-+ * managed by the allocator and garbage collection - garbage collection marks
-+ * the journal buckets as metadata buckets.
-+ *
-+ * OPEN/DIRTY JOURNAL ENTRIES:
-+ *
-+ * Open/dirty journal entries are journal entries that contain b-tree updates
-+ * that have not yet been written out to the b-tree on disk. We have to track
-+ * which journal entries are dirty, and we also have to avoid wrapping around
-+ * the journal and overwriting old but still dirty journal entries with new
-+ * journal entries.
-+ *
-+ * On disk, this is represented with the "last_seq" field of struct jset;
-+ * last_seq is the first sequence number that journal replay has to replay.
-+ *
-+ * To avoid overwriting dirty journal entries on disk, we keep a mapping (in
-+ * journal_device->seq) of for each journal bucket, the highest sequence number
-+ * any journal entry it contains. Then, by comparing that against last_seq we
-+ * can determine whether that journal bucket contains dirty journal entries or
-+ * not.
-+ *
-+ * To track which journal entries are dirty, we maintain a fifo of refcounts
-+ * (where each entry corresponds to a specific sequence number) - when a ref
-+ * goes to 0, that journal entry is no longer dirty.
-+ *
-+ * Journalling of index updates is done at the same time as the b-tree itself is
-+ * being modified (see btree_insert_key()); when we add the key to the journal
-+ * the pending b-tree write takes a ref on the journal entry the key was added
-+ * to. If a pending b-tree write would need to take refs on multiple dirty
-+ * journal entries, it only keeps the ref on the oldest one (since a newer
-+ * journal entry will still be replayed if an older entry was dirty).
-+ *
-+ * JOURNAL FILLING UP:
-+ *
-+ * There are two ways the journal could fill up; either we could run out of
-+ * space to write to, or we could have too many open journal entries and run out
-+ * of room in the fifo of refcounts. Since those refcounts are decremented
-+ * without any locking we can't safely resize that fifo, so we handle it the
-+ * same way.
-+ *
-+ * If the journal fills up, we start flushing dirty btree nodes until we can
-+ * allocate space for a journal write again - preferentially flushing btree
-+ * nodes that are pinning the oldest journal entries first.
-+ */
-+
-+#include <linux/hash.h>
-+
-+#include "journal_types.h"
-+
-+struct bch_fs;
-+
-+static inline void journal_wake(struct journal *j)
-+{
-+ wake_up(&j->wait);
-+ closure_wake_up(&j->async_wait);
-+ closure_wake_up(&j->preres_wait);
-+}
-+
-+static inline struct journal_buf *journal_cur_buf(struct journal *j)
-+{
-+ return j->buf + j->reservations.idx;
-+}
-+
-+/* Sequence number of oldest dirty journal entry */
-+
-+static inline u64 journal_last_seq(struct journal *j)
-+{
-+ return j->pin.front;
-+}
-+
-+static inline u64 journal_cur_seq(struct journal *j)
-+{
-+ EBUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
-+
-+ return j->pin.back - 1;
-+}
-+
-+static inline u64 journal_last_unwritten_seq(struct journal *j)
-+{
-+ return j->seq_ondisk + 1;
-+}
-+
-+static inline int journal_state_count(union journal_res_state s, int idx)
-+{
-+ switch (idx) {
-+ case 0: return s.buf0_count;
-+ case 1: return s.buf1_count;
-+ case 2: return s.buf2_count;
-+ case 3: return s.buf3_count;
-+ }
-+ BUG();
-+}
-+
-+static inline void journal_state_inc(union journal_res_state *s)
-+{
-+ s->buf0_count += s->idx == 0;
-+ s->buf1_count += s->idx == 1;
-+ s->buf2_count += s->idx == 2;
-+ s->buf3_count += s->idx == 3;
-+}
-+
-+/*
-+ * Amount of space that will be taken up by some keys in the journal (i.e.
-+ * including the jset header)
-+ */
-+static inline unsigned jset_u64s(unsigned u64s)
-+{
-+ return u64s + sizeof(struct jset_entry) / sizeof(u64);
-+}
-+
-+static inline int journal_entry_overhead(struct journal *j)
-+{
-+ return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
-+}
-+
-+static inline struct jset_entry *
-+bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
-+{
-+ struct jset *jset = buf->data;
-+ struct jset_entry *entry = vstruct_idx(jset, le32_to_cpu(jset->u64s));
-+
-+ memset(entry, 0, sizeof(*entry));
-+ entry->u64s = cpu_to_le16(u64s);
-+
-+ le32_add_cpu(&jset->u64s, jset_u64s(u64s));
-+
-+ return entry;
-+}
-+
-+static inline struct jset_entry *
-+journal_res_entry(struct journal *j, struct journal_res *res)
-+{
-+ return vstruct_idx(j->buf[res->idx].data, res->offset);
-+}
-+
-+static inline unsigned journal_entry_init(struct jset_entry *entry, unsigned type,
-+ enum btree_id id, unsigned level,
-+ unsigned u64s)
-+{
-+ entry->u64s = cpu_to_le16(u64s);
-+ entry->btree_id = id;
-+ entry->level = level;
-+ entry->type = type;
-+ entry->pad[0] = 0;
-+ entry->pad[1] = 0;
-+ entry->pad[2] = 0;
-+ return jset_u64s(u64s);
-+}
-+
-+static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
-+ enum btree_id id, unsigned level,
-+ const void *data, unsigned u64s)
-+{
-+ unsigned ret = journal_entry_init(entry, type, id, level, u64s);
-+
-+ memcpy_u64s_small(entry->_data, data, u64s);
-+ return ret;
-+}
-+
-+static inline struct jset_entry *
-+bch2_journal_add_entry(struct journal *j, struct journal_res *res,
-+ unsigned type, enum btree_id id,
-+ unsigned level, unsigned u64s)
-+{
-+ struct jset_entry *entry = journal_res_entry(j, res);
-+ unsigned actual = journal_entry_init(entry, type, id, level, u64s);
-+
-+ EBUG_ON(!res->ref);
-+ EBUG_ON(actual > res->u64s);
-+
-+ res->offset += actual;
-+ res->u64s -= actual;
-+ return entry;
-+}
-+
-+static inline bool journal_entry_empty(struct jset *j)
-+{
-+ struct jset_entry *i;
-+
-+ if (j->seq != j->last_seq)
-+ return false;
-+
-+ vstruct_for_each(j, i)
-+ if (i->type == BCH_JSET_ENTRY_btree_keys && i->u64s)
-+ return false;
-+ return true;
-+}
-+
-+/*
-+ * Drop reference on a buffer index and return true if the count has hit zero.
-+ */
-+static inline union journal_res_state journal_state_buf_put(struct journal *j, unsigned idx)
-+{
-+ union journal_res_state s;
-+
-+ s.v = atomic64_sub_return(((union journal_res_state) {
-+ .buf0_count = idx == 0,
-+ .buf1_count = idx == 1,
-+ .buf2_count = idx == 2,
-+ .buf3_count = idx == 3,
-+ }).v, &j->reservations.counter);
-+ return s;
-+}
-+
-+void bch2_journal_buf_put_final(struct journal *, u64, bool);
-+
-+static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
-+{
-+ union journal_res_state s;
-+
-+ s = journal_state_buf_put(j, idx);
-+ if (!journal_state_count(s, idx))
-+ bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
-+}
-+
-+static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
-+{
-+ union journal_res_state s;
-+
-+ s = journal_state_buf_put(j, idx);
-+ if (!journal_state_count(s, idx)) {
-+ spin_lock(&j->lock);
-+ bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
-+ spin_unlock(&j->lock);
-+ }
-+}
-+
-+/*
-+ * This function releases the journal write structure so other threads can
-+ * then proceed to add their keys as well.
-+ */
-+static inline void bch2_journal_res_put(struct journal *j,
-+ struct journal_res *res)
-+{
-+ if (!res->ref)
-+ return;
-+
-+ lock_release(&j->res_map, _THIS_IP_);
-+
-+ while (res->u64s)
-+ bch2_journal_add_entry(j, res,
-+ BCH_JSET_ENTRY_btree_keys,
-+ 0, 0, 0);
-+
-+ bch2_journal_buf_put(j, res->idx, res->seq);
-+
-+ res->ref = 0;
-+}
-+
-+int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
-+ unsigned);
-+
-+/* First bits for BCH_WATERMARK: */
-+enum journal_res_flags {
-+ __JOURNAL_RES_GET_NONBLOCK = BCH_WATERMARK_BITS,
-+ __JOURNAL_RES_GET_CHECK,
-+};
-+
-+#define JOURNAL_RES_GET_NONBLOCK (1 << __JOURNAL_RES_GET_NONBLOCK)
-+#define JOURNAL_RES_GET_CHECK (1 << __JOURNAL_RES_GET_CHECK)
-+
-+static inline int journal_res_get_fast(struct journal *j,
-+ struct journal_res *res,
-+ unsigned flags)
-+{
-+ union journal_res_state old, new;
-+ u64 v = atomic64_read(&j->reservations.counter);
-+
-+ do {
-+ old.v = new.v = v;
-+
-+ /*
-+ * Check if there is still room in the current journal
-+ * entry:
-+ */
-+ if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
-+ return 0;
-+
-+ EBUG_ON(!journal_state_count(new, new.idx));
-+
-+ if ((flags & BCH_WATERMARK_MASK) < j->watermark)
-+ return 0;
-+
-+ new.cur_entry_offset += res->u64s;
-+ journal_state_inc(&new);
-+
-+ /*
-+ * If the refcount would overflow, we have to wait:
-+ * XXX - tracepoint this:
-+ */
-+ if (!journal_state_count(new, new.idx))
-+ return 0;
-+
-+ if (flags & JOURNAL_RES_GET_CHECK)
-+ return 1;
-+ } while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+ old.v, new.v)) != old.v);
-+
-+ res->ref = true;
-+ res->idx = old.idx;
-+ res->offset = old.cur_entry_offset;
-+ res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
-+ return 1;
-+}
-+
-+static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
-+ unsigned u64s, unsigned flags)
-+{
-+ int ret;
-+
-+ EBUG_ON(res->ref);
-+ EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
-+
-+ res->u64s = u64s;
-+
-+ if (journal_res_get_fast(j, res, flags))
-+ goto out;
-+
-+ ret = bch2_journal_res_get_slowpath(j, res, flags);
-+ if (ret)
-+ return ret;
-+out:
-+ if (!(flags & JOURNAL_RES_GET_CHECK)) {
-+ lock_acquire_shared(&j->res_map, 0,
-+ (flags & JOURNAL_RES_GET_NONBLOCK) != 0,
-+ NULL, _THIS_IP_);
-+ EBUG_ON(!res->ref);
-+ }
-+ return 0;
-+}
-+
-+/* journal_preres: */
-+
-+static inline void journal_set_watermark(struct journal *j)
-+{
-+ union journal_preres_state s = READ_ONCE(j->prereserved);
-+ unsigned watermark = BCH_WATERMARK_stripe;
-+
-+ if (fifo_free(&j->pin) < j->pin.size / 4)
-+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
-+ if (fifo_free(&j->pin) < j->pin.size / 8)
-+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-+
-+ if (s.reserved > s.remaining)
-+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
-+ if (!s.remaining)
-+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-+
-+ if (watermark == j->watermark)
-+ return;
-+
-+ swap(watermark, j->watermark);
-+ if (watermark > j->watermark)
-+ journal_wake(j);
-+}
-+
-+static inline void bch2_journal_preres_put(struct journal *j,
-+ struct journal_preres *res)
-+{
-+ union journal_preres_state s = { .reserved = res->u64s };
-+
-+ if (!res->u64s)
-+ return;
-+
-+ s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
-+ res->u64s = 0;
-+
-+ if (unlikely(s.waiting)) {
-+ clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
-+ (unsigned long *) &j->prereserved.v);
-+ closure_wake_up(&j->preres_wait);
-+ }
-+
-+ if (s.reserved <= s.remaining && j->watermark)
-+ journal_set_watermark(j);
-+}
-+
-+int __bch2_journal_preres_get(struct journal *,
-+ struct journal_preres *, unsigned, unsigned);
-+
-+static inline int bch2_journal_preres_get_fast(struct journal *j,
-+ struct journal_preres *res,
-+ unsigned new_u64s,
-+ unsigned flags,
-+ bool set_waiting)
-+{
-+ int d = new_u64s - res->u64s;
-+ union journal_preres_state old, new;
-+ u64 v = atomic64_read(&j->prereserved.counter);
-+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-+ int ret;
-+
-+ do {
-+ old.v = new.v = v;
-+ ret = 0;
-+
-+ if (watermark == BCH_WATERMARK_reclaim ||
-+ new.reserved + d < new.remaining) {
-+ new.reserved += d;
-+ ret = 1;
-+ } else if (set_waiting && !new.waiting)
-+ new.waiting = true;
-+ else
-+ return 0;
-+ } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-+ old.v, new.v)) != old.v);
-+
-+ if (ret)
-+ res->u64s += d;
-+ return ret;
-+}
-+
-+static inline int bch2_journal_preres_get(struct journal *j,
-+ struct journal_preres *res,
-+ unsigned new_u64s,
-+ unsigned flags)
-+{
-+ if (new_u64s <= res->u64s)
-+ return 0;
-+
-+ if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
-+ return 0;
-+
-+ if (flags & JOURNAL_RES_GET_NONBLOCK)
-+ return -BCH_ERR_journal_preres_get_blocked;
-+
-+ return __bch2_journal_preres_get(j, res, new_u64s, flags);
-+}
-+
-+/* journal_entry_res: */
-+
-+void bch2_journal_entry_res_resize(struct journal *,
-+ struct journal_entry_res *,
-+ unsigned);
-+
-+int bch2_journal_flush_seq_async(struct journal *, u64, struct closure *);
-+void bch2_journal_flush_async(struct journal *, struct closure *);
-+
-+int bch2_journal_flush_seq(struct journal *, u64);
-+int bch2_journal_flush(struct journal *);
-+bool bch2_journal_noflush_seq(struct journal *, u64);
-+int bch2_journal_meta(struct journal *);
-+
-+void bch2_journal_halt(struct journal *);
-+
-+static inline int bch2_journal_error(struct journal *j)
-+{
-+ return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL
-+ ? -EIO : 0;
-+}
-+
-+struct bch_dev;
-+
-+static inline void bch2_journal_set_replay_done(struct journal *j)
-+{
-+ BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
-+ set_bit(JOURNAL_REPLAY_DONE, &j->flags);
-+}
-+
-+void bch2_journal_unblock(struct journal *);
-+void bch2_journal_block(struct journal *);
-+
-+void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
-+void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
-+void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
-+bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
-+
-+int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
-+ unsigned nr);
-+int bch2_dev_journal_alloc(struct bch_dev *);
-+int bch2_fs_journal_alloc(struct bch_fs *);
-+
-+void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
-+
-+void bch2_fs_journal_stop(struct journal *);
-+int bch2_fs_journal_start(struct journal *, u64);
-+
-+void bch2_dev_journal_exit(struct bch_dev *);
-+int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
-+void bch2_fs_journal_exit(struct journal *);
-+int bch2_fs_journal_init(struct journal *);
-+
-+#endif /* _BCACHEFS_JOURNAL_H */
-diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
-new file mode 100644
-index 000000000000..f4bc2cdbfdd7
---- /dev/null
-+++ b/fs/bcachefs/journal_io.c
-@@ -0,0 +1,1947 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_io.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "checksum.h"
-+#include "disk_groups.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "replicas.h"
-+#include "sb-clean.h"
-+#include "trace.h"
-+
-+static struct nonce journal_nonce(const struct jset *jset)
-+{
-+ return (struct nonce) {{
-+ [0] = 0,
-+ [1] = ((__le32 *) &jset->seq)[0],
-+ [2] = ((__le32 *) &jset->seq)[1],
-+ [3] = BCH_NONCE_JOURNAL,
-+ }};
-+}
-+
-+static bool jset_csum_good(struct bch_fs *c, struct jset *j)
-+{
-+ return bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)) &&
-+ !bch2_crc_cmp(j->csum,
-+ csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j));
-+}
-+
-+static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
-+{
-+ return (seq - c->journal_entries_base_seq) & (~0U >> 1);
-+}
-+
-+static void __journal_replay_free(struct bch_fs *c,
-+ struct journal_replay *i)
-+{
-+ struct journal_replay **p =
-+ genradix_ptr(&c->journal_entries,
-+ journal_entry_radix_idx(c, le64_to_cpu(i->j.seq)));
-+
-+ BUG_ON(*p != i);
-+ *p = NULL;
-+ kvpfree(i, offsetof(struct journal_replay, j) +
-+ vstruct_bytes(&i->j));
-+}
-+
-+static void journal_replay_free(struct bch_fs *c, struct journal_replay *i)
-+{
-+ i->ignore = true;
-+
-+ if (!c->opts.read_entire_journal)
-+ __journal_replay_free(c, i);
-+}
-+
-+struct journal_list {
-+ struct closure cl;
-+ u64 last_seq;
-+ struct mutex lock;
-+ int ret;
-+};
-+
-+#define JOURNAL_ENTRY_ADD_OK 0
-+#define JOURNAL_ENTRY_ADD_OUT_OF_RANGE 5
-+
-+/*
-+ * Given a journal entry we just read, add it to the list of journal entries to
-+ * be replayed:
-+ */
-+static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
-+ struct journal_ptr entry_ptr,
-+ struct journal_list *jlist, struct jset *j)
-+{
-+ struct genradix_iter iter;
-+ struct journal_replay **_i, *i, *dup;
-+ struct journal_ptr *ptr;
-+ size_t bytes = vstruct_bytes(j);
-+ u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0;
-+ int ret = JOURNAL_ENTRY_ADD_OK;
-+
-+ /* Is this entry older than the range we need? */
-+ if (!c->opts.read_entire_journal &&
-+ le64_to_cpu(j->seq) < jlist->last_seq)
-+ return JOURNAL_ENTRY_ADD_OUT_OF_RANGE;
-+
-+ /*
-+ * genradixes are indexed by a ulong, not a u64, so we can't index them
-+ * by sequence number directly: Assume instead that they will all fall
-+ * within the range of +-2billion of the filrst one we find.
-+ */
-+ if (!c->journal_entries_base_seq)
-+ c->journal_entries_base_seq = max_t(s64, 1, le64_to_cpu(j->seq) - S32_MAX);
-+
-+ /* Drop entries we don't need anymore */
-+ if (last_seq > jlist->last_seq && !c->opts.read_entire_journal) {
-+ genradix_for_each_from(&c->journal_entries, iter, _i,
-+ journal_entry_radix_idx(c, jlist->last_seq)) {
-+ i = *_i;
-+
-+ if (!i || i->ignore)
-+ continue;
-+
-+ if (le64_to_cpu(i->j.seq) >= last_seq)
-+ break;
-+ journal_replay_free(c, i);
-+ }
-+ }
-+
-+ jlist->last_seq = max(jlist->last_seq, last_seq);
-+
-+ _i = genradix_ptr_alloc(&c->journal_entries,
-+ journal_entry_radix_idx(c, le64_to_cpu(j->seq)),
-+ GFP_KERNEL);
-+ if (!_i)
-+ return -BCH_ERR_ENOMEM_journal_entry_add;
-+
-+ /*
-+ * Duplicate journal entries? If so we want the one that didn't have a
-+ * checksum error:
-+ */
-+ dup = *_i;
-+ if (dup) {
-+ if (bytes == vstruct_bytes(&dup->j) &&
-+ !memcmp(j, &dup->j, bytes)) {
-+ i = dup;
-+ goto found;
-+ }
-+
-+ if (!entry_ptr.csum_good) {
-+ i = dup;
-+ goto found;
-+ }
-+
-+ if (!dup->csum_good)
-+ goto replace;
-+
-+ fsck_err(c, journal_entry_replicas_data_mismatch,
-+ "found duplicate but non identical journal entries (seq %llu)",
-+ le64_to_cpu(j->seq));
-+ i = dup;
-+ goto found;
-+ }
-+replace:
-+ i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
-+ if (!i)
-+ return -BCH_ERR_ENOMEM_journal_entry_add;
-+
-+ i->nr_ptrs = 0;
-+ i->csum_good = entry_ptr.csum_good;
-+ i->ignore = false;
-+ unsafe_memcpy(&i->j, j, bytes, "embedded variable length struct");
-+ i->ptrs[i->nr_ptrs++] = entry_ptr;
-+
-+ if (dup) {
-+ if (dup->nr_ptrs >= ARRAY_SIZE(dup->ptrs)) {
-+ bch_err(c, "found too many copies of journal entry %llu",
-+ le64_to_cpu(i->j.seq));
-+ dup->nr_ptrs = ARRAY_SIZE(dup->ptrs) - 1;
-+ }
-+
-+ /* The first ptr should represent the jset we kept: */
-+ memcpy(i->ptrs + i->nr_ptrs,
-+ dup->ptrs,
-+ sizeof(dup->ptrs[0]) * dup->nr_ptrs);
-+ i->nr_ptrs += dup->nr_ptrs;
-+ __journal_replay_free(c, dup);
-+ }
-+
-+ *_i = i;
-+ return 0;
-+found:
-+ for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; ptr++) {
-+ if (ptr->dev == ca->dev_idx) {
-+ bch_err(c, "duplicate journal entry %llu on same device",
-+ le64_to_cpu(i->j.seq));
-+ goto out;
-+ }
-+ }
-+
-+ if (i->nr_ptrs >= ARRAY_SIZE(i->ptrs)) {
-+ bch_err(c, "found too many copies of journal entry %llu",
-+ le64_to_cpu(i->j.seq));
-+ goto out;
-+ }
-+
-+ i->ptrs[i->nr_ptrs++] = entry_ptr;
-+out:
-+fsck_err:
-+ return ret;
-+}
-+
-+/* this fills in a range with empty jset_entries: */
-+static void journal_entry_null_range(void *start, void *end)
-+{
-+ struct jset_entry *entry;
-+
-+ for (entry = start; entry != end; entry = vstruct_next(entry))
-+ memset(entry, 0, sizeof(*entry));
-+}
-+
-+#define JOURNAL_ENTRY_REREAD 5
-+#define JOURNAL_ENTRY_NONE 6
-+#define JOURNAL_ENTRY_BAD 7
-+
-+static void journal_entry_err_msg(struct printbuf *out,
-+ u32 version,
-+ struct jset *jset,
-+ struct jset_entry *entry)
-+{
-+ prt_str(out, "invalid journal entry, version=");
-+ bch2_version_to_text(out, version);
-+
-+ if (entry) {
-+ prt_str(out, " type=");
-+ prt_str(out, bch2_jset_entry_types[entry->type]);
-+ }
-+
-+ if (!jset) {
-+ prt_printf(out, " in superblock");
-+ } else {
-+
-+ prt_printf(out, " seq=%llu", le64_to_cpu(jset->seq));
-+
-+ if (entry)
-+ prt_printf(out, " offset=%zi/%u",
-+ (u64 *) entry - jset->_data,
-+ le32_to_cpu(jset->u64s));
-+ }
-+
-+ prt_str(out, ": ");
-+}
-+
-+#define journal_entry_err(c, version, jset, entry, _err, msg, ...) \
-+({ \
-+ struct printbuf _buf = PRINTBUF; \
-+ \
-+ journal_entry_err_msg(&_buf, version, jset, entry); \
-+ prt_printf(&_buf, msg, ##__VA_ARGS__); \
-+ \
-+ switch (flags & BKEY_INVALID_WRITE) { \
-+ case READ: \
-+ mustfix_fsck_err(c, _err, "%s", _buf.buf); \
-+ break; \
-+ case WRITE: \
-+ bch2_sb_error_count(c, BCH_FSCK_ERR_##_err); \
-+ bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\
-+ if (bch2_fs_inconsistent(c)) { \
-+ ret = -BCH_ERR_fsck_errors_not_fixed; \
-+ goto fsck_err; \
-+ } \
-+ break; \
-+ } \
-+ \
-+ printbuf_exit(&_buf); \
-+ true; \
-+})
-+
-+#define journal_entry_err_on(cond, ...) \
-+ ((cond) ? journal_entry_err(__VA_ARGS__) : false)
-+
-+#define FSCK_DELETED_KEY 5
-+
-+static int journal_validate_key(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned level, enum btree_id btree_id,
-+ struct bkey_i *k,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ int write = flags & BKEY_INVALID_WRITE;
-+ void *next = vstruct_next(entry);
-+ struct printbuf buf = PRINTBUF;
-+ int ret = 0;
-+
-+ if (journal_entry_err_on(!k->k.u64s,
-+ c, version, jset, entry,
-+ journal_entry_bkey_u64s_0,
-+ "k->u64s 0")) {
-+ entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
-+ journal_entry_null_range(vstruct_next(entry), next);
-+ return FSCK_DELETED_KEY;
-+ }
-+
-+ if (journal_entry_err_on((void *) bkey_next(k) >
-+ (void *) vstruct_next(entry),
-+ c, version, jset, entry,
-+ journal_entry_bkey_past_end,
-+ "extends past end of journal entry")) {
-+ entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
-+ journal_entry_null_range(vstruct_next(entry), next);
-+ return FSCK_DELETED_KEY;
-+ }
-+
-+ if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT,
-+ c, version, jset, entry,
-+ journal_entry_bkey_bad_format,
-+ "bad format %u", k->k.format)) {
-+ le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
-+ memmove(k, bkey_next(k), next - (void *) bkey_next(k));
-+ journal_entry_null_range(vstruct_next(entry), next);
-+ return FSCK_DELETED_KEY;
-+ }
-+
-+ if (!write)
-+ bch2_bkey_compat(level, btree_id, version, big_endian,
-+ write, NULL, bkey_to_packed(k));
-+
-+ if (bch2_bkey_invalid(c, bkey_i_to_s_c(k),
-+ __btree_node_type(level, btree_id), write, &buf)) {
-+ printbuf_reset(&buf);
-+ journal_entry_err_msg(&buf, version, jset, entry);
-+ prt_newline(&buf);
-+ printbuf_indent_add(&buf, 2);
-+
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
-+ prt_newline(&buf);
-+ bch2_bkey_invalid(c, bkey_i_to_s_c(k),
-+ __btree_node_type(level, btree_id), write, &buf);
-+
-+ mustfix_fsck_err(c, journal_entry_bkey_invalid,
-+ "%s", buf.buf);
-+
-+ le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
-+ memmove(k, bkey_next(k), next - (void *) bkey_next(k));
-+ journal_entry_null_range(vstruct_next(entry), next);
-+
-+ printbuf_exit(&buf);
-+ return FSCK_DELETED_KEY;
-+ }
-+
-+ if (write)
-+ bch2_bkey_compat(level, btree_id, version, big_endian,
-+ write, NULL, bkey_to_packed(k));
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+static int journal_entry_btree_keys_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ struct bkey_i *k = entry->start;
-+
-+ while (k != vstruct_last(entry)) {
-+ int ret = journal_validate_key(c, jset, entry,
-+ entry->level,
-+ entry->btree_id,
-+ k, version, big_endian,
-+ flags|BKEY_INVALID_JOURNAL);
-+ if (ret == FSCK_DELETED_KEY)
-+ continue;
-+
-+ k = bkey_next(k);
-+ }
-+
-+ return 0;
-+}
-+
-+static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ struct bkey_i *k;
-+ bool first = true;
-+
-+ jset_entry_for_each_key(entry, k) {
-+ if (!first) {
-+ prt_newline(out);
-+ prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
-+ }
-+ prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level);
-+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
-+ first = false;
-+ }
-+}
-+
-+static int journal_entry_btree_root_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ struct bkey_i *k = entry->start;
-+ int ret = 0;
-+
-+ if (journal_entry_err_on(!entry->u64s ||
-+ le16_to_cpu(entry->u64s) != k->k.u64s,
-+ c, version, jset, entry,
-+ journal_entry_btree_root_bad_size,
-+ "invalid btree root journal entry: wrong number of keys")) {
-+ void *next = vstruct_next(entry);
-+ /*
-+ * we don't want to null out this jset_entry,
-+ * just the contents, so that later we can tell
-+ * we were _supposed_ to have a btree root
-+ */
-+ entry->u64s = 0;
-+ journal_entry_null_range(vstruct_next(entry), next);
-+ return 0;
-+ }
-+
-+ return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
-+ version, big_endian, flags);
-+fsck_err:
-+ return ret;
-+}
-+
-+static void journal_entry_btree_root_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ journal_entry_btree_keys_to_text(out, c, entry);
-+}
-+
-+static int journal_entry_prio_ptrs_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ /* obsolete, don't care: */
-+ return 0;
-+}
-+
-+static void journal_entry_prio_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+}
-+
-+static int journal_entry_blacklist_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ int ret = 0;
-+
-+ if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1,
-+ c, version, jset, entry,
-+ journal_entry_blacklist_bad_size,
-+ "invalid journal seq blacklist entry: bad size")) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ }
-+fsck_err:
-+ return ret;
-+}
-+
-+static void journal_entry_blacklist_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ struct jset_entry_blacklist *bl =
-+ container_of(entry, struct jset_entry_blacklist, entry);
-+
-+ prt_printf(out, "seq=%llu", le64_to_cpu(bl->seq));
-+}
-+
-+static int journal_entry_blacklist_v2_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ struct jset_entry_blacklist_v2 *bl_entry;
-+ int ret = 0;
-+
-+ if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2,
-+ c, version, jset, entry,
-+ journal_entry_blacklist_v2_bad_size,
-+ "invalid journal seq blacklist entry: bad size")) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ goto out;
-+ }
-+
-+ bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
-+
-+ if (journal_entry_err_on(le64_to_cpu(bl_entry->start) >
-+ le64_to_cpu(bl_entry->end),
-+ c, version, jset, entry,
-+ journal_entry_blacklist_v2_start_past_end,
-+ "invalid journal seq blacklist entry: start > end")) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ }
-+out:
-+fsck_err:
-+ return ret;
-+}
-+
-+static void journal_entry_blacklist_v2_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ struct jset_entry_blacklist_v2 *bl =
-+ container_of(entry, struct jset_entry_blacklist_v2, entry);
-+
-+ prt_printf(out, "start=%llu end=%llu",
-+ le64_to_cpu(bl->start),
-+ le64_to_cpu(bl->end));
-+}
-+
-+static int journal_entry_usage_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ struct jset_entry_usage *u =
-+ container_of(entry, struct jset_entry_usage, entry);
-+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+ int ret = 0;
-+
-+ if (journal_entry_err_on(bytes < sizeof(*u),
-+ c, version, jset, entry,
-+ journal_entry_usage_bad_size,
-+ "invalid journal entry usage: bad size")) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ return ret;
-+ }
-+
-+fsck_err:
-+ return ret;
-+}
-+
-+static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ struct jset_entry_usage *u =
-+ container_of(entry, struct jset_entry_usage, entry);
-+
-+ prt_printf(out, "type=%s v=%llu",
-+ bch2_fs_usage_types[u->entry.btree_id],
-+ le64_to_cpu(u->v));
-+}
-+
-+static int journal_entry_data_usage_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ struct jset_entry_data_usage *u =
-+ container_of(entry, struct jset_entry_data_usage, entry);
-+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+ int ret = 0;
-+
-+ if (journal_entry_err_on(bytes < sizeof(*u) ||
-+ bytes < sizeof(*u) + u->r.nr_devs,
-+ c, version, jset, entry,
-+ journal_entry_data_usage_bad_size,
-+ "invalid journal entry usage: bad size")) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ return ret;
-+ }
-+
-+fsck_err:
-+ return ret;
-+}
-+
-+static void journal_entry_data_usage_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ struct jset_entry_data_usage *u =
-+ container_of(entry, struct jset_entry_data_usage, entry);
-+
-+ bch2_replicas_entry_to_text(out, &u->r);
-+ prt_printf(out, "=%llu", le64_to_cpu(u->v));
-+}
-+
-+static int journal_entry_clock_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ struct jset_entry_clock *clock =
-+ container_of(entry, struct jset_entry_clock, entry);
-+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+ int ret = 0;
-+
-+ if (journal_entry_err_on(bytes != sizeof(*clock),
-+ c, version, jset, entry,
-+ journal_entry_clock_bad_size,
-+ "bad size")) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ return ret;
-+ }
-+
-+ if (journal_entry_err_on(clock->rw > 1,
-+ c, version, jset, entry,
-+ journal_entry_clock_bad_rw,
-+ "bad rw")) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ return ret;
-+ }
-+
-+fsck_err:
-+ return ret;
-+}
-+
-+static void journal_entry_clock_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ struct jset_entry_clock *clock =
-+ container_of(entry, struct jset_entry_clock, entry);
-+
-+ prt_printf(out, "%s=%llu", clock->rw ? "write" : "read", le64_to_cpu(clock->time));
-+}
-+
-+static int journal_entry_dev_usage_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ struct jset_entry_dev_usage *u =
-+ container_of(entry, struct jset_entry_dev_usage, entry);
-+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
-+ unsigned expected = sizeof(*u);
-+ unsigned dev;
-+ int ret = 0;
-+
-+ if (journal_entry_err_on(bytes < expected,
-+ c, version, jset, entry,
-+ journal_entry_dev_usage_bad_size,
-+ "bad size (%u < %u)",
-+ bytes, expected)) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ return ret;
-+ }
-+
-+ dev = le32_to_cpu(u->dev);
-+
-+ if (journal_entry_err_on(!bch2_dev_exists2(c, dev),
-+ c, version, jset, entry,
-+ journal_entry_dev_usage_bad_dev,
-+ "bad dev")) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ return ret;
-+ }
-+
-+ if (journal_entry_err_on(u->pad,
-+ c, version, jset, entry,
-+ journal_entry_dev_usage_bad_pad,
-+ "bad pad")) {
-+ journal_entry_null_range(entry, vstruct_next(entry));
-+ return ret;
-+ }
-+
-+fsck_err:
-+ return ret;
-+}
-+
-+static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ struct jset_entry_dev_usage *u =
-+ container_of(entry, struct jset_entry_dev_usage, entry);
-+ unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
-+
-+ prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
-+
-+ for (i = 0; i < nr_types; i++) {
-+ if (i < BCH_DATA_NR)
-+ prt_printf(out, " %s", bch2_data_types[i]);
-+ else
-+ prt_printf(out, " (unknown data type %u)", i);
-+ prt_printf(out, ": buckets=%llu sectors=%llu fragmented=%llu",
-+ le64_to_cpu(u->d[i].buckets),
-+ le64_to_cpu(u->d[i].sectors),
-+ le64_to_cpu(u->d[i].fragmented));
-+ }
-+
-+ prt_printf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec));
-+}
-+
-+static int journal_entry_log_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ return 0;
-+}
-+
-+static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry);
-+ unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d);
-+
-+ prt_printf(out, "%.*s", bytes, l->d);
-+}
-+
-+static int journal_entry_overwrite_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ return journal_entry_btree_keys_validate(c, jset, entry,
-+ version, big_endian, READ);
-+}
-+
-+static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ journal_entry_btree_keys_to_text(out, c, entry);
-+}
-+
-+struct jset_entry_ops {
-+ int (*validate)(struct bch_fs *, struct jset *,
-+ struct jset_entry *, unsigned, int,
-+ enum bkey_invalid_flags);
-+ void (*to_text)(struct printbuf *, struct bch_fs *, struct jset_entry *);
-+};
-+
-+static const struct jset_entry_ops bch2_jset_entry_ops[] = {
-+#define x(f, nr) \
-+ [BCH_JSET_ENTRY_##f] = (struct jset_entry_ops) { \
-+ .validate = journal_entry_##f##_validate, \
-+ .to_text = journal_entry_##f##_to_text, \
-+ },
-+ BCH_JSET_ENTRY_TYPES()
-+#undef x
-+};
-+
-+int bch2_journal_entry_validate(struct bch_fs *c,
-+ struct jset *jset,
-+ struct jset_entry *entry,
-+ unsigned version, int big_endian,
-+ enum bkey_invalid_flags flags)
-+{
-+ return entry->type < BCH_JSET_ENTRY_NR
-+ ? bch2_jset_entry_ops[entry->type].validate(c, jset, entry,
-+ version, big_endian, flags)
-+ : 0;
-+}
-+
-+void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ if (entry->type < BCH_JSET_ENTRY_NR) {
-+ prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
-+ bch2_jset_entry_ops[entry->type].to_text(out, c, entry);
-+ } else {
-+ prt_printf(out, "(unknown type %u)", entry->type);
-+ }
-+}
-+
-+static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
-+ enum bkey_invalid_flags flags)
-+{
-+ struct jset_entry *entry;
-+ unsigned version = le32_to_cpu(jset->version);
-+ int ret = 0;
-+
-+ vstruct_for_each(jset, entry) {
-+ if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset),
-+ c, version, jset, entry,
-+ journal_entry_past_jset_end,
-+ "journal entry extends past end of jset")) {
-+ jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
-+ break;
-+ }
-+
-+ ret = bch2_journal_entry_validate(c, jset, entry,
-+ version, JSET_BIG_ENDIAN(jset), flags);
-+ if (ret)
-+ break;
-+ }
-+fsck_err:
-+ return ret;
-+}
-+
-+static int jset_validate(struct bch_fs *c,
-+ struct bch_dev *ca,
-+ struct jset *jset, u64 sector,
-+ enum bkey_invalid_flags flags)
-+{
-+ unsigned version;
-+ int ret = 0;
-+
-+ if (le64_to_cpu(jset->magic) != jset_magic(c))
-+ return JOURNAL_ENTRY_NONE;
-+
-+ version = le32_to_cpu(jset->version);
-+ if (journal_entry_err_on(!bch2_version_compatible(version),
-+ c, version, jset, NULL,
-+ jset_unsupported_version,
-+ "%s sector %llu seq %llu: incompatible journal entry version %u.%u",
-+ ca ? ca->name : c->name,
-+ sector, le64_to_cpu(jset->seq),
-+ BCH_VERSION_MAJOR(version),
-+ BCH_VERSION_MINOR(version))) {
-+ /* don't try to continue: */
-+ return -EINVAL;
-+ }
-+
-+ if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)),
-+ c, version, jset, NULL,
-+ jset_unknown_csum,
-+ "%s sector %llu seq %llu: journal entry with unknown csum type %llu",
-+ ca ? ca->name : c->name,
-+ sector, le64_to_cpu(jset->seq),
-+ JSET_CSUM_TYPE(jset)))
-+ ret = JOURNAL_ENTRY_BAD;
-+
-+ /* last_seq is ignored when JSET_NO_FLUSH is true */
-+ if (journal_entry_err_on(!JSET_NO_FLUSH(jset) &&
-+ le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq),
-+ c, version, jset, NULL,
-+ jset_last_seq_newer_than_seq,
-+ "invalid journal entry: last_seq > seq (%llu > %llu)",
-+ le64_to_cpu(jset->last_seq),
-+ le64_to_cpu(jset->seq))) {
-+ jset->last_seq = jset->seq;
-+ return JOURNAL_ENTRY_BAD;
-+ }
-+
-+ ret = jset_validate_entries(c, jset, flags);
-+fsck_err:
-+ return ret;
-+}
-+
-+static int jset_validate_early(struct bch_fs *c,
-+ struct bch_dev *ca,
-+ struct jset *jset, u64 sector,
-+ unsigned bucket_sectors_left,
-+ unsigned sectors_read)
-+{
-+ size_t bytes = vstruct_bytes(jset);
-+ unsigned version;
-+ enum bkey_invalid_flags flags = BKEY_INVALID_JOURNAL;
-+ int ret = 0;
-+
-+ if (le64_to_cpu(jset->magic) != jset_magic(c))
-+ return JOURNAL_ENTRY_NONE;
-+
-+ version = le32_to_cpu(jset->version);
-+ if (journal_entry_err_on(!bch2_version_compatible(version),
-+ c, version, jset, NULL,
-+ jset_unsupported_version,
-+ "%s sector %llu seq %llu: unknown journal entry version %u.%u",
-+ ca ? ca->name : c->name,
-+ sector, le64_to_cpu(jset->seq),
-+ BCH_VERSION_MAJOR(version),
-+ BCH_VERSION_MINOR(version))) {
-+ /* don't try to continue: */
-+ return -EINVAL;
-+ }
-+
-+ if (bytes > (sectors_read << 9) &&
-+ sectors_read < bucket_sectors_left)
-+ return JOURNAL_ENTRY_REREAD;
-+
-+ if (journal_entry_err_on(bytes > bucket_sectors_left << 9,
-+ c, version, jset, NULL,
-+ jset_past_bucket_end,
-+ "%s sector %llu seq %llu: journal entry too big (%zu bytes)",
-+ ca ? ca->name : c->name,
-+ sector, le64_to_cpu(jset->seq), bytes))
-+ le32_add_cpu(&jset->u64s,
-+ -((bytes - (bucket_sectors_left << 9)) / 8));
-+fsck_err:
-+ return ret;
-+}
-+
-+struct journal_read_buf {
-+ void *data;
-+ size_t size;
-+};
-+
-+static int journal_read_buf_realloc(struct journal_read_buf *b,
-+ size_t new_size)
-+{
-+ void *n;
-+
-+ /* the bios are sized for this many pages, max: */
-+ if (new_size > JOURNAL_ENTRY_SIZE_MAX)
-+ return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
-+
-+ new_size = roundup_pow_of_two(new_size);
-+ n = kvpmalloc(new_size, GFP_KERNEL);
-+ if (!n)
-+ return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
-+
-+ kvpfree(b->data, b->size);
-+ b->data = n;
-+ b->size = new_size;
-+ return 0;
-+}
-+
-+static int journal_read_bucket(struct bch_dev *ca,
-+ struct journal_read_buf *buf,
-+ struct journal_list *jlist,
-+ unsigned bucket)
-+{
-+ struct bch_fs *c = ca->fs;
-+ struct journal_device *ja = &ca->journal;
-+ struct jset *j = NULL;
-+ unsigned sectors, sectors_read = 0;
-+ u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
-+ end = offset + ca->mi.bucket_size;
-+ bool saw_bad = false, csum_good;
-+ int ret = 0;
-+
-+ pr_debug("reading %u", bucket);
-+
-+ while (offset < end) {
-+ if (!sectors_read) {
-+ struct bio *bio;
-+ unsigned nr_bvecs;
-+reread:
-+ sectors_read = min_t(unsigned,
-+ end - offset, buf->size >> 9);
-+ nr_bvecs = buf_pages(buf->data, sectors_read << 9);
-+
-+ bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
-+ bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ);
-+
-+ bio->bi_iter.bi_sector = offset;
-+ bch2_bio_map(bio, buf->data, sectors_read << 9);
-+
-+ ret = submit_bio_wait(bio);
-+ kfree(bio);
-+
-+ if (bch2_dev_io_err_on(ret, ca, BCH_MEMBER_ERROR_read,
-+ "journal read error: sector %llu",
-+ offset) ||
-+ bch2_meta_read_fault("journal")) {
-+ /*
-+ * We don't error out of the recovery process
-+ * here, since the relevant journal entry may be
-+ * found on a different device, and missing or
-+ * no journal entries will be handled later
-+ */
-+ return 0;
-+ }
-+
-+ j = buf->data;
-+ }
-+
-+ ret = jset_validate_early(c, ca, j, offset,
-+ end - offset, sectors_read);
-+ switch (ret) {
-+ case 0:
-+ sectors = vstruct_sectors(j, c->block_bits);
-+ break;
-+ case JOURNAL_ENTRY_REREAD:
-+ if (vstruct_bytes(j) > buf->size) {
-+ ret = journal_read_buf_realloc(buf,
-+ vstruct_bytes(j));
-+ if (ret)
-+ return ret;
-+ }
-+ goto reread;
-+ case JOURNAL_ENTRY_NONE:
-+ if (!saw_bad)
-+ return 0;
-+ /*
-+ * On checksum error we don't really trust the size
-+ * field of the journal entry we read, so try reading
-+ * again at next block boundary:
-+ */
-+ sectors = block_sectors(c);
-+ goto next_block;
-+ default:
-+ return ret;
-+ }
-+
-+ /*
-+ * This happens sometimes if we don't have discards on -
-+ * when we've partially overwritten a bucket with new
-+ * journal entries. We don't need the rest of the
-+ * bucket:
-+ */
-+ if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
-+ return 0;
-+
-+ ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
-+
-+ csum_good = jset_csum_good(c, j);
-+ if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum,
-+ "journal checksum error"))
-+ saw_bad = true;
-+
-+ ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
-+ j->encrypted_start,
-+ vstruct_end(j) - (void *) j->encrypted_start);
-+ bch2_fs_fatal_err_on(ret, c,
-+ "error decrypting journal entry: %i", ret);
-+
-+ mutex_lock(&jlist->lock);
-+ ret = journal_entry_add(c, ca, (struct journal_ptr) {
-+ .csum_good = csum_good,
-+ .dev = ca->dev_idx,
-+ .bucket = bucket,
-+ .bucket_offset = offset -
-+ bucket_to_sector(ca, ja->buckets[bucket]),
-+ .sector = offset,
-+ }, jlist, j);
-+ mutex_unlock(&jlist->lock);
-+
-+ switch (ret) {
-+ case JOURNAL_ENTRY_ADD_OK:
-+ break;
-+ case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
-+ break;
-+ default:
-+ return ret;
-+ }
-+next_block:
-+ pr_debug("next");
-+ offset += sectors;
-+ sectors_read -= sectors;
-+ j = ((void *) j) + (sectors << 9);
-+ }
-+
-+ return 0;
-+}
-+
-+static void bch2_journal_read_device(struct closure *cl)
-+{
-+ struct journal_device *ja =
-+ container_of(cl, struct journal_device, read);
-+ struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
-+ struct bch_fs *c = ca->fs;
-+ struct journal_list *jlist =
-+ container_of(cl->parent, struct journal_list, cl);
-+ struct journal_replay *r, **_r;
-+ struct genradix_iter iter;
-+ struct journal_read_buf buf = { NULL, 0 };
-+ unsigned i;
-+ int ret = 0;
-+
-+ if (!ja->nr)
-+ goto out;
-+
-+ ret = journal_read_buf_realloc(&buf, PAGE_SIZE);
-+ if (ret)
-+ goto err;
-+
-+ pr_debug("%u journal buckets", ja->nr);
-+
-+ for (i = 0; i < ja->nr; i++) {
-+ ret = journal_read_bucket(ca, &buf, jlist, i);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ja->sectors_free = ca->mi.bucket_size;
-+
-+ mutex_lock(&jlist->lock);
-+ genradix_for_each_reverse(&c->journal_entries, iter, _r) {
-+ r = *_r;
-+
-+ if (!r)
-+ continue;
-+
-+ for (i = 0; i < r->nr_ptrs; i++) {
-+ if (r->ptrs[i].dev == ca->dev_idx) {
-+ unsigned wrote = bucket_remainder(ca, r->ptrs[i].sector) +
-+ vstruct_sectors(&r->j, c->block_bits);
-+
-+ ja->cur_idx = r->ptrs[i].bucket;
-+ ja->sectors_free = ca->mi.bucket_size - wrote;
-+ goto found;
-+ }
-+ }
-+ }
-+found:
-+ mutex_unlock(&jlist->lock);
-+
-+ if (ja->bucket_seq[ja->cur_idx] &&
-+ ja->sectors_free == ca->mi.bucket_size) {
-+ bch_err(c, "ja->sectors_free == ca->mi.bucket_size");
-+ bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr);
-+ for (i = 0; i < 3; i++) {
-+ unsigned idx = (ja->cur_idx + ja->nr - 1 + i) % ja->nr;
-+
-+ bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]);
-+ }
-+ ja->sectors_free = 0;
-+ }
-+
-+ /*
-+ * Set dirty_idx to indicate the entire journal is full and needs to be
-+ * reclaimed - journal reclaim will immediately reclaim whatever isn't
-+ * pinned when it first runs:
-+ */
-+ ja->discard_idx = ja->dirty_idx_ondisk =
-+ ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
-+out:
-+ bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
-+ kvpfree(buf.data, buf.size);
-+ percpu_ref_put(&ca->io_ref);
-+ closure_return(cl);
-+ return;
-+err:
-+ mutex_lock(&jlist->lock);
-+ jlist->ret = ret;
-+ mutex_unlock(&jlist->lock);
-+ goto out;
-+}
-+
-+void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct journal_replay *j)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < j->nr_ptrs; i++) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, j->ptrs[i].dev);
-+ u64 offset;
-+
-+ div64_u64_rem(j->ptrs[i].sector, ca->mi.bucket_size, &offset);
-+
-+ if (i)
-+ prt_printf(out, " ");
-+ prt_printf(out, "%u:%u:%u (sector %llu)",
-+ j->ptrs[i].dev,
-+ j->ptrs[i].bucket,
-+ j->ptrs[i].bucket_offset,
-+ j->ptrs[i].sector);
-+ }
-+}
-+
-+int bch2_journal_read(struct bch_fs *c,
-+ u64 *last_seq,
-+ u64 *blacklist_seq,
-+ u64 *start_seq)
-+{
-+ struct journal_list jlist;
-+ struct journal_replay *i, **_i, *prev = NULL;
-+ struct genradix_iter radix_iter;
-+ struct bch_dev *ca;
-+ unsigned iter;
-+ struct printbuf buf = PRINTBUF;
-+ bool degraded = false, last_write_torn = false;
-+ u64 seq;
-+ int ret = 0;
-+
-+ closure_init_stack(&jlist.cl);
-+ mutex_init(&jlist.lock);
-+ jlist.last_seq = 0;
-+ jlist.ret = 0;
-+
-+ for_each_member_device(ca, c, iter) {
-+ if (!c->opts.fsck &&
-+ !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal)))
-+ continue;
-+
-+ if ((ca->mi.state == BCH_MEMBER_STATE_rw ||
-+ ca->mi.state == BCH_MEMBER_STATE_ro) &&
-+ percpu_ref_tryget(&ca->io_ref))
-+ closure_call(&ca->journal.read,
-+ bch2_journal_read_device,
-+ system_unbound_wq,
-+ &jlist.cl);
-+ else
-+ degraded = true;
-+ }
-+
-+ closure_sync(&jlist.cl);
-+
-+ if (jlist.ret)
-+ return jlist.ret;
-+
-+ *last_seq = 0;
-+ *start_seq = 0;
-+ *blacklist_seq = 0;
-+
-+ /*
-+ * Find most recent flush entry, and ignore newer non flush entries -
-+ * those entries will be blacklisted:
-+ */
-+ genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) {
-+ enum bkey_invalid_flags flags = BKEY_INVALID_JOURNAL;
-+
-+ i = *_i;
-+
-+ if (!i || i->ignore)
-+ continue;
-+
-+ if (!*start_seq)
-+ *blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1;
-+
-+ if (JSET_NO_FLUSH(&i->j)) {
-+ i->ignore = true;
-+ continue;
-+ }
-+
-+ if (!last_write_torn && !i->csum_good) {
-+ last_write_torn = true;
-+ i->ignore = true;
-+ continue;
-+ }
-+
-+ if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq),
-+ c, le32_to_cpu(i->j.version), &i->j, NULL,
-+ jset_last_seq_newer_than_seq,
-+ "invalid journal entry: last_seq > seq (%llu > %llu)",
-+ le64_to_cpu(i->j.last_seq),
-+ le64_to_cpu(i->j.seq)))
-+ i->j.last_seq = i->j.seq;
-+
-+ *last_seq = le64_to_cpu(i->j.last_seq);
-+ *blacklist_seq = le64_to_cpu(i->j.seq) + 1;
-+ break;
-+ }
-+
-+ if (!*start_seq) {
-+ bch_info(c, "journal read done, but no entries found");
-+ return 0;
-+ }
-+
-+ if (!*last_seq) {
-+ fsck_err(c, dirty_but_no_journal_entries_post_drop_nonflushes,
-+ "journal read done, but no entries found after dropping non-flushes");
-+ return 0;
-+ }
-+
-+ bch_info(c, "journal read done, replaying entries %llu-%llu",
-+ *last_seq, *blacklist_seq - 1);
-+
-+ if (*start_seq != *blacklist_seq)
-+ bch_info(c, "dropped unflushed entries %llu-%llu",
-+ *blacklist_seq, *start_seq - 1);
-+
-+ /* Drop blacklisted entries and entries older than last_seq: */
-+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
-+ i = *_i;
-+
-+ if (!i || i->ignore)
-+ continue;
-+
-+ seq = le64_to_cpu(i->j.seq);
-+ if (seq < *last_seq) {
-+ journal_replay_free(c, i);
-+ continue;
-+ }
-+
-+ if (bch2_journal_seq_is_blacklisted(c, seq, true)) {
-+ fsck_err_on(!JSET_NO_FLUSH(&i->j), c,
-+ jset_seq_blacklisted,
-+ "found blacklisted journal entry %llu", seq);
-+ i->ignore = true;
-+ }
-+ }
-+
-+ /* Check for missing entries: */
-+ seq = *last_seq;
-+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
-+ i = *_i;
-+
-+ if (!i || i->ignore)
-+ continue;
-+
-+ BUG_ON(seq > le64_to_cpu(i->j.seq));
-+
-+ while (seq < le64_to_cpu(i->j.seq)) {
-+ u64 missing_start, missing_end;
-+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-+
-+ while (seq < le64_to_cpu(i->j.seq) &&
-+ bch2_journal_seq_is_blacklisted(c, seq, false))
-+ seq++;
-+
-+ if (seq == le64_to_cpu(i->j.seq))
-+ break;
-+
-+ missing_start = seq;
-+
-+ while (seq < le64_to_cpu(i->j.seq) &&
-+ !bch2_journal_seq_is_blacklisted(c, seq, false))
-+ seq++;
-+
-+ if (prev) {
-+ bch2_journal_ptrs_to_text(&buf1, c, prev);
-+ prt_printf(&buf1, " size %zu", vstruct_sectors(&prev->j, c->block_bits));
-+ } else
-+ prt_printf(&buf1, "(none)");
-+ bch2_journal_ptrs_to_text(&buf2, c, i);
-+
-+ missing_end = seq - 1;
-+ fsck_err(c, journal_entries_missing,
-+ "journal entries %llu-%llu missing! (replaying %llu-%llu)\n"
-+ " prev at %s\n"
-+ " next at %s",
-+ missing_start, missing_end,
-+ *last_seq, *blacklist_seq - 1,
-+ buf1.buf, buf2.buf);
-+
-+ printbuf_exit(&buf1);
-+ printbuf_exit(&buf2);
-+ }
-+
-+ prev = i;
-+ seq++;
-+ }
-+
-+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
-+ struct bch_replicas_padded replicas = {
-+ .e.data_type = BCH_DATA_journal,
-+ .e.nr_required = 1,
-+ };
-+ unsigned ptr;
-+
-+ i = *_i;
-+ if (!i || i->ignore)
-+ continue;
-+
-+ for (ptr = 0; ptr < i->nr_ptrs; ptr++) {
-+ ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev);
-+
-+ if (!i->ptrs[ptr].csum_good)
-+ bch_err_dev_offset(ca, i->ptrs[ptr].sector,
-+ "invalid journal checksum, seq %llu%s",
-+ le64_to_cpu(i->j.seq),
-+ i->csum_good ? " (had good copy on another device)" : "");
-+ }
-+
-+ ret = jset_validate(c,
-+ bch_dev_bkey_exists(c, i->ptrs[0].dev),
-+ &i->j,
-+ i->ptrs[0].sector,
-+ READ);
-+ if (ret)
-+ goto err;
-+
-+ for (ptr = 0; ptr < i->nr_ptrs; ptr++)
-+ replicas.e.devs[replicas.e.nr_devs++] = i->ptrs[ptr].dev;
-+
-+ bch2_replicas_entry_sort(&replicas.e);
-+
-+ printbuf_reset(&buf);
-+ bch2_replicas_entry_to_text(&buf, &replicas.e);
-+
-+ if (!degraded &&
-+ !bch2_replicas_marked(c, &replicas.e) &&
-+ (le64_to_cpu(i->j.seq) == *last_seq ||
-+ fsck_err(c, journal_entry_replicas_not_marked,
-+ "superblock not marked as containing replicas for journal entry %llu\n %s",
-+ le64_to_cpu(i->j.seq), buf.buf))) {
-+ ret = bch2_mark_replicas(c, &replicas.e);
-+ if (ret)
-+ goto err;
-+ }
-+ }
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+/* journal write: */
-+
-+static void __journal_write_alloc(struct journal *j,
-+ struct journal_buf *w,
-+ struct dev_alloc_list *devs_sorted,
-+ unsigned sectors,
-+ unsigned *replicas,
-+ unsigned replicas_want)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct journal_device *ja;
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ if (*replicas >= replicas_want)
-+ return;
-+
-+ for (i = 0; i < devs_sorted->nr; i++) {
-+ ca = rcu_dereference(c->devs[devs_sorted->devs[i]]);
-+ if (!ca)
-+ continue;
-+
-+ ja = &ca->journal;
-+
-+ /*
-+ * Check that we can use this device, and aren't already using
-+ * it:
-+ */
-+ if (!ca->mi.durability ||
-+ ca->mi.state != BCH_MEMBER_STATE_rw ||
-+ !ja->nr ||
-+ bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
-+ sectors > ja->sectors_free)
-+ continue;
-+
-+ bch2_dev_stripe_increment(ca, &j->wp.stripe);
-+
-+ bch2_bkey_append_ptr(&w->key,
-+ (struct bch_extent_ptr) {
-+ .offset = bucket_to_sector(ca,
-+ ja->buckets[ja->cur_idx]) +
-+ ca->mi.bucket_size -
-+ ja->sectors_free,
-+ .dev = ca->dev_idx,
-+ });
-+
-+ ja->sectors_free -= sectors;
-+ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
-+
-+ *replicas += ca->mi.durability;
-+
-+ if (*replicas >= replicas_want)
-+ break;
-+ }
-+}
-+
-+/**
-+ * journal_write_alloc - decide where to write next journal entry
-+ *
-+ * @j: journal object
-+ * @w: journal buf (entry to be written)
-+ *
-+ * Returns: 0 on success, or -EROFS on failure
-+ */
-+static int journal_write_alloc(struct journal *j, struct journal_buf *w)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct bch_devs_mask devs;
-+ struct journal_device *ja;
-+ struct bch_dev *ca;
-+ struct dev_alloc_list devs_sorted;
-+ unsigned sectors = vstruct_sectors(w->data, c->block_bits);
-+ unsigned target = c->opts.metadata_target ?:
-+ c->opts.foreground_target;
-+ unsigned i, replicas = 0, replicas_want =
-+ READ_ONCE(c->opts.metadata_replicas);
-+
-+ rcu_read_lock();
-+retry:
-+ devs = target_rw_devs(c, BCH_DATA_journal, target);
-+
-+ devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
-+
-+ __journal_write_alloc(j, w, &devs_sorted,
-+ sectors, &replicas, replicas_want);
-+
-+ if (replicas >= replicas_want)
-+ goto done;
-+
-+ for (i = 0; i < devs_sorted.nr; i++) {
-+ ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
-+ if (!ca)
-+ continue;
-+
-+ ja = &ca->journal;
-+
-+ if (sectors > ja->sectors_free &&
-+ sectors <= ca->mi.bucket_size &&
-+ bch2_journal_dev_buckets_available(j, ja,
-+ journal_space_discarded)) {
-+ ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
-+ ja->sectors_free = ca->mi.bucket_size;
-+
-+ /*
-+ * ja->bucket_seq[ja->cur_idx] must always have
-+ * something sensible:
-+ */
-+ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
-+ }
-+ }
-+
-+ __journal_write_alloc(j, w, &devs_sorted,
-+ sectors, &replicas, replicas_want);
-+
-+ if (replicas < replicas_want && target) {
-+ /* Retry from all devices: */
-+ target = 0;
-+ goto retry;
-+ }
-+done:
-+ rcu_read_unlock();
-+
-+ BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
-+
-+ return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
-+}
-+
-+static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
-+{
-+ /* we aren't holding j->lock: */
-+ unsigned new_size = READ_ONCE(j->buf_size_want);
-+ void *new_buf;
-+
-+ if (buf->buf_size >= new_size)
-+ return;
-+
-+ new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
-+ if (!new_buf)
-+ return;
-+
-+ memcpy(new_buf, buf->data, buf->buf_size);
-+
-+ spin_lock(&j->lock);
-+ swap(buf->data, new_buf);
-+ swap(buf->buf_size, new_size);
-+ spin_unlock(&j->lock);
-+
-+ kvpfree(new_buf, new_size);
-+}
-+
-+static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
-+{
-+ return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK);
-+}
-+
-+static void journal_write_done(struct closure *cl)
-+{
-+ struct journal *j = container_of(cl, struct journal, io);
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct journal_buf *w = journal_last_unwritten_buf(j);
-+ struct bch_replicas_padded replicas;
-+ union journal_res_state old, new;
-+ u64 v, seq;
-+ int err = 0;
-+
-+ bch2_time_stats_update(!JSET_NO_FLUSH(w->data)
-+ ? j->flush_write_time
-+ : j->noflush_write_time, j->write_start_time);
-+
-+ if (!w->devs_written.nr) {
-+ bch_err(c, "unable to write journal to sufficient devices");
-+ err = -EIO;
-+ } else {
-+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
-+ w->devs_written);
-+ if (bch2_mark_replicas(c, &replicas.e))
-+ err = -EIO;
-+ }
-+
-+ if (err)
-+ bch2_fatal_error(c);
-+
-+ spin_lock(&j->lock);
-+ seq = le64_to_cpu(w->data->seq);
-+
-+ if (seq >= j->pin.front)
-+ journal_seq_pin(j, seq)->devs = w->devs_written;
-+
-+ if (!err) {
-+ if (!JSET_NO_FLUSH(w->data)) {
-+ j->flushed_seq_ondisk = seq;
-+ j->last_seq_ondisk = w->last_seq;
-+
-+ bch2_do_discards(c);
-+ closure_wake_up(&c->freelist_wait);
-+
-+ bch2_reset_alloc_cursors(c);
-+ }
-+ } else if (!j->err_seq || seq < j->err_seq)
-+ j->err_seq = seq;
-+
-+ j->seq_ondisk = seq;
-+
-+ /*
-+ * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
-+ * more buckets:
-+ *
-+ * Must come before signaling write completion, for
-+ * bch2_fs_journal_stop():
-+ */
-+ if (j->watermark != BCH_WATERMARK_stripe)
-+ journal_reclaim_kick(&c->journal);
-+
-+ /* also must come before signalling write completion: */
-+ closure_debug_destroy(cl);
-+
-+ v = atomic64_read(&j->reservations.counter);
-+ do {
-+ old.v = new.v = v;
-+ BUG_ON(journal_state_count(new, new.unwritten_idx));
-+
-+ new.unwritten_idx++;
-+ } while ((v = atomic64_cmpxchg(&j->reservations.counter,
-+ old.v, new.v)) != old.v);
-+
-+ bch2_journal_space_available(j);
-+
-+ closure_wake_up(&w->wait);
-+ journal_wake(j);
-+
-+ if (!journal_state_count(new, new.unwritten_idx) &&
-+ journal_last_unwritten_seq(j) <= journal_cur_seq(j)) {
-+ spin_unlock(&j->lock);
-+ closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
-+ } else if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
-+ new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
-+ struct journal_buf *buf = journal_cur_buf(j);
-+ long delta = buf->expires - jiffies;
-+
-+ /*
-+ * We don't close a journal entry to write it while there's
-+ * previous entries still in flight - the current journal entry
-+ * might want to be written now:
-+ */
-+
-+ spin_unlock(&j->lock);
-+ mod_delayed_work(c->io_complete_wq, &j->write_work, max(0L, delta));
-+ } else {
-+ spin_unlock(&j->lock);
-+ }
-+}
-+
-+static void journal_write_endio(struct bio *bio)
-+{
-+ struct bch_dev *ca = bio->bi_private;
-+ struct journal *j = &ca->fs->journal;
-+ struct journal_buf *w = journal_last_unwritten_buf(j);
-+ unsigned long flags;
-+
-+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
-+ "error writing journal entry %llu: %s",
-+ le64_to_cpu(w->data->seq),
-+ bch2_blk_status_to_str(bio->bi_status)) ||
-+ bch2_meta_write_fault("journal")) {
-+ spin_lock_irqsave(&j->err_lock, flags);
-+ bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx);
-+ spin_unlock_irqrestore(&j->err_lock, flags);
-+ }
-+
-+ closure_put(&j->io);
-+ percpu_ref_put(&ca->io_ref);
-+}
-+
-+static void do_journal_write(struct closure *cl)
-+{
-+ struct journal *j = container_of(cl, struct journal, io);
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct bch_dev *ca;
-+ struct journal_buf *w = journal_last_unwritten_buf(j);
-+ struct bch_extent_ptr *ptr;
-+ struct bio *bio;
-+ unsigned sectors = vstruct_sectors(w->data, c->block_bits);
-+
-+ extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
-+ ca = bch_dev_bkey_exists(c, ptr->dev);
-+ if (!percpu_ref_tryget(&ca->io_ref)) {
-+ /* XXX: fix this */
-+ bch_err(c, "missing device for journal write\n");
-+ continue;
-+ }
-+
-+ this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
-+ sectors);
-+
-+ bio = ca->journal.bio;
-+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
-+ bio->bi_iter.bi_sector = ptr->offset;
-+ bio->bi_end_io = journal_write_endio;
-+ bio->bi_private = ca;
-+
-+ BUG_ON(bio->bi_iter.bi_sector == ca->prev_journal_sector);
-+ ca->prev_journal_sector = bio->bi_iter.bi_sector;
-+
-+ if (!JSET_NO_FLUSH(w->data))
-+ bio->bi_opf |= REQ_FUA;
-+ if (!JSET_NO_FLUSH(w->data) && !w->separate_flush)
-+ bio->bi_opf |= REQ_PREFLUSH;
-+
-+ bch2_bio_map(bio, w->data, sectors << 9);
-+
-+ trace_and_count(c, journal_write, bio);
-+ closure_bio_submit(bio, cl);
-+
-+ ca->journal.bucket_seq[ca->journal.cur_idx] =
-+ le64_to_cpu(w->data->seq);
-+ }
-+
-+ continue_at(cl, journal_write_done, c->io_complete_wq);
-+}
-+
-+static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct jset_entry *start, *end, *i, *next, *prev = NULL;
-+ struct jset *jset = w->data;
-+ unsigned sectors, bytes, u64s;
-+ bool validate_before_checksum = false;
-+ unsigned long btree_roots_have = 0;
-+ int ret;
-+
-+ /*
-+ * Simple compaction, dropping empty jset_entries (from journal
-+ * reservations that weren't fully used) and merging jset_entries that
-+ * can be.
-+ *
-+ * If we wanted to be really fancy here, we could sort all the keys in
-+ * the jset and drop keys that were overwritten - probably not worth it:
-+ */
-+ vstruct_for_each_safe(jset, i, next) {
-+ unsigned u64s = le16_to_cpu(i->u64s);
-+
-+ /* Empty entry: */
-+ if (!u64s)
-+ continue;
-+
-+ /*
-+ * New btree roots are set by journalling them; when the journal
-+ * entry gets written we have to propagate them to
-+ * c->btree_roots
-+ *
-+ * But, every journal entry we write has to contain all the
-+ * btree roots (at least for now); so after we copy btree roots
-+ * to c->btree_roots we have to get any missing btree roots and
-+ * add them to this journal entry:
-+ */
-+ if (i->type == BCH_JSET_ENTRY_btree_root) {
-+ bch2_journal_entry_to_btree_root(c, i);
-+ __set_bit(i->btree_id, &btree_roots_have);
-+ }
-+
-+ /* Can we merge with previous entry? */
-+ if (prev &&
-+ i->btree_id == prev->btree_id &&
-+ i->level == prev->level &&
-+ i->type == prev->type &&
-+ i->type == BCH_JSET_ENTRY_btree_keys &&
-+ le16_to_cpu(prev->u64s) + u64s <= U16_MAX) {
-+ memmove_u64s_down(vstruct_next(prev),
-+ i->_data,
-+ u64s);
-+ le16_add_cpu(&prev->u64s, u64s);
-+ continue;
-+ }
-+
-+ /* Couldn't merge, move i into new position (after prev): */
-+ prev = prev ? vstruct_next(prev) : jset->start;
-+ if (i != prev)
-+ memmove_u64s_down(prev, i, jset_u64s(u64s));
-+ }
-+
-+ prev = prev ? vstruct_next(prev) : jset->start;
-+ jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
-+
-+ start = end = vstruct_last(jset);
-+
-+ end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
-+
-+ bch2_journal_super_entries_add_common(c, &end,
-+ le64_to_cpu(jset->seq));
-+ u64s = (u64 *) end - (u64 *) start;
-+ BUG_ON(u64s > j->entry_u64s_reserved);
-+
-+ le32_add_cpu(&jset->u64s, u64s);
-+
-+ sectors = vstruct_sectors(jset, c->block_bits);
-+ bytes = vstruct_bytes(jset);
-+
-+ if (sectors > w->sectors) {
-+ bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
-+ vstruct_bytes(jset), w->sectors << 9,
-+ u64s, w->u64s_reserved, j->entry_u64s_reserved);
-+ return -EINVAL;
-+ }
-+
-+ jset->magic = cpu_to_le64(jset_magic(c));
-+ jset->version = cpu_to_le32(c->sb.version);
-+
-+ SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
-+ SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
-+
-+ if (!JSET_NO_FLUSH(jset) && journal_entry_empty(jset))
-+ j->last_empty_seq = le64_to_cpu(jset->seq);
-+
-+ if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
-+ validate_before_checksum = true;
-+
-+ if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current)
-+ validate_before_checksum = true;
-+
-+ if (validate_before_checksum &&
-+ (ret = jset_validate(c, NULL, jset, 0, WRITE)))
-+ return ret;
-+
-+ ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
-+ jset->encrypted_start,
-+ vstruct_end(jset) - (void *) jset->encrypted_start);
-+ if (bch2_fs_fatal_err_on(ret, c,
-+ "error decrypting journal entry: %i", ret))
-+ return ret;
-+
-+ jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
-+ journal_nonce(jset), jset);
-+
-+ if (!validate_before_checksum &&
-+ (ret = jset_validate(c, NULL, jset, 0, WRITE)))
-+ return ret;
-+
-+ memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
-+ return 0;
-+}
-+
-+static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *w)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ int error = bch2_journal_error(j);
-+
-+ /*
-+ * If the journal is in an error state - we did an emergency shutdown -
-+ * we prefer to continue doing journal writes. We just mark them as
-+ * noflush so they'll never be used, but they'll still be visible by the
-+ * list_journal tool - this helps in debugging.
-+ *
-+ * There's a caveat: the first journal write after marking the
-+ * superblock dirty must always be a flush write, because on startup
-+ * from a clean shutdown we didn't necessarily read the journal and the
-+ * new journal write might overwrite whatever was in the journal
-+ * previously - we can't leave the journal without any flush writes in
-+ * it.
-+ *
-+ * So if we're in an error state, and we're still starting up, we don't
-+ * write anything at all.
-+ */
-+ if (error && test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags))
-+ return -EIO;
-+
-+ if (error ||
-+ w->noflush ||
-+ (!w->must_flush &&
-+ (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
-+ test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
-+ w->noflush = true;
-+ SET_JSET_NO_FLUSH(w->data, true);
-+ w->data->last_seq = 0;
-+ w->last_seq = 0;
-+
-+ j->nr_noflush_writes++;
-+ } else {
-+ j->last_flush_write = jiffies;
-+ j->nr_flush_writes++;
-+ clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
-+ }
-+
-+ return 0;
-+}
-+
-+void bch2_journal_write(struct closure *cl)
-+{
-+ struct journal *j = container_of(cl, struct journal, io);
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct bch_dev *ca;
-+ struct journal_buf *w = journal_last_unwritten_buf(j);
-+ struct bch_replicas_padded replicas;
-+ struct bio *bio;
-+ struct printbuf journal_debug_buf = PRINTBUF;
-+ unsigned i, nr_rw_members = 0;
-+ int ret;
-+
-+ BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
-+
-+ j->write_start_time = local_clock();
-+
-+ spin_lock(&j->lock);
-+ ret = bch2_journal_write_pick_flush(j, w);
-+ spin_unlock(&j->lock);
-+ if (ret)
-+ goto err;
-+
-+ journal_buf_realloc(j, w);
-+
-+ ret = bch2_journal_write_prep(j, w);
-+ if (ret)
-+ goto err;
-+
-+ while (1) {
-+ spin_lock(&j->lock);
-+ ret = journal_write_alloc(j, w);
-+ if (!ret || !j->can_discard)
-+ break;
-+
-+ spin_unlock(&j->lock);
-+ bch2_journal_do_discards(j);
-+ }
-+
-+ if (ret) {
-+ __bch2_journal_debug_to_text(&journal_debug_buf, j);
-+ spin_unlock(&j->lock);
-+ bch_err(c, "Unable to allocate journal write:\n%s",
-+ journal_debug_buf.buf);
-+ printbuf_exit(&journal_debug_buf);
-+ goto err;
-+ }
-+
-+ /*
-+ * write is allocated, no longer need to account for it in
-+ * bch2_journal_space_available():
-+ */
-+ w->sectors = 0;
-+
-+ /*
-+ * journal entry has been compacted and allocated, recalculate space
-+ * available:
-+ */
-+ bch2_journal_space_available(j);
-+ spin_unlock(&j->lock);
-+
-+ w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
-+
-+ if (c->opts.nochanges)
-+ goto no_io;
-+
-+ for_each_rw_member(ca, c, i)
-+ nr_rw_members++;
-+
-+ if (nr_rw_members > 1)
-+ w->separate_flush = true;
-+
-+ /*
-+ * Mark journal replicas before we submit the write to guarantee
-+ * recovery will find the journal entries after a crash.
-+ */
-+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
-+ w->devs_written);
-+ ret = bch2_mark_replicas(c, &replicas.e);
-+ if (ret)
-+ goto err;
-+
-+ if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
-+ for_each_rw_member(ca, c, i) {
-+ percpu_ref_get(&ca->io_ref);
-+
-+ bio = ca->journal.bio;
-+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH);
-+ bio->bi_end_io = journal_write_endio;
-+ bio->bi_private = ca;
-+ closure_bio_submit(bio, cl);
-+ }
-+ }
-+
-+ continue_at(cl, do_journal_write, c->io_complete_wq);
-+ return;
-+no_io:
-+ continue_at(cl, journal_write_done, c->io_complete_wq);
-+ return;
-+err:
-+ bch2_fatal_error(c);
-+ continue_at(cl, journal_write_done, c->io_complete_wq);
-+}
-diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h
-new file mode 100644
-index 000000000000..a88d097b13f1
---- /dev/null
-+++ b/fs/bcachefs/journal_io.h
-@@ -0,0 +1,65 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_IO_H
-+#define _BCACHEFS_JOURNAL_IO_H
-+
-+/*
-+ * Only used for holding the journal entries we read in btree_journal_read()
-+ * during cache_registration
-+ */
-+struct journal_replay {
-+ struct journal_ptr {
-+ bool csum_good;
-+ u8 dev;
-+ u32 bucket;
-+ u32 bucket_offset;
-+ u64 sector;
-+ } ptrs[BCH_REPLICAS_MAX];
-+ unsigned nr_ptrs;
-+
-+ bool csum_good;
-+ bool ignore;
-+ /* must be last: */
-+ struct jset j;
-+};
-+
-+static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
-+ struct jset_entry *entry, unsigned type)
-+{
-+ while (entry < vstruct_last(jset)) {
-+ if (entry->type == type)
-+ return entry;
-+
-+ entry = vstruct_next(entry);
-+ }
-+
-+ return NULL;
-+}
-+
-+#define for_each_jset_entry_type(entry, jset, type) \
-+ for (entry = (jset)->start; \
-+ (entry = __jset_entry_type_next(jset, entry, type)); \
-+ entry = vstruct_next(entry))
-+
-+#define jset_entry_for_each_key(_e, _k) \
-+ for (_k = (_e)->start; \
-+ _k < vstruct_last(_e); \
-+ _k = bkey_next(_k))
-+
-+#define for_each_jset_key(k, entry, jset) \
-+ for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys)\
-+ jset_entry_for_each_key(entry, k)
-+
-+int bch2_journal_entry_validate(struct bch_fs *, struct jset *,
-+ struct jset_entry *, unsigned, int,
-+ enum bkey_invalid_flags);
-+void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
-+ struct jset_entry *);
-+
-+void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
-+ struct journal_replay *);
-+
-+int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
-+
-+void bch2_journal_write(struct closure *);
-+
-+#endif /* _BCACHEFS_JOURNAL_IO_H */
-diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
-new file mode 100644
-index 000000000000..9a584aaaa2eb
---- /dev/null
-+++ b/fs/bcachefs/journal_reclaim.c
-@@ -0,0 +1,876 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "replicas.h"
-+#include "sb-members.h"
-+#include "trace.h"
-+
-+#include <linux/kthread.h>
-+#include <linux/sched/mm.h>
-+
-+/* Free space calculations: */
-+
-+static unsigned journal_space_from(struct journal_device *ja,
-+ enum journal_space_from from)
-+{
-+ switch (from) {
-+ case journal_space_discarded:
-+ return ja->discard_idx;
-+ case journal_space_clean_ondisk:
-+ return ja->dirty_idx_ondisk;
-+ case journal_space_clean:
-+ return ja->dirty_idx;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+unsigned bch2_journal_dev_buckets_available(struct journal *j,
-+ struct journal_device *ja,
-+ enum journal_space_from from)
-+{
-+ unsigned available = (journal_space_from(ja, from) -
-+ ja->cur_idx - 1 + ja->nr) % ja->nr;
-+
-+ /*
-+ * Don't use the last bucket unless writing the new last_seq
-+ * will make another bucket available:
-+ */
-+ if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
-+ --available;
-+
-+ return available;
-+}
-+
-+static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
-+{
-+ union journal_preres_state old, new;
-+ u64 v = atomic64_read(&j->prereserved.counter);
-+
-+ do {
-+ old.v = new.v = v;
-+ new.remaining = u64s_remaining;
-+ } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-+ old.v, new.v)) != old.v);
-+}
-+
-+static struct journal_space
-+journal_dev_space_available(struct journal *j, struct bch_dev *ca,
-+ enum journal_space_from from)
-+{
-+ struct journal_device *ja = &ca->journal;
-+ unsigned sectors, buckets, unwritten;
-+ u64 seq;
-+
-+ if (from == journal_space_total)
-+ return (struct journal_space) {
-+ .next_entry = ca->mi.bucket_size,
-+ .total = ca->mi.bucket_size * ja->nr,
-+ };
-+
-+ buckets = bch2_journal_dev_buckets_available(j, ja, from);
-+ sectors = ja->sectors_free;
-+
-+ /*
-+ * We that we don't allocate the space for a journal entry
-+ * until we write it out - thus, account for it here:
-+ */
-+ for (seq = journal_last_unwritten_seq(j);
-+ seq <= journal_cur_seq(j);
-+ seq++) {
-+ unwritten = j->buf[seq & JOURNAL_BUF_MASK].sectors;
-+
-+ if (!unwritten)
-+ continue;
-+
-+ /* entry won't fit on this device, skip: */
-+ if (unwritten > ca->mi.bucket_size)
-+ continue;
-+
-+ if (unwritten >= sectors) {
-+ if (!buckets) {
-+ sectors = 0;
-+ break;
-+ }
-+
-+ buckets--;
-+ sectors = ca->mi.bucket_size;
-+ }
-+
-+ sectors -= unwritten;
-+ }
-+
-+ if (sectors < ca->mi.bucket_size && buckets) {
-+ buckets--;
-+ sectors = ca->mi.bucket_size;
-+ }
-+
-+ return (struct journal_space) {
-+ .next_entry = sectors,
-+ .total = sectors + buckets * ca->mi.bucket_size,
-+ };
-+}
-+
-+static struct journal_space __journal_space_available(struct journal *j, unsigned nr_devs_want,
-+ enum journal_space_from from)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct bch_dev *ca;
-+ unsigned i, pos, nr_devs = 0;
-+ struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX];
-+
-+ BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space));
-+
-+ rcu_read_lock();
-+ for_each_member_device_rcu(ca, c, i,
-+ &c->rw_devs[BCH_DATA_journal]) {
-+ if (!ca->journal.nr)
-+ continue;
-+
-+ space = journal_dev_space_available(j, ca, from);
-+ if (!space.next_entry)
-+ continue;
-+
-+ for (pos = 0; pos < nr_devs; pos++)
-+ if (space.total > dev_space[pos].total)
-+ break;
-+
-+ array_insert_item(dev_space, nr_devs, pos, space);
-+ }
-+ rcu_read_unlock();
-+
-+ if (nr_devs < nr_devs_want)
-+ return (struct journal_space) { 0, 0 };
-+
-+ /*
-+ * We sorted largest to smallest, and we want the smallest out of the
-+ * @nr_devs_want largest devices:
-+ */
-+ return dev_space[nr_devs_want - 1];
-+}
-+
-+void bch2_journal_space_available(struct journal *j)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct bch_dev *ca;
-+ unsigned clean, clean_ondisk, total;
-+ s64 u64s_remaining = 0;
-+ unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
-+ j->buf[1].buf_size >> 9);
-+ unsigned i, nr_online = 0, nr_devs_want;
-+ bool can_discard = false;
-+ int ret = 0;
-+
-+ lockdep_assert_held(&j->lock);
-+
-+ rcu_read_lock();
-+ for_each_member_device_rcu(ca, c, i,
-+ &c->rw_devs[BCH_DATA_journal]) {
-+ struct journal_device *ja = &ca->journal;
-+
-+ if (!ja->nr)
-+ continue;
-+
-+ while (ja->dirty_idx != ja->cur_idx &&
-+ ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
-+ ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
-+
-+ while (ja->dirty_idx_ondisk != ja->dirty_idx &&
-+ ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
-+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
-+
-+ if (ja->discard_idx != ja->dirty_idx_ondisk)
-+ can_discard = true;
-+
-+ max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size);
-+ nr_online++;
-+ }
-+ rcu_read_unlock();
-+
-+ j->can_discard = can_discard;
-+
-+ if (nr_online < c->opts.metadata_replicas_required) {
-+ ret = JOURNAL_ERR_insufficient_devices;
-+ goto out;
-+ }
-+
-+ nr_devs_want = min_t(unsigned, nr_online, c->opts.metadata_replicas);
-+
-+ for (i = 0; i < journal_space_nr; i++)
-+ j->space[i] = __journal_space_available(j, nr_devs_want, i);
-+
-+ clean_ondisk = j->space[journal_space_clean_ondisk].total;
-+ clean = j->space[journal_space_clean].total;
-+ total = j->space[journal_space_total].total;
-+
-+ if (!j->space[journal_space_discarded].next_entry)
-+ ret = JOURNAL_ERR_journal_full;
-+
-+ if ((j->space[journal_space_clean_ondisk].next_entry <
-+ j->space[journal_space_clean_ondisk].total) &&
-+ (clean - clean_ondisk <= total / 8) &&
-+ (clean_ondisk * 2 > clean))
-+ set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
-+ else
-+ clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
-+
-+ u64s_remaining = (u64) clean << 6;
-+ u64s_remaining -= (u64) total << 3;
-+ u64s_remaining = max(0LL, u64s_remaining);
-+ u64s_remaining /= 4;
-+ u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
-+out:
-+ j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
-+ j->cur_entry_error = ret;
-+ journal_set_remaining(j, u64s_remaining);
-+ journal_set_watermark(j);
-+
-+ if (!ret)
-+ journal_wake(j);
-+}
-+
-+/* Discards - last part of journal reclaim: */
-+
-+static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
-+{
-+ bool ret;
-+
-+ spin_lock(&j->lock);
-+ ret = ja->discard_idx != ja->dirty_idx_ondisk;
-+ spin_unlock(&j->lock);
-+
-+ return ret;
-+}
-+
-+/*
-+ * Advance ja->discard_idx as long as it points to buckets that are no longer
-+ * dirty, issuing discards if necessary:
-+ */
-+void bch2_journal_do_discards(struct journal *j)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct bch_dev *ca;
-+ unsigned iter;
-+
-+ mutex_lock(&j->discard_lock);
-+
-+ for_each_rw_member(ca, c, iter) {
-+ struct journal_device *ja = &ca->journal;
-+
-+ while (should_discard_bucket(j, ja)) {
-+ if (!c->opts.nochanges &&
-+ ca->mi.discard &&
-+ bdev_max_discard_sectors(ca->disk_sb.bdev))
-+ blkdev_issue_discard(ca->disk_sb.bdev,
-+ bucket_to_sector(ca,
-+ ja->buckets[ja->discard_idx]),
-+ ca->mi.bucket_size, GFP_NOFS);
-+
-+ spin_lock(&j->lock);
-+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
-+
-+ bch2_journal_space_available(j);
-+ spin_unlock(&j->lock);
-+ }
-+ }
-+
-+ mutex_unlock(&j->discard_lock);
-+}
-+
-+/*
-+ * Journal entry pinning - machinery for holding a reference on a given journal
-+ * entry, holding it open to ensure it gets replayed during recovery:
-+ */
-+
-+void bch2_journal_reclaim_fast(struct journal *j)
-+{
-+ bool popped = false;
-+
-+ lockdep_assert_held(&j->lock);
-+
-+ /*
-+ * Unpin journal entries whose reference counts reached zero, meaning
-+ * all btree nodes got written out
-+ */
-+ while (!fifo_empty(&j->pin) &&
-+ !atomic_read(&fifo_peek_front(&j->pin).count)) {
-+ j->pin.front++;
-+ popped = true;
-+ }
-+
-+ if (popped)
-+ bch2_journal_space_available(j);
-+}
-+
-+bool __bch2_journal_pin_put(struct journal *j, u64 seq)
-+{
-+ struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
-+
-+ return atomic_dec_and_test(&pin_list->count);
-+}
-+
-+void bch2_journal_pin_put(struct journal *j, u64 seq)
-+{
-+ if (__bch2_journal_pin_put(j, seq)) {
-+ spin_lock(&j->lock);
-+ bch2_journal_reclaim_fast(j);
-+ spin_unlock(&j->lock);
-+ }
-+}
-+
-+static inline bool __journal_pin_drop(struct journal *j,
-+ struct journal_entry_pin *pin)
-+{
-+ struct journal_entry_pin_list *pin_list;
-+
-+ if (!journal_pin_active(pin))
-+ return false;
-+
-+ if (j->flush_in_progress == pin)
-+ j->flush_in_progress_dropped = true;
-+
-+ pin_list = journal_seq_pin(j, pin->seq);
-+ pin->seq = 0;
-+ list_del_init(&pin->list);
-+
-+ /*
-+ * Unpinning a journal entry may make journal_next_bucket() succeed, if
-+ * writing a new last_seq will now make another bucket available:
-+ */
-+ return atomic_dec_and_test(&pin_list->count) &&
-+ pin_list == &fifo_peek_front(&j->pin);
-+}
-+
-+void bch2_journal_pin_drop(struct journal *j,
-+ struct journal_entry_pin *pin)
-+{
-+ spin_lock(&j->lock);
-+ if (__journal_pin_drop(j, pin))
-+ bch2_journal_reclaim_fast(j);
-+ spin_unlock(&j->lock);
-+}
-+
-+static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
-+{
-+ if (fn == bch2_btree_node_flush0 ||
-+ fn == bch2_btree_node_flush1)
-+ return JOURNAL_PIN_btree;
-+ else if (fn == bch2_btree_key_cache_journal_flush)
-+ return JOURNAL_PIN_key_cache;
-+ else
-+ return JOURNAL_PIN_other;
-+}
-+
-+void bch2_journal_pin_set(struct journal *j, u64 seq,
-+ struct journal_entry_pin *pin,
-+ journal_pin_flush_fn flush_fn)
-+{
-+ struct journal_entry_pin_list *pin_list;
-+ bool reclaim;
-+
-+ spin_lock(&j->lock);
-+
-+ if (seq < journal_last_seq(j)) {
-+ /*
-+ * bch2_journal_pin_copy() raced with bch2_journal_pin_drop() on
-+ * the src pin - with the pin dropped, the entry to pin might no
-+ * longer to exist, but that means there's no longer anything to
-+ * copy and we can bail out here:
-+ */
-+ spin_unlock(&j->lock);
-+ return;
-+ }
-+
-+ pin_list = journal_seq_pin(j, seq);
-+
-+ reclaim = __journal_pin_drop(j, pin);
-+
-+ atomic_inc(&pin_list->count);
-+ pin->seq = seq;
-+ pin->flush = flush_fn;
-+
-+ if (flush_fn)
-+ list_add(&pin->list, &pin_list->list[journal_pin_type(flush_fn)]);
-+ else
-+ list_add(&pin->list, &pin_list->flushed);
-+
-+ if (reclaim)
-+ bch2_journal_reclaim_fast(j);
-+ spin_unlock(&j->lock);
-+
-+ /*
-+ * If the journal is currently full, we might want to call flush_fn
-+ * immediately:
-+ */
-+ journal_wake(j);
-+}
-+
-+/**
-+ * bch2_journal_pin_flush: ensure journal pin callback is no longer running
-+ * @j: journal object
-+ * @pin: pin to flush
-+ */
-+void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
-+{
-+ BUG_ON(journal_pin_active(pin));
-+
-+ wait_event(j->pin_flush_wait, j->flush_in_progress != pin);
-+}
-+
-+/*
-+ * Journal reclaim: flush references to open journal entries to reclaim space in
-+ * the journal
-+ *
-+ * May be done by the journal code in the background as needed to free up space
-+ * for more journal entries, or as part of doing a clean shutdown, or to migrate
-+ * data off of a specific device:
-+ */
-+
-+static struct journal_entry_pin *
-+journal_get_next_pin(struct journal *j,
-+ u64 seq_to_flush,
-+ unsigned allowed_below_seq,
-+ unsigned allowed_above_seq,
-+ u64 *seq)
-+{
-+ struct journal_entry_pin_list *pin_list;
-+ struct journal_entry_pin *ret = NULL;
-+ unsigned i;
-+
-+ fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
-+ if (*seq > seq_to_flush && !allowed_above_seq)
-+ break;
-+
-+ for (i = 0; i < JOURNAL_PIN_NR; i++)
-+ if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) ||
-+ ((1U << i) & allowed_above_seq)) {
-+ ret = list_first_entry_or_null(&pin_list->list[i],
-+ struct journal_entry_pin, list);
-+ if (ret)
-+ return ret;
-+ }
-+ }
-+
-+ return NULL;
-+}
-+
-+/* returns true if we did work */
-+static size_t journal_flush_pins(struct journal *j,
-+ u64 seq_to_flush,
-+ unsigned allowed_below_seq,
-+ unsigned allowed_above_seq,
-+ unsigned min_any,
-+ unsigned min_key_cache)
-+{
-+ struct journal_entry_pin *pin;
-+ size_t nr_flushed = 0;
-+ journal_pin_flush_fn flush_fn;
-+ u64 seq;
-+ int err;
-+
-+ lockdep_assert_held(&j->reclaim_lock);
-+
-+ while (1) {
-+ unsigned allowed_above = allowed_above_seq;
-+ unsigned allowed_below = allowed_below_seq;
-+
-+ if (min_any) {
-+ allowed_above |= ~0;
-+ allowed_below |= ~0;
-+ }
-+
-+ if (min_key_cache) {
-+ allowed_above |= 1U << JOURNAL_PIN_key_cache;
-+ allowed_below |= 1U << JOURNAL_PIN_key_cache;
-+ }
-+
-+ cond_resched();
-+
-+ j->last_flushed = jiffies;
-+
-+ spin_lock(&j->lock);
-+ pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq);
-+ if (pin) {
-+ BUG_ON(j->flush_in_progress);
-+ j->flush_in_progress = pin;
-+ j->flush_in_progress_dropped = false;
-+ flush_fn = pin->flush;
-+ }
-+ spin_unlock(&j->lock);
-+
-+ if (!pin)
-+ break;
-+
-+ if (min_key_cache && pin->flush == bch2_btree_key_cache_journal_flush)
-+ min_key_cache--;
-+
-+ if (min_any)
-+ min_any--;
-+
-+ err = flush_fn(j, pin, seq);
-+
-+ spin_lock(&j->lock);
-+ /* Pin might have been dropped or rearmed: */
-+ if (likely(!err && !j->flush_in_progress_dropped))
-+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
-+ j->flush_in_progress = NULL;
-+ j->flush_in_progress_dropped = false;
-+ spin_unlock(&j->lock);
-+
-+ wake_up(&j->pin_flush_wait);
-+
-+ if (err)
-+ break;
-+
-+ nr_flushed++;
-+ }
-+
-+ return nr_flushed;
-+}
-+
-+static u64 journal_seq_to_flush(struct journal *j)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct bch_dev *ca;
-+ u64 seq_to_flush = 0;
-+ unsigned iter;
-+
-+ spin_lock(&j->lock);
-+
-+ for_each_rw_member(ca, c, iter) {
-+ struct journal_device *ja = &ca->journal;
-+ unsigned nr_buckets, bucket_to_flush;
-+
-+ if (!ja->nr)
-+ continue;
-+
-+ /* Try to keep the journal at most half full: */
-+ nr_buckets = ja->nr / 2;
-+
-+ /* And include pre-reservations: */
-+ nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
-+ (ca->mi.bucket_size << 6) -
-+ journal_entry_overhead(j));
-+
-+ nr_buckets = min(nr_buckets, ja->nr);
-+
-+ bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
-+ seq_to_flush = max(seq_to_flush,
-+ ja->bucket_seq[bucket_to_flush]);
-+ }
-+
-+ /* Also flush if the pin fifo is more than half full */
-+ seq_to_flush = max_t(s64, seq_to_flush,
-+ (s64) journal_cur_seq(j) -
-+ (j->pin.size >> 1));
-+ spin_unlock(&j->lock);
-+
-+ return seq_to_flush;
-+}
-+
-+/**
-+ * __bch2_journal_reclaim - free up journal buckets
-+ * @j: journal object
-+ * @direct: direct or background reclaim?
-+ * @kicked: requested to run since we last ran?
-+ * Returns: 0 on success, or -EIO if the journal has been shutdown
-+ *
-+ * Background journal reclaim writes out btree nodes. It should be run
-+ * early enough so that we never completely run out of journal buckets.
-+ *
-+ * High watermarks for triggering background reclaim:
-+ * - FIFO has fewer than 512 entries left
-+ * - fewer than 25% journal buckets free
-+ *
-+ * Background reclaim runs until low watermarks are reached:
-+ * - FIFO has more than 1024 entries left
-+ * - more than 50% journal buckets free
-+ *
-+ * As long as a reclaim can complete in the time it takes to fill up
-+ * 512 journal entries or 25% of all journal buckets, then
-+ * journal_next_bucket() should not stall.
-+ */
-+static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ bool kthread = (current->flags & PF_KTHREAD) != 0;
-+ u64 seq_to_flush;
-+ size_t min_nr, min_key_cache, nr_flushed;
-+ unsigned flags;
-+ int ret = 0;
-+
-+ /*
-+ * We can't invoke memory reclaim while holding the reclaim_lock -
-+ * journal reclaim is required to make progress for memory reclaim
-+ * (cleaning the caches), so we can't get stuck in memory reclaim while
-+ * we're holding the reclaim lock:
-+ */
-+ lockdep_assert_held(&j->reclaim_lock);
-+ flags = memalloc_noreclaim_save();
-+
-+ do {
-+ if (kthread && kthread_should_stop())
-+ break;
-+
-+ if (bch2_journal_error(j)) {
-+ ret = -EIO;
-+ break;
-+ }
-+
-+ bch2_journal_do_discards(j);
-+
-+ seq_to_flush = journal_seq_to_flush(j);
-+ min_nr = 0;
-+
-+ /*
-+ * If it's been longer than j->reclaim_delay_ms since we last flushed,
-+ * make sure to flush at least one journal pin:
-+ */
-+ if (time_after(jiffies, j->last_flushed +
-+ msecs_to_jiffies(c->opts.journal_reclaim_delay)))
-+ min_nr = 1;
-+
-+ if (j->prereserved.reserved * 4 > j->prereserved.remaining)
-+ min_nr = 1;
-+
-+ if (fifo_free(&j->pin) <= 32)
-+ min_nr = 1;
-+
-+ if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
-+ min_nr = 1;
-+
-+ min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
-+
-+ trace_and_count(c, journal_reclaim_start, c,
-+ direct, kicked,
-+ min_nr, min_key_cache,
-+ j->prereserved.reserved,
-+ j->prereserved.remaining,
-+ atomic_read(&c->btree_cache.dirty),
-+ c->btree_cache.used,
-+ atomic_long_read(&c->btree_key_cache.nr_dirty),
-+ atomic_long_read(&c->btree_key_cache.nr_keys));
-+
-+ nr_flushed = journal_flush_pins(j, seq_to_flush,
-+ ~0, 0,
-+ min_nr, min_key_cache);
-+
-+ if (direct)
-+ j->nr_direct_reclaim += nr_flushed;
-+ else
-+ j->nr_background_reclaim += nr_flushed;
-+ trace_and_count(c, journal_reclaim_finish, c, nr_flushed);
-+
-+ if (nr_flushed)
-+ wake_up(&j->reclaim_wait);
-+ } while ((min_nr || min_key_cache) && nr_flushed && !direct);
-+
-+ memalloc_noreclaim_restore(flags);
-+
-+ return ret;
-+}
-+
-+int bch2_journal_reclaim(struct journal *j)
-+{
-+ return __bch2_journal_reclaim(j, true, true);
-+}
-+
-+static int bch2_journal_reclaim_thread(void *arg)
-+{
-+ struct journal *j = arg;
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ unsigned long delay, now;
-+ bool journal_empty;
-+ int ret = 0;
-+
-+ set_freezable();
-+
-+ j->last_flushed = jiffies;
-+
-+ while (!ret && !kthread_should_stop()) {
-+ bool kicked = j->reclaim_kicked;
-+
-+ j->reclaim_kicked = false;
-+
-+ mutex_lock(&j->reclaim_lock);
-+ ret = __bch2_journal_reclaim(j, false, kicked);
-+ mutex_unlock(&j->reclaim_lock);
-+
-+ now = jiffies;
-+ delay = msecs_to_jiffies(c->opts.journal_reclaim_delay);
-+ j->next_reclaim = j->last_flushed + delay;
-+
-+ if (!time_in_range(j->next_reclaim, now, now + delay))
-+ j->next_reclaim = now + delay;
-+
-+ while (1) {
-+ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
-+ if (kthread_should_stop())
-+ break;
-+ if (j->reclaim_kicked)
-+ break;
-+
-+ spin_lock(&j->lock);
-+ journal_empty = fifo_empty(&j->pin);
-+ spin_unlock(&j->lock);
-+
-+ if (journal_empty)
-+ schedule();
-+ else if (time_after(j->next_reclaim, jiffies))
-+ schedule_timeout(j->next_reclaim - jiffies);
-+ else
-+ break;
-+ }
-+ __set_current_state(TASK_RUNNING);
-+ }
-+
-+ return 0;
-+}
-+
-+void bch2_journal_reclaim_stop(struct journal *j)
-+{
-+ struct task_struct *p = j->reclaim_thread;
-+
-+ j->reclaim_thread = NULL;
-+
-+ if (p) {
-+ kthread_stop(p);
-+ put_task_struct(p);
-+ }
-+}
-+
-+int bch2_journal_reclaim_start(struct journal *j)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct task_struct *p;
-+ int ret;
-+
-+ if (j->reclaim_thread)
-+ return 0;
-+
-+ p = kthread_create(bch2_journal_reclaim_thread, j,
-+ "bch-reclaim/%s", c->name);
-+ ret = PTR_ERR_OR_ZERO(p);
-+ if (ret) {
-+ bch_err_msg(c, ret, "creating journal reclaim thread");
-+ return ret;
-+ }
-+
-+ get_task_struct(p);
-+ j->reclaim_thread = p;
-+ wake_up_process(p);
-+ return 0;
-+}
-+
-+static int journal_flush_done(struct journal *j, u64 seq_to_flush,
-+ bool *did_work)
-+{
-+ int ret;
-+
-+ ret = bch2_journal_error(j);
-+ if (ret)
-+ return ret;
-+
-+ mutex_lock(&j->reclaim_lock);
-+
-+ if (journal_flush_pins(j, seq_to_flush,
-+ (1U << JOURNAL_PIN_key_cache)|
-+ (1U << JOURNAL_PIN_other), 0, 0, 0) ||
-+ journal_flush_pins(j, seq_to_flush,
-+ (1U << JOURNAL_PIN_btree), 0, 0, 0))
-+ *did_work = true;
-+
-+ spin_lock(&j->lock);
-+ /*
-+ * If journal replay hasn't completed, the unreplayed journal entries
-+ * hold refs on their corresponding sequence numbers
-+ */
-+ ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
-+ journal_last_seq(j) > seq_to_flush ||
-+ !fifo_used(&j->pin);
-+
-+ spin_unlock(&j->lock);
-+ mutex_unlock(&j->reclaim_lock);
-+
-+ return ret;
-+}
-+
-+bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
-+{
-+ bool did_work = false;
-+
-+ if (!test_bit(JOURNAL_STARTED, &j->flags))
-+ return false;
-+
-+ closure_wait_event(&j->async_wait,
-+ journal_flush_done(j, seq_to_flush, &did_work));
-+
-+ return did_work;
-+}
-+
-+int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
-+{
-+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
-+ struct journal_entry_pin_list *p;
-+ u64 iter, seq = 0;
-+ int ret = 0;
-+
-+ spin_lock(&j->lock);
-+ fifo_for_each_entry_ptr(p, &j->pin, iter)
-+ if (dev_idx >= 0
-+ ? bch2_dev_list_has_dev(p->devs, dev_idx)
-+ : p->devs.nr < c->opts.metadata_replicas)
-+ seq = iter;
-+ spin_unlock(&j->lock);
-+
-+ bch2_journal_flush_pins(j, seq);
-+
-+ ret = bch2_journal_error(j);
-+ if (ret)
-+ return ret;
-+
-+ mutex_lock(&c->replicas_gc_lock);
-+ bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
-+
-+ /*
-+ * Now that we've populated replicas_gc, write to the journal to mark
-+ * active journal devices. This handles the case where the journal might
-+ * be empty. Otherwise we could clear all journal replicas and
-+ * temporarily put the fs into an unrecoverable state. Journal recovery
-+ * expects to find devices marked for journal data on unclean mount.
-+ */
-+ ret = bch2_journal_meta(&c->journal);
-+ if (ret)
-+ goto err;
-+
-+ seq = 0;
-+ spin_lock(&j->lock);
-+ while (!ret) {
-+ struct bch_replicas_padded replicas;
-+
-+ seq = max(seq, journal_last_seq(j));
-+ if (seq >= j->pin.back)
-+ break;
-+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
-+ journal_seq_pin(j, seq)->devs);
-+ seq++;
-+
-+ spin_unlock(&j->lock);
-+ ret = bch2_mark_replicas(c, &replicas.e);
-+ spin_lock(&j->lock);
-+ }
-+ spin_unlock(&j->lock);
-+err:
-+ ret = bch2_replicas_gc_end(c, ret);
-+ mutex_unlock(&c->replicas_gc_lock);
-+
-+ return ret;
-+}
-diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h
-new file mode 100644
-index 000000000000..494d1a6eddb0
---- /dev/null
-+++ b/fs/bcachefs/journal_reclaim.h
-@@ -0,0 +1,87 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_RECLAIM_H
-+#define _BCACHEFS_JOURNAL_RECLAIM_H
-+
-+#define JOURNAL_PIN (32 * 1024)
-+
-+static inline void journal_reclaim_kick(struct journal *j)
-+{
-+ struct task_struct *p = READ_ONCE(j->reclaim_thread);
-+
-+ j->reclaim_kicked = true;
-+ if (p)
-+ wake_up_process(p);
-+}
-+
-+unsigned bch2_journal_dev_buckets_available(struct journal *,
-+ struct journal_device *,
-+ enum journal_space_from);
-+void bch2_journal_space_available(struct journal *);
-+
-+static inline bool journal_pin_active(struct journal_entry_pin *pin)
-+{
-+ return pin->seq != 0;
-+}
-+
-+static inline struct journal_entry_pin_list *
-+journal_seq_pin(struct journal *j, u64 seq)
-+{
-+ EBUG_ON(seq < j->pin.front || seq >= j->pin.back);
-+
-+ return &j->pin.data[seq & j->pin.mask];
-+}
-+
-+void bch2_journal_reclaim_fast(struct journal *);
-+bool __bch2_journal_pin_put(struct journal *, u64);
-+void bch2_journal_pin_put(struct journal *, u64);
-+void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
-+
-+void bch2_journal_pin_set(struct journal *, u64, struct journal_entry_pin *,
-+ journal_pin_flush_fn);
-+
-+static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
-+ struct journal_entry_pin *pin,
-+ journal_pin_flush_fn flush_fn)
-+{
-+ if (unlikely(!journal_pin_active(pin) || pin->seq > seq))
-+ bch2_journal_pin_set(j, seq, pin, flush_fn);
-+}
-+
-+static inline void bch2_journal_pin_copy(struct journal *j,
-+ struct journal_entry_pin *dst,
-+ struct journal_entry_pin *src,
-+ journal_pin_flush_fn flush_fn)
-+{
-+ /* Guard against racing with journal_pin_drop(src): */
-+ u64 seq = READ_ONCE(src->seq);
-+
-+ if (seq)
-+ bch2_journal_pin_add(j, seq, dst, flush_fn);
-+}
-+
-+static inline void bch2_journal_pin_update(struct journal *j, u64 seq,
-+ struct journal_entry_pin *pin,
-+ journal_pin_flush_fn flush_fn)
-+{
-+ if (unlikely(!journal_pin_active(pin) || pin->seq < seq))
-+ bch2_journal_pin_set(j, seq, pin, flush_fn);
-+}
-+
-+void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
-+
-+void bch2_journal_do_discards(struct journal *);
-+int bch2_journal_reclaim(struct journal *);
-+
-+void bch2_journal_reclaim_stop(struct journal *);
-+int bch2_journal_reclaim_start(struct journal *);
-+
-+bool bch2_journal_flush_pins(struct journal *, u64);
-+
-+static inline bool bch2_journal_flush_all_pins(struct journal *j)
-+{
-+ return bch2_journal_flush_pins(j, U64_MAX);
-+}
-+
-+int bch2_journal_flush_device_pins(struct journal *, int);
-+
-+#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */
-diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c
-new file mode 100644
-index 000000000000..ae4fb8c3a2bc
---- /dev/null
-+++ b/fs/bcachefs/journal_sb.c
-@@ -0,0 +1,219 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "journal_sb.h"
-+#include "darray.h"
-+
-+#include <linux/sort.h>
-+
-+/* BCH_SB_FIELD_journal: */
-+
-+static int u64_cmp(const void *_l, const void *_r)
-+{
-+ const u64 *l = _l;
-+ const u64 *r = _r;
-+
-+ return cmp_int(*l, *r);
-+}
-+
-+static int bch2_sb_journal_validate(struct bch_sb *sb,
-+ struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_journal *journal = field_to_type(f, journal);
-+ struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx);
-+ int ret = -BCH_ERR_invalid_sb_journal;
-+ unsigned nr;
-+ unsigned i;
-+ u64 *b;
-+
-+ nr = bch2_nr_journal_buckets(journal);
-+ if (!nr)
-+ return 0;
-+
-+ b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL);
-+ if (!b)
-+ return -BCH_ERR_ENOMEM_sb_journal_validate;
-+
-+ for (i = 0; i < nr; i++)
-+ b[i] = le64_to_cpu(journal->buckets[i]);
-+
-+ sort(b, nr, sizeof(u64), u64_cmp, NULL);
-+
-+ if (!b[0]) {
-+ prt_printf(err, "journal bucket at sector 0");
-+ goto err;
-+ }
-+
-+ if (b[0] < le16_to_cpu(m.first_bucket)) {
-+ prt_printf(err, "journal bucket %llu before first bucket %u",
-+ b[0], le16_to_cpu(m.first_bucket));
-+ goto err;
-+ }
-+
-+ if (b[nr - 1] >= le64_to_cpu(m.nbuckets)) {
-+ prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)",
-+ b[nr - 1], le64_to_cpu(m.nbuckets));
-+ goto err;
-+ }
-+
-+ for (i = 0; i + 1 < nr; i++)
-+ if (b[i] == b[i + 1]) {
-+ prt_printf(err, "duplicate journal buckets %llu", b[i]);
-+ goto err;
-+ }
-+
-+ ret = 0;
-+err:
-+ kfree(b);
-+ return ret;
-+}
-+
-+static void bch2_sb_journal_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_journal *journal = field_to_type(f, journal);
-+ unsigned i, nr = bch2_nr_journal_buckets(journal);
-+
-+ prt_printf(out, "Buckets: ");
-+ for (i = 0; i < nr; i++)
-+ prt_printf(out, " %llu", le64_to_cpu(journal->buckets[i]));
-+ prt_newline(out);
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_journal = {
-+ .validate = bch2_sb_journal_validate,
-+ .to_text = bch2_sb_journal_to_text,
-+};
-+
-+struct u64_range {
-+ u64 start;
-+ u64 end;
-+};
-+
-+static int u64_range_cmp(const void *_l, const void *_r)
-+{
-+ const struct u64_range *l = _l;
-+ const struct u64_range *r = _r;
-+
-+ return cmp_int(l->start, r->start);
-+}
-+
-+static int bch2_sb_journal_v2_validate(struct bch_sb *sb,
-+ struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2);
-+ struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx);
-+ int ret = -BCH_ERR_invalid_sb_journal;
-+ unsigned nr;
-+ unsigned i;
-+ struct u64_range *b;
-+
-+ nr = bch2_sb_field_journal_v2_nr_entries(journal);
-+ if (!nr)
-+ return 0;
-+
-+ b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL);
-+ if (!b)
-+ return -BCH_ERR_ENOMEM_sb_journal_v2_validate;
-+
-+ for (i = 0; i < nr; i++) {
-+ b[i].start = le64_to_cpu(journal->d[i].start);
-+ b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr);
-+ }
-+
-+ sort(b, nr, sizeof(*b), u64_range_cmp, NULL);
-+
-+ if (!b[0].start) {
-+ prt_printf(err, "journal bucket at sector 0");
-+ goto err;
-+ }
-+
-+ if (b[0].start < le16_to_cpu(m.first_bucket)) {
-+ prt_printf(err, "journal bucket %llu before first bucket %u",
-+ b[0].start, le16_to_cpu(m.first_bucket));
-+ goto err;
-+ }
-+
-+ if (b[nr - 1].end > le64_to_cpu(m.nbuckets)) {
-+ prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)",
-+ b[nr - 1].end - 1, le64_to_cpu(m.nbuckets));
-+ goto err;
-+ }
-+
-+ for (i = 0; i + 1 < nr; i++) {
-+ if (b[i].end > b[i + 1].start) {
-+ prt_printf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu",
-+ b[i].start, b[i].end, b[i + 1].start, b[i + 1].end);
-+ goto err;
-+ }
-+ }
-+
-+ ret = 0;
-+err:
-+ kfree(b);
-+ return ret;
-+}
-+
-+static void bch2_sb_journal_v2_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2);
-+ unsigned i, nr = bch2_sb_field_journal_v2_nr_entries(journal);
-+
-+ prt_printf(out, "Buckets: ");
-+ for (i = 0; i < nr; i++)
-+ prt_printf(out, " %llu-%llu",
-+ le64_to_cpu(journal->d[i].start),
-+ le64_to_cpu(journal->d[i].start) + le64_to_cpu(journal->d[i].nr));
-+ prt_newline(out);
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_journal_v2 = {
-+ .validate = bch2_sb_journal_v2_validate,
-+ .to_text = bch2_sb_journal_v2_to_text,
-+};
-+
-+int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
-+ u64 *buckets, unsigned nr)
-+{
-+ struct bch_sb_field_journal_v2 *j;
-+ unsigned i, dst = 0, nr_compacted = 1;
-+
-+ if (c)
-+ lockdep_assert_held(&c->sb_lock);
-+
-+ if (!nr) {
-+ bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
-+ bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal_v2);
-+ return 0;
-+ }
-+
-+ for (i = 0; i + 1 < nr; i++)
-+ if (buckets[i] + 1 != buckets[i + 1])
-+ nr_compacted++;
-+
-+ j = bch2_sb_field_resize(&ca->disk_sb, journal_v2,
-+ (sizeof(*j) + sizeof(j->d[0]) * nr_compacted) / sizeof(u64));
-+ if (!j)
-+ return -BCH_ERR_ENOSPC_sb_journal;
-+
-+ bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
-+
-+ j->d[dst].start = cpu_to_le64(buckets[0]);
-+ j->d[dst].nr = cpu_to_le64(1);
-+
-+ for (i = 1; i < nr; i++) {
-+ if (buckets[i] == buckets[i - 1] + 1) {
-+ le64_add_cpu(&j->d[dst].nr, 1);
-+ } else {
-+ dst++;
-+ j->d[dst].start = cpu_to_le64(buckets[i]);
-+ j->d[dst].nr = cpu_to_le64(1);
-+ }
-+ }
-+
-+ BUG_ON(dst + 1 != nr_compacted);
-+ return 0;
-+}
-diff --git a/fs/bcachefs/journal_sb.h b/fs/bcachefs/journal_sb.h
-new file mode 100644
-index 000000000000..ba40a7e8d90a
---- /dev/null
-+++ b/fs/bcachefs/journal_sb.h
-@@ -0,0 +1,24 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+#include "super-io.h"
-+#include "vstructs.h"
-+
-+static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
-+{
-+ return j
-+ ? (__le64 *) vstruct_end(&j->field) - j->buckets
-+ : 0;
-+}
-+
-+static inline unsigned bch2_sb_field_journal_v2_nr_entries(struct bch_sb_field_journal_v2 *j)
-+{
-+ if (!j)
-+ return 0;
-+
-+ return (struct bch_sb_field_journal_v2_entry *) vstruct_end(&j->field) - &j->d[0];
-+}
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_journal;
-+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2;
-+
-+int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *, u64 *, unsigned);
-diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
-new file mode 100644
-index 000000000000..f9d9aa95bf3a
---- /dev/null
-+++ b/fs/bcachefs/journal_seq_blacklist.c
-@@ -0,0 +1,320 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_iter.h"
-+#include "eytzinger.h"
-+#include "journal_seq_blacklist.h"
-+#include "super-io.h"
-+
-+/*
-+ * journal_seq_blacklist machinery:
-+ *
-+ * To guarantee order of btree updates after a crash, we need to detect when a
-+ * btree node entry (bset) is newer than the newest journal entry that was
-+ * successfully written, and ignore it - effectively ignoring any btree updates
-+ * that didn't make it into the journal.
-+ *
-+ * If we didn't do this, we might have two btree nodes, a and b, both with
-+ * updates that weren't written to the journal yet: if b was updated after a,
-+ * but b was flushed and not a - oops; on recovery we'll find that the updates
-+ * to b happened, but not the updates to a that happened before it.
-+ *
-+ * Ignoring bsets that are newer than the newest journal entry is always safe,
-+ * because everything they contain will also have been journalled - and must
-+ * still be present in the journal on disk until a journal entry has been
-+ * written _after_ that bset was written.
-+ *
-+ * To accomplish this, bsets record the newest journal sequence number they
-+ * contain updates for; then, on startup, the btree code queries the journal
-+ * code to ask "Is this sequence number newer than the newest journal entry? If
-+ * so, ignore it."
-+ *
-+ * When this happens, we must blacklist that journal sequence number: the
-+ * journal must not write any entries with that sequence number, and it must
-+ * record that it was blacklisted so that a) on recovery we don't think we have
-+ * missing journal entries and b) so that the btree code continues to ignore
-+ * that bset, until that btree node is rewritten.
-+ */
-+
-+static unsigned sb_blacklist_u64s(unsigned nr)
-+{
-+ struct bch_sb_field_journal_seq_blacklist *bl;
-+
-+ return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
-+}
-+
-+static struct bch_sb_field_journal_seq_blacklist *
-+blacklist_entry_try_merge(struct bch_fs *c,
-+ struct bch_sb_field_journal_seq_blacklist *bl,
-+ unsigned i)
-+{
-+ unsigned nr = blacklist_nr_entries(bl);
-+
-+ if (le64_to_cpu(bl->start[i].end) >=
-+ le64_to_cpu(bl->start[i + 1].start)) {
-+ bl->start[i].end = bl->start[i + 1].end;
-+ --nr;
-+ memmove(&bl->start[i],
-+ &bl->start[i + 1],
-+ sizeof(bl->start[0]) * (nr - i));
-+
-+ bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
-+ sb_blacklist_u64s(nr));
-+ BUG_ON(!bl);
-+ }
-+
-+ return bl;
-+}
-+
-+static bool bl_entry_contig_or_overlaps(struct journal_seq_blacklist_entry *e,
-+ u64 start, u64 end)
-+{
-+ return !(end < le64_to_cpu(e->start) || le64_to_cpu(e->end) < start);
-+}
-+
-+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
-+{
-+ struct bch_sb_field_journal_seq_blacklist *bl;
-+ unsigned i, nr;
-+ int ret = 0;
-+
-+ mutex_lock(&c->sb_lock);
-+ bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
-+ nr = blacklist_nr_entries(bl);
-+
-+ for (i = 0; i < nr; i++) {
-+ struct journal_seq_blacklist_entry *e =
-+ bl->start + i;
-+
-+ if (bl_entry_contig_or_overlaps(e, start, end)) {
-+ e->start = cpu_to_le64(min(start, le64_to_cpu(e->start)));
-+ e->end = cpu_to_le64(max(end, le64_to_cpu(e->end)));
-+
-+ if (i + 1 < nr)
-+ bl = blacklist_entry_try_merge(c,
-+ bl, i);
-+ if (i)
-+ bl = blacklist_entry_try_merge(c,
-+ bl, i - 1);
-+ goto out_write_sb;
-+ }
-+ }
-+
-+ bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
-+ sb_blacklist_u64s(nr + 1));
-+ if (!bl) {
-+ ret = -BCH_ERR_ENOSPC_sb_journal_seq_blacklist;
-+ goto out;
-+ }
-+
-+ bl->start[nr].start = cpu_to_le64(start);
-+ bl->start[nr].end = cpu_to_le64(end);
-+out_write_sb:
-+ c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
-+
-+ ret = bch2_write_super(c);
-+out:
-+ mutex_unlock(&c->sb_lock);
-+
-+ return ret ?: bch2_blacklist_table_initialize(c);
-+}
-+
-+static int journal_seq_blacklist_table_cmp(const void *_l,
-+ const void *_r, size_t size)
-+{
-+ const struct journal_seq_blacklist_table_entry *l = _l;
-+ const struct journal_seq_blacklist_table_entry *r = _r;
-+
-+ return cmp_int(l->start, r->start);
-+}
-+
-+bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
-+ bool dirty)
-+{
-+ struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
-+ struct journal_seq_blacklist_table_entry search = { .start = seq };
-+ int idx;
-+
-+ if (!t)
-+ return false;
-+
-+ idx = eytzinger0_find_le(t->entries, t->nr,
-+ sizeof(t->entries[0]),
-+ journal_seq_blacklist_table_cmp,
-+ &search);
-+ if (idx < 0)
-+ return false;
-+
-+ BUG_ON(t->entries[idx].start > seq);
-+
-+ if (seq >= t->entries[idx].end)
-+ return false;
-+
-+ if (dirty)
-+ t->entries[idx].dirty = true;
-+ return true;
-+}
-+
-+int bch2_blacklist_table_initialize(struct bch_fs *c)
-+{
-+ struct bch_sb_field_journal_seq_blacklist *bl =
-+ bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
-+ struct journal_seq_blacklist_table *t;
-+ unsigned i, nr = blacklist_nr_entries(bl);
-+
-+ if (!bl)
-+ return 0;
-+
-+ t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
-+ GFP_KERNEL);
-+ if (!t)
-+ return -BCH_ERR_ENOMEM_blacklist_table_init;
-+
-+ t->nr = nr;
-+
-+ for (i = 0; i < nr; i++) {
-+ t->entries[i].start = le64_to_cpu(bl->start[i].start);
-+ t->entries[i].end = le64_to_cpu(bl->start[i].end);
-+ }
-+
-+ eytzinger0_sort(t->entries,
-+ t->nr,
-+ sizeof(t->entries[0]),
-+ journal_seq_blacklist_table_cmp,
-+ NULL);
-+
-+ kfree(c->journal_seq_blacklist_table);
-+ c->journal_seq_blacklist_table = t;
-+ return 0;
-+}
-+
-+static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
-+ struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_journal_seq_blacklist *bl =
-+ field_to_type(f, journal_seq_blacklist);
-+ unsigned i, nr = blacklist_nr_entries(bl);
-+
-+ for (i = 0; i < nr; i++) {
-+ struct journal_seq_blacklist_entry *e = bl->start + i;
-+
-+ if (le64_to_cpu(e->start) >=
-+ le64_to_cpu(e->end)) {
-+ prt_printf(err, "entry %u start >= end (%llu >= %llu)",
-+ i, le64_to_cpu(e->start), le64_to_cpu(e->end));
-+ return -BCH_ERR_invalid_sb_journal_seq_blacklist;
-+ }
-+
-+ if (i + 1 < nr &&
-+ le64_to_cpu(e[0].end) >
-+ le64_to_cpu(e[1].start)) {
-+ prt_printf(err, "entry %u out of order with next entry (%llu > %llu)",
-+ i + 1, le64_to_cpu(e[0].end), le64_to_cpu(e[1].start));
-+ return -BCH_ERR_invalid_sb_journal_seq_blacklist;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
-+ struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_journal_seq_blacklist *bl =
-+ field_to_type(f, journal_seq_blacklist);
-+ struct journal_seq_blacklist_entry *i;
-+ unsigned nr = blacklist_nr_entries(bl);
-+
-+ for (i = bl->start; i < bl->start + nr; i++) {
-+ if (i != bl->start)
-+ prt_printf(out, " ");
-+
-+ prt_printf(out, "%llu-%llu",
-+ le64_to_cpu(i->start),
-+ le64_to_cpu(i->end));
-+ }
-+ prt_newline(out);
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
-+ .validate = bch2_sb_journal_seq_blacklist_validate,
-+ .to_text = bch2_sb_journal_seq_blacklist_to_text
-+};
-+
-+void bch2_blacklist_entries_gc(struct work_struct *work)
-+{
-+ struct bch_fs *c = container_of(work, struct bch_fs,
-+ journal_seq_blacklist_gc_work);
-+ struct journal_seq_blacklist_table *t;
-+ struct bch_sb_field_journal_seq_blacklist *bl;
-+ struct journal_seq_blacklist_entry *src, *dst;
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ unsigned i, nr, new_nr;
-+ int ret;
-+
-+ for (i = 0; i < BTREE_ID_NR; i++) {
-+ struct btree_iter iter;
-+ struct btree *b;
-+
-+ bch2_trans_node_iter_init(trans, &iter, i, POS_MIN,
-+ 0, 0, BTREE_ITER_PREFETCH);
-+retry:
-+ bch2_trans_begin(trans);
-+
-+ b = bch2_btree_iter_peek_node(&iter);
-+
-+ while (!(ret = PTR_ERR_OR_ZERO(b)) &&
-+ b &&
-+ !test_bit(BCH_FS_STOPPING, &c->flags))
-+ b = bch2_btree_iter_next_node(&iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ }
-+
-+ bch2_trans_put(trans);
-+ if (ret)
-+ return;
-+
-+ mutex_lock(&c->sb_lock);
-+ bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
-+ if (!bl)
-+ goto out;
-+
-+ nr = blacklist_nr_entries(bl);
-+ dst = bl->start;
-+
-+ t = c->journal_seq_blacklist_table;
-+ BUG_ON(nr != t->nr);
-+
-+ for (src = bl->start, i = eytzinger0_first(t->nr);
-+ src < bl->start + nr;
-+ src++, i = eytzinger0_next(i, nr)) {
-+ BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
-+ BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
-+
-+ if (t->entries[i].dirty)
-+ *dst++ = *src;
-+ }
-+
-+ new_nr = dst - bl->start;
-+
-+ bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
-+
-+ if (new_nr != nr) {
-+ bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
-+ new_nr ? sb_blacklist_u64s(new_nr) : 0);
-+ BUG_ON(new_nr && !bl);
-+
-+ if (!new_nr)
-+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3));
-+
-+ bch2_write_super(c);
-+ }
-+out:
-+ mutex_unlock(&c->sb_lock);
-+}
-diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h
-new file mode 100644
-index 000000000000..afb886ec8e25
---- /dev/null
-+++ b/fs/bcachefs/journal_seq_blacklist.h
-@@ -0,0 +1,22 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
-+#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
-+
-+static inline unsigned
-+blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
-+{
-+ return bl
-+ ? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
-+ sizeof(struct journal_seq_blacklist_entry))
-+ : 0;
-+}
-+
-+bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
-+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
-+int bch2_blacklist_table_initialize(struct bch_fs *);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
-+
-+void bch2_blacklist_entries_gc(struct work_struct *);
-+
-+#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
-diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
-new file mode 100644
-index 000000000000..42504e16acb6
---- /dev/null
-+++ b/fs/bcachefs/journal_types.h
-@@ -0,0 +1,345 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_JOURNAL_TYPES_H
-+#define _BCACHEFS_JOURNAL_TYPES_H
-+
-+#include <linux/cache.h>
-+#include <linux/workqueue.h>
-+
-+#include "alloc_types.h"
-+#include "super_types.h"
-+#include "fifo.h"
-+
-+#define JOURNAL_BUF_BITS 2
-+#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
-+#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
-+
-+/*
-+ * We put JOURNAL_BUF_NR of these in struct journal; we used them for writes to
-+ * the journal that are being staged or in flight.
-+ */
-+struct journal_buf {
-+ struct jset *data;
-+
-+ __BKEY_PADDED(key, BCH_REPLICAS_MAX);
-+ struct bch_devs_list devs_written;
-+
-+ struct closure_waitlist wait;
-+ u64 last_seq; /* copy of data->last_seq */
-+ long expires;
-+ u64 flush_time;
-+
-+ unsigned buf_size; /* size in bytes of @data */
-+ unsigned sectors; /* maximum size for current entry */
-+ unsigned disk_sectors; /* maximum size entry could have been, if
-+ buf_size was bigger */
-+ unsigned u64s_reserved;
-+ bool noflush; /* write has already been kicked off, and was noflush */
-+ bool must_flush; /* something wants a flush */
-+ bool separate_flush;
-+};
-+
-+/*
-+ * Something that makes a journal entry dirty - i.e. a btree node that has to be
-+ * flushed:
-+ */
-+
-+enum journal_pin_type {
-+ JOURNAL_PIN_btree,
-+ JOURNAL_PIN_key_cache,
-+ JOURNAL_PIN_other,
-+ JOURNAL_PIN_NR,
-+};
-+
-+struct journal_entry_pin_list {
-+ struct list_head list[JOURNAL_PIN_NR];
-+ struct list_head flushed;
-+ atomic_t count;
-+ struct bch_devs_list devs;
-+};
-+
-+struct journal;
-+struct journal_entry_pin;
-+typedef int (*journal_pin_flush_fn)(struct journal *j,
-+ struct journal_entry_pin *, u64);
-+
-+struct journal_entry_pin {
-+ struct list_head list;
-+ journal_pin_flush_fn flush;
-+ u64 seq;
-+};
-+
-+struct journal_res {
-+ bool ref;
-+ u8 idx;
-+ u16 u64s;
-+ u32 offset;
-+ u64 seq;
-+};
-+
-+/*
-+ * For reserving space in the journal prior to getting a reservation on a
-+ * particular journal entry:
-+ */
-+struct journal_preres {
-+ unsigned u64s;
-+};
-+
-+union journal_res_state {
-+ struct {
-+ atomic64_t counter;
-+ };
-+
-+ struct {
-+ u64 v;
-+ };
-+
-+ struct {
-+ u64 cur_entry_offset:20,
-+ idx:2,
-+ unwritten_idx:2,
-+ buf0_count:10,
-+ buf1_count:10,
-+ buf2_count:10,
-+ buf3_count:10;
-+ };
-+};
-+
-+union journal_preres_state {
-+ struct {
-+ atomic64_t counter;
-+ };
-+
-+ struct {
-+ u64 v;
-+ };
-+
-+ struct {
-+ u64 waiting:1,
-+ reserved:31,
-+ remaining:32;
-+ };
-+};
-+
-+/* bytes: */
-+#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
-+#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
-+
-+/*
-+ * We stash some journal state as sentinal values in cur_entry_offset:
-+ * note - cur_entry_offset is in units of u64s
-+ */
-+#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1)
-+
-+#define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1)
-+#define JOURNAL_ENTRY_ERROR_VAL (JOURNAL_ENTRY_OFFSET_MAX)
-+
-+struct journal_space {
-+ /* Units of 512 bytes sectors: */
-+ unsigned next_entry; /* How big the next journal entry can be */
-+ unsigned total;
-+};
-+
-+enum journal_space_from {
-+ journal_space_discarded,
-+ journal_space_clean_ondisk,
-+ journal_space_clean,
-+ journal_space_total,
-+ journal_space_nr,
-+};
-+
-+enum journal_flags {
-+ JOURNAL_REPLAY_DONE,
-+ JOURNAL_STARTED,
-+ JOURNAL_MAY_SKIP_FLUSH,
-+ JOURNAL_NEED_FLUSH_WRITE,
-+};
-+
-+/* Reasons we may fail to get a journal reservation: */
-+#define JOURNAL_ERRORS() \
-+ x(ok) \
-+ x(blocked) \
-+ x(max_in_flight) \
-+ x(journal_full) \
-+ x(journal_pin_full) \
-+ x(journal_stuck) \
-+ x(insufficient_devices)
-+
-+enum journal_errors {
-+#define x(n) JOURNAL_ERR_##n,
-+ JOURNAL_ERRORS()
-+#undef x
-+};
-+
-+typedef DARRAY(u64) darray_u64;
-+
-+/* Embedded in struct bch_fs */
-+struct journal {
-+ /* Fastpath stuff up front: */
-+ struct {
-+
-+ union journal_res_state reservations;
-+ enum bch_watermark watermark;
-+
-+ union journal_preres_state prereserved;
-+
-+ } __aligned(SMP_CACHE_BYTES);
-+
-+ unsigned long flags;
-+
-+ /* Max size of current journal entry */
-+ unsigned cur_entry_u64s;
-+ unsigned cur_entry_sectors;
-+
-+ /* Reserved space in journal entry to be used just prior to write */
-+ unsigned entry_u64s_reserved;
-+
-+
-+ /*
-+ * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
-+ * insufficient devices:
-+ */
-+ enum journal_errors cur_entry_error;
-+
-+ unsigned buf_size_want;
-+ /*
-+ * We may queue up some things to be journalled (log messages) before
-+ * the journal has actually started - stash them here:
-+ */
-+ darray_u64 early_journal_entries;
-+
-+ /*
-+ * Two journal entries -- one is currently open for new entries, the
-+ * other is possibly being written out.
-+ */
-+ struct journal_buf buf[JOURNAL_BUF_NR];
-+
-+ spinlock_t lock;
-+
-+ /* if nonzero, we may not open a new journal entry: */
-+ unsigned blocked;
-+
-+ /* Used when waiting because the journal was full */
-+ wait_queue_head_t wait;
-+ struct closure_waitlist async_wait;
-+ struct closure_waitlist preres_wait;
-+
-+ struct closure io;
-+ struct delayed_work write_work;
-+
-+ /* Sequence number of most recent journal entry (last entry in @pin) */
-+ atomic64_t seq;
-+
-+ /* seq, last_seq from the most recent journal entry successfully written */
-+ u64 seq_ondisk;
-+ u64 flushed_seq_ondisk;
-+ u64 last_seq_ondisk;
-+ u64 err_seq;
-+ u64 last_empty_seq;
-+
-+ /*
-+ * FIFO of journal entries whose btree updates have not yet been
-+ * written out.
-+ *
-+ * Each entry is a reference count. The position in the FIFO is the
-+ * entry's sequence number relative to @seq.
-+ *
-+ * The journal entry itself holds a reference count, put when the
-+ * journal entry is written out. Each btree node modified by the journal
-+ * entry also holds a reference count, put when the btree node is
-+ * written.
-+ *
-+ * When a reference count reaches zero, the journal entry is no longer
-+ * needed. When all journal entries in the oldest journal bucket are no
-+ * longer needed, the bucket can be discarded and reused.
-+ */
-+ struct {
-+ u64 front, back, size, mask;
-+ struct journal_entry_pin_list *data;
-+ } pin;
-+
-+ struct journal_space space[journal_space_nr];
-+
-+ u64 replay_journal_seq;
-+ u64 replay_journal_seq_end;
-+
-+ struct write_point wp;
-+ spinlock_t err_lock;
-+
-+ struct mutex reclaim_lock;
-+ /*
-+ * Used for waiting until journal reclaim has freed up space in the
-+ * journal:
-+ */
-+ wait_queue_head_t reclaim_wait;
-+ struct task_struct *reclaim_thread;
-+ bool reclaim_kicked;
-+ unsigned long next_reclaim;
-+ u64 nr_direct_reclaim;
-+ u64 nr_background_reclaim;
-+
-+ unsigned long last_flushed;
-+ struct journal_entry_pin *flush_in_progress;
-+ bool flush_in_progress_dropped;
-+ wait_queue_head_t pin_flush_wait;
-+
-+ /* protects advancing ja->discard_idx: */
-+ struct mutex discard_lock;
-+ bool can_discard;
-+
-+ unsigned long last_flush_write;
-+
-+ u64 res_get_blocked_start;
-+ u64 write_start_time;
-+
-+ u64 nr_flush_writes;
-+ u64 nr_noflush_writes;
-+
-+ struct bch2_time_stats *flush_write_time;
-+ struct bch2_time_stats *noflush_write_time;
-+ struct bch2_time_stats *blocked_time;
-+ struct bch2_time_stats *flush_seq_time;
-+
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+ struct lockdep_map res_map;
-+#endif
-+} __aligned(SMP_CACHE_BYTES);
-+
-+/*
-+ * Embedded in struct bch_dev. First three fields refer to the array of journal
-+ * buckets, in bch_sb.
-+ */
-+struct journal_device {
-+ /*
-+ * For each journal bucket, contains the max sequence number of the
-+ * journal writes it contains - so we know when a bucket can be reused.
-+ */
-+ u64 *bucket_seq;
-+
-+ unsigned sectors_free;
-+
-+ /*
-+ * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
-+ */
-+ unsigned discard_idx; /* Next bucket to discard */
-+ unsigned dirty_idx_ondisk;
-+ unsigned dirty_idx;
-+ unsigned cur_idx; /* Journal bucket we're currently writing to */
-+ unsigned nr;
-+
-+ u64 *buckets;
-+
-+ /* Bio for journal reads/writes to this device */
-+ struct bio *bio;
-+
-+ /* for bch_journal_read_device */
-+ struct closure read;
-+};
-+
-+/*
-+ * journal_entry_res - reserve space in every journal entry:
-+ */
-+struct journal_entry_res {
-+ unsigned u64s;
-+};
-+
-+#endif /* _BCACHEFS_JOURNAL_TYPES_H */
-diff --git a/fs/bcachefs/keylist.c b/fs/bcachefs/keylist.c
-new file mode 100644
-index 000000000000..5699cd4873c8
---- /dev/null
-+++ b/fs/bcachefs/keylist.c
-@@ -0,0 +1,52 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey.h"
-+#include "keylist.h"
-+
-+int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
-+ size_t nr_inline_u64s, size_t new_u64s)
-+{
-+ size_t oldsize = bch2_keylist_u64s(l);
-+ size_t newsize = oldsize + new_u64s;
-+ u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p;
-+ u64 *new_keys;
-+
-+ newsize = roundup_pow_of_two(newsize);
-+
-+ if (newsize <= nr_inline_u64s ||
-+ (old_buf && roundup_pow_of_two(oldsize) == newsize))
-+ return 0;
-+
-+ new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOFS);
-+ if (!new_keys)
-+ return -ENOMEM;
-+
-+ if (!old_buf)
-+ memcpy_u64s(new_keys, inline_u64s, oldsize);
-+
-+ l->keys_p = new_keys;
-+ l->top_p = new_keys + oldsize;
-+
-+ return 0;
-+}
-+
-+void bch2_keylist_pop_front(struct keylist *l)
-+{
-+ l->top_p -= bch2_keylist_front(l)->k.u64s;
-+
-+ memmove_u64s_down(l->keys,
-+ bkey_next(l->keys),
-+ bch2_keylist_u64s(l));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_verify_keylist_sorted(struct keylist *l)
-+{
-+ struct bkey_i *k;
-+
-+ for_each_keylist_key(l, k)
-+ BUG_ON(bkey_next(k) != l->top &&
-+ bpos_ge(k->k.p, bkey_next(k)->k.p));
-+}
-+#endif
-diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h
-new file mode 100644
-index 000000000000..fe759c7031e0
---- /dev/null
-+++ b/fs/bcachefs/keylist.h
-@@ -0,0 +1,74 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_KEYLIST_H
-+#define _BCACHEFS_KEYLIST_H
-+
-+#include "keylist_types.h"
-+
-+int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
-+void bch2_keylist_pop_front(struct keylist *);
-+
-+static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys)
-+{
-+ l->top_p = l->keys_p = inline_keys;
-+}
-+
-+static inline void bch2_keylist_free(struct keylist *l, u64 *inline_keys)
-+{
-+ if (l->keys_p != inline_keys)
-+ kfree(l->keys_p);
-+}
-+
-+static inline void bch2_keylist_push(struct keylist *l)
-+{
-+ l->top = bkey_next(l->top);
-+}
-+
-+static inline void bch2_keylist_add(struct keylist *l, const struct bkey_i *k)
-+{
-+ bkey_copy(l->top, k);
-+ bch2_keylist_push(l);
-+}
-+
-+static inline bool bch2_keylist_empty(struct keylist *l)
-+{
-+ return l->top == l->keys;
-+}
-+
-+static inline size_t bch2_keylist_u64s(struct keylist *l)
-+{
-+ return l->top_p - l->keys_p;
-+}
-+
-+static inline size_t bch2_keylist_bytes(struct keylist *l)
-+{
-+ return bch2_keylist_u64s(l) * sizeof(u64);
-+}
-+
-+static inline struct bkey_i *bch2_keylist_front(struct keylist *l)
-+{
-+ return l->keys;
-+}
-+
-+#define for_each_keylist_key(_keylist, _k) \
-+ for (_k = (_keylist)->keys; \
-+ _k != (_keylist)->top; \
-+ _k = bkey_next(_k))
-+
-+static inline u64 keylist_sectors(struct keylist *keys)
-+{
-+ struct bkey_i *k;
-+ u64 ret = 0;
-+
-+ for_each_keylist_key(keys, k)
-+ ret += k->k.size;
-+
-+ return ret;
-+}
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+void bch2_verify_keylist_sorted(struct keylist *);
-+#else
-+static inline void bch2_verify_keylist_sorted(struct keylist *l) {}
-+#endif
-+
-+#endif /* _BCACHEFS_KEYLIST_H */
-diff --git a/fs/bcachefs/keylist_types.h b/fs/bcachefs/keylist_types.h
-new file mode 100644
-index 000000000000..4b3ff7d8a875
---- /dev/null
-+++ b/fs/bcachefs/keylist_types.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_KEYLIST_TYPES_H
-+#define _BCACHEFS_KEYLIST_TYPES_H
-+
-+struct keylist {
-+ union {
-+ struct bkey_i *keys;
-+ u64 *keys_p;
-+ };
-+ union {
-+ struct bkey_i *top;
-+ u64 *top_p;
-+ };
-+};
-+
-+#endif /* _BCACHEFS_KEYLIST_TYPES_H */
-diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c
-new file mode 100644
-index 000000000000..8640f7dee0de
---- /dev/null
-+++ b/fs/bcachefs/logged_ops.c
-@@ -0,0 +1,112 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_buf.h"
-+#include "btree_update.h"
-+#include "error.h"
-+#include "io_misc.h"
-+#include "logged_ops.h"
-+#include "super.h"
-+
-+struct bch_logged_op_fn {
-+ u8 type;
-+ int (*resume)(struct btree_trans *, struct bkey_i *);
-+};
-+
-+static const struct bch_logged_op_fn logged_op_fns[] = {
-+#define x(n) { \
-+ .type = KEY_TYPE_logged_op_##n, \
-+ .resume = bch2_resume_logged_op_##n, \
-+},
-+ BCH_LOGGED_OPS()
-+#undef x
-+};
-+
-+static const struct bch_logged_op_fn *logged_op_fn(enum bch_bkey_type type)
-+{
-+ for (unsigned i = 0; i < ARRAY_SIZE(logged_op_fns); i++)
-+ if (logged_op_fns[i].type == type)
-+ return logged_op_fns + i;
-+ return NULL;
-+}
-+
-+static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
-+ struct bkey_buf sk;
-+ u32 restart_count = trans->restart_count;
-+ int ret;
-+
-+ if (!fn)
-+ return 0;
-+
-+ bch2_bkey_buf_init(&sk);
-+ bch2_bkey_buf_reassemble(&sk, c, k);
-+
-+ ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?:
-+ fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
-+
-+ bch2_bkey_buf_exit(&sk, c);
-+ return ret;
-+}
-+
-+int bch2_resume_logged_ops(struct bch_fs *c)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key2(trans, iter,
-+ BTREE_ID_logged_ops, POS_MIN, BTREE_ITER_PREFETCH, k,
-+ resume_logged_op(trans, &iter, k)));
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
-+{
-+ struct btree_iter iter;
-+ int ret;
-+
-+ ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX);
-+ if (ret)
-+ return ret;
-+
-+ k->k.p = iter.pos;
-+
-+ ret = bch2_trans_update(trans, &iter, k, 0);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
-+{
-+ return commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ __bch2_logged_op_start(trans, k));
-+}
-+
-+void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
-+{
-+ int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0));
-+ /*
-+ * This needs to be a fatal error because we've left an unfinished
-+ * operation in the logged ops btree.
-+ *
-+ * We should only ever see an error here if the filesystem has already
-+ * been shut down, but make sure of that here:
-+ */
-+ if (ret) {
-+ struct bch_fs *c = trans->c;
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
-+ bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s",
-+ __func__, buf.buf, bch2_err_str(ret));
-+ printbuf_exit(&buf);
-+ }
-+}
-diff --git a/fs/bcachefs/logged_ops.h b/fs/bcachefs/logged_ops.h
-new file mode 100644
-index 000000000000..4d1e786a27a8
---- /dev/null
-+++ b/fs/bcachefs/logged_ops.h
-@@ -0,0 +1,20 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_LOGGED_OPS_H
-+#define _BCACHEFS_LOGGED_OPS_H
-+
-+#include "bkey.h"
-+
-+#define BCH_LOGGED_OPS() \
-+ x(truncate) \
-+ x(finsert)
-+
-+static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i *op)
-+{
-+ return bch2_btree_insert_nonextent(trans, BTREE_ID_logged_ops, op, 0);
-+}
-+
-+int bch2_resume_logged_ops(struct bch_fs *);
-+int bch2_logged_op_start(struct btree_trans *, struct bkey_i *);
-+void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *);
-+
-+#endif /* _BCACHEFS_LOGGED_OPS_H */
-diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
-new file mode 100644
-index 000000000000..a5cc0ed195d6
---- /dev/null
-+++ b/fs/bcachefs/lru.c
-@@ -0,0 +1,164 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "btree_write_buffer.h"
-+#include "error.h"
-+#include "lru.h"
-+#include "recovery.h"
-+
-+/* KEY_TYPE_lru is obsolete: */
-+int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err,
-+ lru_entry_at_time_0,
-+ "lru entry at time=0");
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ const struct bch_lru *lru = bkey_s_c_to_lru(k).v;
-+
-+ prt_printf(out, "idx %llu", le64_to_cpu(lru->idx));
-+}
-+
-+void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru)
-+{
-+ prt_printf(out, "%llu:%llu -> %llu:%llu",
-+ lru_pos_id(lru),
-+ lru_pos_time(lru),
-+ u64_to_bucket(lru.offset).inode,
-+ u64_to_bucket(lru.offset).offset);
-+}
-+
-+static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
-+ u64 dev_bucket, u64 time, bool set)
-+{
-+ return time
-+ ? bch2_btree_bit_mod(trans, BTREE_ID_lru,
-+ lru_pos(lru_id, dev_bucket, time), set)
-+ : 0;
-+}
-+
-+int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
-+{
-+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted);
-+}
-+
-+int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
-+{
-+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set);
-+}
-+
-+int bch2_lru_change(struct btree_trans *trans,
-+ u16 lru_id, u64 dev_bucket,
-+ u64 old_time, u64 new_time)
-+{
-+ if (old_time == new_time)
-+ return 0;
-+
-+ return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?:
-+ bch2_lru_set(trans, lru_id, dev_bucket, new_time);
-+}
-+
-+static const char * const bch2_lru_types[] = {
-+#define x(n) #n,
-+ BCH_LRU_TYPES()
-+#undef x
-+ NULL
-+};
-+
-+static int bch2_check_lru_key(struct btree_trans *trans,
-+ struct btree_iter *lru_iter,
-+ struct bkey_s_c lru_k,
-+ struct bpos *last_flushed_pos)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a;
-+ struct printbuf buf1 = PRINTBUF;
-+ struct printbuf buf2 = PRINTBUF;
-+ enum bch_lru_type type = lru_type(lru_k);
-+ struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset);
-+ u64 idx;
-+ int ret;
-+
-+ if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c,
-+ lru_entry_to_invalid_bucket,
-+ "lru key points to nonexistent device:bucket %llu:%llu",
-+ alloc_pos.inode, alloc_pos.offset))
-+ return bch2_btree_delete_at(trans, lru_iter, 0);
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ a = bch2_alloc_to_v4(k, &a_convert);
-+
-+ switch (type) {
-+ case BCH_LRU_read:
-+ idx = alloc_lru_idx_read(*a);
-+ break;
-+ case BCH_LRU_fragmentation:
-+ idx = a->fragmentation_lru;
-+ break;
-+ }
-+
-+ if (lru_k.k->type != KEY_TYPE_set ||
-+ lru_pos_time(lru_k.k->p) != idx) {
-+ if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) {
-+ *last_flushed_pos = lru_k.k->p;
-+ ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-+ -BCH_ERR_transaction_restart_write_buffer_flush;
-+ goto out;
-+ }
-+
-+ if (c->opts.reconstruct_alloc ||
-+ fsck_err(c, lru_entry_bad,
-+ "incorrect lru entry: lru %s time %llu\n"
-+ " %s\n"
-+ " for %s",
-+ bch2_lru_types[type],
-+ lru_pos_time(lru_k.k->p),
-+ (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
-+ (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf)))
-+ ret = bch2_btree_delete_at(trans, lru_iter, 0);
-+ }
-+out:
-+err:
-+fsck_err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ printbuf_exit(&buf2);
-+ printbuf_exit(&buf1);
-+ return ret;
-+}
-+
-+int bch2_check_lrus(struct bch_fs *c)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bpos last_flushed_pos = POS_MIN;
-+ int ret = 0;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter,
-+ BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+ bch2_check_lru_key(trans, &iter, k, &last_flushed_pos)));
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+
-+}
-diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
-new file mode 100644
-index 000000000000..429dca816df5
---- /dev/null
-+++ b/fs/bcachefs/lru.h
-@@ -0,0 +1,69 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_LRU_H
-+#define _BCACHEFS_LRU_H
-+
-+#define LRU_TIME_BITS 48
-+#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
-+
-+static inline u64 lru_pos_id(struct bpos pos)
-+{
-+ return pos.inode >> LRU_TIME_BITS;
-+}
-+
-+static inline u64 lru_pos_time(struct bpos pos)
-+{
-+ return pos.inode & ~(~0ULL << LRU_TIME_BITS);
-+}
-+
-+static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time)
-+{
-+ struct bpos pos = POS(((u64) lru_id << LRU_TIME_BITS)|time, dev_bucket);
-+
-+ EBUG_ON(time > LRU_TIME_MAX);
-+ EBUG_ON(lru_pos_id(pos) != lru_id);
-+ EBUG_ON(lru_pos_time(pos) != time);
-+ EBUG_ON(pos.offset != dev_bucket);
-+
-+ return pos;
-+}
-+
-+#define BCH_LRU_TYPES() \
-+ x(read) \
-+ x(fragmentation)
-+
-+enum bch_lru_type {
-+#define x(n) BCH_LRU_##n,
-+ BCH_LRU_TYPES()
-+#undef x
-+};
-+
-+#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1)
-+
-+static inline enum bch_lru_type lru_type(struct bkey_s_c l)
-+{
-+ u16 lru_id = l.k->p.inode >> 48;
-+
-+ if (lru_id == BCH_LRU_FRAGMENTATION_START)
-+ return BCH_LRU_fragmentation;
-+ return BCH_LRU_read;
-+}
-+
-+int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+void bch2_lru_pos_to_text(struct printbuf *, struct bpos);
-+
-+#define bch2_bkey_ops_lru ((struct bkey_ops) { \
-+ .key_invalid = bch2_lru_invalid, \
-+ .val_to_text = bch2_lru_to_text, \
-+ .min_val_size = 8, \
-+})
-+
-+int bch2_lru_del(struct btree_trans *, u16, u64, u64);
-+int bch2_lru_set(struct btree_trans *, u16, u64, u64);
-+int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
-+
-+int bch2_check_lrus(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_LRU_H */
-diff --git a/fs/bcachefs/mean_and_variance.c b/fs/bcachefs/mean_and_variance.c
-new file mode 100644
-index 000000000000..1f0801e2e565
---- /dev/null
-+++ b/fs/bcachefs/mean_and_variance.c
-@@ -0,0 +1,159 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Functions for incremental mean and variance.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 as published by
-+ * the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-+ * more details.
-+ *
-+ * Copyright © 2022 Daniel B. Hill
-+ *
-+ * Author: Daniel B. Hill <daniel@gluo.nz>
-+ *
-+ * Description:
-+ *
-+ * This is includes some incremental algorithms for mean and variance calculation
-+ *
-+ * Derived from the paper: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
-+ *
-+ * Create a struct and if it's the weighted variant set the w field (weight = 2^k).
-+ *
-+ * Use mean_and_variance[_weighted]_update() on the struct to update it's state.
-+ *
-+ * Use the mean_and_variance[_weighted]_get_* functions to calculate the mean and variance, some computation
-+ * is deferred to these functions for performance reasons.
-+ *
-+ * see lib/math/mean_and_variance_test.c for examples of usage.
-+ *
-+ * DO NOT access the mean and variance fields of the weighted variants directly.
-+ * DO NOT change the weight after calling update.
-+ */
-+
-+#include <linux/bug.h>
-+#include <linux/compiler.h>
-+#include <linux/export.h>
-+#include <linux/limits.h>
-+#include <linux/math.h>
-+#include <linux/math64.h>
-+#include <linux/module.h>
-+
-+#include "mean_and_variance.h"
-+
-+u128_u u128_div(u128_u n, u64 d)
-+{
-+ u128_u r;
-+ u64 rem;
-+ u64 hi = u128_hi(n);
-+ u64 lo = u128_lo(n);
-+ u64 h = hi & ((u64) U32_MAX << 32);
-+ u64 l = (hi & (u64) U32_MAX) << 32;
-+
-+ r = u128_shl(u64_to_u128(div64_u64_rem(h, d, &rem)), 64);
-+ r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l + (rem << 32), d, &rem)), 32));
-+ r = u128_add(r, u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem)));
-+ return r;
-+}
-+EXPORT_SYMBOL_GPL(u128_div);
-+
-+/**
-+ * mean_and_variance_get_mean() - get mean from @s
-+ */
-+s64 mean_and_variance_get_mean(struct mean_and_variance s)
-+{
-+ return s.n ? div64_u64(s.sum, s.n) : 0;
-+}
-+EXPORT_SYMBOL_GPL(mean_and_variance_get_mean);
-+
-+/**
-+ * mean_and_variance_get_variance() - get variance from @s1
-+ *
-+ * see linked pdf equation 12.
-+ */
-+u64 mean_and_variance_get_variance(struct mean_and_variance s1)
-+{
-+ if (s1.n) {
-+ u128_u s2 = u128_div(s1.sum_squares, s1.n);
-+ u64 s3 = abs(mean_and_variance_get_mean(s1));
-+
-+ return u128_lo(u128_sub(s2, u128_square(s3)));
-+ } else {
-+ return 0;
-+ }
-+}
-+EXPORT_SYMBOL_GPL(mean_and_variance_get_variance);
-+
-+/**
-+ * mean_and_variance_get_stddev() - get standard deviation from @s
-+ */
-+u32 mean_and_variance_get_stddev(struct mean_and_variance s)
-+{
-+ return int_sqrt64(mean_and_variance_get_variance(s));
-+}
-+EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
-+
-+/**
-+ * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
-+ * @s1: ..
-+ * @s2: ..
-+ *
-+ * see linked pdf: function derived from equations 140-143 where alpha = 2^w.
-+ * values are stored bitshifted for performance and added precision.
-+ */
-+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 x)
-+{
-+ // previous weighted variance.
-+ u8 w = s->weight;
-+ u64 var_w0 = s->variance;
-+ // new value weighted.
-+ s64 x_w = x << w;
-+ s64 diff_w = x_w - s->mean;
-+ s64 diff = fast_divpow2(diff_w, w);
-+ // new mean weighted.
-+ s64 u_w1 = s->mean + diff;
-+
-+ if (!s->init) {
-+ s->mean = x_w;
-+ s->variance = 0;
-+ } else {
-+ s->mean = u_w1;
-+ s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w;
-+ }
-+ s->init = true;
-+}
-+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
-+
-+/**
-+ * mean_and_variance_weighted_get_mean() - get mean from @s
-+ */
-+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s)
-+{
-+ return fast_divpow2(s.mean, s.weight);
-+}
-+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
-+
-+/**
-+ * mean_and_variance_weighted_get_variance() -- get variance from @s
-+ */
-+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s)
-+{
-+ // always positive don't need fast divpow2
-+ return s.variance >> s.weight;
-+}
-+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
-+
-+/**
-+ * mean_and_variance_weighted_get_stddev() - get standard deviation from @s
-+ */
-+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s)
-+{
-+ return int_sqrt64(mean_and_variance_weighted_get_variance(s));
-+}
-+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev);
-+
-+MODULE_AUTHOR("Daniel B. Hill");
-+MODULE_LICENSE("GPL");
-diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h
-new file mode 100644
-index 000000000000..647505010b39
---- /dev/null
-+++ b/fs/bcachefs/mean_and_variance.h
-@@ -0,0 +1,198 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef MEAN_AND_VARIANCE_H_
-+#define MEAN_AND_VARIANCE_H_
-+
-+#include <linux/types.h>
-+#include <linux/limits.h>
-+#include <linux/math.h>
-+#include <linux/math64.h>
-+
-+#define SQRT_U64_MAX 4294967295ULL
-+
-+/*
-+ * u128_u: u128 user mode, because not all architectures support a real int128
-+ * type
-+ */
-+
-+#ifdef __SIZEOF_INT128__
-+
-+typedef struct {
-+ unsigned __int128 v;
-+} __aligned(16) u128_u;
-+
-+static inline u128_u u64_to_u128(u64 a)
-+{
-+ return (u128_u) { .v = a };
-+}
-+
-+static inline u64 u128_lo(u128_u a)
-+{
-+ return a.v;
-+}
-+
-+static inline u64 u128_hi(u128_u a)
-+{
-+ return a.v >> 64;
-+}
-+
-+static inline u128_u u128_add(u128_u a, u128_u b)
-+{
-+ a.v += b.v;
-+ return a;
-+}
-+
-+static inline u128_u u128_sub(u128_u a, u128_u b)
-+{
-+ a.v -= b.v;
-+ return a;
-+}
-+
-+static inline u128_u u128_shl(u128_u a, s8 shift)
-+{
-+ a.v <<= shift;
-+ return a;
-+}
-+
-+static inline u128_u u128_square(u64 a)
-+{
-+ u128_u b = u64_to_u128(a);
-+
-+ b.v *= b.v;
-+ return b;
-+}
-+
-+#else
-+
-+typedef struct {
-+ u64 hi, lo;
-+} __aligned(16) u128_u;
-+
-+/* conversions */
-+
-+static inline u128_u u64_to_u128(u64 a)
-+{
-+ return (u128_u) { .lo = a };
-+}
-+
-+static inline u64 u128_lo(u128_u a)
-+{
-+ return a.lo;
-+}
-+
-+static inline u64 u128_hi(u128_u a)
-+{
-+ return a.hi;
-+}
-+
-+/* arithmetic */
-+
-+static inline u128_u u128_add(u128_u a, u128_u b)
-+{
-+ u128_u c;
-+
-+ c.lo = a.lo + b.lo;
-+ c.hi = a.hi + b.hi + (c.lo < a.lo);
-+ return c;
-+}
-+
-+static inline u128_u u128_sub(u128_u a, u128_u b)
-+{
-+ u128_u c;
-+
-+ c.lo = a.lo - b.lo;
-+ c.hi = a.hi - b.hi - (c.lo > a.lo);
-+ return c;
-+}
-+
-+static inline u128_u u128_shl(u128_u i, s8 shift)
-+{
-+ u128_u r;
-+
-+ r.lo = i.lo << shift;
-+ if (shift < 64)
-+ r.hi = (i.hi << shift) | (i.lo >> (64 - shift));
-+ else {
-+ r.hi = i.lo << (shift - 64);
-+ r.lo = 0;
-+ }
-+ return r;
-+}
-+
-+static inline u128_u u128_square(u64 i)
-+{
-+ u128_u r;
-+ u64 h = i >> 32, l = i & U32_MAX;
-+
-+ r = u128_shl(u64_to_u128(h*h), 64);
-+ r = u128_add(r, u128_shl(u64_to_u128(h*l), 32));
-+ r = u128_add(r, u128_shl(u64_to_u128(l*h), 32));
-+ r = u128_add(r, u64_to_u128(l*l));
-+ return r;
-+}
-+
-+#endif
-+
-+static inline u128_u u64s_to_u128(u64 hi, u64 lo)
-+{
-+ u128_u c = u64_to_u128(hi);
-+
-+ c = u128_shl(c, 64);
-+ c = u128_add(c, u64_to_u128(lo));
-+ return c;
-+}
-+
-+u128_u u128_div(u128_u n, u64 d);
-+
-+struct mean_and_variance {
-+ s64 n;
-+ s64 sum;
-+ u128_u sum_squares;
-+};
-+
-+/* expontentially weighted variant */
-+struct mean_and_variance_weighted {
-+ bool init;
-+ u8 weight; /* base 2 logarithim */
-+ s64 mean;
-+ u64 variance;
-+};
-+
-+/**
-+ * fast_divpow2() - fast approximation for n / (1 << d)
-+ * @n: numerator
-+ * @d: the power of 2 denominator.
-+ *
-+ * note: this rounds towards 0.
-+ */
-+static inline s64 fast_divpow2(s64 n, u8 d)
-+{
-+ return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d;
-+}
-+
-+/**
-+ * mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1
-+ * and return it.
-+ * @s1: the mean_and_variance to update.
-+ * @v1: the new sample.
-+ *
-+ * see linked pdf equation 12.
-+ */
-+static inline void
-+mean_and_variance_update(struct mean_and_variance *s, s64 v)
-+{
-+ s->n++;
-+ s->sum += v;
-+ s->sum_squares = u128_add(s->sum_squares, u128_square(abs(v)));
-+}
-+
-+s64 mean_and_variance_get_mean(struct mean_and_variance s);
-+u64 mean_and_variance_get_variance(struct mean_and_variance s1);
-+u32 mean_and_variance_get_stddev(struct mean_and_variance s);
-+
-+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 v);
-+
-+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s);
-+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s);
-+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s);
-+
-+#endif // MEAN_AND_VAIRANCE_H_
-diff --git a/fs/bcachefs/mean_and_variance_test.c b/fs/bcachefs/mean_and_variance_test.c
-new file mode 100644
-index 000000000000..019583c3ca0e
---- /dev/null
-+++ b/fs/bcachefs/mean_and_variance_test.c
-@@ -0,0 +1,240 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include <kunit/test.h>
-+
-+#include "mean_and_variance.h"
-+
-+#define MAX_SQR (SQRT_U64_MAX*SQRT_U64_MAX)
-+
-+static void mean_and_variance_basic_test(struct kunit *test)
-+{
-+ struct mean_and_variance s = {};
-+
-+ mean_and_variance_update(&s, 2);
-+ mean_and_variance_update(&s, 2);
-+
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(s), 2);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_variance(s), 0);
-+ KUNIT_EXPECT_EQ(test, s.n, 2);
-+
-+ mean_and_variance_update(&s, 4);
-+ mean_and_variance_update(&s, 4);
-+
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(s), 3);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_variance(s), 1);
-+ KUNIT_EXPECT_EQ(test, s.n, 4);
-+}
-+
-+/*
-+ * Test values computed using a spreadsheet from the psuedocode at the bottom:
-+ * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
-+ */
-+
-+static void mean_and_variance_weighted_test(struct kunit *test)
-+{
-+ struct mean_and_variance_weighted s = { .weight = 2 };
-+
-+ mean_and_variance_weighted_update(&s, 10);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 10);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 0);
-+
-+ mean_and_variance_weighted_update(&s, 20);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 12);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 18);
-+
-+ mean_and_variance_weighted_update(&s, 30);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 16);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 72);
-+
-+ s = (struct mean_and_variance_weighted) { .weight = 2 };
-+
-+ mean_and_variance_weighted_update(&s, -10);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -10);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 0);
-+
-+ mean_and_variance_weighted_update(&s, -20);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -12);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 18);
-+
-+ mean_and_variance_weighted_update(&s, -30);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -16);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 72);
-+}
-+
-+static void mean_and_variance_weighted_advanced_test(struct kunit *test)
-+{
-+ struct mean_and_variance_weighted s = { .weight = 8 };
-+ s64 i;
-+
-+ for (i = 10; i <= 100; i += 10)
-+ mean_and_variance_weighted_update(&s, i);
-+
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 11);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 107);
-+
-+ s = (struct mean_and_variance_weighted) { .weight = 8 };
-+
-+ for (i = -10; i >= -100; i -= 10)
-+ mean_and_variance_weighted_update(&s, i);
-+
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -11);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 107);
-+}
-+
-+static void do_mean_and_variance_test(struct kunit *test,
-+ s64 initial_value,
-+ s64 initial_n,
-+ s64 n,
-+ unsigned weight,
-+ s64 *data,
-+ s64 *mean,
-+ s64 *stddev,
-+ s64 *weighted_mean,
-+ s64 *weighted_stddev)
-+{
-+ struct mean_and_variance mv = {};
-+ struct mean_and_variance_weighted vw = { .weight = weight };
-+
-+ for (unsigned i = 0; i < initial_n; i++) {
-+ mean_and_variance_update(&mv, initial_value);
-+ mean_and_variance_weighted_update(&vw, initial_value);
-+
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(mv), initial_value);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_stddev(mv), 0);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(vw), initial_value);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_stddev(vw),0);
-+ }
-+
-+ for (unsigned i = 0; i < n; i++) {
-+ mean_and_variance_update(&mv, data[i]);
-+ mean_and_variance_weighted_update(&vw, data[i]);
-+
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(mv), mean[i]);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_stddev(mv), stddev[i]);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(vw), weighted_mean[i]);
-+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_stddev(vw),weighted_stddev[i]);
-+ }
-+
-+ KUNIT_EXPECT_EQ(test, mv.n, initial_n + n);
-+}
-+
-+/* Test behaviour with a single outlier, then back to steady state: */
-+static void mean_and_variance_test_1(struct kunit *test)
-+{
-+ s64 d[] = { 100, 10, 10, 10, 10, 10, 10 };
-+ s64 mean[] = { 22, 21, 20, 19, 18, 17, 16 };
-+ s64 stddev[] = { 32, 29, 28, 27, 26, 25, 24 };
-+ s64 weighted_mean[] = { 32, 27, 22, 19, 17, 15, 14 };
-+ s64 weighted_stddev[] = { 38, 35, 31, 27, 24, 21, 18 };
-+
-+ do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
-+ d, mean, stddev, weighted_mean, weighted_stddev);
-+}
-+
-+static void mean_and_variance_test_2(struct kunit *test)
-+{
-+ s64 d[] = { 100, 10, 10, 10, 10, 10, 10 };
-+ s64 mean[] = { 10, 10, 10, 10, 10, 10, 10 };
-+ s64 stddev[] = { 9, 9, 9, 9, 9, 9, 9 };
-+ s64 weighted_mean[] = { 32, 27, 22, 19, 17, 15, 14 };
-+ s64 weighted_stddev[] = { 38, 35, 31, 27, 24, 21, 18 };
-+
-+ do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
-+ d, mean, stddev, weighted_mean, weighted_stddev);
-+}
-+
-+/* Test behaviour where we switch from one steady state to another: */
-+static void mean_and_variance_test_3(struct kunit *test)
-+{
-+ s64 d[] = { 100, 100, 100, 100, 100 };
-+ s64 mean[] = { 22, 32, 40, 46, 50 };
-+ s64 stddev[] = { 32, 39, 42, 44, 45 };
-+ s64 weighted_mean[] = { 32, 49, 61, 71, 78 };
-+ s64 weighted_stddev[] = { 38, 44, 44, 41, 38 };
-+
-+ do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
-+ d, mean, stddev, weighted_mean, weighted_stddev);
-+}
-+
-+static void mean_and_variance_test_4(struct kunit *test)
-+{
-+ s64 d[] = { 100, 100, 100, 100, 100 };
-+ s64 mean[] = { 10, 11, 12, 13, 14 };
-+ s64 stddev[] = { 9, 13, 15, 17, 19 };
-+ s64 weighted_mean[] = { 32, 49, 61, 71, 78 };
-+ s64 weighted_stddev[] = { 38, 44, 44, 41, 38 };
-+
-+ do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
-+ d, mean, stddev, weighted_mean, weighted_stddev);
-+}
-+
-+static void mean_and_variance_fast_divpow2(struct kunit *test)
-+{
-+ s64 i;
-+ u8 d;
-+
-+ for (i = 0; i < 100; i++) {
-+ d = 0;
-+ KUNIT_EXPECT_EQ(test, fast_divpow2(i, d), div_u64(i, 1LLU << d));
-+ KUNIT_EXPECT_EQ(test, abs(fast_divpow2(-i, d)), div_u64(i, 1LLU << d));
-+ for (d = 1; d < 32; d++) {
-+ KUNIT_EXPECT_EQ_MSG(test, abs(fast_divpow2(i, d)),
-+ div_u64(i, 1 << d), "%lld %u", i, d);
-+ KUNIT_EXPECT_EQ_MSG(test, abs(fast_divpow2(-i, d)),
-+ div_u64(i, 1 << d), "%lld %u", -i, d);
-+ }
-+ }
-+}
-+
-+static void mean_and_variance_u128_basic_test(struct kunit *test)
-+{
-+ u128_u a = u64s_to_u128(0, U64_MAX);
-+ u128_u a1 = u64s_to_u128(0, 1);
-+ u128_u b = u64s_to_u128(1, 0);
-+ u128_u c = u64s_to_u128(0, 1LLU << 63);
-+ u128_u c2 = u64s_to_u128(U64_MAX, U64_MAX);
-+
-+ KUNIT_EXPECT_EQ(test, u128_hi(u128_add(a, a1)), 1);
-+ KUNIT_EXPECT_EQ(test, u128_lo(u128_add(a, a1)), 0);
-+ KUNIT_EXPECT_EQ(test, u128_hi(u128_add(a1, a)), 1);
-+ KUNIT_EXPECT_EQ(test, u128_lo(u128_add(a1, a)), 0);
-+
-+ KUNIT_EXPECT_EQ(test, u128_lo(u128_sub(b, a1)), U64_MAX);
-+ KUNIT_EXPECT_EQ(test, u128_hi(u128_sub(b, a1)), 0);
-+
-+ KUNIT_EXPECT_EQ(test, u128_hi(u128_shl(c, 1)), 1);
-+ KUNIT_EXPECT_EQ(test, u128_lo(u128_shl(c, 1)), 0);
-+
-+ KUNIT_EXPECT_EQ(test, u128_hi(u128_square(U64_MAX)), U64_MAX - 1);
-+ KUNIT_EXPECT_EQ(test, u128_lo(u128_square(U64_MAX)), 1);
-+
-+ KUNIT_EXPECT_EQ(test, u128_lo(u128_div(b, 2)), 1LLU << 63);
-+
-+ KUNIT_EXPECT_EQ(test, u128_hi(u128_div(c2, 2)), U64_MAX >> 1);
-+ KUNIT_EXPECT_EQ(test, u128_lo(u128_div(c2, 2)), U64_MAX);
-+
-+ KUNIT_EXPECT_EQ(test, u128_hi(u128_div(u128_shl(u64_to_u128(U64_MAX), 32), 2)), U32_MAX >> 1);
-+ KUNIT_EXPECT_EQ(test, u128_lo(u128_div(u128_shl(u64_to_u128(U64_MAX), 32), 2)), U64_MAX << 31);
-+}
-+
-+static struct kunit_case mean_and_variance_test_cases[] = {
-+ KUNIT_CASE(mean_and_variance_fast_divpow2),
-+ KUNIT_CASE(mean_and_variance_u128_basic_test),
-+ KUNIT_CASE(mean_and_variance_basic_test),
-+ KUNIT_CASE(mean_and_variance_weighted_test),
-+ KUNIT_CASE(mean_and_variance_weighted_advanced_test),
-+ KUNIT_CASE(mean_and_variance_test_1),
-+ KUNIT_CASE(mean_and_variance_test_2),
-+ KUNIT_CASE(mean_and_variance_test_3),
-+ KUNIT_CASE(mean_and_variance_test_4),
-+ {}
-+};
-+
-+static struct kunit_suite mean_and_variance_test_suite = {
-+ .name = "mean and variance tests",
-+ .test_cases = mean_and_variance_test_cases
-+};
-+
-+kunit_test_suite(mean_and_variance_test_suite);
-+
-+MODULE_AUTHOR("Daniel B. Hill");
-+MODULE_LICENSE("GPL");
-diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
-new file mode 100644
-index 000000000000..e3a51f6d6c9b
---- /dev/null
-+++ b/fs/bcachefs/migrate.c
-@@ -0,0 +1,179 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Code for moving data off a device.
-+ */
-+
-+#include "bcachefs.h"
-+#include "bkey_buf.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "errcode.h"
-+#include "extents.h"
-+#include "io_write.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "migrate.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
-+ unsigned dev_idx, int flags, bool metadata)
-+{
-+ unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
-+ unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
-+ unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
-+ unsigned nr_good;
-+
-+ bch2_bkey_drop_device(k, dev_idx);
-+
-+ nr_good = bch2_bkey_durability(c, k.s_c);
-+ if ((!nr_good && !(flags & lost)) ||
-+ (nr_good < replicas && !(flags & degraded)))
-+ return -EINVAL;
-+
-+ return 0;
-+}
-+
-+static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ unsigned dev_idx,
-+ int flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_i *n;
-+ int ret;
-+
-+ if (!bch2_bkey_has_device_c(k, dev_idx))
-+ return 0;
-+
-+ n = bch2_bkey_make_mut(trans, iter, &k, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ ret = PTR_ERR_OR_ZERO(n);
-+ if (ret)
-+ return ret;
-+
-+ ret = drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, false);
-+ if (ret)
-+ return ret;
-+
-+ /*
-+ * If the new extent no longer has any pointers, bch2_extent_normalize()
-+ * will do the appropriate thing with it (turning it into a
-+ * KEY_TYPE_error key, or just a discard if it was a cached extent)
-+ */
-+ bch2_extent_normalize(c, bkey_i_to_s(n));
-+
-+ /*
-+ * Since we're not inserting through an extent iterator
-+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
-+ * we aren't using the extent overwrite path to delete, we're
-+ * just using the normal key deletion path:
-+ */
-+ if (bkey_deleted(&n->k))
-+ n->k.size = 0;
-+ return 0;
-+}
-+
-+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ enum btree_id id;
-+ int ret = 0;
-+
-+ for (id = 0; id < BTREE_ID_NR; id++) {
-+ if (!btree_type_has_ptrs(id))
-+ continue;
-+
-+ ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
-+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL,
-+ bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags));
-+ if (ret)
-+ break;
-+ }
-+
-+ bch2_trans_put(trans);
-+
-+ return ret;
-+}
-+
-+static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct closure cl;
-+ struct btree *b;
-+ struct bkey_buf k;
-+ unsigned id;
-+ int ret;
-+
-+ /* don't handle this yet: */
-+ if (flags & BCH_FORCE_IF_METADATA_LOST)
-+ return -EINVAL;
-+
-+ trans = bch2_trans_get(c);
-+ bch2_bkey_buf_init(&k);
-+ closure_init_stack(&cl);
-+
-+ for (id = 0; id < BTREE_ID_NR; id++) {
-+ bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
-+ BTREE_ITER_PREFETCH);
-+retry:
-+ ret = 0;
-+ while (bch2_trans_begin(trans),
-+ (b = bch2_btree_iter_peek_node(&iter)) &&
-+ !(ret = PTR_ERR_OR_ZERO(b))) {
-+ if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
-+ goto next;
-+
-+ bch2_bkey_buf_copy(&k, c, &b->key);
-+
-+ ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
-+ dev_idx, flags, true);
-+ if (ret) {
-+ bch_err(c, "Cannot drop device without losing data");
-+ break;
-+ }
-+
-+ ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
-+ ret = 0;
-+ continue;
-+ }
-+
-+ if (ret) {
-+ bch_err_msg(c, ret, "updating btree node key");
-+ break;
-+ }
-+next:
-+ bch2_btree_iter_next_node(&iter);
-+ }
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret)
-+ goto err;
-+ }
-+
-+ bch2_btree_interior_updates_flush(c);
-+ ret = 0;
-+err:
-+ bch2_bkey_buf_exit(&k, c);
-+ bch2_trans_put(trans);
-+
-+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
-+
-+ return ret;
-+}
-+
-+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-+{
-+ return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
-+ bch2_dev_metadata_drop(c, dev_idx, flags);
-+}
-diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h
-new file mode 100644
-index 000000000000..027efaa0d575
---- /dev/null
-+++ b/fs/bcachefs/migrate.h
-@@ -0,0 +1,7 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MIGRATE_H
-+#define _BCACHEFS_MIGRATE_H
-+
-+int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
-+
-+#endif /* _BCACHEFS_MIGRATE_H */
-diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
-new file mode 100644
-index 000000000000..ab749bf2fcbc
---- /dev/null
-+++ b/fs/bcachefs/move.c
-@@ -0,0 +1,1198 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "backpointers.h"
-+#include "bkey_buf.h"
-+#include "btree_gc.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_write_buffer.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "inode.h"
-+#include "io_read.h"
-+#include "io_write.h"
-+#include "journal_reclaim.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "replicas.h"
-+#include "snapshot.h"
-+#include "super-io.h"
-+#include "trace.h"
-+
-+#include <linux/ioprio.h>
-+#include <linux/kthread.h>
-+
-+static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k)
-+{
-+ if (trace_move_extent_enabled()) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ trace_move_extent(c, buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+}
-+
-+static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
-+{
-+ if (trace_move_extent_read_enabled()) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ trace_move_extent_read(c, buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+}
-+
-+static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k)
-+{
-+ if (trace_move_extent_alloc_mem_fail_enabled()) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_bkey_val_to_text(&buf, c, k);
-+ trace_move_extent_alloc_mem_fail(c, buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+}
-+
-+struct moving_io {
-+ struct list_head read_list;
-+ struct list_head io_list;
-+ struct move_bucket_in_flight *b;
-+ struct closure cl;
-+ bool read_completed;
-+
-+ unsigned read_sectors;
-+ unsigned write_sectors;
-+
-+ struct bch_read_bio rbio;
-+
-+ struct data_update write;
-+ /* Must be last since it is variable size */
-+ struct bio_vec bi_inline_vecs[0];
-+};
-+
-+static void move_free(struct moving_io *io)
-+{
-+ struct moving_context *ctxt = io->write.ctxt;
-+
-+ if (io->b)
-+ atomic_dec(&io->b->count);
-+
-+ bch2_data_update_exit(&io->write);
-+
-+ mutex_lock(&ctxt->lock);
-+ list_del(&io->io_list);
-+ wake_up(&ctxt->wait);
-+ mutex_unlock(&ctxt->lock);
-+
-+ kfree(io);
-+}
-+
-+static void move_write_done(struct bch_write_op *op)
-+{
-+ struct moving_io *io = container_of(op, struct moving_io, write.op);
-+ struct moving_context *ctxt = io->write.ctxt;
-+
-+ if (io->write.op.error)
-+ ctxt->write_error = true;
-+
-+ atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
-+ atomic_dec(&io->write.ctxt->write_ios);
-+ move_free(io);
-+ closure_put(&ctxt->cl);
-+}
-+
-+static void move_write(struct moving_io *io)
-+{
-+ if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
-+ move_free(io);
-+ return;
-+ }
-+
-+ closure_get(&io->write.ctxt->cl);
-+ atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
-+ atomic_inc(&io->write.ctxt->write_ios);
-+
-+ bch2_data_update_read_done(&io->write, io->rbio.pick.crc);
-+}
-+
-+struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
-+{
-+ struct moving_io *io =
-+ list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list);
-+
-+ return io && io->read_completed ? io : NULL;
-+}
-+
-+static void move_read_endio(struct bio *bio)
-+{
-+ struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
-+ struct moving_context *ctxt = io->write.ctxt;
-+
-+ atomic_sub(io->read_sectors, &ctxt->read_sectors);
-+ atomic_dec(&ctxt->read_ios);
-+ io->read_completed = true;
-+
-+ wake_up(&ctxt->wait);
-+ closure_put(&ctxt->cl);
-+}
-+
-+void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt)
-+{
-+ struct moving_io *io;
-+
-+ while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
-+ bch2_trans_unlock_long(ctxt->trans);
-+ list_del(&io->read_list);
-+ move_write(io);
-+ }
-+}
-+
-+void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
-+{
-+ unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
-+
-+ move_ctxt_wait_event(ctxt,
-+ !atomic_read(&ctxt->write_sectors) ||
-+ atomic_read(&ctxt->write_sectors) != sectors_pending);
-+}
-+
-+void bch2_moving_ctxt_exit(struct moving_context *ctxt)
-+{
-+ struct bch_fs *c = ctxt->trans->c;
-+
-+ move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
-+ closure_sync(&ctxt->cl);
-+
-+ EBUG_ON(atomic_read(&ctxt->write_sectors));
-+ EBUG_ON(atomic_read(&ctxt->write_ios));
-+ EBUG_ON(atomic_read(&ctxt->read_sectors));
-+ EBUG_ON(atomic_read(&ctxt->read_ios));
-+
-+ mutex_lock(&c->moving_context_lock);
-+ list_del(&ctxt->list);
-+ mutex_unlock(&c->moving_context_lock);
-+
-+ bch2_trans_put(ctxt->trans);
-+ memset(ctxt, 0, sizeof(*ctxt));
-+}
-+
-+void bch2_moving_ctxt_init(struct moving_context *ctxt,
-+ struct bch_fs *c,
-+ struct bch_ratelimit *rate,
-+ struct bch_move_stats *stats,
-+ struct write_point_specifier wp,
-+ bool wait_on_copygc)
-+{
-+ memset(ctxt, 0, sizeof(*ctxt));
-+
-+ ctxt->trans = bch2_trans_get(c);
-+ ctxt->fn = (void *) _RET_IP_;
-+ ctxt->rate = rate;
-+ ctxt->stats = stats;
-+ ctxt->wp = wp;
-+ ctxt->wait_on_copygc = wait_on_copygc;
-+
-+ closure_init_stack(&ctxt->cl);
-+
-+ mutex_init(&ctxt->lock);
-+ INIT_LIST_HEAD(&ctxt->reads);
-+ INIT_LIST_HEAD(&ctxt->ios);
-+ init_waitqueue_head(&ctxt->wait);
-+
-+ mutex_lock(&c->moving_context_lock);
-+ list_add(&ctxt->list, &c->moving_context_list);
-+ mutex_unlock(&c->moving_context_lock);
-+}
-+
-+void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c)
-+{
-+ trace_move_data(c, stats);
-+}
-+
-+void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
-+{
-+ memset(stats, 0, sizeof(*stats));
-+ stats->data_type = BCH_DATA_user;
-+ scnprintf(stats->name, sizeof(stats->name), "%s", name);
-+}
-+
-+static int bch2_extent_drop_ptrs(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ struct data_update_opts data_opts)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_i *n;
-+ int ret;
-+
-+ n = bch2_bkey_make_mut_noupdate(trans, k);
-+ ret = PTR_ERR_OR_ZERO(n);
-+ if (ret)
-+ return ret;
-+
-+ while (data_opts.kill_ptrs) {
-+ unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
-+ struct bch_extent_ptr *ptr;
-+
-+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
-+ data_opts.kill_ptrs ^= 1U << drop;
-+ }
-+
-+ /*
-+ * If the new extent no longer has any pointers, bch2_extent_normalize()
-+ * will do the appropriate thing with it (turning it into a
-+ * KEY_TYPE_error key, or just a discard if it was a cached extent)
-+ */
-+ bch2_extent_normalize(c, bkey_i_to_s(n));
-+
-+ /*
-+ * Since we're not inserting through an extent iterator
-+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
-+ * we aren't using the extent overwrite path to delete, we're
-+ * just using the normal key deletion path:
-+ */
-+ if (bkey_deleted(&n->k))
-+ n->k.size = 0;
-+
-+ return bch2_trans_relock(trans) ?:
-+ bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-+ bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
-+}
-+
-+int bch2_move_extent(struct moving_context *ctxt,
-+ struct move_bucket_in_flight *bucket_in_flight,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ struct bch_io_opts io_opts,
-+ struct data_update_opts data_opts)
-+{
-+ struct btree_trans *trans = ctxt->trans;
-+ struct bch_fs *c = trans->c;
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ struct moving_io *io;
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ unsigned sectors = k.k->size, pages;
-+ int ret = -ENOMEM;
-+
-+ if (ctxt->stats)
-+ ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
-+ trace_move_extent2(c, k);
-+
-+ bch2_data_update_opts_normalize(k, &data_opts);
-+
-+ if (!data_opts.rewrite_ptrs &&
-+ !data_opts.extra_replicas) {
-+ if (data_opts.kill_ptrs)
-+ return bch2_extent_drop_ptrs(trans, iter, k, data_opts);
-+ return 0;
-+ }
-+
-+ /*
-+ * Before memory allocations & taking nocow locks in
-+ * bch2_data_update_init():
-+ */
-+ bch2_trans_unlock(trans);
-+
-+ /* write path might have to decompress data: */
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-+ sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
-+
-+ pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
-+ io = kzalloc(sizeof(struct moving_io) +
-+ sizeof(struct bio_vec) * pages, GFP_KERNEL);
-+ if (!io)
-+ goto err;
-+
-+ INIT_LIST_HEAD(&io->io_list);
-+ io->write.ctxt = ctxt;
-+ io->read_sectors = k.k->size;
-+ io->write_sectors = k.k->size;
-+
-+ bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0);
-+ bio_set_prio(&io->write.op.wbio.bio,
-+ IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-+
-+ if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
-+ GFP_KERNEL))
-+ goto err_free;
-+
-+ io->rbio.c = c;
-+ io->rbio.opts = io_opts;
-+ bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0);
-+ io->rbio.bio.bi_vcnt = pages;
-+ bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
-+ io->rbio.bio.bi_iter.bi_size = sectors << 9;
-+
-+ io->rbio.bio.bi_opf = REQ_OP_READ;
-+ io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
-+ io->rbio.bio.bi_end_io = move_read_endio;
-+
-+ ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
-+ io_opts, data_opts, iter->btree_id, k);
-+ if (ret && ret != -BCH_ERR_unwritten_extent_update)
-+ goto err_free_pages;
-+
-+ if (ret == -BCH_ERR_unwritten_extent_update) {
-+ bch2_update_unwritten_extent(trans, &io->write);
-+ move_free(io);
-+ return 0;
-+ }
-+
-+ BUG_ON(ret);
-+
-+ io->write.op.end_io = move_write_done;
-+
-+ if (ctxt->rate)
-+ bch2_ratelimit_increment(ctxt->rate, k.k->size);
-+
-+ if (ctxt->stats) {
-+ atomic64_inc(&ctxt->stats->keys_moved);
-+ atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
-+ }
-+
-+ if (bucket_in_flight) {
-+ io->b = bucket_in_flight;
-+ atomic_inc(&io->b->count);
-+ }
-+
-+ this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
-+ this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
-+ trace_move_extent_read2(c, k);
-+
-+ mutex_lock(&ctxt->lock);
-+ atomic_add(io->read_sectors, &ctxt->read_sectors);
-+ atomic_inc(&ctxt->read_ios);
-+
-+ list_add_tail(&io->read_list, &ctxt->reads);
-+ list_add_tail(&io->io_list, &ctxt->ios);
-+ mutex_unlock(&ctxt->lock);
-+
-+ /*
-+ * dropped by move_read_endio() - guards against use after free of
-+ * ctxt when doing wakeup
-+ */
-+ closure_get(&ctxt->cl);
-+ bch2_read_extent(trans, &io->rbio,
-+ bkey_start_pos(k.k),
-+ iter->btree_id, k, 0,
-+ BCH_READ_NODECODE|
-+ BCH_READ_LAST_FRAGMENT);
-+ return 0;
-+err_free_pages:
-+ bio_free_pages(&io->write.op.wbio.bio);
-+err_free:
-+ kfree(io);
-+err:
-+ this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
-+ trace_move_extent_alloc_mem_fail2(c, k);
-+ return ret;
-+}
-+
-+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
-+ struct per_snapshot_io_opts *io_opts,
-+ struct bkey_s_c extent_k)
-+{
-+ struct bch_fs *c = trans->c;
-+ u32 restart_count = trans->restart_count;
-+ int ret = 0;
-+
-+ if (io_opts->cur_inum != extent_k.k->p.inode) {
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+
-+ io_opts->d.nr = 0;
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
-+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-+ if (k.k->p.offset != extent_k.k->p.inode)
-+ break;
-+
-+ if (!bkey_is_inode(k.k))
-+ continue;
-+
-+ struct bch_inode_unpacked inode;
-+ BUG_ON(bch2_inode_unpack(k, &inode));
-+
-+ struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
-+ bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
-+
-+ ret = darray_push(&io_opts->d, e);
-+ if (ret)
-+ break;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+ io_opts->cur_inum = extent_k.k->p.inode;
-+ }
-+
-+ ret = ret ?: trans_was_restarted(trans, restart_count);
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ if (extent_k.k->p.snapshot) {
-+ struct snapshot_io_opts_entry *i;
-+ darray_for_each(io_opts->d, i)
-+ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot))
-+ return &i->io_opts;
-+ }
-+
-+ return &io_opts->fs_io_opts;
-+}
-+
-+int bch2_move_get_io_opts_one(struct btree_trans *trans,
-+ struct bch_io_opts *io_opts,
-+ struct bkey_s_c extent_k)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ /* reflink btree? */
-+ if (!extent_k.k->p.inode) {
-+ *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
-+ return 0;
-+ }
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
-+ SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
-+ BTREE_ITER_CACHED);
-+ ret = bkey_err(k);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ return ret;
-+
-+ if (!ret && bkey_is_inode(k.k)) {
-+ struct bch_inode_unpacked inode;
-+ bch2_inode_unpack(k, &inode);
-+ bch2_inode_opts_get(io_opts, trans->c, &inode);
-+ } else {
-+ *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ return 0;
-+}
-+
-+int bch2_move_ratelimit(struct moving_context *ctxt)
-+{
-+ struct bch_fs *c = ctxt->trans->c;
-+ u64 delay;
-+
-+ if (ctxt->wait_on_copygc && !c->copygc_running) {
-+ bch2_trans_unlock_long(ctxt->trans);
-+ wait_event_killable(c->copygc_running_wq,
-+ !c->copygc_running ||
-+ kthread_should_stop());
-+ }
-+
-+ do {
-+ delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
-+
-+
-+ if (delay) {
-+ if (delay > HZ / 10)
-+ bch2_trans_unlock_long(ctxt->trans);
-+ else
-+ bch2_trans_unlock(ctxt->trans);
-+ set_current_state(TASK_INTERRUPTIBLE);
-+ }
-+
-+ if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
-+ __set_current_state(TASK_RUNNING);
-+ return 1;
-+ }
-+
-+ if (delay)
-+ schedule_timeout(delay);
-+
-+ if (unlikely(freezing(current))) {
-+ move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
-+ try_to_freeze();
-+ }
-+ } while (delay);
-+
-+ /*
-+ * XXX: these limits really ought to be per device, SSDs and hard drives
-+ * will want different limits
-+ */
-+ move_ctxt_wait_event(ctxt,
-+ atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 &&
-+ atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 &&
-+ atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight &&
-+ atomic_read(&ctxt->read_ios) < c->opts.move_ios_in_flight);
-+
-+ return 0;
-+}
-+
-+static int bch2_move_data_btree(struct moving_context *ctxt,
-+ struct bpos start,
-+ struct bpos end,
-+ move_pred_fn pred, void *arg,
-+ enum btree_id btree_id)
-+{
-+ struct btree_trans *trans = ctxt->trans;
-+ struct bch_fs *c = trans->c;
-+ struct per_snapshot_io_opts snapshot_io_opts;
-+ struct bch_io_opts *io_opts;
-+ struct bkey_buf sk;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct data_update_opts data_opts;
-+ int ret = 0, ret2;
-+
-+ per_snapshot_io_opts_init(&snapshot_io_opts, c);
-+ bch2_bkey_buf_init(&sk);
-+
-+ if (ctxt->stats) {
-+ ctxt->stats->data_type = BCH_DATA_user;
-+ ctxt->stats->pos = BBPOS(btree_id, start);
-+ }
-+
-+ bch2_trans_iter_init(trans, &iter, btree_id, start,
-+ BTREE_ITER_PREFETCH|
-+ BTREE_ITER_ALL_SNAPSHOTS);
-+
-+ if (ctxt->rate)
-+ bch2_ratelimit_reset(ctxt->rate);
-+
-+ while (!bch2_move_ratelimit(ctxt)) {
-+ bch2_trans_begin(trans);
-+
-+ k = bch2_btree_iter_peek(&iter);
-+ if (!k.k)
-+ break;
-+
-+ ret = bkey_err(k);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ break;
-+
-+ if (bkey_ge(bkey_start_pos(k.k), end))
-+ break;
-+
-+ if (ctxt->stats)
-+ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
-+
-+ if (!bkey_extent_is_direct_data(k.k))
-+ goto next_nondata;
-+
-+ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k);
-+ ret = PTR_ERR_OR_ZERO(io_opts);
-+ if (ret)
-+ continue;
-+
-+ memset(&data_opts, 0, sizeof(data_opts));
-+ if (!pred(c, arg, k, io_opts, &data_opts))
-+ goto next;
-+
-+ /*
-+ * The iterator gets unlocked by __bch2_read_extent - need to
-+ * save a copy of @k elsewhere:
-+ */
-+ bch2_bkey_buf_reassemble(&sk, c, k);
-+ k = bkey_i_to_s_c(sk.k);
-+
-+ ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts);
-+ if (ret2) {
-+ if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
-+ continue;
-+
-+ if (ret2 == -ENOMEM) {
-+ /* memory allocation failure, wait for some IO to finish */
-+ bch2_move_ctxt_wait_for_io(ctxt);
-+ continue;
-+ }
-+
-+ /* XXX signal failure */
-+ goto next;
-+ }
-+next:
-+ if (ctxt->stats)
-+ atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
-+next_nondata:
-+ bch2_btree_iter_advance(&iter);
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_bkey_buf_exit(&sk, c);
-+ per_snapshot_io_opts_exit(&snapshot_io_opts);
-+
-+ return ret;
-+}
-+
-+int __bch2_move_data(struct moving_context *ctxt,
-+ struct bbpos start,
-+ struct bbpos end,
-+ move_pred_fn pred, void *arg)
-+{
-+ struct bch_fs *c = ctxt->trans->c;
-+ enum btree_id id;
-+ int ret = 0;
-+
-+ for (id = start.btree;
-+ id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
-+ id++) {
-+ ctxt->stats->pos = BBPOS(id, POS_MIN);
-+
-+ if (!btree_type_has_ptrs(id) ||
-+ !bch2_btree_id_root(c, id)->b)
-+ continue;
-+
-+ ret = bch2_move_data_btree(ctxt,
-+ id == start.btree ? start.pos : POS_MIN,
-+ id == end.btree ? end.pos : POS_MAX,
-+ pred, arg, id);
-+ if (ret)
-+ break;
-+ }
-+
-+ return ret;
-+}
-+
-+int bch2_move_data(struct bch_fs *c,
-+ struct bbpos start,
-+ struct bbpos end,
-+ struct bch_ratelimit *rate,
-+ struct bch_move_stats *stats,
-+ struct write_point_specifier wp,
-+ bool wait_on_copygc,
-+ move_pred_fn pred, void *arg)
-+{
-+
-+ struct moving_context ctxt;
-+ int ret;
-+
-+ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
-+ ret = __bch2_move_data(&ctxt, start, end, pred, arg);
-+ bch2_moving_ctxt_exit(&ctxt);
-+
-+ return ret;
-+}
-+
-+int __bch2_evacuate_bucket(struct moving_context *ctxt,
-+ struct move_bucket_in_flight *bucket_in_flight,
-+ struct bpos bucket, int gen,
-+ struct data_update_opts _data_opts)
-+{
-+ struct btree_trans *trans = ctxt->trans;
-+ struct bch_fs *c = trans->c;
-+ struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-+ struct btree_iter iter;
-+ struct bkey_buf sk;
-+ struct bch_backpointer bp;
-+ struct bch_alloc_v4 a_convert;
-+ const struct bch_alloc_v4 *a;
-+ struct bkey_s_c k;
-+ struct data_update_opts data_opts;
-+ unsigned dirty_sectors, bucket_size;
-+ u64 fragmentation;
-+ struct bpos bp_pos = POS_MIN;
-+ int ret = 0;
-+
-+ trace_bucket_evacuate(c, &bucket);
-+
-+ bch2_bkey_buf_init(&sk);
-+
-+ /*
-+ * We're not run in a context that handles transaction restarts:
-+ */
-+ bch2_trans_begin(trans);
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
-+ bucket, BTREE_ITER_CACHED);
-+ ret = lockrestart_do(trans,
-+ bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret) {
-+ bch_err_msg(c, ret, "looking up alloc key");
-+ goto err;
-+ }
-+
-+ a = bch2_alloc_to_v4(k, &a_convert);
-+ dirty_sectors = a->dirty_sectors;
-+ bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
-+ fragmentation = a->fragmentation_lru;
-+
-+ ret = bch2_btree_write_buffer_flush(trans);
-+ if (ret) {
-+ bch_err_msg(c, ret, "flushing btree write buffer");
-+ goto err;
-+ }
-+
-+ while (!(ret = bch2_move_ratelimit(ctxt))) {
-+ bch2_trans_begin(trans);
-+
-+ ret = bch2_get_next_backpointer(trans, bucket, gen,
-+ &bp_pos, &bp,
-+ BTREE_ITER_CACHED);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ goto err;
-+ if (bkey_eq(bp_pos, POS_MAX))
-+ break;
-+
-+ if (!bp.level) {
-+ const struct bch_extent_ptr *ptr;
-+ unsigned i = 0;
-+
-+ k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0);
-+ ret = bkey_err(k);
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ goto err;
-+ if (!k.k)
-+ goto next;
-+
-+ bch2_bkey_buf_reassemble(&sk, c, k);
-+ k = bkey_i_to_s_c(sk.k);
-+
-+ ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
-+ if (ret) {
-+ bch2_trans_iter_exit(trans, &iter);
-+ continue;
-+ }
-+
-+ data_opts = _data_opts;
-+ data_opts.target = io_opts.background_target;
-+ data_opts.rewrite_ptrs = 0;
-+
-+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
-+ if (ptr->dev == bucket.inode) {
-+ data_opts.rewrite_ptrs |= 1U << i;
-+ if (ptr->cached) {
-+ bch2_trans_iter_exit(trans, &iter);
-+ goto next;
-+ }
-+ }
-+ i++;
-+ }
-+
-+ ret = bch2_move_extent(ctxt, bucket_in_flight,
-+ &iter, k, io_opts, data_opts);
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret == -ENOMEM) {
-+ /* memory allocation failure, wait for some IO to finish */
-+ bch2_move_ctxt_wait_for_io(ctxt);
-+ continue;
-+ }
-+ if (ret)
-+ goto err;
-+
-+ if (ctxt->stats)
-+ atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
-+ } else {
-+ struct btree *b;
-+
-+ b = bch2_backpointer_get_node(trans, &iter, bp_pos, bp);
-+ ret = PTR_ERR_OR_ZERO(b);
-+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
-+ continue;
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ goto err;
-+ if (!b)
-+ goto next;
-+
-+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ goto err;
-+
-+ if (ctxt->rate)
-+ bch2_ratelimit_increment(ctxt->rate,
-+ c->opts.btree_node_size >> 9);
-+ if (ctxt->stats) {
-+ atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen);
-+ atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved);
-+ }
-+ }
-+next:
-+ bp_pos = bpos_nosnap_successor(bp_pos);
-+ }
-+
-+ trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret);
-+err:
-+ bch2_bkey_buf_exit(&sk, c);
-+ return ret;
-+}
-+
-+int bch2_evacuate_bucket(struct bch_fs *c,
-+ struct bpos bucket, int gen,
-+ struct data_update_opts data_opts,
-+ struct bch_ratelimit *rate,
-+ struct bch_move_stats *stats,
-+ struct write_point_specifier wp,
-+ bool wait_on_copygc)
-+{
-+ struct moving_context ctxt;
-+ int ret;
-+
-+ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
-+ ret = __bch2_evacuate_bucket(&ctxt, NULL, bucket, gen, data_opts);
-+ bch2_moving_ctxt_exit(&ctxt);
-+
-+ return ret;
-+}
-+
-+typedef bool (*move_btree_pred)(struct bch_fs *, void *,
-+ struct btree *, struct bch_io_opts *,
-+ struct data_update_opts *);
-+
-+static int bch2_move_btree(struct bch_fs *c,
-+ enum btree_id start_btree_id, struct bpos start_pos,
-+ enum btree_id end_btree_id, struct bpos end_pos,
-+ move_btree_pred pred, void *arg,
-+ struct bch_move_stats *stats)
-+{
-+ bool kthread = (current->flags & PF_KTHREAD) != 0;
-+ struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-+ struct moving_context ctxt;
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct btree *b;
-+ enum btree_id id;
-+ struct data_update_opts data_opts;
-+ int ret = 0;
-+
-+ bch2_moving_ctxt_init(&ctxt, c, NULL, stats,
-+ writepoint_ptr(&c->btree_write_point),
-+ true);
-+ trans = ctxt.trans;
-+
-+ stats->data_type = BCH_DATA_btree;
-+
-+ for (id = start_btree_id;
-+ id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1);
-+ id++) {
-+ stats->pos = BBPOS(id, POS_MIN);
-+
-+ if (!bch2_btree_id_root(c, id)->b)
-+ continue;
-+
-+ bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
-+ BTREE_ITER_PREFETCH);
-+retry:
-+ ret = 0;
-+ while (bch2_trans_begin(trans),
-+ (b = bch2_btree_iter_peek_node(&iter)) &&
-+ !(ret = PTR_ERR_OR_ZERO(b))) {
-+ if (kthread && kthread_should_stop())
-+ break;
-+
-+ if ((cmp_int(id, end_btree_id) ?:
-+ bpos_cmp(b->key.k.p, end_pos)) > 0)
-+ break;
-+
-+ stats->pos = BBPOS(iter.btree_id, iter.pos);
-+
-+ if (!pred(c, arg, b, &io_opts, &data_opts))
-+ goto next;
-+
-+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret;
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ break;
-+next:
-+ bch2_btree_iter_next_node(&iter);
-+ }
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (kthread && kthread_should_stop())
-+ break;
-+ }
-+
-+ bch_err_fn(c, ret);
-+ bch2_moving_ctxt_exit(&ctxt);
-+ bch2_btree_interior_updates_flush(c);
-+
-+ return ret;
-+}
-+
-+static bool rereplicate_pred(struct bch_fs *c, void *arg,
-+ struct bkey_s_c k,
-+ struct bch_io_opts *io_opts,
-+ struct data_update_opts *data_opts)
-+{
-+ unsigned nr_good = bch2_bkey_durability(c, k);
-+ unsigned replicas = bkey_is_btree_ptr(k.k)
-+ ? c->opts.metadata_replicas
-+ : io_opts->data_replicas;
-+
-+ if (!nr_good || nr_good >= replicas)
-+ return false;
-+
-+ data_opts->target = 0;
-+ data_opts->extra_replicas = replicas - nr_good;
-+ data_opts->btree_insert_flags = 0;
-+ return true;
-+}
-+
-+static bool migrate_pred(struct bch_fs *c, void *arg,
-+ struct bkey_s_c k,
-+ struct bch_io_opts *io_opts,
-+ struct data_update_opts *data_opts)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const struct bch_extent_ptr *ptr;
-+ struct bch_ioctl_data *op = arg;
-+ unsigned i = 0;
-+
-+ data_opts->rewrite_ptrs = 0;
-+ data_opts->target = 0;
-+ data_opts->extra_replicas = 0;
-+ data_opts->btree_insert_flags = 0;
-+
-+ bkey_for_each_ptr(ptrs, ptr) {
-+ if (ptr->dev == op->migrate.dev)
-+ data_opts->rewrite_ptrs |= 1U << i;
-+ i++;
-+ }
-+
-+ return data_opts->rewrite_ptrs != 0;
-+}
-+
-+static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
-+ struct btree *b,
-+ struct bch_io_opts *io_opts,
-+ struct data_update_opts *data_opts)
-+{
-+ return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
-+}
-+
-+static bool migrate_btree_pred(struct bch_fs *c, void *arg,
-+ struct btree *b,
-+ struct bch_io_opts *io_opts,
-+ struct data_update_opts *data_opts)
-+{
-+ return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
-+}
-+
-+static bool bformat_needs_redo(struct bkey_format *f)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < f->nr_fields; i++) {
-+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
-+ u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
-+ u64 field_offset = le64_to_cpu(f->field_offset[i]);
-+
-+ if (f->bits_per_field[i] > unpacked_bits)
-+ return true;
-+
-+ if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
-+ return true;
-+
-+ if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
-+ unpacked_mask) <
-+ field_offset)
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
-+ struct btree *b,
-+ struct bch_io_opts *io_opts,
-+ struct data_update_opts *data_opts)
-+{
-+ if (b->version_ondisk != c->sb.version ||
-+ btree_node_need_rewrite(b) ||
-+ bformat_needs_redo(&b->format)) {
-+ data_opts->target = 0;
-+ data_opts->extra_replicas = 0;
-+ data_opts->btree_insert_flags = 0;
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
-+{
-+ int ret;
-+
-+ ret = bch2_move_btree(c,
-+ 0, POS_MIN,
-+ BTREE_ID_NR, SPOS_MAX,
-+ rewrite_old_nodes_pred, c, stats);
-+ if (!ret) {
-+ mutex_lock(&c->sb_lock);
-+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
-+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
-+ c->disk_sb.sb->version_min = c->disk_sb.sb->version;
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+ }
-+
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+int bch2_data_job(struct bch_fs *c,
-+ struct bch_move_stats *stats,
-+ struct bch_ioctl_data op)
-+{
-+ int ret = 0;
-+
-+ switch (op.op) {
-+ case BCH_DATA_OP_REREPLICATE:
-+ bch2_move_stats_init(stats, "rereplicate");
-+ stats->data_type = BCH_DATA_journal;
-+ ret = bch2_journal_flush_device_pins(&c->journal, -1);
-+
-+ ret = bch2_move_btree(c,
-+ op.start_btree, op.start_pos,
-+ op.end_btree, op.end_pos,
-+ rereplicate_btree_pred, c, stats) ?: ret;
-+ ret = bch2_replicas_gc2(c) ?: ret;
-+
-+ ret = bch2_move_data(c,
-+ (struct bbpos) { op.start_btree, op.start_pos },
-+ (struct bbpos) { op.end_btree, op.end_pos },
-+ NULL,
-+ stats,
-+ writepoint_hashed((unsigned long) current),
-+ true,
-+ rereplicate_pred, c) ?: ret;
-+ ret = bch2_replicas_gc2(c) ?: ret;
-+
-+ bch2_move_stats_exit(stats, c);
-+ break;
-+ case BCH_DATA_OP_MIGRATE:
-+ if (op.migrate.dev >= c->sb.nr_devices)
-+ return -EINVAL;
-+
-+ bch2_move_stats_init(stats, "migrate");
-+ stats->data_type = BCH_DATA_journal;
-+ ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
-+
-+ ret = bch2_move_btree(c,
-+ op.start_btree, op.start_pos,
-+ op.end_btree, op.end_pos,
-+ migrate_btree_pred, &op, stats) ?: ret;
-+ ret = bch2_replicas_gc2(c) ?: ret;
-+
-+ ret = bch2_move_data(c,
-+ (struct bbpos) { op.start_btree, op.start_pos },
-+ (struct bbpos) { op.end_btree, op.end_pos },
-+ NULL,
-+ stats,
-+ writepoint_hashed((unsigned long) current),
-+ true,
-+ migrate_pred, &op) ?: ret;
-+ ret = bch2_replicas_gc2(c) ?: ret;
-+
-+ bch2_move_stats_exit(stats, c);
-+ break;
-+ case BCH_DATA_OP_REWRITE_OLD_NODES:
-+ bch2_move_stats_init(stats, "rewrite_old_nodes");
-+ ret = bch2_scan_old_btree_nodes(c, stats);
-+ bch2_move_stats_exit(stats, c);
-+ break;
-+ default:
-+ ret = -EINVAL;
-+ }
-+
-+ return ret;
-+}
-+
-+void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
-+{
-+ prt_printf(out, "%s: data type=%s pos=",
-+ stats->name,
-+ bch2_data_types[stats->data_type]);
-+ bch2_bbpos_to_text(out, stats->pos);
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+
-+ prt_str(out, "keys moved: ");
-+ prt_u64(out, atomic64_read(&stats->keys_moved));
-+ prt_newline(out);
-+
-+ prt_str(out, "keys raced: ");
-+ prt_u64(out, atomic64_read(&stats->keys_raced));
-+ prt_newline(out);
-+
-+ prt_str(out, "bytes seen: ");
-+ prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9);
-+ prt_newline(out);
-+
-+ prt_str(out, "bytes moved: ");
-+ prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9);
-+ prt_newline(out);
-+
-+ prt_str(out, "bytes raced: ");
-+ prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9);
-+ prt_newline(out);
-+
-+ printbuf_indent_sub(out, 2);
-+}
-+
-+static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
-+{
-+ struct moving_io *io;
-+
-+ bch2_move_stats_to_text(out, ctxt->stats);
-+ printbuf_indent_add(out, 2);
-+
-+ prt_printf(out, "reads: ios %u/%u sectors %u/%u",
-+ atomic_read(&ctxt->read_ios),
-+ c->opts.move_ios_in_flight,
-+ atomic_read(&ctxt->read_sectors),
-+ c->opts.move_bytes_in_flight >> 9);
-+ prt_newline(out);
-+
-+ prt_printf(out, "writes: ios %u/%u sectors %u/%u",
-+ atomic_read(&ctxt->write_ios),
-+ c->opts.move_ios_in_flight,
-+ atomic_read(&ctxt->write_sectors),
-+ c->opts.move_bytes_in_flight >> 9);
-+ prt_newline(out);
-+
-+ printbuf_indent_add(out, 2);
-+
-+ mutex_lock(&ctxt->lock);
-+ list_for_each_entry(io, &ctxt->ios, io_list)
-+ bch2_write_op_to_text(out, &io->write.op);
-+ mutex_unlock(&ctxt->lock);
-+
-+ printbuf_indent_sub(out, 4);
-+}
-+
-+void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ struct moving_context *ctxt;
-+
-+ mutex_lock(&c->moving_context_lock);
-+ list_for_each_entry(ctxt, &c->moving_context_list, list)
-+ bch2_moving_ctxt_to_text(out, c, ctxt);
-+ mutex_unlock(&c->moving_context_lock);
-+}
-+
-+void bch2_fs_move_init(struct bch_fs *c)
-+{
-+ INIT_LIST_HEAD(&c->moving_context_list);
-+ mutex_init(&c->moving_context_lock);
-+}
-diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
-new file mode 100644
-index 000000000000..07cf9d42643b
---- /dev/null
-+++ b/fs/bcachefs/move.h
-@@ -0,0 +1,139 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVE_H
-+#define _BCACHEFS_MOVE_H
-+
-+#include "bbpos.h"
-+#include "bcachefs_ioctl.h"
-+#include "btree_iter.h"
-+#include "buckets.h"
-+#include "data_update.h"
-+#include "move_types.h"
-+
-+struct bch_read_bio;
-+
-+struct moving_context {
-+ struct btree_trans *trans;
-+ struct list_head list;
-+ void *fn;
-+
-+ struct bch_ratelimit *rate;
-+ struct bch_move_stats *stats;
-+ struct write_point_specifier wp;
-+ bool wait_on_copygc;
-+ bool write_error;
-+
-+ /* For waiting on outstanding reads and writes: */
-+ struct closure cl;
-+
-+ struct mutex lock;
-+ struct list_head reads;
-+ struct list_head ios;
-+
-+ /* in flight sectors: */
-+ atomic_t read_sectors;
-+ atomic_t write_sectors;
-+ atomic_t read_ios;
-+ atomic_t write_ios;
-+
-+ wait_queue_head_t wait;
-+};
-+
-+#define move_ctxt_wait_event(_ctxt, _cond) \
-+do { \
-+ bool cond_finished = false; \
-+ bch2_moving_ctxt_do_pending_writes(_ctxt); \
-+ \
-+ if (_cond) \
-+ break; \
-+ bch2_trans_unlock_long((_ctxt)->trans); \
-+ __wait_event((_ctxt)->wait, \
-+ bch2_moving_ctxt_next_pending_write(_ctxt) || \
-+ (cond_finished = (_cond))); \
-+ if (cond_finished) \
-+ break; \
-+} while (1)
-+
-+typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c,
-+ struct bch_io_opts *, struct data_update_opts *);
-+
-+void bch2_moving_ctxt_exit(struct moving_context *);
-+void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *,
-+ struct bch_ratelimit *, struct bch_move_stats *,
-+ struct write_point_specifier, bool);
-+struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *);
-+void bch2_moving_ctxt_do_pending_writes(struct moving_context *);
-+void bch2_move_ctxt_wait_for_io(struct moving_context *);
-+int bch2_move_ratelimit(struct moving_context *);
-+
-+/* Inodes in different snapshots may have different IO options: */
-+struct snapshot_io_opts_entry {
-+ u32 snapshot;
-+ struct bch_io_opts io_opts;
-+};
-+
-+struct per_snapshot_io_opts {
-+ u64 cur_inum;
-+ struct bch_io_opts fs_io_opts;
-+ DARRAY(struct snapshot_io_opts_entry) d;
-+};
-+
-+static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
-+{
-+ memset(io_opts, 0, sizeof(*io_opts));
-+ io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts);
-+}
-+
-+static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
-+{
-+ darray_exit(&io_opts->d);
-+}
-+
-+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *,
-+ struct per_snapshot_io_opts *, struct bkey_s_c);
-+int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c);
-+
-+int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
-+
-+int bch2_move_extent(struct moving_context *,
-+ struct move_bucket_in_flight *,
-+ struct btree_iter *,
-+ struct bkey_s_c,
-+ struct bch_io_opts,
-+ struct data_update_opts);
-+
-+int __bch2_move_data(struct moving_context *,
-+ struct bbpos,
-+ struct bbpos,
-+ move_pred_fn, void *);
-+int bch2_move_data(struct bch_fs *,
-+ struct bbpos start,
-+ struct bbpos end,
-+ struct bch_ratelimit *,
-+ struct bch_move_stats *,
-+ struct write_point_specifier,
-+ bool,
-+ move_pred_fn, void *);
-+
-+int __bch2_evacuate_bucket(struct moving_context *,
-+ struct move_bucket_in_flight *,
-+ struct bpos, int,
-+ struct data_update_opts);
-+int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int,
-+ struct data_update_opts,
-+ struct bch_ratelimit *,
-+ struct bch_move_stats *,
-+ struct write_point_specifier,
-+ bool);
-+int bch2_data_job(struct bch_fs *,
-+ struct bch_move_stats *,
-+ struct bch_ioctl_data);
-+
-+void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *);
-+void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *);
-+void bch2_move_stats_init(struct bch_move_stats *, char *);
-+
-+void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_fs_move_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_MOVE_H */
-diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h
-new file mode 100644
-index 000000000000..e22841ef31e4
---- /dev/null
-+++ b/fs/bcachefs/move_types.h
-@@ -0,0 +1,36 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVE_TYPES_H
-+#define _BCACHEFS_MOVE_TYPES_H
-+
-+#include "bbpos_types.h"
-+
-+struct bch_move_stats {
-+ enum bch_data_type data_type;
-+ struct bbpos pos;
-+ char name[32];
-+
-+ atomic64_t keys_moved;
-+ atomic64_t keys_raced;
-+ atomic64_t sectors_seen;
-+ atomic64_t sectors_moved;
-+ atomic64_t sectors_raced;
-+};
-+
-+struct move_bucket_key {
-+ struct bpos bucket;
-+ u8 gen;
-+};
-+
-+struct move_bucket {
-+ struct move_bucket_key k;
-+ unsigned sectors;
-+};
-+
-+struct move_bucket_in_flight {
-+ struct move_bucket_in_flight *next;
-+ struct rhash_head hash;
-+ struct move_bucket bucket;
-+ atomic_t count;
-+};
-+
-+#endif /* _BCACHEFS_MOVE_TYPES_H */
-diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
-new file mode 100644
-index 000000000000..0a0576326c5b
---- /dev/null
-+++ b/fs/bcachefs/movinggc.c
-@@ -0,0 +1,431 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * Moving/copying garbage collector
-+ *
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "btree_write_buffer.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "lru.h"
-+#include "move.h"
-+#include "movinggc.h"
-+#include "trace.h"
-+
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/math64.h>
-+#include <linux/sched/task.h>
-+#include <linux/wait.h>
-+
-+struct buckets_in_flight {
-+ struct rhashtable table;
-+ struct move_bucket_in_flight *first;
-+ struct move_bucket_in_flight *last;
-+ size_t nr;
-+ size_t sectors;
-+};
-+
-+static const struct rhashtable_params bch_move_bucket_params = {
-+ .head_offset = offsetof(struct move_bucket_in_flight, hash),
-+ .key_offset = offsetof(struct move_bucket_in_flight, bucket.k),
-+ .key_len = sizeof(struct move_bucket_key),
-+};
-+
-+static struct move_bucket_in_flight *
-+move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket b)
-+{
-+ struct move_bucket_in_flight *new = kzalloc(sizeof(*new), GFP_KERNEL);
-+ int ret;
-+
-+ if (!new)
-+ return ERR_PTR(-ENOMEM);
-+
-+ new->bucket = b;
-+
-+ ret = rhashtable_lookup_insert_fast(&list->table, &new->hash,
-+ bch_move_bucket_params);
-+ if (ret) {
-+ kfree(new);
-+ return ERR_PTR(ret);
-+ }
-+
-+ if (!list->first)
-+ list->first = new;
-+ else
-+ list->last->next = new;
-+
-+ list->last = new;
-+ list->nr++;
-+ list->sectors += b.sectors;
-+ return new;
-+}
-+
-+static int bch2_bucket_is_movable(struct btree_trans *trans,
-+ struct move_bucket *b, u64 time)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_alloc_v4 _a;
-+ const struct bch_alloc_v4 *a;
-+ int ret;
-+
-+ if (bch2_bucket_is_open(trans->c,
-+ b->k.bucket.inode,
-+ b->k.bucket.offset))
-+ return 0;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
-+ b->k.bucket, BTREE_ITER_CACHED);
-+ ret = bkey_err(k);
-+ if (ret)
-+ return ret;
-+
-+ a = bch2_alloc_to_v4(k, &_a);
-+ b->k.gen = a->gen;
-+ b->sectors = a->dirty_sectors;
-+
-+ ret = data_type_movable(a->data_type) &&
-+ a->fragmentation_lru &&
-+ a->fragmentation_lru <= time;
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static void move_buckets_wait(struct moving_context *ctxt,
-+ struct buckets_in_flight *list,
-+ bool flush)
-+{
-+ struct move_bucket_in_flight *i;
-+ int ret;
-+
-+ while ((i = list->first)) {
-+ if (flush)
-+ move_ctxt_wait_event(ctxt, !atomic_read(&i->count));
-+
-+ if (atomic_read(&i->count))
-+ break;
-+
-+ list->first = i->next;
-+ if (!list->first)
-+ list->last = NULL;
-+
-+ list->nr--;
-+ list->sectors -= i->bucket.sectors;
-+
-+ ret = rhashtable_remove_fast(&list->table, &i->hash,
-+ bch_move_bucket_params);
-+ BUG_ON(ret);
-+ kfree(i);
-+ }
-+
-+ bch2_trans_unlock_long(ctxt->trans);
-+}
-+
-+static bool bucket_in_flight(struct buckets_in_flight *list,
-+ struct move_bucket_key k)
-+{
-+ return rhashtable_lookup_fast(&list->table, &k, bch_move_bucket_params);
-+}
-+
-+typedef DARRAY(struct move_bucket) move_buckets;
-+
-+static int bch2_copygc_get_buckets(struct moving_context *ctxt,
-+ struct buckets_in_flight *buckets_in_flight,
-+ move_buckets *buckets)
-+{
-+ struct btree_trans *trans = ctxt->trans;
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
-+ size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
-+ int ret;
-+
-+ move_buckets_wait(ctxt, buckets_in_flight, false);
-+
-+ ret = bch2_btree_write_buffer_flush(trans);
-+ if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()",
-+ __func__, bch2_err_str(ret)))
-+ return ret;
-+
-+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru,
-+ lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
-+ lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
-+ 0, k, ({
-+ struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
-+ int ret2 = 0;
-+
-+ saw++;
-+
-+ if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p)))
-+ not_movable++;
-+ else if (bucket_in_flight(buckets_in_flight, b.k))
-+ in_flight++;
-+ else {
-+ ret2 = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
-+ if (ret2 >= 0)
-+ sectors += b.sectors;
-+ }
-+ ret2;
-+ }));
-+
-+ pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i",
-+ buckets_in_flight->nr, buckets_in_flight->sectors,
-+ saw, in_flight, not_movable, buckets->nr, sectors, nr_to_get, ret);
-+
-+ return ret < 0 ? ret : 0;
-+}
-+
-+noinline
-+static int bch2_copygc(struct moving_context *ctxt,
-+ struct buckets_in_flight *buckets_in_flight,
-+ bool *did_work)
-+{
-+ struct btree_trans *trans = ctxt->trans;
-+ struct bch_fs *c = trans->c;
-+ struct data_update_opts data_opts = {
-+ .btree_insert_flags = BCH_WATERMARK_copygc,
-+ };
-+ move_buckets buckets = { 0 };
-+ struct move_bucket_in_flight *f;
-+ struct move_bucket *i;
-+ u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
-+ int ret = 0;
-+
-+ ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets);
-+ if (ret)
-+ goto err;
-+
-+ darray_for_each(buckets, i) {
-+ if (unlikely(freezing(current)))
-+ break;
-+
-+ f = move_bucket_in_flight_add(buckets_in_flight, *i);
-+ ret = PTR_ERR_OR_ZERO(f);
-+ if (ret == -EEXIST) { /* rare race: copygc_get_buckets returned same bucket more than once */
-+ ret = 0;
-+ continue;
-+ }
-+ if (ret == -ENOMEM) { /* flush IO, continue later */
-+ ret = 0;
-+ break;
-+ }
-+
-+ ret = __bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket,
-+ f->bucket.k.gen, data_opts);
-+ if (ret)
-+ goto err;
-+
-+ *did_work = true;
-+ }
-+err:
-+ darray_exit(&buckets);
-+
-+ /* no entries in LRU btree found, or got to end: */
-+ if (bch2_err_matches(ret, ENOENT))
-+ ret = 0;
-+
-+ if (ret < 0 && !bch2_err_matches(ret, EROFS))
-+ bch_err_msg(c, ret, "from bch2_move_data()");
-+
-+ moved = atomic64_read(&ctxt->stats->sectors_moved) - moved;
-+ trace_and_count(c, copygc, c, moved, 0, 0, 0);
-+ return ret;
-+}
-+
-+/*
-+ * Copygc runs when the amount of fragmented data is above some arbitrary
-+ * threshold:
-+ *
-+ * The threshold at the limit - when the device is full - is the amount of space
-+ * we reserved in bch2_recalc_capacity; we can't have more than that amount of
-+ * disk space stranded due to fragmentation and store everything we have
-+ * promised to store.
-+ *
-+ * But we don't want to be running copygc unnecessarily when the device still
-+ * has plenty of free space - rather, we want copygc to smoothly run every so
-+ * often and continually reduce the amount of fragmented space as the device
-+ * fills up. So, we increase the threshold by half the current free space.
-+ */
-+unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned dev_idx;
-+ s64 wait = S64_MAX, fragmented_allowed, fragmented;
-+ unsigned i;
-+
-+ for_each_rw_member(ca, c, dev_idx) {
-+ struct bch_dev_usage usage = bch2_dev_usage_read(ca);
-+
-+ fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) *
-+ ca->mi.bucket_size) >> 1);
-+ fragmented = 0;
-+
-+ for (i = 0; i < BCH_DATA_NR; i++)
-+ if (data_type_movable(i))
-+ fragmented += usage.d[i].fragmented;
-+
-+ wait = min(wait, max(0LL, fragmented_allowed - fragmented));
-+ }
-+
-+ return wait;
-+}
-+
-+void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ prt_printf(out, "Currently waiting for: ");
-+ prt_human_readable_u64(out, max(0LL, c->copygc_wait -
-+ atomic64_read(&c->io_clock[WRITE].now)) << 9);
-+ prt_newline(out);
-+
-+ prt_printf(out, "Currently waiting since: ");
-+ prt_human_readable_u64(out, max(0LL,
-+ atomic64_read(&c->io_clock[WRITE].now) -
-+ c->copygc_wait_at) << 9);
-+ prt_newline(out);
-+
-+ prt_printf(out, "Currently calculated wait: ");
-+ prt_human_readable_u64(out, bch2_copygc_wait_amount(c));
-+ prt_newline(out);
-+}
-+
-+static int bch2_copygc_thread(void *arg)
-+{
-+ struct bch_fs *c = arg;
-+ struct moving_context ctxt;
-+ struct bch_move_stats move_stats;
-+ struct io_clock *clock = &c->io_clock[WRITE];
-+ struct buckets_in_flight *buckets;
-+ u64 last, wait;
-+ int ret = 0;
-+
-+ buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL);
-+ if (!buckets)
-+ return -ENOMEM;
-+ ret = rhashtable_init(&buckets->table, &bch_move_bucket_params);
-+ if (ret) {
-+ kfree(buckets);
-+ bch_err_msg(c, ret, "allocating copygc buckets in flight");
-+ return ret;
-+ }
-+
-+ set_freezable();
-+
-+ bch2_move_stats_init(&move_stats, "copygc");
-+ bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
-+ writepoint_ptr(&c->copygc_write_point),
-+ false);
-+
-+ while (!ret && !kthread_should_stop()) {
-+ bool did_work = false;
-+
-+ bch2_trans_unlock_long(ctxt.trans);
-+ cond_resched();
-+
-+ if (!c->copy_gc_enabled) {
-+ move_buckets_wait(&ctxt, buckets, true);
-+ kthread_wait_freezable(c->copy_gc_enabled);
-+ }
-+
-+ if (unlikely(freezing(current))) {
-+ move_buckets_wait(&ctxt, buckets, true);
-+ __refrigerator(false);
-+ continue;
-+ }
-+
-+ last = atomic64_read(&clock->now);
-+ wait = bch2_copygc_wait_amount(c);
-+
-+ if (wait > clock->max_slop) {
-+ c->copygc_wait_at = last;
-+ c->copygc_wait = last + wait;
-+ move_buckets_wait(&ctxt, buckets, true);
-+ trace_and_count(c, copygc_wait, c, wait, last + wait);
-+ bch2_kthread_io_clock_wait(clock, last + wait,
-+ MAX_SCHEDULE_TIMEOUT);
-+ continue;
-+ }
-+
-+ c->copygc_wait = 0;
-+
-+ c->copygc_running = true;
-+ ret = bch2_copygc(&ctxt, buckets, &did_work);
-+ c->copygc_running = false;
-+
-+ wake_up(&c->copygc_running_wq);
-+
-+ if (!wait && !did_work) {
-+ u64 min_member_capacity = bch2_min_rw_member_capacity(c);
-+
-+ if (min_member_capacity == U64_MAX)
-+ min_member_capacity = 128 * 2048;
-+
-+ bch2_trans_unlock_long(ctxt.trans);
-+ bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6),
-+ MAX_SCHEDULE_TIMEOUT);
-+ }
-+ }
-+
-+ move_buckets_wait(&ctxt, buckets, true);
-+
-+ rhashtable_destroy(&buckets->table);
-+ kfree(buckets);
-+ bch2_moving_ctxt_exit(&ctxt);
-+ bch2_move_stats_exit(&move_stats, c);
-+
-+ return 0;
-+}
-+
-+void bch2_copygc_stop(struct bch_fs *c)
-+{
-+ if (c->copygc_thread) {
-+ kthread_stop(c->copygc_thread);
-+ put_task_struct(c->copygc_thread);
-+ }
-+ c->copygc_thread = NULL;
-+}
-+
-+int bch2_copygc_start(struct bch_fs *c)
-+{
-+ struct task_struct *t;
-+ int ret;
-+
-+ if (c->copygc_thread)
-+ return 0;
-+
-+ if (c->opts.nochanges)
-+ return 0;
-+
-+ if (bch2_fs_init_fault("copygc_start"))
-+ return -ENOMEM;
-+
-+ t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
-+ ret = PTR_ERR_OR_ZERO(t);
-+ if (ret) {
-+ bch_err_msg(c, ret, "creating copygc thread");
-+ return ret;
-+ }
-+
-+ get_task_struct(t);
-+
-+ c->copygc_thread = t;
-+ wake_up_process(c->copygc_thread);
-+
-+ return 0;
-+}
-+
-+void bch2_fs_copygc_init(struct bch_fs *c)
-+{
-+ init_waitqueue_head(&c->copygc_running_wq);
-+ c->copygc_running = false;
-+}
-diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h
-new file mode 100644
-index 000000000000..ea181fef5bc9
---- /dev/null
-+++ b/fs/bcachefs/movinggc.h
-@@ -0,0 +1,12 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_MOVINGGC_H
-+#define _BCACHEFS_MOVINGGC_H
-+
-+unsigned long bch2_copygc_wait_amount(struct bch_fs *);
-+void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_copygc_stop(struct bch_fs *);
-+int bch2_copygc_start(struct bch_fs *);
-+void bch2_fs_copygc_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_MOVINGGC_H */
-diff --git a/fs/bcachefs/nocow_locking.c b/fs/bcachefs/nocow_locking.c
-new file mode 100644
-index 000000000000..3c21981a4a1c
---- /dev/null
-+++ b/fs/bcachefs/nocow_locking.c
-@@ -0,0 +1,144 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_methods.h"
-+#include "nocow_locking.h"
-+#include "util.h"
-+
-+#include <linux/closure.h>
-+
-+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket)
-+{
-+ u64 dev_bucket = bucket_to_u64(bucket);
-+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
-+ if (l->b[i] == dev_bucket && atomic_read(&l->l[i]))
-+ return true;
-+ return false;
-+}
-+
-+#define sign(v) (v < 0 ? -1 : v > 0 ? 1 : 0)
-+
-+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags)
-+{
-+ u64 dev_bucket = bucket_to_u64(bucket);
-+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
-+ int lock_val = flags ? 1 : -1;
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
-+ if (l->b[i] == dev_bucket) {
-+ int v = atomic_sub_return(lock_val, &l->l[i]);
-+
-+ BUG_ON(v && sign(v) != lock_val);
-+ if (!v)
-+ closure_wake_up(&l->wait);
-+ return;
-+ }
-+
-+ BUG();
-+}
-+
-+bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
-+ u64 dev_bucket, int flags)
-+{
-+ int v, lock_val = flags ? 1 : -1;
-+ unsigned i;
-+
-+ spin_lock(&l->lock);
-+
-+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
-+ if (l->b[i] == dev_bucket)
-+ goto got_entry;
-+
-+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
-+ if (!atomic_read(&l->l[i])) {
-+ l->b[i] = dev_bucket;
-+ goto take_lock;
-+ }
-+fail:
-+ spin_unlock(&l->lock);
-+ return false;
-+got_entry:
-+ v = atomic_read(&l->l[i]);
-+ if (lock_val > 0 ? v < 0 : v > 0)
-+ goto fail;
-+take_lock:
-+ v = atomic_read(&l->l[i]);
-+ /* Overflow? */
-+ if (v && sign(v + lock_val) != sign(v))
-+ goto fail;
-+
-+ atomic_add(lock_val, &l->l[i]);
-+ spin_unlock(&l->lock);
-+ return true;
-+}
-+
-+void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
-+ struct nocow_lock_bucket *l,
-+ u64 dev_bucket, int flags)
-+{
-+ if (!__bch2_bucket_nocow_trylock(l, dev_bucket, flags)) {
-+ struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
-+ u64 start_time = local_clock();
-+
-+ __closure_wait_event(&l->wait, __bch2_bucket_nocow_trylock(l, dev_bucket, flags));
-+ bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
-+ }
-+}
-+
-+void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t)
-+
-+{
-+ unsigned i, nr_zero = 0;
-+ struct nocow_lock_bucket *l;
-+
-+ for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) {
-+ unsigned v = 0;
-+
-+ for (i = 0; i < ARRAY_SIZE(l->l); i++)
-+ v |= atomic_read(&l->l[i]);
-+
-+ if (!v) {
-+ nr_zero++;
-+ continue;
-+ }
-+
-+ if (nr_zero)
-+ prt_printf(out, "(%u empty entries)\n", nr_zero);
-+ nr_zero = 0;
-+
-+ for (i = 0; i < ARRAY_SIZE(l->l); i++) {
-+ int v = atomic_read(&l->l[i]);
-+ if (v) {
-+ bch2_bpos_to_text(out, u64_to_bucket(l->b[i]));
-+ prt_printf(out, ": %s %u ", v < 0 ? "copy" : "update", abs(v));
-+ }
-+ }
-+ prt_newline(out);
-+ }
-+
-+ if (nr_zero)
-+ prt_printf(out, "(%u empty entries)\n", nr_zero);
-+}
-+
-+void bch2_fs_nocow_locking_exit(struct bch_fs *c)
-+{
-+ struct bucket_nocow_lock_table *t = &c->nocow_locks;
-+
-+ for (struct nocow_lock_bucket *l = t->l; l < t->l + ARRAY_SIZE(t->l); l++)
-+ for (unsigned j = 0; j < ARRAY_SIZE(l->l); j++)
-+ BUG_ON(atomic_read(&l->l[j]));
-+}
-+
-+int bch2_fs_nocow_locking_init(struct bch_fs *c)
-+{
-+ struct bucket_nocow_lock_table *t = &c->nocow_locks;
-+
-+ for (struct nocow_lock_bucket *l = t->l; l < t->l + ARRAY_SIZE(t->l); l++)
-+ spin_lock_init(&l->lock);
-+
-+ return 0;
-+}
-diff --git a/fs/bcachefs/nocow_locking.h b/fs/bcachefs/nocow_locking.h
-new file mode 100644
-index 000000000000..f9d6a426a960
---- /dev/null
-+++ b/fs/bcachefs/nocow_locking.h
-@@ -0,0 +1,50 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_NOCOW_LOCKING_H
-+#define _BCACHEFS_NOCOW_LOCKING_H
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "nocow_locking_types.h"
-+
-+#include <linux/hash.h>
-+
-+static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
-+ u64 dev_bucket)
-+{
-+ unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS);
-+
-+ return t->l + (h & (BUCKET_NOCOW_LOCKS - 1));
-+}
-+
-+#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0)
-+
-+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos);
-+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int);
-+bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *, u64, int);
-+void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *,
-+ struct nocow_lock_bucket *, u64, int);
-+
-+static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
-+ struct bpos bucket, int flags)
-+{
-+ u64 dev_bucket = bucket_to_u64(bucket);
-+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
-+
-+ __bch2_bucket_nocow_lock(t, l, dev_bucket, flags);
-+}
-+
-+static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t,
-+ struct bpos bucket, int flags)
-+{
-+ u64 dev_bucket = bucket_to_u64(bucket);
-+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
-+
-+ return __bch2_bucket_nocow_trylock(l, dev_bucket, flags);
-+}
-+
-+void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *);
-+
-+void bch2_fs_nocow_locking_exit(struct bch_fs *);
-+int bch2_fs_nocow_locking_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_NOCOW_LOCKING_H */
-diff --git a/fs/bcachefs/nocow_locking_types.h b/fs/bcachefs/nocow_locking_types.h
-new file mode 100644
-index 000000000000..bd12bf677924
---- /dev/null
-+++ b/fs/bcachefs/nocow_locking_types.h
-@@ -0,0 +1,20 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H
-+#define _BCACHEFS_NOCOW_LOCKING_TYPES_H
-+
-+#define BUCKET_NOCOW_LOCKS_BITS 10
-+#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS)
-+
-+struct nocow_lock_bucket {
-+ struct closure_waitlist wait;
-+ spinlock_t lock;
-+ u64 b[4];
-+ atomic_t l[4];
-+} __aligned(SMP_CACHE_BYTES);
-+
-+struct bucket_nocow_lock_table {
-+ struct nocow_lock_bucket l[BUCKET_NOCOW_LOCKS];
-+};
-+
-+#endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */
-+
-diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
-new file mode 100644
-index 000000000000..8dd4046cca41
---- /dev/null
-+++ b/fs/bcachefs/opts.c
-@@ -0,0 +1,602 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/kernel.h>
-+
-+#include "bcachefs.h"
-+#include "compress.h"
-+#include "disk_groups.h"
-+#include "error.h"
-+#include "opts.h"
-+#include "super-io.h"
-+#include "util.h"
-+
-+#define x(t, n, ...) [n] = #t,
-+
-+const char * const bch2_error_actions[] = {
-+ BCH_ERROR_ACTIONS()
-+ NULL
-+};
-+
-+const char * const bch2_fsck_fix_opts[] = {
-+ BCH_FIX_ERRORS_OPTS()
-+ NULL
-+};
-+
-+const char * const bch2_version_upgrade_opts[] = {
-+ BCH_VERSION_UPGRADE_OPTS()
-+ NULL
-+};
-+
-+const char * const bch2_sb_features[] = {
-+ BCH_SB_FEATURES()
-+ NULL
-+};
-+
-+const char * const bch2_sb_compat[] = {
-+ BCH_SB_COMPAT()
-+ NULL
-+};
-+
-+const char * const __bch2_btree_ids[] = {
-+ BCH_BTREE_IDS()
-+ NULL
-+};
-+
-+const char * const bch2_csum_types[] = {
-+ BCH_CSUM_TYPES()
-+ NULL
-+};
-+
-+const char * const bch2_csum_opts[] = {
-+ BCH_CSUM_OPTS()
-+ NULL
-+};
-+
-+const char * const bch2_compression_types[] = {
-+ BCH_COMPRESSION_TYPES()
-+ NULL
-+};
-+
-+const char * const bch2_compression_opts[] = {
-+ BCH_COMPRESSION_OPTS()
-+ NULL
-+};
-+
-+const char * const bch2_str_hash_types[] = {
-+ BCH_STR_HASH_TYPES()
-+ NULL
-+};
-+
-+const char * const bch2_str_hash_opts[] = {
-+ BCH_STR_HASH_OPTS()
-+ NULL
-+};
-+
-+const char * const bch2_data_types[] = {
-+ BCH_DATA_TYPES()
-+ NULL
-+};
-+
-+const char * const bch2_member_states[] = {
-+ BCH_MEMBER_STATES()
-+ NULL
-+};
-+
-+const char * const bch2_jset_entry_types[] = {
-+ BCH_JSET_ENTRY_TYPES()
-+ NULL
-+};
-+
-+const char * const bch2_fs_usage_types[] = {
-+ BCH_FS_USAGE_TYPES()
-+ NULL
-+};
-+
-+#undef x
-+
-+static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
-+ struct printbuf *err)
-+{
-+ if (!val) {
-+ *res = FSCK_FIX_yes;
-+ } else {
-+ int ret = match_string(bch2_fsck_fix_opts, -1, val);
-+
-+ if (ret < 0 && err)
-+ prt_str(err, "fix_errors: invalid selection");
-+ if (ret < 0)
-+ return ret;
-+ *res = ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static void bch2_opt_fix_errors_to_text(struct printbuf *out,
-+ struct bch_fs *c,
-+ struct bch_sb *sb,
-+ u64 v)
-+{
-+ prt_str(out, bch2_fsck_fix_opts[v]);
-+}
-+
-+#define bch2_opt_fix_errors (struct bch_opt_fn) { \
-+ .parse = bch2_opt_fix_errors_parse, \
-+ .to_text = bch2_opt_fix_errors_to_text, \
-+}
-+
-+const char * const bch2_d_types[BCH_DT_MAX] = {
-+ [DT_UNKNOWN] = "unknown",
-+ [DT_FIFO] = "fifo",
-+ [DT_CHR] = "chr",
-+ [DT_DIR] = "dir",
-+ [DT_BLK] = "blk",
-+ [DT_REG] = "reg",
-+ [DT_LNK] = "lnk",
-+ [DT_SOCK] = "sock",
-+ [DT_WHT] = "whiteout",
-+ [DT_SUBVOL] = "subvol",
-+};
-+
-+u64 BCH2_NO_SB_OPT(const struct bch_sb *sb)
-+{
-+ BUG();
-+}
-+
-+void SET_BCH2_NO_SB_OPT(struct bch_sb *sb, u64 v)
-+{
-+ BUG();
-+}
-+
-+void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
-+{
-+#define x(_name, ...) \
-+ if (opt_defined(src, _name)) \
-+ opt_set(*dst, _name, src._name);
-+
-+ BCH_OPTS()
-+#undef x
-+}
-+
-+bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id)
-+{
-+ switch (id) {
-+#define x(_name, ...) \
-+ case Opt_##_name: \
-+ return opt_defined(*opts, _name);
-+ BCH_OPTS()
-+#undef x
-+ default:
-+ BUG();
-+ }
-+}
-+
-+u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id)
-+{
-+ switch (id) {
-+#define x(_name, ...) \
-+ case Opt_##_name: \
-+ return opts->_name;
-+ BCH_OPTS()
-+#undef x
-+ default:
-+ BUG();
-+ }
-+}
-+
-+void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v)
-+{
-+ switch (id) {
-+#define x(_name, ...) \
-+ case Opt_##_name: \
-+ opt_set(*opts, _name, v); \
-+ break;
-+ BCH_OPTS()
-+#undef x
-+ default:
-+ BUG();
-+ }
-+}
-+
-+const struct bch_option bch2_opt_table[] = {
-+#define OPT_BOOL() .type = BCH_OPT_BOOL, .min = 0, .max = 2
-+#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \
-+ .min = _min, .max = _max
-+#define OPT_STR(_choices) .type = BCH_OPT_STR, \
-+ .min = 0, .max = ARRAY_SIZE(_choices), \
-+ .choices = _choices
-+#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
-+
-+#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
-+ [Opt_##_name] = { \
-+ .attr = { \
-+ .name = #_name, \
-+ .mode = (_flags) & OPT_RUNTIME ? 0644 : 0444, \
-+ }, \
-+ .flags = _flags, \
-+ .hint = _hint, \
-+ .help = _help, \
-+ .get_sb = _sb_opt, \
-+ .set_sb = SET_##_sb_opt, \
-+ _type \
-+ },
-+
-+ BCH_OPTS()
-+#undef x
-+};
-+
-+int bch2_opt_lookup(const char *name)
-+{
-+ const struct bch_option *i;
-+
-+ for (i = bch2_opt_table;
-+ i < bch2_opt_table + ARRAY_SIZE(bch2_opt_table);
-+ i++)
-+ if (!strcmp(name, i->attr.name))
-+ return i - bch2_opt_table;
-+
-+ return -1;
-+}
-+
-+struct synonym {
-+ const char *s1, *s2;
-+};
-+
-+static const struct synonym bch_opt_synonyms[] = {
-+ { "quota", "usrquota" },
-+};
-+
-+static int bch2_mount_opt_lookup(const char *name)
-+{
-+ const struct synonym *i;
-+
-+ for (i = bch_opt_synonyms;
-+ i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
-+ i++)
-+ if (!strcmp(name, i->s1))
-+ name = i->s2;
-+
-+ return bch2_opt_lookup(name);
-+}
-+
-+int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err)
-+{
-+ if (v < opt->min) {
-+ if (err)
-+ prt_printf(err, "%s: too small (min %llu)",
-+ opt->attr.name, opt->min);
-+ return -BCH_ERR_ERANGE_option_too_small;
-+ }
-+
-+ if (opt->max && v >= opt->max) {
-+ if (err)
-+ prt_printf(err, "%s: too big (max %llu)",
-+ opt->attr.name, opt->max);
-+ return -BCH_ERR_ERANGE_option_too_big;
-+ }
-+
-+ if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) {
-+ if (err)
-+ prt_printf(err, "%s: not a multiple of 512",
-+ opt->attr.name);
-+ return -EINVAL;
-+ }
-+
-+ if ((opt->flags & OPT_MUST_BE_POW_2) && !is_power_of_2(v)) {
-+ if (err)
-+ prt_printf(err, "%s: must be a power of two",
-+ opt->attr.name);
-+ return -EINVAL;
-+ }
-+
-+ if (opt->fn.validate)
-+ return opt->fn.validate(v, err);
-+
-+ return 0;
-+}
-+
-+int bch2_opt_parse(struct bch_fs *c,
-+ const struct bch_option *opt,
-+ const char *val, u64 *res,
-+ struct printbuf *err)
-+{
-+ ssize_t ret;
-+
-+ switch (opt->type) {
-+ case BCH_OPT_BOOL:
-+ if (val) {
-+ ret = kstrtou64(val, 10, res);
-+ } else {
-+ ret = 0;
-+ *res = 1;
-+ }
-+
-+ if (ret < 0 || (*res != 0 && *res != 1)) {
-+ if (err)
-+ prt_printf(err, "%s: must be bool", opt->attr.name);
-+ return ret;
-+ }
-+ break;
-+ case BCH_OPT_UINT:
-+ if (!val) {
-+ prt_printf(err, "%s: required value",
-+ opt->attr.name);
-+ return -EINVAL;
-+ }
-+
-+ ret = opt->flags & OPT_HUMAN_READABLE
-+ ? bch2_strtou64_h(val, res)
-+ : kstrtou64(val, 10, res);
-+ if (ret < 0) {
-+ if (err)
-+ prt_printf(err, "%s: must be a number",
-+ opt->attr.name);
-+ return ret;
-+ }
-+ break;
-+ case BCH_OPT_STR:
-+ if (!val) {
-+ prt_printf(err, "%s: required value",
-+ opt->attr.name);
-+ return -EINVAL;
-+ }
-+
-+ ret = match_string(opt->choices, -1, val);
-+ if (ret < 0) {
-+ if (err)
-+ prt_printf(err, "%s: invalid selection",
-+ opt->attr.name);
-+ return ret;
-+ }
-+
-+ *res = ret;
-+ break;
-+ case BCH_OPT_FN:
-+ ret = opt->fn.parse(c, val, res, err);
-+ if (ret < 0) {
-+ if (err)
-+ prt_printf(err, "%s: parse error",
-+ opt->attr.name);
-+ return ret;
-+ }
-+ }
-+
-+ return bch2_opt_validate(opt, *res, err);
-+}
-+
-+void bch2_opt_to_text(struct printbuf *out,
-+ struct bch_fs *c, struct bch_sb *sb,
-+ const struct bch_option *opt, u64 v,
-+ unsigned flags)
-+{
-+ if (flags & OPT_SHOW_MOUNT_STYLE) {
-+ if (opt->type == BCH_OPT_BOOL) {
-+ prt_printf(out, "%s%s",
-+ v ? "" : "no",
-+ opt->attr.name);
-+ return;
-+ }
-+
-+ prt_printf(out, "%s=", opt->attr.name);
-+ }
-+
-+ switch (opt->type) {
-+ case BCH_OPT_BOOL:
-+ case BCH_OPT_UINT:
-+ if (opt->flags & OPT_HUMAN_READABLE)
-+ prt_human_readable_u64(out, v);
-+ else
-+ prt_printf(out, "%lli", v);
-+ break;
-+ case BCH_OPT_STR:
-+ if (flags & OPT_SHOW_FULL_LIST)
-+ prt_string_option(out, opt->choices, v);
-+ else
-+ prt_str(out, opt->choices[v]);
-+ break;
-+ case BCH_OPT_FN:
-+ opt->fn.to_text(out, c, sb, v);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
-+{
-+ int ret = 0;
-+
-+ switch (id) {
-+ case Opt_compression:
-+ case Opt_background_compression:
-+ ret = bch2_check_set_has_compressed_data(c, v);
-+ break;
-+ case Opt_erasure_code:
-+ if (v)
-+ bch2_check_set_feature(c, BCH_FEATURE_ec);
-+ break;
-+ }
-+
-+ return ret;
-+}
-+
-+int bch2_opts_check_may_set(struct bch_fs *c)
-+{
-+ unsigned i;
-+ int ret;
-+
-+ for (i = 0; i < bch2_opts_nr; i++) {
-+ ret = bch2_opt_check_may_set(c, i,
-+ bch2_opt_get_by_id(&c->opts, i));
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
-+ char *options)
-+{
-+ char *copied_opts, *copied_opts_start;
-+ char *opt, *name, *val;
-+ int ret, id;
-+ struct printbuf err = PRINTBUF;
-+ u64 v;
-+
-+ if (!options)
-+ return 0;
-+
-+ /*
-+ * sys_fsconfig() is now occasionally providing us with option lists
-+ * starting with a comma - weird.
-+ */
-+ if (*options == ',')
-+ options++;
-+
-+ copied_opts = kstrdup(options, GFP_KERNEL);
-+ if (!copied_opts)
-+ return -1;
-+ copied_opts_start = copied_opts;
-+
-+ while ((opt = strsep(&copied_opts, ",")) != NULL) {
-+ name = strsep(&opt, "=");
-+ val = opt;
-+
-+ id = bch2_mount_opt_lookup(name);
-+
-+ /* Check for the form "noopt", negation of a boolean opt: */
-+ if (id < 0 &&
-+ !val &&
-+ !strncmp("no", name, 2)) {
-+ id = bch2_mount_opt_lookup(name + 2);
-+ val = "0";
-+ }
-+
-+ /* Unknown options are ignored: */
-+ if (id < 0)
-+ continue;
-+
-+ if (!(bch2_opt_table[id].flags & OPT_MOUNT))
-+ goto bad_opt;
-+
-+ if (id == Opt_acl &&
-+ !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL))
-+ goto bad_opt;
-+
-+ if ((id == Opt_usrquota ||
-+ id == Opt_grpquota) &&
-+ !IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
-+ goto bad_opt;
-+
-+ ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
-+ if (ret < 0)
-+ goto bad_val;
-+
-+ bch2_opt_set_by_id(opts, id, v);
-+ }
-+
-+ ret = 0;
-+ goto out;
-+
-+bad_opt:
-+ pr_err("Bad mount option %s", name);
-+ ret = -1;
-+ goto out;
-+bad_val:
-+ pr_err("Invalid mount option %s", err.buf);
-+ ret = -1;
-+ goto out;
-+out:
-+ kfree(copied_opts_start);
-+ printbuf_exit(&err);
-+ return ret;
-+}
-+
-+u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id)
-+{
-+ const struct bch_option *opt = bch2_opt_table + id;
-+ u64 v;
-+
-+ v = opt->get_sb(sb);
-+
-+ if (opt->flags & OPT_SB_FIELD_ILOG2)
-+ v = 1ULL << v;
-+
-+ if (opt->flags & OPT_SB_FIELD_SECTORS)
-+ v <<= 9;
-+
-+ return v;
-+}
-+
-+/*
-+ * Initial options from superblock - here we don't want any options undefined,
-+ * any options the superblock doesn't specify are set to 0:
-+ */
-+int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb)
-+{
-+ unsigned id;
-+
-+ for (id = 0; id < bch2_opts_nr; id++) {
-+ const struct bch_option *opt = bch2_opt_table + id;
-+
-+ if (opt->get_sb == BCH2_NO_SB_OPT)
-+ continue;
-+
-+ bch2_opt_set_by_id(opts, id, bch2_opt_from_sb(sb, id));
-+ }
-+
-+ return 0;
-+}
-+
-+void __bch2_opt_set_sb(struct bch_sb *sb, const struct bch_option *opt, u64 v)
-+{
-+ if (opt->set_sb == SET_BCH2_NO_SB_OPT)
-+ return;
-+
-+ if (opt->flags & OPT_SB_FIELD_SECTORS)
-+ v >>= 9;
-+
-+ if (opt->flags & OPT_SB_FIELD_ILOG2)
-+ v = ilog2(v);
-+
-+ opt->set_sb(sb, v);
-+}
-+
-+void bch2_opt_set_sb(struct bch_fs *c, const struct bch_option *opt, u64 v)
-+{
-+ if (opt->set_sb == SET_BCH2_NO_SB_OPT)
-+ return;
-+
-+ mutex_lock(&c->sb_lock);
-+ __bch2_opt_set_sb(c->disk_sb.sb, opt, v);
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+}
-+
-+/* io opts: */
-+
-+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src)
-+{
-+ return (struct bch_io_opts) {
-+#define x(_name, _bits) ._name = src._name,
-+ BCH_INODE_OPTS()
-+#undef x
-+ };
-+}
-+
-+bool bch2_opt_is_inode_opt(enum bch_opt_id id)
-+{
-+ static const enum bch_opt_id inode_opt_list[] = {
-+#define x(_name, _bits) Opt_##_name,
-+ BCH_INODE_OPTS()
-+#undef x
-+ };
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(inode_opt_list); i++)
-+ if (inode_opt_list[i] == id)
-+ return true;
-+
-+ return false;
-+}
-diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
-new file mode 100644
-index 000000000000..8526f177450a
---- /dev/null
-+++ b/fs/bcachefs/opts.h
-@@ -0,0 +1,564 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_OPTS_H
-+#define _BCACHEFS_OPTS_H
-+
-+#include <linux/bug.h>
-+#include <linux/log2.h>
-+#include <linux/string.h>
-+#include <linux/sysfs.h>
-+#include "bcachefs_format.h"
-+
-+struct bch_fs;
-+
-+extern const char * const bch2_error_actions[];
-+extern const char * const bch2_fsck_fix_opts[];
-+extern const char * const bch2_version_upgrade_opts[];
-+extern const char * const bch2_sb_features[];
-+extern const char * const bch2_sb_compat[];
-+extern const char * const __bch2_btree_ids[];
-+extern const char * const bch2_csum_types[];
-+extern const char * const bch2_csum_opts[];
-+extern const char * const bch2_compression_types[];
-+extern const char * const bch2_compression_opts[];
-+extern const char * const bch2_str_hash_types[];
-+extern const char * const bch2_str_hash_opts[];
-+extern const char * const bch2_data_types[];
-+extern const char * const bch2_member_states[];
-+extern const char * const bch2_jset_entry_types[];
-+extern const char * const bch2_fs_usage_types[];
-+extern const char * const bch2_d_types[];
-+
-+static inline const char *bch2_d_type_str(unsigned d_type)
-+{
-+ return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)";
-+}
-+
-+/*
-+ * Mount options; we also store defaults in the superblock.
-+ *
-+ * Also exposed via sysfs: if an option is writeable, and it's also stored in
-+ * the superblock, changing it via sysfs (currently? might change this) also
-+ * updates the superblock.
-+ *
-+ * We store options as signed integers, where -1 means undefined. This means we
-+ * can pass the mount options to bch2_fs_alloc() as a whole struct, and then only
-+ * apply the options from that struct that are defined.
-+ */
-+
-+/* dummy option, for options that aren't stored in the superblock */
-+u64 BCH2_NO_SB_OPT(const struct bch_sb *);
-+void SET_BCH2_NO_SB_OPT(struct bch_sb *, u64);
-+
-+/* When can be set: */
-+enum opt_flags {
-+ OPT_FS = (1 << 0), /* Filesystem option */
-+ OPT_DEVICE = (1 << 1), /* Device option */
-+ OPT_INODE = (1 << 2), /* Inode option */
-+ OPT_FORMAT = (1 << 3), /* May be specified at format time */
-+ OPT_MOUNT = (1 << 4), /* May be specified at mount time */
-+ OPT_RUNTIME = (1 << 5), /* May be specified at runtime */
-+ OPT_HUMAN_READABLE = (1 << 6),
-+ OPT_MUST_BE_POW_2 = (1 << 7), /* Must be power of 2 */
-+ OPT_SB_FIELD_SECTORS = (1 << 8),/* Superblock field is >> 9 of actual value */
-+ OPT_SB_FIELD_ILOG2 = (1 << 9), /* Superblock field is ilog2 of actual value */
-+};
-+
-+enum opt_type {
-+ BCH_OPT_BOOL,
-+ BCH_OPT_UINT,
-+ BCH_OPT_STR,
-+ BCH_OPT_FN,
-+};
-+
-+struct bch_opt_fn {
-+ int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *);
-+ void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
-+ int (*validate)(u64, struct printbuf *);
-+};
-+
-+/**
-+ * x(name, shortopt, type, in mem type, mode, sb_opt)
-+ *
-+ * @name - name of mount option, sysfs attribute, and struct bch_opts
-+ * member
-+ *
-+ * @mode - when opt may be set
-+ *
-+ * @sb_option - name of corresponding superblock option
-+ *
-+ * @type - one of OPT_BOOL, OPT_UINT, OPT_STR
-+ */
-+
-+/*
-+ * XXX: add fields for
-+ * - default value
-+ * - helptext
-+ */
-+
-+#ifdef __KERNEL__
-+#define RATELIMIT_ERRORS_DEFAULT true
-+#else
-+#define RATELIMIT_ERRORS_DEFAULT false
-+#endif
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+#define BCACHEFS_VERBOSE_DEFAULT true
-+#else
-+#define BCACHEFS_VERBOSE_DEFAULT false
-+#endif
-+
-+#define BCH_FIX_ERRORS_OPTS() \
-+ x(exit, 0) \
-+ x(yes, 1) \
-+ x(no, 2) \
-+ x(ask, 3)
-+
-+enum fsck_err_opts {
-+#define x(t, n) FSCK_FIX_##t,
-+ BCH_FIX_ERRORS_OPTS()
-+#undef x
-+};
-+
-+#define BCH_OPTS() \
-+ x(block_size, u16, \
-+ OPT_FS|OPT_FORMAT| \
-+ OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS, \
-+ OPT_UINT(512, 1U << 16), \
-+ BCH_SB_BLOCK_SIZE, 8, \
-+ "size", NULL) \
-+ x(btree_node_size, u32, \
-+ OPT_FS|OPT_FORMAT| \
-+ OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS, \
-+ OPT_UINT(512, 1U << 20), \
-+ BCH_SB_BTREE_NODE_SIZE, 512, \
-+ "size", "Btree node size, default 256k") \
-+ x(errors, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_STR(bch2_error_actions), \
-+ BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \
-+ NULL, "Action to take on filesystem error") \
-+ x(metadata_replicas, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_UINT(1, BCH_REPLICAS_MAX), \
-+ BCH_SB_META_REPLICAS_WANT, 1, \
-+ "#", "Number of metadata replicas") \
-+ x(data_replicas, u8, \
-+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_UINT(1, BCH_REPLICAS_MAX), \
-+ BCH_SB_DATA_REPLICAS_WANT, 1, \
-+ "#", "Number of data replicas") \
-+ x(metadata_replicas_required, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
-+ OPT_UINT(1, BCH_REPLICAS_MAX), \
-+ BCH_SB_META_REPLICAS_REQ, 1, \
-+ "#", NULL) \
-+ x(data_replicas_required, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
-+ OPT_UINT(1, BCH_REPLICAS_MAX), \
-+ BCH_SB_DATA_REPLICAS_REQ, 1, \
-+ "#", NULL) \
-+ x(encoded_extent_max, u32, \
-+ OPT_FS|OPT_FORMAT| \
-+ OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS|OPT_SB_FIELD_ILOG2,\
-+ OPT_UINT(4096, 2U << 20), \
-+ BCH_SB_ENCODED_EXTENT_MAX_BITS, 64 << 10, \
-+ "size", "Maximum size of checksummed/compressed extents")\
-+ x(metadata_checksum, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_STR(bch2_csum_opts), \
-+ BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \
-+ NULL, NULL) \
-+ x(data_checksum, u8, \
-+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_STR(bch2_csum_opts), \
-+ BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \
-+ NULL, NULL) \
-+ x(compression, u8, \
-+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_FN(bch2_opt_compression), \
-+ BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_none, \
-+ NULL, NULL) \
-+ x(background_compression, u8, \
-+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_FN(bch2_opt_compression), \
-+ BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none, \
-+ NULL, NULL) \
-+ x(str_hash, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_STR(bch2_str_hash_opts), \
-+ BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_OPT_siphash, \
-+ NULL, "Hash function for directory entries and xattrs")\
-+ x(metadata_target, u16, \
-+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_FN(bch2_opt_target), \
-+ BCH_SB_METADATA_TARGET, 0, \
-+ "(target)", "Device or label for metadata writes") \
-+ x(foreground_target, u16, \
-+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_FN(bch2_opt_target), \
-+ BCH_SB_FOREGROUND_TARGET, 0, \
-+ "(target)", "Device or label for foreground writes") \
-+ x(background_target, u16, \
-+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_FN(bch2_opt_target), \
-+ BCH_SB_BACKGROUND_TARGET, 0, \
-+ "(target)", "Device or label to move data to in the background")\
-+ x(promote_target, u16, \
-+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_FN(bch2_opt_target), \
-+ BCH_SB_PROMOTE_TARGET, 0, \
-+ "(target)", "Device or label to promote data to on read") \
-+ x(erasure_code, u16, \
-+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_BOOL(), \
-+ BCH_SB_ERASURE_CODE, false, \
-+ NULL, "Enable erasure coding (DO NOT USE YET)") \
-+ x(inodes_32bit, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_BOOL(), \
-+ BCH_SB_INODE_32BIT, true, \
-+ NULL, "Constrain inode numbers to 32 bits") \
-+ x(shard_inode_numbers, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_BOOL(), \
-+ BCH_SB_SHARD_INUMS, true, \
-+ NULL, "Shard new inode numbers by CPU id") \
-+ x(inodes_use_key_cache, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH_SB_INODES_USE_KEY_CACHE, true, \
-+ NULL, "Use the btree key cache for the inodes btree") \
-+ x(btree_node_mem_ptr_optimization, u8, \
-+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, true, \
-+ NULL, "Stash pointer to in memory btree node in btree ptr")\
-+ x(btree_write_buffer_size, u32, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_UINT(16, (1U << 20) - 1), \
-+ BCH2_NO_SB_OPT, 1U << 13, \
-+ NULL, "Number of btree write buffer entries") \
-+ x(gc_reserve_percent, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_UINT(5, 21), \
-+ BCH_SB_GC_RESERVE, 8, \
-+ "%", "Percentage of disk space to reserve for copygc")\
-+ x(gc_reserve_bytes, u64, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME| \
-+ OPT_HUMAN_READABLE|OPT_SB_FIELD_SECTORS, \
-+ OPT_UINT(0, U64_MAX), \
-+ BCH_SB_GC_RESERVE_BYTES, 0, \
-+ "%", "Amount of disk space to reserve for copygc\n" \
-+ "Takes precedence over gc_reserve_percent if set")\
-+ x(root_reserve_percent, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
-+ OPT_UINT(0, 100), \
-+ BCH_SB_ROOT_RESERVE, 0, \
-+ "%", "Percentage of disk space to reserve for superuser")\
-+ x(wide_macs, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_BOOL(), \
-+ BCH_SB_128_BIT_MACS, false, \
-+ NULL, "Store full 128 bits of cryptographic MACs, instead of 80")\
-+ x(inline_data, u8, \
-+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, true, \
-+ NULL, "Enable inline data extents") \
-+ x(acl, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH_SB_POSIX_ACL, true, \
-+ NULL, "Enable POSIX acls") \
-+ x(usrquota, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH_SB_USRQUOTA, false, \
-+ NULL, "Enable user quotas") \
-+ x(grpquota, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH_SB_GRPQUOTA, false, \
-+ NULL, "Enable group quotas") \
-+ x(prjquota, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH_SB_PRJQUOTA, false, \
-+ NULL, "Enable project quotas") \
-+ x(degraded, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Allow mounting in degraded mode") \
-+ x(very_degraded, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Allow mounting in when data will be missing") \
-+ x(discard, u8, \
-+ OPT_FS|OPT_MOUNT|OPT_DEVICE, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, true, \
-+ NULL, "Enable discard/TRIM support") \
-+ x(verbose, u8, \
-+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, BCACHEFS_VERBOSE_DEFAULT, \
-+ NULL, "Extra debugging information during mount/recovery")\
-+ x(journal_flush_delay, u32, \
-+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_UINT(1, U32_MAX), \
-+ BCH_SB_JOURNAL_FLUSH_DELAY, 1000, \
-+ NULL, "Delay in milliseconds before automatic journal commits")\
-+ x(journal_flush_disabled, u8, \
-+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_BOOL(), \
-+ BCH_SB_JOURNAL_FLUSH_DISABLED,false, \
-+ NULL, "Disable journal flush on sync/fsync\n" \
-+ "If enabled, writes can be lost, but only since the\n"\
-+ "last journal write (default 1 second)") \
-+ x(journal_reclaim_delay, u32, \
-+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_UINT(0, U32_MAX), \
-+ BCH_SB_JOURNAL_RECLAIM_DELAY, 100, \
-+ NULL, "Delay in milliseconds before automatic journal reclaim")\
-+ x(move_bytes_in_flight, u32, \
-+ OPT_HUMAN_READABLE|OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_UINT(1024, U32_MAX), \
-+ BCH2_NO_SB_OPT, 1U << 20, \
-+ NULL, "Maximum Amount of IO to keep in flight by the move path")\
-+ x(move_ios_in_flight, u32, \
-+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_UINT(1, 1024), \
-+ BCH2_NO_SB_OPT, 32, \
-+ NULL, "Maximum number of IOs to keep in flight by the move path")\
-+ x(fsck, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Run fsck on mount") \
-+ x(fix_errors, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_FN(bch2_opt_fix_errors), \
-+ BCH2_NO_SB_OPT, FSCK_FIX_exit, \
-+ NULL, "Fix errors during fsck without asking") \
-+ x(ratelimit_errors, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, RATELIMIT_ERRORS_DEFAULT, \
-+ NULL, "Ratelimit error messages during fsck") \
-+ x(nochanges, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Super read only mode - no writes at all will be issued,\n"\
-+ "even if we have to replay the journal") \
-+ x(norecovery, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Don't replay the journal") \
-+ x(keep_journal, u8, \
-+ 0, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Don't free journal entries/keys after startup")\
-+ x(read_entire_journal, u8, \
-+ 0, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Read all journal entries, not just dirty ones")\
-+ x(read_journal_only, u8, \
-+ 0, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Only read the journal, skip the rest of recovery")\
-+ x(journal_transaction_names, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
-+ OPT_BOOL(), \
-+ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \
-+ NULL, "Log transaction function names in journal") \
-+ x(noexcl, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Don't open device in exclusive mode") \
-+ x(direct_io, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, true, \
-+ NULL, "Use O_DIRECT (userspace only)") \
-+ x(sb, u64, \
-+ OPT_MOUNT, \
-+ OPT_UINT(0, S64_MAX), \
-+ BCH2_NO_SB_OPT, BCH_SB_SECTOR, \
-+ "offset", "Sector offset of superblock") \
-+ x(read_only, u8, \
-+ OPT_FS, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, NULL) \
-+ x(nostart, u8, \
-+ 0, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Don\'t start filesystem, only open devices") \
-+ x(reconstruct_alloc, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Reconstruct alloc btree") \
-+ x(version_upgrade, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_STR(bch2_version_upgrade_opts), \
-+ BCH_SB_VERSION_UPGRADE, BCH_VERSION_UPGRADE_compatible, \
-+ NULL, "Set superblock to latest version,\n" \
-+ "allowing any new features to be used") \
-+ x(buckets_nouse, u8, \
-+ 0, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Allocate the buckets_nouse bitmap") \
-+ x(project, u8, \
-+ OPT_INODE, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, NULL) \
-+ x(nocow, u8, \
-+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \
-+ OPT_BOOL(), \
-+ BCH_SB_NOCOW, false, \
-+ NULL, "Nocow mode: Writes will be done in place when possible.\n"\
-+ "Snapshots and reflink will still caused writes to be COW\n"\
-+ "Implicitly disables data checksumming, compression and encryption")\
-+ x(nocow_enabled, u8, \
-+ OPT_FS|OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, true, \
-+ NULL, "Enable nocow mode: enables runtime locking in\n"\
-+ "data move path needed if nocow will ever be in use\n")\
-+ x(no_data_io, u8, \
-+ OPT_MOUNT, \
-+ OPT_BOOL(), \
-+ BCH2_NO_SB_OPT, false, \
-+ NULL, "Skip submit_bio() for data reads and writes, " \
-+ "for performance testing purposes") \
-+ x(fs_size, u64, \
-+ OPT_DEVICE, \
-+ OPT_UINT(0, S64_MAX), \
-+ BCH2_NO_SB_OPT, 0, \
-+ "size", "Size of filesystem on device") \
-+ x(bucket, u32, \
-+ OPT_DEVICE, \
-+ OPT_UINT(0, S64_MAX), \
-+ BCH2_NO_SB_OPT, 0, \
-+ "size", "Size of filesystem on device") \
-+ x(durability, u8, \
-+ OPT_DEVICE, \
-+ OPT_UINT(0, BCH_REPLICAS_MAX), \
-+ BCH2_NO_SB_OPT, 1, \
-+ "n", "Data written to this device will be considered\n"\
-+ "to have already been replicated n times")
-+
-+struct bch_opts {
-+#define x(_name, _bits, ...) unsigned _name##_defined:1;
-+ BCH_OPTS()
-+#undef x
-+
-+#define x(_name, _bits, ...) _bits _name;
-+ BCH_OPTS()
-+#undef x
-+};
-+
-+static const __maybe_unused struct bch_opts bch2_opts_default = {
-+#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...) \
-+ ._name##_defined = true, \
-+ ._name = _default, \
-+
-+ BCH_OPTS()
-+#undef x
-+};
-+
-+#define opt_defined(_opts, _name) ((_opts)._name##_defined)
-+
-+#define opt_get(_opts, _name) \
-+ (opt_defined(_opts, _name) ? (_opts)._name : bch2_opts_default._name)
-+
-+#define opt_set(_opts, _name, _v) \
-+do { \
-+ (_opts)._name##_defined = true; \
-+ (_opts)._name = _v; \
-+} while (0)
-+
-+static inline struct bch_opts bch2_opts_empty(void)
-+{
-+ return (struct bch_opts) { 0 };
-+}
-+
-+void bch2_opts_apply(struct bch_opts *, struct bch_opts);
-+
-+enum bch_opt_id {
-+#define x(_name, ...) Opt_##_name,
-+ BCH_OPTS()
-+#undef x
-+ bch2_opts_nr
-+};
-+
-+struct bch_fs;
-+struct printbuf;
-+
-+struct bch_option {
-+ struct attribute attr;
-+ u64 (*get_sb)(const struct bch_sb *);
-+ void (*set_sb)(struct bch_sb *, u64);
-+ enum opt_type type;
-+ enum opt_flags flags;
-+ u64 min, max;
-+
-+ const char * const *choices;
-+
-+ struct bch_opt_fn fn;
-+
-+ const char *hint;
-+ const char *help;
-+
-+};
-+
-+extern const struct bch_option bch2_opt_table[];
-+
-+bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id);
-+u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id);
-+void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
-+
-+u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id);
-+int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *);
-+void __bch2_opt_set_sb(struct bch_sb *, const struct bch_option *, u64);
-+void bch2_opt_set_sb(struct bch_fs *, const struct bch_option *, u64);
-+
-+int bch2_opt_lookup(const char *);
-+int bch2_opt_validate(const struct bch_option *, u64, struct printbuf *);
-+int bch2_opt_parse(struct bch_fs *, const struct bch_option *,
-+ const char *, u64 *, struct printbuf *);
-+
-+#define OPT_SHOW_FULL_LIST (1 << 0)
-+#define OPT_SHOW_MOUNT_STYLE (1 << 1)
-+
-+void bch2_opt_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *,
-+ const struct bch_option *, u64, unsigned);
-+
-+int bch2_opt_check_may_set(struct bch_fs *, int, u64);
-+int bch2_opts_check_may_set(struct bch_fs *);
-+int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, char *);
-+
-+/* inode opts: */
-+
-+struct bch_io_opts {
-+#define x(_name, _bits) u##_bits _name;
-+ BCH_INODE_OPTS()
-+#undef x
-+};
-+
-+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
-+bool bch2_opt_is_inode_opt(enum bch_opt_id);
-+
-+#endif /* _BCACHEFS_OPTS_H */
-diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c
-new file mode 100644
-index 000000000000..5e653eb81d54
---- /dev/null
-+++ b/fs/bcachefs/printbuf.c
-@@ -0,0 +1,425 @@
-+// SPDX-License-Identifier: LGPL-2.1+
-+/* Copyright (C) 2022 Kent Overstreet */
-+
-+#include <linux/err.h>
-+#include <linux/export.h>
-+#include <linux/kernel.h>
-+#include <linux/slab.h>
-+#include <linux/string_helpers.h>
-+
-+#include "printbuf.h"
-+
-+static inline unsigned printbuf_linelen(struct printbuf *buf)
-+{
-+ return buf->pos - buf->last_newline;
-+}
-+
-+int bch2_printbuf_make_room(struct printbuf *out, unsigned extra)
-+{
-+ unsigned new_size;
-+ char *buf;
-+
-+ if (!out->heap_allocated)
-+ return 0;
-+
-+ /* Reserved space for terminating nul: */
-+ extra += 1;
-+
-+ if (out->pos + extra < out->size)
-+ return 0;
-+
-+ new_size = roundup_pow_of_two(out->size + extra);
-+
-+ /*
-+ * Note: output buffer must be freeable with kfree(), it's not required
-+ * that the user use printbuf_exit().
-+ */
-+ buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_NOWAIT);
-+
-+ if (!buf) {
-+ out->allocation_failure = true;
-+ return -ENOMEM;
-+ }
-+
-+ out->buf = buf;
-+ out->size = new_size;
-+ return 0;
-+}
-+
-+void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args)
-+{
-+ int len;
-+
-+ do {
-+ va_list args2;
-+
-+ va_copy(args2, args);
-+ len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2);
-+ } while (len + 1 >= printbuf_remaining(out) &&
-+ !bch2_printbuf_make_room(out, len + 1));
-+
-+ len = min_t(size_t, len,
-+ printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0);
-+ out->pos += len;
-+}
-+
-+void bch2_prt_printf(struct printbuf *out, const char *fmt, ...)
-+{
-+ va_list args;
-+ int len;
-+
-+ do {
-+ va_start(args, fmt);
-+ len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args);
-+ va_end(args);
-+ } while (len + 1 >= printbuf_remaining(out) &&
-+ !bch2_printbuf_make_room(out, len + 1));
-+
-+ len = min_t(size_t, len,
-+ printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0);
-+ out->pos += len;
-+}
-+
-+/**
-+ * bch2_printbuf_str() - returns printbuf's buf as a C string, guaranteed to be
-+ * null terminated
-+ * @buf: printbuf to terminate
-+ * Returns: Printbuf contents, as a nul terminated C string
-+ */
-+const char *bch2_printbuf_str(const struct printbuf *buf)
-+{
-+ /*
-+ * If we've written to a printbuf then it's guaranteed to be a null
-+ * terminated string - but if we haven't, then we might not have
-+ * allocated a buffer at all:
-+ */
-+ return buf->pos
-+ ? buf->buf
-+ : "";
-+}
-+
-+/**
-+ * bch2_printbuf_exit() - exit a printbuf, freeing memory it owns and poisoning it
-+ * against accidental use.
-+ * @buf: printbuf to exit
-+ */
-+void bch2_printbuf_exit(struct printbuf *buf)
-+{
-+ if (buf->heap_allocated) {
-+ kfree(buf->buf);
-+ buf->buf = ERR_PTR(-EINTR); /* poison value */
-+ }
-+}
-+
-+void bch2_printbuf_tabstops_reset(struct printbuf *buf)
-+{
-+ buf->nr_tabstops = 0;
-+}
-+
-+void bch2_printbuf_tabstop_pop(struct printbuf *buf)
-+{
-+ if (buf->nr_tabstops)
-+ --buf->nr_tabstops;
-+}
-+
-+/*
-+ * bch2_printbuf_tabstop_set() - add a tabstop, n spaces from the previous tabstop
-+ *
-+ * @buf: printbuf to control
-+ * @spaces: number of spaces from previous tabpstop
-+ *
-+ * In the future this function may allocate memory if setting more than
-+ * PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start
-+ * of line.
-+ */
-+int bch2_printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
-+{
-+ unsigned prev_tabstop = buf->nr_tabstops
-+ ? buf->_tabstops[buf->nr_tabstops - 1]
-+ : 0;
-+
-+ if (WARN_ON(buf->nr_tabstops >= ARRAY_SIZE(buf->_tabstops)))
-+ return -EINVAL;
-+
-+ buf->_tabstops[buf->nr_tabstops++] = prev_tabstop + spaces;
-+ buf->has_indent_or_tabstops = true;
-+ return 0;
-+}
-+
-+/**
-+ * bch2_printbuf_indent_add() - add to the current indent level
-+ *
-+ * @buf: printbuf to control
-+ * @spaces: number of spaces to add to the current indent level
-+ *
-+ * Subsequent lines, and the current line if the output position is at the start
-+ * of the current line, will be indented by @spaces more spaces.
-+ */
-+void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces)
-+{
-+ if (WARN_ON_ONCE(buf->indent + spaces < buf->indent))
-+ spaces = 0;
-+
-+ buf->indent += spaces;
-+ prt_chars(buf, ' ', spaces);
-+
-+ buf->has_indent_or_tabstops = true;
-+}
-+
-+/**
-+ * bch2_printbuf_indent_sub() - subtract from the current indent level
-+ *
-+ * @buf: printbuf to control
-+ * @spaces: number of spaces to subtract from the current indent level
-+ *
-+ * Subsequent lines, and the current line if the output position is at the start
-+ * of the current line, will be indented by @spaces less spaces.
-+ */
-+void bch2_printbuf_indent_sub(struct printbuf *buf, unsigned spaces)
-+{
-+ if (WARN_ON_ONCE(spaces > buf->indent))
-+ spaces = buf->indent;
-+
-+ if (buf->last_newline + buf->indent == buf->pos) {
-+ buf->pos -= spaces;
-+ printbuf_nul_terminate(buf);
-+ }
-+ buf->indent -= spaces;
-+
-+ if (!buf->indent && !buf->nr_tabstops)
-+ buf->has_indent_or_tabstops = false;
-+}
-+
-+void bch2_prt_newline(struct printbuf *buf)
-+{
-+ unsigned i;
-+
-+ bch2_printbuf_make_room(buf, 1 + buf->indent);
-+
-+ __prt_char(buf, '\n');
-+
-+ buf->last_newline = buf->pos;
-+
-+ for (i = 0; i < buf->indent; i++)
-+ __prt_char(buf, ' ');
-+
-+ printbuf_nul_terminate(buf);
-+
-+ buf->last_field = buf->pos;
-+ buf->cur_tabstop = 0;
-+}
-+
-+/*
-+ * Returns spaces from start of line, if set, or 0 if unset:
-+ */
-+static inline unsigned cur_tabstop(struct printbuf *buf)
-+{
-+ return buf->cur_tabstop < buf->nr_tabstops
-+ ? buf->_tabstops[buf->cur_tabstop]
-+ : 0;
-+}
-+
-+static void __prt_tab(struct printbuf *out)
-+{
-+ int spaces = max_t(int, 0, cur_tabstop(out) - printbuf_linelen(out));
-+
-+ prt_chars(out, ' ', spaces);
-+
-+ out->last_field = out->pos;
-+ out->cur_tabstop++;
-+}
-+
-+/**
-+ * bch2_prt_tab() - Advance printbuf to the next tabstop
-+ * @out: printbuf to control
-+ *
-+ * Advance output to the next tabstop by printing spaces.
-+ */
-+void bch2_prt_tab(struct printbuf *out)
-+{
-+ if (WARN_ON(!cur_tabstop(out)))
-+ return;
-+
-+ __prt_tab(out);
-+}
-+
-+static void __prt_tab_rjust(struct printbuf *buf)
-+{
-+ unsigned move = buf->pos - buf->last_field;
-+ int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf);
-+
-+ if (pad > 0) {
-+ bch2_printbuf_make_room(buf, pad);
-+
-+ if (buf->last_field + pad < buf->size)
-+ memmove(buf->buf + buf->last_field + pad,
-+ buf->buf + buf->last_field,
-+ min(move, buf->size - 1 - buf->last_field - pad));
-+
-+ if (buf->last_field < buf->size)
-+ memset(buf->buf + buf->last_field, ' ',
-+ min((unsigned) pad, buf->size - buf->last_field));
-+
-+ buf->pos += pad;
-+ printbuf_nul_terminate(buf);
-+ }
-+
-+ buf->last_field = buf->pos;
-+ buf->cur_tabstop++;
-+}
-+
-+/**
-+ * bch2_prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
-+ * previous output
-+ *
-+ * @buf: printbuf to control
-+ *
-+ * Advance output to the next tabstop by inserting spaces immediately after the
-+ * previous tabstop, right justifying previously outputted text.
-+ */
-+void bch2_prt_tab_rjust(struct printbuf *buf)
-+{
-+ if (WARN_ON(!cur_tabstop(buf)))
-+ return;
-+
-+ __prt_tab_rjust(buf);
-+}
-+
-+/**
-+ * bch2_prt_bytes_indented() - Print an array of chars, handling embedded control characters
-+ *
-+ * @out: output printbuf
-+ * @str: string to print
-+ * @count: number of bytes to print
-+ *
-+ * The following contol characters are handled as so:
-+ * \n: prt_newline newline that obeys current indent level
-+ * \t: prt_tab advance to next tabstop
-+ * \r: prt_tab_rjust advance to next tabstop, with right justification
-+ */
-+void bch2_prt_bytes_indented(struct printbuf *out, const char *str, unsigned count)
-+{
-+ const char *unprinted_start = str;
-+ const char *end = str + count;
-+
-+ if (!out->has_indent_or_tabstops || out->suppress_indent_tabstop_handling) {
-+ prt_bytes(out, str, count);
-+ return;
-+ }
-+
-+ while (str != end) {
-+ switch (*str) {
-+ case '\n':
-+ prt_bytes(out, unprinted_start, str - unprinted_start);
-+ unprinted_start = str + 1;
-+ bch2_prt_newline(out);
-+ break;
-+ case '\t':
-+ if (likely(cur_tabstop(out))) {
-+ prt_bytes(out, unprinted_start, str - unprinted_start);
-+ unprinted_start = str + 1;
-+ __prt_tab(out);
-+ }
-+ break;
-+ case '\r':
-+ if (likely(cur_tabstop(out))) {
-+ prt_bytes(out, unprinted_start, str - unprinted_start);
-+ unprinted_start = str + 1;
-+ __prt_tab_rjust(out);
-+ }
-+ break;
-+ }
-+
-+ str++;
-+ }
-+
-+ prt_bytes(out, unprinted_start, str - unprinted_start);
-+}
-+
-+/**
-+ * bch2_prt_human_readable_u64() - Print out a u64 in human readable units
-+ * @out: output printbuf
-+ * @v: integer to print
-+ *
-+ * Units of 2^10 (default) or 10^3 are controlled via @out->si_units
-+ */
-+void bch2_prt_human_readable_u64(struct printbuf *out, u64 v)
-+{
-+ bch2_printbuf_make_room(out, 10);
-+ out->pos += string_get_size(v, 1, !out->si_units,
-+ out->buf + out->pos,
-+ printbuf_remaining_size(out));
-+}
-+
-+/**
-+ * bch2_prt_human_readable_s64() - Print out a s64 in human readable units
-+ * @out: output printbuf
-+ * @v: integer to print
-+ *
-+ * Units of 2^10 (default) or 10^3 are controlled via @out->si_units
-+ */
-+void bch2_prt_human_readable_s64(struct printbuf *out, s64 v)
-+{
-+ if (v < 0)
-+ prt_char(out, '-');
-+ bch2_prt_human_readable_u64(out, abs(v));
-+}
-+
-+/**
-+ * bch2_prt_units_u64() - Print out a u64 according to printbuf unit options
-+ * @out: output printbuf
-+ * @v: integer to print
-+ *
-+ * Units are either raw (default), or human reabable units (controlled via
-+ * @buf->human_readable_units)
-+ */
-+void bch2_prt_units_u64(struct printbuf *out, u64 v)
-+{
-+ if (out->human_readable_units)
-+ bch2_prt_human_readable_u64(out, v);
-+ else
-+ bch2_prt_printf(out, "%llu", v);
-+}
-+
-+/**
-+ * bch2_prt_units_s64() - Print out a s64 according to printbuf unit options
-+ * @out: output printbuf
-+ * @v: integer to print
-+ *
-+ * Units are either raw (default), or human reabable units (controlled via
-+ * @buf->human_readable_units)
-+ */
-+void bch2_prt_units_s64(struct printbuf *out, s64 v)
-+{
-+ if (v < 0)
-+ prt_char(out, '-');
-+ bch2_prt_units_u64(out, abs(v));
-+}
-+
-+void bch2_prt_string_option(struct printbuf *out,
-+ const char * const list[],
-+ size_t selected)
-+{
-+ size_t i;
-+
-+ for (i = 0; list[i]; i++)
-+ bch2_prt_printf(out, i == selected ? "[%s] " : "%s ", list[i]);
-+}
-+
-+void bch2_prt_bitflags(struct printbuf *out,
-+ const char * const list[], u64 flags)
-+{
-+ unsigned bit, nr = 0;
-+ bool first = true;
-+
-+ while (list[nr])
-+ nr++;
-+
-+ while (flags && (bit = __ffs64(flags)) < nr) {
-+ if (!first)
-+ bch2_prt_printf(out, ",");
-+ first = false;
-+ bch2_prt_printf(out, "%s", list[bit]);
-+ flags ^= BIT_ULL(bit);
-+ }
-+}
-diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h
-new file mode 100644
-index 000000000000..2191423d9f22
---- /dev/null
-+++ b/fs/bcachefs/printbuf.h
-@@ -0,0 +1,284 @@
-+/* SPDX-License-Identifier: LGPL-2.1+ */
-+/* Copyright (C) 2022 Kent Overstreet */
-+
-+#ifndef _BCACHEFS_PRINTBUF_H
-+#define _BCACHEFS_PRINTBUF_H
-+
-+/*
-+ * Printbufs: Simple strings for printing to, with optional heap allocation
-+ *
-+ * This code has provisions for use in userspace, to aid in making other code
-+ * portable between kernelspace and userspace.
-+ *
-+ * Basic example:
-+ * struct printbuf buf = PRINTBUF;
-+ *
-+ * prt_printf(&buf, "foo=");
-+ * foo_to_text(&buf, foo);
-+ * printk("%s", buf.buf);
-+ * printbuf_exit(&buf);
-+ *
-+ * Or
-+ * struct printbuf buf = PRINTBUF_EXTERN(char_buf, char_buf_size)
-+ *
-+ * We can now write pretty printers instead of writing code that dumps
-+ * everything to the kernel log buffer, and then those pretty-printers can be
-+ * used by other code that outputs to kernel log, sysfs, debugfs, etc.
-+ *
-+ * Memory allocation: Outputing to a printbuf may allocate memory. This
-+ * allocation is done with GFP_KERNEL, by default: use the newer
-+ * memalloc_*_(save|restore) functions as needed.
-+ *
-+ * Since no equivalent yet exists for GFP_ATOMIC/GFP_NOWAIT, memory allocations
-+ * will be done with GFP_NOWAIT if printbuf->atomic is nonzero.
-+ *
-+ * It's allowed to grab the output buffer and free it later with kfree() instead
-+ * of using printbuf_exit(), if the user just needs a heap allocated string at
-+ * the end.
-+ *
-+ * Memory allocation failures: We don't return errors directly, because on
-+ * memory allocation failure we usually don't want to bail out and unwind - we
-+ * want to print what we've got, on a best-effort basis. But code that does want
-+ * to return -ENOMEM may check printbuf.allocation_failure.
-+ *
-+ * Indenting, tabstops:
-+ *
-+ * To aid is writing multi-line pretty printers spread across multiple
-+ * functions, printbufs track the current indent level.
-+ *
-+ * printbuf_indent_push() and printbuf_indent_pop() increase and decrease the current indent
-+ * level, respectively.
-+ *
-+ * To use tabstops, set printbuf->tabstops[]; they are in units of spaces, from
-+ * start of line. Once set, prt_tab() will output spaces up to the next tabstop.
-+ * prt_tab_rjust() will also advance the current line of text up to the next
-+ * tabstop, but it does so by shifting text since the previous tabstop up to the
-+ * next tabstop - right justifying it.
-+ *
-+ * Make sure you use prt_newline() instead of \n in the format string for indent
-+ * level and tabstops to work corretly.
-+ *
-+ * Output units: printbuf->units exists to tell pretty-printers how to output
-+ * numbers: a raw value (e.g. directly from a superblock field), as bytes, or as
-+ * human readable bytes. prt_units() obeys it.
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/string.h>
-+
-+enum printbuf_si {
-+ PRINTBUF_UNITS_2, /* use binary powers of 2^10 */
-+ PRINTBUF_UNITS_10, /* use powers of 10^3 (standard SI) */
-+};
-+
-+#define PRINTBUF_INLINE_TABSTOPS 6
-+
-+struct printbuf {
-+ char *buf;
-+ unsigned size;
-+ unsigned pos;
-+ unsigned last_newline;
-+ unsigned last_field;
-+ unsigned indent;
-+ /*
-+ * If nonzero, allocations will be done with GFP_ATOMIC:
-+ */
-+ u8 atomic;
-+ bool allocation_failure:1;
-+ bool heap_allocated:1;
-+ enum printbuf_si si_units:1;
-+ bool human_readable_units:1;
-+ bool has_indent_or_tabstops:1;
-+ bool suppress_indent_tabstop_handling:1;
-+ u8 nr_tabstops;
-+
-+ /*
-+ * Do not modify directly: use printbuf_tabstop_add(),
-+ * printbuf_tabstop_get()
-+ */
-+ u8 cur_tabstop;
-+ u8 _tabstops[PRINTBUF_INLINE_TABSTOPS];
-+};
-+
-+int bch2_printbuf_make_room(struct printbuf *, unsigned);
-+__printf(2, 3) void bch2_prt_printf(struct printbuf *out, const char *fmt, ...);
-+__printf(2, 0) void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list);
-+const char *bch2_printbuf_str(const struct printbuf *);
-+void bch2_printbuf_exit(struct printbuf *);
-+
-+void bch2_printbuf_tabstops_reset(struct printbuf *);
-+void bch2_printbuf_tabstop_pop(struct printbuf *);
-+int bch2_printbuf_tabstop_push(struct printbuf *, unsigned);
-+
-+void bch2_printbuf_indent_add(struct printbuf *, unsigned);
-+void bch2_printbuf_indent_sub(struct printbuf *, unsigned);
-+
-+void bch2_prt_newline(struct printbuf *);
-+void bch2_prt_tab(struct printbuf *);
-+void bch2_prt_tab_rjust(struct printbuf *);
-+
-+void bch2_prt_bytes_indented(struct printbuf *, const char *, unsigned);
-+void bch2_prt_human_readable_u64(struct printbuf *, u64);
-+void bch2_prt_human_readable_s64(struct printbuf *, s64);
-+void bch2_prt_units_u64(struct printbuf *, u64);
-+void bch2_prt_units_s64(struct printbuf *, s64);
-+void bch2_prt_string_option(struct printbuf *, const char * const[], size_t);
-+void bch2_prt_bitflags(struct printbuf *, const char * const[], u64);
-+
-+/* Initializer for a heap allocated printbuf: */
-+#define PRINTBUF ((struct printbuf) { .heap_allocated = true })
-+
-+/* Initializer a printbuf that points to an external buffer: */
-+#define PRINTBUF_EXTERN(_buf, _size) \
-+((struct printbuf) { \
-+ .buf = _buf, \
-+ .size = _size, \
-+})
-+
-+/*
-+ * Returns size remaining of output buffer:
-+ */
-+static inline unsigned printbuf_remaining_size(struct printbuf *out)
-+{
-+ return out->pos < out->size ? out->size - out->pos : 0;
-+}
-+
-+/*
-+ * Returns number of characters we can print to the output buffer - i.e.
-+ * excluding the terminating nul:
-+ */
-+static inline unsigned printbuf_remaining(struct printbuf *out)
-+{
-+ return out->pos < out->size ? out->size - out->pos - 1 : 0;
-+}
-+
-+static inline unsigned printbuf_written(struct printbuf *out)
-+{
-+ return out->size ? min(out->pos, out->size - 1) : 0;
-+}
-+
-+/*
-+ * Returns true if output was truncated:
-+ */
-+static inline bool printbuf_overflowed(struct printbuf *out)
-+{
-+ return out->pos >= out->size;
-+}
-+
-+static inline void printbuf_nul_terminate(struct printbuf *out)
-+{
-+ bch2_printbuf_make_room(out, 1);
-+
-+ if (out->pos < out->size)
-+ out->buf[out->pos] = 0;
-+ else if (out->size)
-+ out->buf[out->size - 1] = 0;
-+}
-+
-+/* Doesn't call bch2_printbuf_make_room(), doesn't nul terminate: */
-+static inline void __prt_char_reserved(struct printbuf *out, char c)
-+{
-+ if (printbuf_remaining(out))
-+ out->buf[out->pos] = c;
-+ out->pos++;
-+}
-+
-+/* Doesn't nul terminate: */
-+static inline void __prt_char(struct printbuf *out, char c)
-+{
-+ bch2_printbuf_make_room(out, 1);
-+ __prt_char_reserved(out, c);
-+}
-+
-+static inline void prt_char(struct printbuf *out, char c)
-+{
-+ __prt_char(out, c);
-+ printbuf_nul_terminate(out);
-+}
-+
-+static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n)
-+{
-+ unsigned i, can_print = min(n, printbuf_remaining(out));
-+
-+ for (i = 0; i < can_print; i++)
-+ out->buf[out->pos++] = c;
-+ out->pos += n - can_print;
-+}
-+
-+static inline void prt_chars(struct printbuf *out, char c, unsigned n)
-+{
-+ bch2_printbuf_make_room(out, n);
-+ __prt_chars_reserved(out, c, n);
-+ printbuf_nul_terminate(out);
-+}
-+
-+static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n)
-+{
-+ unsigned i, can_print;
-+
-+ bch2_printbuf_make_room(out, n);
-+
-+ can_print = min(n, printbuf_remaining(out));
-+
-+ for (i = 0; i < can_print; i++)
-+ out->buf[out->pos++] = ((char *) b)[i];
-+ out->pos += n - can_print;
-+
-+ printbuf_nul_terminate(out);
-+}
-+
-+static inline void prt_str(struct printbuf *out, const char *str)
-+{
-+ prt_bytes(out, str, strlen(str));
-+}
-+
-+static inline void prt_str_indented(struct printbuf *out, const char *str)
-+{
-+ bch2_prt_bytes_indented(out, str, strlen(str));
-+}
-+
-+static inline void prt_hex_byte(struct printbuf *out, u8 byte)
-+{
-+ bch2_printbuf_make_room(out, 2);
-+ __prt_char_reserved(out, hex_asc_hi(byte));
-+ __prt_char_reserved(out, hex_asc_lo(byte));
-+ printbuf_nul_terminate(out);
-+}
-+
-+static inline void prt_hex_byte_upper(struct printbuf *out, u8 byte)
-+{
-+ bch2_printbuf_make_room(out, 2);
-+ __prt_char_reserved(out, hex_asc_upper_hi(byte));
-+ __prt_char_reserved(out, hex_asc_upper_lo(byte));
-+ printbuf_nul_terminate(out);
-+}
-+
-+/**
-+ * printbuf_reset - re-use a printbuf without freeing and re-initializing it:
-+ */
-+static inline void printbuf_reset(struct printbuf *buf)
-+{
-+ buf->pos = 0;
-+ buf->allocation_failure = 0;
-+ buf->indent = 0;
-+ buf->nr_tabstops = 0;
-+ buf->cur_tabstop = 0;
-+}
-+
-+/**
-+ * printbuf_atomic_inc - mark as entering an atomic section
-+ */
-+static inline void printbuf_atomic_inc(struct printbuf *buf)
-+{
-+ buf->atomic++;
-+}
-+
-+/**
-+ * printbuf_atomic_inc - mark as leaving an atomic section
-+ */
-+static inline void printbuf_atomic_dec(struct printbuf *buf)
-+{
-+ buf->atomic--;
-+}
-+
-+#endif /* _BCACHEFS_PRINTBUF_H */
-diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
-new file mode 100644
-index 000000000000..a54647c36b85
---- /dev/null
-+++ b/fs/bcachefs/quota.c
-@@ -0,0 +1,979 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "inode.h"
-+#include "quota.h"
-+#include "snapshot.h"
-+#include "super-io.h"
-+
-+static const char * const bch2_quota_types[] = {
-+ "user",
-+ "group",
-+ "project",
-+};
-+
-+static const char * const bch2_quota_counters[] = {
-+ "space",
-+ "inodes",
-+};
-+
-+static int bch2_sb_quota_validate(struct bch_sb *sb, struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_quota *q = field_to_type(f, quota);
-+
-+ if (vstruct_bytes(&q->field) < sizeof(*q)) {
-+ prt_printf(err, "wrong size (got %zu should be %zu)",
-+ vstruct_bytes(&q->field), sizeof(*q));
-+ return -BCH_ERR_invalid_sb_quota;
-+ }
-+
-+ return 0;
-+}
-+
-+static void bch2_sb_quota_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_quota *q = field_to_type(f, quota);
-+ unsigned qtyp, counter;
-+
-+ for (qtyp = 0; qtyp < ARRAY_SIZE(q->q); qtyp++) {
-+ prt_printf(out, "%s: flags %llx",
-+ bch2_quota_types[qtyp],
-+ le64_to_cpu(q->q[qtyp].flags));
-+
-+ for (counter = 0; counter < Q_COUNTERS; counter++)
-+ prt_printf(out, " %s timelimit %u warnlimit %u",
-+ bch2_quota_counters[counter],
-+ le32_to_cpu(q->q[qtyp].c[counter].timelimit),
-+ le32_to_cpu(q->q[qtyp].c[counter].warnlimit));
-+
-+ prt_newline(out);
-+ }
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_quota = {
-+ .validate = bch2_sb_quota_validate,
-+ .to_text = bch2_sb_quota_to_text,
-+};
-+
-+int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err,
-+ quota_type_invalid,
-+ "invalid quota type (%llu >= %u)",
-+ k.k->p.inode, QTYP_NR);
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_quota dq = bkey_s_c_to_quota(k);
-+ unsigned i;
-+
-+ for (i = 0; i < Q_COUNTERS; i++)
-+ prt_printf(out, "%s hardlimit %llu softlimit %llu",
-+ bch2_quota_counters[i],
-+ le64_to_cpu(dq.v->c[i].hardlimit),
-+ le64_to_cpu(dq.v->c[i].softlimit));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+#include <linux/cred.h>
-+#include <linux/fs.h>
-+#include <linux/quota.h>
-+
-+static void qc_info_to_text(struct printbuf *out, struct qc_info *i)
-+{
-+ printbuf_tabstops_reset(out);
-+ printbuf_tabstop_push(out, 20);
-+
-+ prt_str(out, "i_fieldmask");
-+ prt_tab(out);
-+ prt_printf(out, "%x", i->i_fieldmask);
-+ prt_newline(out);
-+
-+ prt_str(out, "i_flags");
-+ prt_tab(out);
-+ prt_printf(out, "%u", i->i_flags);
-+ prt_newline(out);
-+
-+ prt_str(out, "i_spc_timelimit");
-+ prt_tab(out);
-+ prt_printf(out, "%u", i->i_spc_timelimit);
-+ prt_newline(out);
-+
-+ prt_str(out, "i_ino_timelimit");
-+ prt_tab(out);
-+ prt_printf(out, "%u", i->i_ino_timelimit);
-+ prt_newline(out);
-+
-+ prt_str(out, "i_rt_spc_timelimit");
-+ prt_tab(out);
-+ prt_printf(out, "%u", i->i_rt_spc_timelimit);
-+ prt_newline(out);
-+
-+ prt_str(out, "i_spc_warnlimit");
-+ prt_tab(out);
-+ prt_printf(out, "%u", i->i_spc_warnlimit);
-+ prt_newline(out);
-+
-+ prt_str(out, "i_ino_warnlimit");
-+ prt_tab(out);
-+ prt_printf(out, "%u", i->i_ino_warnlimit);
-+ prt_newline(out);
-+
-+ prt_str(out, "i_rt_spc_warnlimit");
-+ prt_tab(out);
-+ prt_printf(out, "%u", i->i_rt_spc_warnlimit);
-+ prt_newline(out);
-+}
-+
-+static void qc_dqblk_to_text(struct printbuf *out, struct qc_dqblk *q)
-+{
-+ printbuf_tabstops_reset(out);
-+ printbuf_tabstop_push(out, 20);
-+
-+ prt_str(out, "d_fieldmask");
-+ prt_tab(out);
-+ prt_printf(out, "%x", q->d_fieldmask);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_spc_hardlimit");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", q->d_spc_hardlimit);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_spc_softlimit");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", q->d_spc_softlimit);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_ino_hardlimit");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", q->d_ino_hardlimit);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_ino_softlimit");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", q->d_ino_softlimit);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_space");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", q->d_space);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_ino_count");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", q->d_ino_count);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_ino_timer");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", q->d_ino_timer);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_spc_timer");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", q->d_spc_timer);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_ino_warns");
-+ prt_tab(out);
-+ prt_printf(out, "%i", q->d_ino_warns);
-+ prt_newline(out);
-+
-+ prt_str(out, "d_spc_warns");
-+ prt_tab(out);
-+ prt_printf(out, "%i", q->d_spc_warns);
-+ prt_newline(out);
-+}
-+
-+static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
-+{
-+ qtypes >>= i;
-+ return qtypes ? i + __ffs(qtypes) : QTYP_NR;
-+}
-+
-+#define for_each_set_qtype(_c, _i, _q, _qtypes) \
-+ for (_i = 0; \
-+ (_i = __next_qtype(_i, _qtypes), \
-+ _q = &(_c)->quotas[_i], \
-+ _i < QTYP_NR); \
-+ _i++)
-+
-+static bool ignore_hardlimit(struct bch_memquota_type *q)
-+{
-+ if (capable(CAP_SYS_RESOURCE))
-+ return true;
-+#if 0
-+ struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
-+
-+ return capable(CAP_SYS_RESOURCE) &&
-+ (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
-+ !(info->dqi_flags & DQF_ROOT_SQUASH));
-+#endif
-+ return false;
-+}
-+
-+enum quota_msg {
-+ SOFTWARN, /* Softlimit reached */
-+ SOFTLONGWARN, /* Grace time expired */
-+ HARDWARN, /* Hardlimit reached */
-+
-+ HARDBELOW, /* Usage got below inode hardlimit */
-+ SOFTBELOW, /* Usage got below inode softlimit */
-+};
-+
-+static int quota_nl[][Q_COUNTERS] = {
-+ [HARDWARN][Q_SPC] = QUOTA_NL_BHARDWARN,
-+ [SOFTLONGWARN][Q_SPC] = QUOTA_NL_BSOFTLONGWARN,
-+ [SOFTWARN][Q_SPC] = QUOTA_NL_BSOFTWARN,
-+ [HARDBELOW][Q_SPC] = QUOTA_NL_BHARDBELOW,
-+ [SOFTBELOW][Q_SPC] = QUOTA_NL_BSOFTBELOW,
-+
-+ [HARDWARN][Q_INO] = QUOTA_NL_IHARDWARN,
-+ [SOFTLONGWARN][Q_INO] = QUOTA_NL_ISOFTLONGWARN,
-+ [SOFTWARN][Q_INO] = QUOTA_NL_ISOFTWARN,
-+ [HARDBELOW][Q_INO] = QUOTA_NL_IHARDBELOW,
-+ [SOFTBELOW][Q_INO] = QUOTA_NL_ISOFTBELOW,
-+};
-+
-+struct quota_msgs {
-+ u8 nr;
-+ struct {
-+ u8 qtype;
-+ u8 msg;
-+ } m[QTYP_NR * Q_COUNTERS];
-+};
-+
-+static void prepare_msg(unsigned qtype,
-+ enum quota_counters counter,
-+ struct quota_msgs *msgs,
-+ enum quota_msg msg_type)
-+{
-+ BUG_ON(msgs->nr >= ARRAY_SIZE(msgs->m));
-+
-+ msgs->m[msgs->nr].qtype = qtype;
-+ msgs->m[msgs->nr].msg = quota_nl[msg_type][counter];
-+ msgs->nr++;
-+}
-+
-+static void prepare_warning(struct memquota_counter *qc,
-+ unsigned qtype,
-+ enum quota_counters counter,
-+ struct quota_msgs *msgs,
-+ enum quota_msg msg_type)
-+{
-+ if (qc->warning_issued & (1 << msg_type))
-+ return;
-+
-+ prepare_msg(qtype, counter, msgs, msg_type);
-+}
-+
-+static void flush_warnings(struct bch_qid qid,
-+ struct super_block *sb,
-+ struct quota_msgs *msgs)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < msgs->nr; i++)
-+ quota_send_warning(make_kqid(&init_user_ns, msgs->m[i].qtype, qid.q[i]),
-+ sb->s_dev, msgs->m[i].msg);
-+}
-+
-+static int bch2_quota_check_limit(struct bch_fs *c,
-+ unsigned qtype,
-+ struct bch_memquota *mq,
-+ struct quota_msgs *msgs,
-+ enum quota_counters counter,
-+ s64 v,
-+ enum quota_acct_mode mode)
-+{
-+ struct bch_memquota_type *q = &c->quotas[qtype];
-+ struct memquota_counter *qc = &mq->c[counter];
-+ u64 n = qc->v + v;
-+
-+ BUG_ON((s64) n < 0);
-+
-+ if (mode == KEY_TYPE_QUOTA_NOCHECK)
-+ return 0;
-+
-+ if (v <= 0) {
-+ if (n < qc->hardlimit &&
-+ (qc->warning_issued & (1 << HARDWARN))) {
-+ qc->warning_issued &= ~(1 << HARDWARN);
-+ prepare_msg(qtype, counter, msgs, HARDBELOW);
-+ }
-+
-+ if (n < qc->softlimit &&
-+ (qc->warning_issued & (1 << SOFTWARN))) {
-+ qc->warning_issued &= ~(1 << SOFTWARN);
-+ prepare_msg(qtype, counter, msgs, SOFTBELOW);
-+ }
-+
-+ qc->warning_issued = 0;
-+ return 0;
-+ }
-+
-+ if (qc->hardlimit &&
-+ qc->hardlimit < n &&
-+ !ignore_hardlimit(q)) {
-+ prepare_warning(qc, qtype, counter, msgs, HARDWARN);
-+ return -EDQUOT;
-+ }
-+
-+ if (qc->softlimit &&
-+ qc->softlimit < n) {
-+ if (qc->timer == 0) {
-+ qc->timer = ktime_get_real_seconds() + q->limits[counter].timelimit;
-+ prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
-+ } else if (ktime_get_real_seconds() >= qc->timer &&
-+ !ignore_hardlimit(q)) {
-+ prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
-+ return -EDQUOT;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
-+ enum quota_counters counter, s64 v,
-+ enum quota_acct_mode mode)
-+{
-+ unsigned qtypes = enabled_qtypes(c);
-+ struct bch_memquota_type *q;
-+ struct bch_memquota *mq[QTYP_NR];
-+ struct quota_msgs msgs;
-+ unsigned i;
-+ int ret = 0;
-+
-+ memset(&msgs, 0, sizeof(msgs));
-+
-+ for_each_set_qtype(c, i, q, qtypes) {
-+ mq[i] = genradix_ptr_alloc(&q->table, qid.q[i], GFP_KERNEL);
-+ if (!mq[i])
-+ return -ENOMEM;
-+ }
-+
-+ for_each_set_qtype(c, i, q, qtypes)
-+ mutex_lock_nested(&q->lock, i);
-+
-+ for_each_set_qtype(c, i, q, qtypes) {
-+ ret = bch2_quota_check_limit(c, i, mq[i], &msgs, counter, v, mode);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ for_each_set_qtype(c, i, q, qtypes)
-+ mq[i]->c[counter].v += v;
-+err:
-+ for_each_set_qtype(c, i, q, qtypes)
-+ mutex_unlock(&q->lock);
-+
-+ flush_warnings(qid, c->vfs_sb, &msgs);
-+
-+ return ret;
-+}
-+
-+static void __bch2_quota_transfer(struct bch_memquota *src_q,
-+ struct bch_memquota *dst_q,
-+ enum quota_counters counter, s64 v)
-+{
-+ BUG_ON(v > src_q->c[counter].v);
-+ BUG_ON(v + dst_q->c[counter].v < v);
-+
-+ src_q->c[counter].v -= v;
-+ dst_q->c[counter].v += v;
-+}
-+
-+int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
-+ struct bch_qid dst,
-+ struct bch_qid src, u64 space,
-+ enum quota_acct_mode mode)
-+{
-+ struct bch_memquota_type *q;
-+ struct bch_memquota *src_q[3], *dst_q[3];
-+ struct quota_msgs msgs;
-+ unsigned i;
-+ int ret = 0;
-+
-+ qtypes &= enabled_qtypes(c);
-+
-+ memset(&msgs, 0, sizeof(msgs));
-+
-+ for_each_set_qtype(c, i, q, qtypes) {
-+ src_q[i] = genradix_ptr_alloc(&q->table, src.q[i], GFP_KERNEL);
-+ dst_q[i] = genradix_ptr_alloc(&q->table, dst.q[i], GFP_KERNEL);
-+ if (!src_q[i] || !dst_q[i])
-+ return -ENOMEM;
-+ }
-+
-+ for_each_set_qtype(c, i, q, qtypes)
-+ mutex_lock_nested(&q->lock, i);
-+
-+ for_each_set_qtype(c, i, q, qtypes) {
-+ ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
-+ dst_q[i]->c[Q_SPC].v + space,
-+ mode);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
-+ dst_q[i]->c[Q_INO].v + 1,
-+ mode);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ for_each_set_qtype(c, i, q, qtypes) {
-+ __bch2_quota_transfer(src_q[i], dst_q[i], Q_SPC, space);
-+ __bch2_quota_transfer(src_q[i], dst_q[i], Q_INO, 1);
-+ }
-+
-+err:
-+ for_each_set_qtype(c, i, q, qtypes)
-+ mutex_unlock(&q->lock);
-+
-+ flush_warnings(dst, c->vfs_sb, &msgs);
-+
-+ return ret;
-+}
-+
-+static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k,
-+ struct qc_dqblk *qdq)
-+{
-+ struct bkey_s_c_quota dq;
-+ struct bch_memquota_type *q;
-+ struct bch_memquota *mq;
-+ unsigned i;
-+
-+ BUG_ON(k.k->p.inode >= QTYP_NR);
-+
-+ if (!((1U << k.k->p.inode) & enabled_qtypes(c)))
-+ return 0;
-+
-+ switch (k.k->type) {
-+ case KEY_TYPE_quota:
-+ dq = bkey_s_c_to_quota(k);
-+ q = &c->quotas[k.k->p.inode];
-+
-+ mutex_lock(&q->lock);
-+ mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL);
-+ if (!mq) {
-+ mutex_unlock(&q->lock);
-+ return -ENOMEM;
-+ }
-+
-+ for (i = 0; i < Q_COUNTERS; i++) {
-+ mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit);
-+ mq->c[i].softlimit = le64_to_cpu(dq.v->c[i].softlimit);
-+ }
-+
-+ if (qdq && qdq->d_fieldmask & QC_SPC_TIMER)
-+ mq->c[Q_SPC].timer = qdq->d_spc_timer;
-+ if (qdq && qdq->d_fieldmask & QC_SPC_WARNS)
-+ mq->c[Q_SPC].warns = qdq->d_spc_warns;
-+ if (qdq && qdq->d_fieldmask & QC_INO_TIMER)
-+ mq->c[Q_INO].timer = qdq->d_ino_timer;
-+ if (qdq && qdq->d_fieldmask & QC_INO_WARNS)
-+ mq->c[Q_INO].warns = qdq->d_ino_warns;
-+
-+ mutex_unlock(&q->lock);
-+ }
-+
-+ return 0;
-+}
-+
-+void bch2_fs_quota_exit(struct bch_fs *c)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
-+ genradix_free(&c->quotas[i].table);
-+}
-+
-+void bch2_fs_quota_init(struct bch_fs *c)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
-+ mutex_init(&c->quotas[i].lock);
-+}
-+
-+static struct bch_sb_field_quota *bch2_sb_get_or_create_quota(struct bch_sb_handle *sb)
-+{
-+ struct bch_sb_field_quota *sb_quota = bch2_sb_field_get(sb->sb, quota);
-+
-+ if (sb_quota)
-+ return sb_quota;
-+
-+ sb_quota = bch2_sb_field_resize(sb, quota, sizeof(*sb_quota) / sizeof(u64));
-+ if (sb_quota) {
-+ unsigned qtype, qc;
-+
-+ for (qtype = 0; qtype < QTYP_NR; qtype++)
-+ for (qc = 0; qc < Q_COUNTERS; qc++)
-+ sb_quota->q[qtype].c[qc].timelimit =
-+ cpu_to_le32(7 * 24 * 60 * 60);
-+ }
-+
-+ return sb_quota;
-+}
-+
-+static void bch2_sb_quota_read(struct bch_fs *c)
-+{
-+ struct bch_sb_field_quota *sb_quota;
-+ unsigned i, j;
-+
-+ sb_quota = bch2_sb_field_get(c->disk_sb.sb, quota);
-+ if (!sb_quota)
-+ return;
-+
-+ for (i = 0; i < QTYP_NR; i++) {
-+ struct bch_memquota_type *q = &c->quotas[i];
-+
-+ for (j = 0; j < Q_COUNTERS; j++) {
-+ q->limits[j].timelimit =
-+ le32_to_cpu(sb_quota->q[i].c[j].timelimit);
-+ q->limits[j].warnlimit =
-+ le32_to_cpu(sb_quota->q[i].c[j].warnlimit);
-+ }
-+ }
-+}
-+
-+static int bch2_fs_quota_read_inode(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_inode_unpacked u;
-+ struct bch_snapshot_tree s_t;
-+ int ret;
-+
-+ ret = bch2_snapshot_tree_lookup(trans,
-+ bch2_snapshot_tree(c, k.k->p.snapshot), &s_t);
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
-+ "%s: snapshot tree %u not found", __func__,
-+ snapshot_t(c, k.k->p.snapshot)->tree);
-+ if (ret)
-+ return ret;
-+
-+ if (!s_t.master_subvol)
-+ goto advance;
-+
-+ ret = bch2_inode_find_by_inum_nowarn_trans(trans,
-+ (subvol_inum) {
-+ le32_to_cpu(s_t.master_subvol),
-+ k.k->p.offset,
-+ }, &u);
-+ /*
-+ * Inode might be deleted in this snapshot - the easiest way to handle
-+ * that is to just skip it here:
-+ */
-+ if (bch2_err_matches(ret, ENOENT))
-+ goto advance;
-+
-+ if (ret)
-+ return ret;
-+
-+ bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
-+ KEY_TYPE_QUOTA_NOCHECK);
-+ bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
-+ KEY_TYPE_QUOTA_NOCHECK);
-+advance:
-+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
-+ return 0;
-+}
-+
-+int bch2_fs_quota_read(struct bch_fs *c)
-+{
-+ struct bch_sb_field_quota *sb_quota;
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ mutex_lock(&c->sb_lock);
-+ sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
-+ if (!sb_quota) {
-+ mutex_unlock(&c->sb_lock);
-+ return -BCH_ERR_ENOSPC_sb_quota;
-+ }
-+
-+ bch2_sb_quota_read(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ trans = bch2_trans_get(c);
-+
-+ ret = for_each_btree_key2(trans, iter, BTREE_ID_quotas,
-+ POS_MIN, BTREE_ITER_PREFETCH, k,
-+ __bch2_quota_set(c, k, NULL)) ?:
-+ for_each_btree_key2(trans, iter, BTREE_ID_inodes,
-+ POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
-+ bch2_fs_quota_read_inode(trans, &iter, k));
-+
-+ bch2_trans_put(trans);
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+/* Enable/disable/delete quotas for an entire filesystem: */
-+
-+static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ struct bch_sb_field_quota *sb_quota;
-+ int ret = 0;
-+
-+ if (sb->s_flags & SB_RDONLY)
-+ return -EROFS;
-+
-+ /* Accounting must be enabled at mount time: */
-+ if (uflags & (FS_QUOTA_UDQ_ACCT|FS_QUOTA_GDQ_ACCT|FS_QUOTA_PDQ_ACCT))
-+ return -EINVAL;
-+
-+ /* Can't enable enforcement without accounting: */
-+ if ((uflags & FS_QUOTA_UDQ_ENFD) && !c->opts.usrquota)
-+ return -EINVAL;
-+
-+ if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
-+ return -EINVAL;
-+
-+ if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota)
-+ return -EINVAL;
-+
-+ mutex_lock(&c->sb_lock);
-+ sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
-+ if (!sb_quota) {
-+ ret = -BCH_ERR_ENOSPC_sb_quota;
-+ goto unlock;
-+ }
-+
-+ if (uflags & FS_QUOTA_UDQ_ENFD)
-+ SET_BCH_SB_USRQUOTA(c->disk_sb.sb, true);
-+
-+ if (uflags & FS_QUOTA_GDQ_ENFD)
-+ SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, true);
-+
-+ if (uflags & FS_QUOTA_PDQ_ENFD)
-+ SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true);
-+
-+ bch2_write_super(c);
-+unlock:
-+ mutex_unlock(&c->sb_lock);
-+
-+ return bch2_err_class(ret);
-+}
-+
-+static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+
-+ if (sb->s_flags & SB_RDONLY)
-+ return -EROFS;
-+
-+ mutex_lock(&c->sb_lock);
-+ if (uflags & FS_QUOTA_UDQ_ENFD)
-+ SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false);
-+
-+ if (uflags & FS_QUOTA_GDQ_ENFD)
-+ SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, false);
-+
-+ if (uflags & FS_QUOTA_PDQ_ENFD)
-+ SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false);
-+
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ return 0;
-+}
-+
-+static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ int ret;
-+
-+ if (sb->s_flags & SB_RDONLY)
-+ return -EROFS;
-+
-+ if (uflags & FS_USER_QUOTA) {
-+ if (c->opts.usrquota)
-+ return -EINVAL;
-+
-+ ret = bch2_btree_delete_range(c, BTREE_ID_quotas,
-+ POS(QTYP_USR, 0),
-+ POS(QTYP_USR, U64_MAX),
-+ 0, NULL);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (uflags & FS_GROUP_QUOTA) {
-+ if (c->opts.grpquota)
-+ return -EINVAL;
-+
-+ ret = bch2_btree_delete_range(c, BTREE_ID_quotas,
-+ POS(QTYP_GRP, 0),
-+ POS(QTYP_GRP, U64_MAX),
-+ 0, NULL);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (uflags & FS_PROJ_QUOTA) {
-+ if (c->opts.prjquota)
-+ return -EINVAL;
-+
-+ ret = bch2_btree_delete_range(c, BTREE_ID_quotas,
-+ POS(QTYP_PRJ, 0),
-+ POS(QTYP_PRJ, U64_MAX),
-+ 0, NULL);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+/*
-+ * Return quota status information, such as enforcements, quota file inode
-+ * numbers etc.
-+ */
-+static int bch2_quota_get_state(struct super_block *sb, struct qc_state *state)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ unsigned qtypes = enabled_qtypes(c);
-+ unsigned i;
-+
-+ memset(state, 0, sizeof(*state));
-+
-+ for (i = 0; i < QTYP_NR; i++) {
-+ state->s_state[i].flags |= QCI_SYSFILE;
-+
-+ if (!(qtypes & (1 << i)))
-+ continue;
-+
-+ state->s_state[i].flags |= QCI_ACCT_ENABLED;
-+
-+ state->s_state[i].spc_timelimit = c->quotas[i].limits[Q_SPC].timelimit;
-+ state->s_state[i].spc_warnlimit = c->quotas[i].limits[Q_SPC].warnlimit;
-+
-+ state->s_state[i].ino_timelimit = c->quotas[i].limits[Q_INO].timelimit;
-+ state->s_state[i].ino_warnlimit = c->quotas[i].limits[Q_INO].warnlimit;
-+ }
-+
-+ return 0;
-+}
-+
-+/*
-+ * Adjust quota timers & warnings
-+ */
-+static int bch2_quota_set_info(struct super_block *sb, int type,
-+ struct qc_info *info)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ struct bch_sb_field_quota *sb_quota;
-+ int ret = 0;
-+
-+ if (0) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ qc_info_to_text(&buf, info);
-+ pr_info("setting:\n%s", buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+
-+ if (sb->s_flags & SB_RDONLY)
-+ return -EROFS;
-+
-+ if (type >= QTYP_NR)
-+ return -EINVAL;
-+
-+ if (!((1 << type) & enabled_qtypes(c)))
-+ return -ESRCH;
-+
-+ if (info->i_fieldmask &
-+ ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
-+ return -EINVAL;
-+
-+ mutex_lock(&c->sb_lock);
-+ sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
-+ if (!sb_quota) {
-+ ret = -BCH_ERR_ENOSPC_sb_quota;
-+ goto unlock;
-+ }
-+
-+ if (info->i_fieldmask & QC_SPC_TIMER)
-+ sb_quota->q[type].c[Q_SPC].timelimit =
-+ cpu_to_le32(info->i_spc_timelimit);
-+
-+ if (info->i_fieldmask & QC_SPC_WARNS)
-+ sb_quota->q[type].c[Q_SPC].warnlimit =
-+ cpu_to_le32(info->i_spc_warnlimit);
-+
-+ if (info->i_fieldmask & QC_INO_TIMER)
-+ sb_quota->q[type].c[Q_INO].timelimit =
-+ cpu_to_le32(info->i_ino_timelimit);
-+
-+ if (info->i_fieldmask & QC_INO_WARNS)
-+ sb_quota->q[type].c[Q_INO].warnlimit =
-+ cpu_to_le32(info->i_ino_warnlimit);
-+
-+ bch2_sb_quota_read(c);
-+
-+ bch2_write_super(c);
-+unlock:
-+ mutex_unlock(&c->sb_lock);
-+
-+ return bch2_err_class(ret);
-+}
-+
-+/* Get/set individual quotas: */
-+
-+static void __bch2_quota_get(struct qc_dqblk *dst, struct bch_memquota *src)
-+{
-+ dst->d_space = src->c[Q_SPC].v << 9;
-+ dst->d_spc_hardlimit = src->c[Q_SPC].hardlimit << 9;
-+ dst->d_spc_softlimit = src->c[Q_SPC].softlimit << 9;
-+ dst->d_spc_timer = src->c[Q_SPC].timer;
-+ dst->d_spc_warns = src->c[Q_SPC].warns;
-+
-+ dst->d_ino_count = src->c[Q_INO].v;
-+ dst->d_ino_hardlimit = src->c[Q_INO].hardlimit;
-+ dst->d_ino_softlimit = src->c[Q_INO].softlimit;
-+ dst->d_ino_timer = src->c[Q_INO].timer;
-+ dst->d_ino_warns = src->c[Q_INO].warns;
-+}
-+
-+static int bch2_get_quota(struct super_block *sb, struct kqid kqid,
-+ struct qc_dqblk *qdq)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ struct bch_memquota_type *q = &c->quotas[kqid.type];
-+ qid_t qid = from_kqid(&init_user_ns, kqid);
-+ struct bch_memquota *mq;
-+
-+ memset(qdq, 0, sizeof(*qdq));
-+
-+ mutex_lock(&q->lock);
-+ mq = genradix_ptr(&q->table, qid);
-+ if (mq)
-+ __bch2_quota_get(qdq, mq);
-+ mutex_unlock(&q->lock);
-+
-+ return 0;
-+}
-+
-+static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
-+ struct qc_dqblk *qdq)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ struct bch_memquota_type *q = &c->quotas[kqid->type];
-+ qid_t qid = from_kqid(&init_user_ns, *kqid);
-+ struct genradix_iter iter;
-+ struct bch_memquota *mq;
-+ int ret = 0;
-+
-+ mutex_lock(&q->lock);
-+
-+ genradix_for_each_from(&q->table, iter, mq, qid)
-+ if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
-+ __bch2_quota_get(qdq, mq);
-+ *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
-+ goto found;
-+ }
-+
-+ ret = -ENOENT;
-+found:
-+ mutex_unlock(&q->lock);
-+ return bch2_err_class(ret);
-+}
-+
-+static int bch2_set_quota_trans(struct btree_trans *trans,
-+ struct bkey_i_quota *new_quota,
-+ struct qc_dqblk *qdq)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p,
-+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-+ ret = bkey_err(k);
-+ if (unlikely(ret))
-+ return ret;
-+
-+ if (k.k->type == KEY_TYPE_quota)
-+ new_quota->v = *bkey_s_c_to_quota(k).v;
-+
-+ if (qdq->d_fieldmask & QC_SPC_SOFT)
-+ new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
-+ if (qdq->d_fieldmask & QC_SPC_HARD)
-+ new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
-+
-+ if (qdq->d_fieldmask & QC_INO_SOFT)
-+ new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
-+ if (qdq->d_fieldmask & QC_INO_HARD)
-+ new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
-+
-+ ret = bch2_trans_update(trans, &iter, &new_quota->k_i, 0);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int bch2_set_quota(struct super_block *sb, struct kqid qid,
-+ struct qc_dqblk *qdq)
-+{
-+ struct bch_fs *c = sb->s_fs_info;
-+ struct bkey_i_quota new_quota;
-+ int ret;
-+
-+ if (0) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ qc_dqblk_to_text(&buf, qdq);
-+ pr_info("setting:\n%s", buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+
-+ if (sb->s_flags & SB_RDONLY)
-+ return -EROFS;
-+
-+ bkey_quota_init(&new_quota.k_i);
-+ new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
-+
-+ ret = bch2_trans_do(c, NULL, NULL, 0,
-+ bch2_set_quota_trans(trans, &new_quota, qdq)) ?:
-+ __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq);
-+
-+ return bch2_err_class(ret);
-+}
-+
-+const struct quotactl_ops bch2_quotactl_operations = {
-+ .quota_enable = bch2_quota_enable,
-+ .quota_disable = bch2_quota_disable,
-+ .rm_xquota = bch2_quota_remove,
-+
-+ .get_state = bch2_quota_get_state,
-+ .set_info = bch2_quota_set_info,
-+
-+ .get_dqblk = bch2_get_quota,
-+ .get_nextdqblk = bch2_get_next_quota,
-+ .set_dqblk = bch2_set_quota,
-+};
-+
-+#endif /* CONFIG_BCACHEFS_QUOTA */
-diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h
-new file mode 100644
-index 000000000000..884f601f41c4
---- /dev/null
-+++ b/fs/bcachefs/quota.h
-@@ -0,0 +1,74 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_QUOTA_H
-+#define _BCACHEFS_QUOTA_H
-+
-+#include "inode.h"
-+#include "quota_types.h"
-+
-+enum bkey_invalid_flags;
-+extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
-+
-+int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_quota ((struct bkey_ops) { \
-+ .key_invalid = bch2_quota_invalid, \
-+ .val_to_text = bch2_quota_to_text, \
-+ .min_val_size = 32, \
-+})
-+
-+static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
-+{
-+ return (struct bch_qid) {
-+ .q[QTYP_USR] = u->bi_uid,
-+ .q[QTYP_GRP] = u->bi_gid,
-+ .q[QTYP_PRJ] = u->bi_project ? u->bi_project - 1 : 0,
-+ };
-+}
-+
-+static inline unsigned enabled_qtypes(struct bch_fs *c)
-+{
-+ return ((c->opts.usrquota << QTYP_USR)|
-+ (c->opts.grpquota << QTYP_GRP)|
-+ (c->opts.prjquota << QTYP_PRJ));
-+}
-+
-+#ifdef CONFIG_BCACHEFS_QUOTA
-+
-+int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,
-+ s64, enum quota_acct_mode);
-+
-+int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid,
-+ struct bch_qid, u64, enum quota_acct_mode);
-+
-+void bch2_fs_quota_exit(struct bch_fs *);
-+void bch2_fs_quota_init(struct bch_fs *);
-+int bch2_fs_quota_read(struct bch_fs *);
-+
-+extern const struct quotactl_ops bch2_quotactl_operations;
-+
-+#else
-+
-+static inline int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
-+ enum quota_counters counter, s64 v,
-+ enum quota_acct_mode mode)
-+{
-+ return 0;
-+}
-+
-+static inline int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
-+ struct bch_qid dst,
-+ struct bch_qid src, u64 space,
-+ enum quota_acct_mode mode)
-+{
-+ return 0;
-+}
-+
-+static inline void bch2_fs_quota_exit(struct bch_fs *c) {}
-+static inline void bch2_fs_quota_init(struct bch_fs *c) {}
-+static inline int bch2_fs_quota_read(struct bch_fs *c) { return 0; }
-+
-+#endif
-+
-+#endif /* _BCACHEFS_QUOTA_H */
-diff --git a/fs/bcachefs/quota_types.h b/fs/bcachefs/quota_types.h
-new file mode 100644
-index 000000000000..6a136083d389
---- /dev/null
-+++ b/fs/bcachefs/quota_types.h
-@@ -0,0 +1,43 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_QUOTA_TYPES_H
-+#define _BCACHEFS_QUOTA_TYPES_H
-+
-+#include <linux/generic-radix-tree.h>
-+
-+struct bch_qid {
-+ u32 q[QTYP_NR];
-+};
-+
-+enum quota_acct_mode {
-+ KEY_TYPE_QUOTA_PREALLOC,
-+ KEY_TYPE_QUOTA_WARN,
-+ KEY_TYPE_QUOTA_NOCHECK,
-+};
-+
-+struct memquota_counter {
-+ u64 v;
-+ u64 hardlimit;
-+ u64 softlimit;
-+ s64 timer;
-+ int warns;
-+ int warning_issued;
-+};
-+
-+struct bch_memquota {
-+ struct memquota_counter c[Q_COUNTERS];
-+};
-+
-+typedef GENRADIX(struct bch_memquota) bch_memquota_table;
-+
-+struct quota_limit {
-+ u32 timelimit;
-+ u32 warnlimit;
-+};
-+
-+struct bch_memquota_type {
-+ struct quota_limit limits[Q_COUNTERS];
-+ bch_memquota_table table;
-+ struct mutex lock;
-+};
-+
-+#endif /* _BCACHEFS_QUOTA_TYPES_H */
-diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
-new file mode 100644
-index 000000000000..3319190b8d9c
---- /dev/null
-+++ b/fs/bcachefs/rebalance.c
-@@ -0,0 +1,464 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "btree_write_buffer.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "compress.h"
-+#include "disk_groups.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "inode.h"
-+#include "move.h"
-+#include "rebalance.h"
-+#include "subvolume.h"
-+#include "super-io.h"
-+#include "trace.h"
-+
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/sched/cputime.h>
-+
-+#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
-+
-+static const char * const bch2_rebalance_state_strs[] = {
-+#define x(t) #t,
-+ BCH_REBALANCE_STATES()
-+ NULL
-+#undef x
-+};
-+
-+static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_i_cookie *cookie;
-+ u64 v;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
-+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
-+ BTREE_ITER_INTENT);
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ v = k.k->type == KEY_TYPE_cookie
-+ ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
-+ : 0;
-+
-+ cookie = bch2_trans_kmalloc(trans, sizeof(*cookie));
-+ ret = PTR_ERR_OR_ZERO(cookie);
-+ if (ret)
-+ goto err;
-+
-+ bkey_cookie_init(&cookie->k_i);
-+ cookie->k.p = iter.pos;
-+ cookie->v.cookie = cpu_to_le64(v + 1);
-+
-+ ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
-+{
-+ int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
-+ __bch2_set_rebalance_needs_scan(trans, inum));
-+ rebalance_wakeup(c);
-+ return ret;
-+}
-+
-+int bch2_set_fs_needs_rebalance(struct bch_fs *c)
-+{
-+ return bch2_set_rebalance_needs_scan(c, 0);
-+}
-+
-+static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ u64 v;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
-+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
-+ BTREE_ITER_INTENT);
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ v = k.k->type == KEY_TYPE_cookie
-+ ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
-+ : 0;
-+
-+ if (v == cookie)
-+ ret = bch2_btree_delete_at(trans, &iter, 0);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans,
-+ struct btree_iter *work_iter)
-+{
-+ return !kthread_should_stop()
-+ ? bch2_btree_iter_peek(work_iter)
-+ : bkey_s_c_null;
-+}
-+
-+static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0);
-+ int ret = PTR_ERR_OR_ZERO(n);
-+ if (ret)
-+ return ret;
-+
-+ extent_entry_drop(bkey_i_to_s(n),
-+ (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n)));
-+ return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
-+}
-+
-+static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
-+ struct bpos work_pos,
-+ struct btree_iter *extent_iter,
-+ struct data_update_opts *data_opts)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_s_c k;
-+
-+ bch2_trans_iter_exit(trans, extent_iter);
-+ bch2_trans_iter_init(trans, extent_iter,
-+ work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink,
-+ work_pos,
-+ BTREE_ITER_ALL_SNAPSHOTS);
-+ k = bch2_btree_iter_peek_slot(extent_iter);
-+ if (bkey_err(k))
-+ return k;
-+
-+ const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL;
-+ if (!r) {
-+ /* raced due to btree write buffer, nothing to do */
-+ return bkey_s_c_null;
-+ }
-+
-+ memset(data_opts, 0, sizeof(*data_opts));
-+
-+ data_opts->rewrite_ptrs =
-+ bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression);
-+ data_opts->target = r->target;
-+
-+ if (!data_opts->rewrite_ptrs) {
-+ /*
-+ * device we would want to write to offline? devices in target
-+ * changed?
-+ *
-+ * We'll now need a full scan before this extent is picked up
-+ * again:
-+ */
-+ int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k);
-+ if (ret)
-+ return bkey_s_c_err(ret);
-+ return bkey_s_c_null;
-+ }
-+
-+ return k;
-+}
-+
-+noinline_for_stack
-+static int do_rebalance_extent(struct moving_context *ctxt,
-+ struct bpos work_pos,
-+ struct btree_iter *extent_iter)
-+{
-+ struct btree_trans *trans = ctxt->trans;
-+ struct bch_fs *c = trans->c;
-+ struct bch_fs_rebalance *r = &trans->c->rebalance;
-+ struct data_update_opts data_opts;
-+ struct bch_io_opts io_opts;
-+ struct bkey_s_c k;
-+ struct bkey_buf sk;
-+ int ret;
-+
-+ ctxt->stats = &r->work_stats;
-+ r->state = BCH_REBALANCE_working;
-+
-+ bch2_bkey_buf_init(&sk);
-+
-+ ret = bkey_err(k = next_rebalance_extent(trans, work_pos,
-+ extent_iter, &data_opts));
-+ if (ret || !k.k)
-+ goto out;
-+
-+ ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
-+ if (ret)
-+ goto out;
-+
-+ atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
-+
-+ /*
-+ * The iterator gets unlocked by __bch2_read_extent - need to
-+ * save a copy of @k elsewhere:
-+ */
-+ bch2_bkey_buf_reassemble(&sk, c, k);
-+ k = bkey_i_to_s_c(sk.k);
-+
-+ ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts);
-+ if (ret) {
-+ if (bch2_err_matches(ret, ENOMEM)) {
-+ /* memory allocation failure, wait for some IO to finish */
-+ bch2_move_ctxt_wait_for_io(ctxt);
-+ ret = -BCH_ERR_transaction_restart_nested;
-+ }
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto out;
-+
-+ /* skip it and continue, XXX signal failure */
-+ ret = 0;
-+ }
-+out:
-+ bch2_bkey_buf_exit(&sk, c);
-+ return ret;
-+}
-+
-+static bool rebalance_pred(struct bch_fs *c, void *arg,
-+ struct bkey_s_c k,
-+ struct bch_io_opts *io_opts,
-+ struct data_update_opts *data_opts)
-+{
-+ unsigned target, compression;
-+
-+ if (k.k->p.inode) {
-+ target = io_opts->background_target;
-+ compression = io_opts->background_compression ?: io_opts->compression;
-+ } else {
-+ const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
-+
-+ target = r ? r->target : io_opts->background_target;
-+ compression = r ? r->compression :
-+ (io_opts->background_compression ?: io_opts->compression);
-+ }
-+
-+ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression);
-+ data_opts->target = target;
-+ return data_opts->rewrite_ptrs != 0;
-+}
-+
-+static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
-+{
-+ struct btree_trans *trans = ctxt->trans;
-+ struct bch_fs_rebalance *r = &trans->c->rebalance;
-+ int ret;
-+
-+ bch2_move_stats_init(&r->scan_stats, "rebalance_scan");
-+ ctxt->stats = &r->scan_stats;
-+
-+ if (!inum) {
-+ r->scan_start = BBPOS_MIN;
-+ r->scan_end = BBPOS_MAX;
-+ } else {
-+ r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0));
-+ r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX));
-+ }
-+
-+ r->state = BCH_REBALANCE_scanning;
-+
-+ ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?:
-+ commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ bch2_clear_rebalance_needs_scan(trans, inum, cookie));
-+
-+ bch2_move_stats_exit(&r->scan_stats, trans->c);
-+ return ret;
-+}
-+
-+static void rebalance_wait(struct bch_fs *c)
-+{
-+ struct bch_fs_rebalance *r = &c->rebalance;
-+ struct io_clock *clock = &c->io_clock[WRITE];
-+ u64 now = atomic64_read(&clock->now);
-+ u64 min_member_capacity = bch2_min_rw_member_capacity(c);
-+
-+ if (min_member_capacity == U64_MAX)
-+ min_member_capacity = 128 * 2048;
-+
-+ r->wait_iotime_end = now + (min_member_capacity >> 6);
-+
-+ if (r->state != BCH_REBALANCE_waiting) {
-+ r->wait_iotime_start = now;
-+ r->wait_wallclock_start = ktime_get_real_ns();
-+ r->state = BCH_REBALANCE_waiting;
-+ }
-+
-+ bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT);
-+}
-+
-+static int do_rebalance(struct moving_context *ctxt)
-+{
-+ struct btree_trans *trans = ctxt->trans;
-+ struct bch_fs *c = trans->c;
-+ struct bch_fs_rebalance *r = &c->rebalance;
-+ struct btree_iter rebalance_work_iter, extent_iter = { NULL };
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ bch2_move_stats_init(&r->work_stats, "rebalance_work");
-+ bch2_move_stats_init(&r->scan_stats, "rebalance_scan");
-+
-+ bch2_trans_iter_init(trans, &rebalance_work_iter,
-+ BTREE_ID_rebalance_work, POS_MIN,
-+ BTREE_ITER_ALL_SNAPSHOTS);
-+
-+ while (!bch2_move_ratelimit(ctxt) &&
-+ !kthread_wait_freezable(r->enabled)) {
-+ bch2_trans_begin(trans);
-+
-+ ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter));
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret || !k.k)
-+ break;
-+
-+ ret = k.k->type == KEY_TYPE_cookie
-+ ? do_rebalance_scan(ctxt, k.k->p.inode,
-+ le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie))
-+ : do_rebalance_extent(ctxt, k.k->p, &extent_iter);
-+
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ continue;
-+ if (ret)
-+ break;
-+
-+ bch2_btree_iter_advance(&rebalance_work_iter);
-+ }
-+
-+ bch2_trans_iter_exit(trans, &extent_iter);
-+ bch2_trans_iter_exit(trans, &rebalance_work_iter);
-+ bch2_move_stats_exit(&r->scan_stats, c);
-+
-+ if (!ret &&
-+ !kthread_should_stop() &&
-+ !atomic64_read(&r->work_stats.sectors_seen) &&
-+ !atomic64_read(&r->scan_stats.sectors_seen)) {
-+ bch2_trans_unlock_long(trans);
-+ rebalance_wait(c);
-+ }
-+
-+ if (!bch2_err_matches(ret, EROFS))
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int bch2_rebalance_thread(void *arg)
-+{
-+ struct bch_fs *c = arg;
-+ struct bch_fs_rebalance *r = &c->rebalance;
-+ struct moving_context ctxt;
-+ int ret;
-+
-+ set_freezable();
-+
-+ bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
-+ writepoint_ptr(&c->rebalance_write_point),
-+ true);
-+
-+ while (!kthread_should_stop() &&
-+ !(ret = do_rebalance(&ctxt)))
-+ ;
-+
-+ bch2_moving_ctxt_exit(&ctxt);
-+
-+ return 0;
-+}
-+
-+void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ struct bch_fs_rebalance *r = &c->rebalance;
-+
-+ prt_str(out, bch2_rebalance_state_strs[r->state]);
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+
-+ switch (r->state) {
-+ case BCH_REBALANCE_waiting: {
-+ u64 now = atomic64_read(&c->io_clock[WRITE].now);
-+
-+ prt_str(out, "io wait duration: ");
-+ bch2_prt_human_readable_s64(out, r->wait_iotime_end - r->wait_iotime_start);
-+ prt_newline(out);
-+
-+ prt_str(out, "io wait remaining: ");
-+ bch2_prt_human_readable_s64(out, r->wait_iotime_end - now);
-+ prt_newline(out);
-+
-+ prt_str(out, "duration waited: ");
-+ bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start);
-+ prt_newline(out);
-+ break;
-+ }
-+ case BCH_REBALANCE_working:
-+ bch2_move_stats_to_text(out, &r->work_stats);
-+ break;
-+ case BCH_REBALANCE_scanning:
-+ bch2_move_stats_to_text(out, &r->scan_stats);
-+ break;
-+ }
-+ prt_newline(out);
-+ printbuf_indent_sub(out, 2);
-+}
-+
-+void bch2_rebalance_stop(struct bch_fs *c)
-+{
-+ struct task_struct *p;
-+
-+ c->rebalance.pd.rate.rate = UINT_MAX;
-+ bch2_ratelimit_reset(&c->rebalance.pd.rate);
-+
-+ p = rcu_dereference_protected(c->rebalance.thread, 1);
-+ c->rebalance.thread = NULL;
-+
-+ if (p) {
-+ /* for sychronizing with rebalance_wakeup() */
-+ synchronize_rcu();
-+
-+ kthread_stop(p);
-+ put_task_struct(p);
-+ }
-+}
-+
-+int bch2_rebalance_start(struct bch_fs *c)
-+{
-+ struct task_struct *p;
-+ int ret;
-+
-+ if (c->rebalance.thread)
-+ return 0;
-+
-+ if (c->opts.nochanges)
-+ return 0;
-+
-+ p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
-+ ret = PTR_ERR_OR_ZERO(p);
-+ if (ret) {
-+ bch_err_msg(c, ret, "creating rebalance thread");
-+ return ret;
-+ }
-+
-+ get_task_struct(p);
-+ rcu_assign_pointer(c->rebalance.thread, p);
-+ wake_up_process(p);
-+ return 0;
-+}
-+
-+void bch2_fs_rebalance_init(struct bch_fs *c)
-+{
-+ bch2_pd_controller_init(&c->rebalance.pd);
-+}
-diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
-new file mode 100644
-index 000000000000..28a52638f16c
---- /dev/null
-+++ b/fs/bcachefs/rebalance.h
-@@ -0,0 +1,27 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REBALANCE_H
-+#define _BCACHEFS_REBALANCE_H
-+
-+#include "rebalance_types.h"
-+
-+int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
-+int bch2_set_fs_needs_rebalance(struct bch_fs *);
-+
-+static inline void rebalance_wakeup(struct bch_fs *c)
-+{
-+ struct task_struct *p;
-+
-+ rcu_read_lock();
-+ p = rcu_dereference(c->rebalance.thread);
-+ if (p)
-+ wake_up_process(p);
-+ rcu_read_unlock();
-+}
-+
-+void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *);
-+
-+void bch2_rebalance_stop(struct bch_fs *);
-+int bch2_rebalance_start(struct bch_fs *);
-+void bch2_fs_rebalance_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_REBALANCE_H */
-diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h
-new file mode 100644
-index 000000000000..0fffb536c1d0
---- /dev/null
-+++ b/fs/bcachefs/rebalance_types.h
-@@ -0,0 +1,37 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REBALANCE_TYPES_H
-+#define _BCACHEFS_REBALANCE_TYPES_H
-+
-+#include "bbpos_types.h"
-+#include "move_types.h"
-+
-+#define BCH_REBALANCE_STATES() \
-+ x(waiting) \
-+ x(working) \
-+ x(scanning)
-+
-+enum bch_rebalance_states {
-+#define x(t) BCH_REBALANCE_##t,
-+ BCH_REBALANCE_STATES()
-+#undef x
-+};
-+
-+struct bch_fs_rebalance {
-+ struct task_struct __rcu *thread;
-+ struct bch_pd_controller pd;
-+
-+ enum bch_rebalance_states state;
-+ u64 wait_iotime_start;
-+ u64 wait_iotime_end;
-+ u64 wait_wallclock_start;
-+
-+ struct bch_move_stats work_stats;
-+
-+ struct bbpos scan_start;
-+ struct bbpos scan_end;
-+ struct bch_move_stats scan_stats;
-+
-+ unsigned enabled:1;
-+};
-+
-+#endif /* _BCACHEFS_REBALANCE_TYPES_H */
-diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
-new file mode 100644
-index 000000000000..9c30500ce920
---- /dev/null
-+++ b/fs/bcachefs/recovery.c
-@@ -0,0 +1,1057 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "backpointers.h"
-+#include "bkey_buf.h"
-+#include "alloc_background.h"
-+#include "btree_gc.h"
-+#include "btree_journal_iter.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "buckets.h"
-+#include "dirent.h"
-+#include "ec.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "fs-common.h"
-+#include "fsck.h"
-+#include "journal_io.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "lru.h"
-+#include "logged_ops.h"
-+#include "move.h"
-+#include "quota.h"
-+#include "rebalance.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "sb-clean.h"
-+#include "snapshot.h"
-+#include "subvolume.h"
-+#include "super-io.h"
-+
-+#include <linux/sort.h>
-+#include <linux/stat.h>
-+
-+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-+
-+static bool btree_id_is_alloc(enum btree_id id)
-+{
-+ switch (id) {
-+ case BTREE_ID_alloc:
-+ case BTREE_ID_backpointers:
-+ case BTREE_ID_need_discard:
-+ case BTREE_ID_freespace:
-+ case BTREE_ID_bucket_gens:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+/* for -o reconstruct_alloc: */
-+static void drop_alloc_keys(struct journal_keys *keys)
-+{
-+ size_t src, dst;
-+
-+ for (src = 0, dst = 0; src < keys->nr; src++)
-+ if (!btree_id_is_alloc(keys->d[src].btree_id))
-+ keys->d[dst++] = keys->d[src];
-+
-+ keys->nr = dst;
-+}
-+
-+/*
-+ * Btree node pointers have a field to stack a pointer to the in memory btree
-+ * node; we need to zero out this field when reading in btree nodes, or when
-+ * reading in keys from the journal:
-+ */
-+static void zero_out_btree_mem_ptr(struct journal_keys *keys)
-+{
-+ struct journal_key *i;
-+
-+ for (i = keys->d; i < keys->d + keys->nr; i++)
-+ if (i->k->k.type == KEY_TYPE_btree_ptr_v2)
-+ bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0;
-+}
-+
-+/* journal replay: */
-+
-+static void replay_now_at(struct journal *j, u64 seq)
-+{
-+ BUG_ON(seq < j->replay_journal_seq);
-+
-+ seq = min(seq, j->replay_journal_seq_end);
-+
-+ while (j->replay_journal_seq < seq)
-+ bch2_journal_pin_put(j, j->replay_journal_seq++);
-+}
-+
-+static int bch2_journal_replay_key(struct btree_trans *trans,
-+ struct journal_key *k)
-+{
-+ struct btree_iter iter;
-+ unsigned iter_flags =
-+ BTREE_ITER_INTENT|
-+ BTREE_ITER_NOT_EXTENTS;
-+ unsigned update_flags = BTREE_TRIGGER_NORUN;
-+ int ret;
-+
-+ /*
-+ * BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to
-+ * keep the key cache coherent with the underlying btree. Nothing
-+ * besides the allocator is doing updates yet so we don't need key cache
-+ * coherency for non-alloc btrees, and key cache fills for snapshots
-+ * btrees use BTREE_ITER_FILTER_SNAPSHOTS, which isn't available until
-+ * the snapshots recovery pass runs.
-+ */
-+ if (!k->level && k->btree_id == BTREE_ID_alloc)
-+ iter_flags |= BTREE_ITER_CACHED;
-+ else
-+ update_flags |= BTREE_UPDATE_KEY_CACHE_RECLAIM;
-+
-+ bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
-+ BTREE_MAX_DEPTH, k->level,
-+ iter_flags);
-+ ret = bch2_btree_iter_traverse(&iter);
-+ if (ret)
-+ goto out;
-+
-+ /* Must be checked with btree locked: */
-+ if (k->overwritten)
-+ goto out;
-+
-+ ret = bch2_trans_update(trans, &iter, k->k, update_flags);
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int journal_sort_seq_cmp(const void *_l, const void *_r)
-+{
-+ const struct journal_key *l = *((const struct journal_key **)_l);
-+ const struct journal_key *r = *((const struct journal_key **)_r);
-+
-+ return cmp_int(l->journal_seq, r->journal_seq);
-+}
-+
-+static int bch2_journal_replay(struct bch_fs *c)
-+{
-+ struct journal_keys *keys = &c->journal_keys;
-+ struct journal_key **keys_sorted, *k;
-+ struct journal *j = &c->journal;
-+ u64 start_seq = c->journal_replay_seq_start;
-+ u64 end_seq = c->journal_replay_seq_start;
-+ size_t i;
-+ int ret;
-+
-+ move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
-+ keys->gap = keys->nr;
-+
-+ keys_sorted = kvmalloc_array(keys->nr, sizeof(*keys_sorted), GFP_KERNEL);
-+ if (!keys_sorted)
-+ return -BCH_ERR_ENOMEM_journal_replay;
-+
-+ for (i = 0; i < keys->nr; i++)
-+ keys_sorted[i] = &keys->d[i];
-+
-+ sort(keys_sorted, keys->nr,
-+ sizeof(keys_sorted[0]),
-+ journal_sort_seq_cmp, NULL);
-+
-+ if (keys->nr) {
-+ ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
-+ keys->nr, start_seq, end_seq);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ for (i = 0; i < keys->nr; i++) {
-+ k = keys_sorted[i];
-+
-+ cond_resched();
-+
-+ replay_now_at(j, k->journal_seq);
-+
-+ ret = bch2_trans_do(c, NULL, NULL,
-+ BTREE_INSERT_LAZY_RW|
-+ BTREE_INSERT_NOFAIL|
-+ (!k->allocated
-+ ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim
-+ : 0),
-+ bch2_journal_replay_key(trans, k));
-+ if (ret) {
-+ bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s",
-+ bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret));
-+ goto err;
-+ }
-+ }
-+
-+ replay_now_at(j, j->replay_journal_seq_end);
-+ j->replay_journal_seq = 0;
-+
-+ bch2_journal_set_replay_done(j);
-+ bch2_journal_flush_all_pins(j);
-+ ret = bch2_journal_error(j);
-+
-+ if (keys->nr && !ret)
-+ bch2_journal_log_msg(c, "journal replay finished");
-+err:
-+ kvfree(keys_sorted);
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+/* journal replay early: */
-+
-+static int journal_replay_entry_early(struct bch_fs *c,
-+ struct jset_entry *entry)
-+{
-+ int ret = 0;
-+
-+ switch (entry->type) {
-+ case BCH_JSET_ENTRY_btree_root: {
-+ struct btree_root *r;
-+
-+ while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) {
-+ ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL });
-+ if (ret)
-+ return ret;
-+ }
-+
-+ r = bch2_btree_id_root(c, entry->btree_id);
-+
-+ if (entry->u64s) {
-+ r->level = entry->level;
-+ bkey_copy(&r->key, (struct bkey_i *) entry->start);
-+ r->error = 0;
-+ } else {
-+ r->error = -EIO;
-+ }
-+ r->alive = true;
-+ break;
-+ }
-+ case BCH_JSET_ENTRY_usage: {
-+ struct jset_entry_usage *u =
-+ container_of(entry, struct jset_entry_usage, entry);
-+
-+ switch (entry->btree_id) {
-+ case BCH_FS_USAGE_reserved:
-+ if (entry->level < BCH_REPLICAS_MAX)
-+ c->usage_base->persistent_reserved[entry->level] =
-+ le64_to_cpu(u->v);
-+ break;
-+ case BCH_FS_USAGE_inodes:
-+ c->usage_base->nr_inodes = le64_to_cpu(u->v);
-+ break;
-+ case BCH_FS_USAGE_key_version:
-+ atomic64_set(&c->key_version,
-+ le64_to_cpu(u->v));
-+ break;
-+ }
-+
-+ break;
-+ }
-+ case BCH_JSET_ENTRY_data_usage: {
-+ struct jset_entry_data_usage *u =
-+ container_of(entry, struct jset_entry_data_usage, entry);
-+
-+ ret = bch2_replicas_set_usage(c, &u->r,
-+ le64_to_cpu(u->v));
-+ break;
-+ }
-+ case BCH_JSET_ENTRY_dev_usage: {
-+ struct jset_entry_dev_usage *u =
-+ container_of(entry, struct jset_entry_dev_usage, entry);
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev));
-+ unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
-+
-+ ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec);
-+
-+ for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) {
-+ ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets);
-+ ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors);
-+ ca->usage_base->d[i].fragmented = le64_to_cpu(u->d[i].fragmented);
-+ }
-+
-+ break;
-+ }
-+ case BCH_JSET_ENTRY_blacklist: {
-+ struct jset_entry_blacklist *bl_entry =
-+ container_of(entry, struct jset_entry_blacklist, entry);
-+
-+ ret = bch2_journal_seq_blacklist_add(c,
-+ le64_to_cpu(bl_entry->seq),
-+ le64_to_cpu(bl_entry->seq) + 1);
-+ break;
-+ }
-+ case BCH_JSET_ENTRY_blacklist_v2: {
-+ struct jset_entry_blacklist_v2 *bl_entry =
-+ container_of(entry, struct jset_entry_blacklist_v2, entry);
-+
-+ ret = bch2_journal_seq_blacklist_add(c,
-+ le64_to_cpu(bl_entry->start),
-+ le64_to_cpu(bl_entry->end) + 1);
-+ break;
-+ }
-+ case BCH_JSET_ENTRY_clock: {
-+ struct jset_entry_clock *clock =
-+ container_of(entry, struct jset_entry_clock, entry);
-+
-+ atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time));
-+ }
-+ }
-+
-+ return ret;
-+}
-+
-+static int journal_replay_early(struct bch_fs *c,
-+ struct bch_sb_field_clean *clean)
-+{
-+ struct jset_entry *entry;
-+ int ret;
-+
-+ if (clean) {
-+ for (entry = clean->start;
-+ entry != vstruct_end(&clean->field);
-+ entry = vstruct_next(entry)) {
-+ ret = journal_replay_entry_early(c, entry);
-+ if (ret)
-+ return ret;
-+ }
-+ } else {
-+ struct genradix_iter iter;
-+ struct journal_replay *i, **_i;
-+
-+ genradix_for_each(&c->journal_entries, iter, _i) {
-+ i = *_i;
-+
-+ if (!i || i->ignore)
-+ continue;
-+
-+ vstruct_for_each(&i->j, entry) {
-+ ret = journal_replay_entry_early(c, entry);
-+ if (ret)
-+ return ret;
-+ }
-+ }
-+ }
-+
-+ bch2_fs_usage_initialize(c);
-+
-+ return 0;
-+}
-+
-+/* sb clean section: */
-+
-+static int read_btree_roots(struct bch_fs *c)
-+{
-+ unsigned i;
-+ int ret = 0;
-+
-+ for (i = 0; i < btree_id_nr_alive(c); i++) {
-+ struct btree_root *r = bch2_btree_id_root(c, i);
-+
-+ if (!r->alive)
-+ continue;
-+
-+ if (btree_id_is_alloc(i) &&
-+ c->opts.reconstruct_alloc) {
-+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
-+ continue;
-+ }
-+
-+ if (r->error) {
-+ __fsck_err(c,
-+ btree_id_is_alloc(i)
-+ ? FSCK_CAN_IGNORE : 0,
-+ btree_root_bkey_invalid,
-+ "invalid btree root %s",
-+ bch2_btree_id_str(i));
-+ if (i == BTREE_ID_alloc)
-+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
-+ }
-+
-+ ret = bch2_btree_root_read(c, i, &r->key, r->level);
-+ if (ret) {
-+ fsck_err(c,
-+ btree_root_read_error,
-+ "error reading btree root %s",
-+ bch2_btree_id_str(i));
-+ if (btree_id_is_alloc(i))
-+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
-+ ret = 0;
-+ }
-+ }
-+
-+ for (i = 0; i < BTREE_ID_NR; i++) {
-+ struct btree_root *r = bch2_btree_id_root(c, i);
-+
-+ if (!r->b) {
-+ r->alive = false;
-+ r->level = 0;
-+ bch2_btree_root_alloc(c, i);
-+ }
-+ }
-+fsck_err:
-+ return ret;
-+}
-+
-+static int bch2_initialize_subvolumes(struct bch_fs *c)
-+{
-+ struct bkey_i_snapshot_tree root_tree;
-+ struct bkey_i_snapshot root_snapshot;
-+ struct bkey_i_subvolume root_volume;
-+ int ret;
-+
-+ bkey_snapshot_tree_init(&root_tree.k_i);
-+ root_tree.k.p.offset = 1;
-+ root_tree.v.master_subvol = cpu_to_le32(1);
-+ root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
-+
-+ bkey_snapshot_init(&root_snapshot.k_i);
-+ root_snapshot.k.p.offset = U32_MAX;
-+ root_snapshot.v.flags = 0;
-+ root_snapshot.v.parent = 0;
-+ root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
-+ root_snapshot.v.tree = cpu_to_le32(1);
-+ SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
-+
-+ bkey_subvolume_init(&root_volume.k_i);
-+ root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
-+ root_volume.v.flags = 0;
-+ root_volume.v.snapshot = cpu_to_le32(U32_MAX);
-+ root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
-+
-+ ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
-+ bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
-+ bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_inode_unpacked inode;
-+ int ret;
-+
-+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
-+ SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
-+ ret = bkey_err(k);
-+ if (ret)
-+ return ret;
-+
-+ if (!bkey_is_inode(k.k)) {
-+ bch_err(trans->c, "root inode not found");
-+ ret = -BCH_ERR_ENOENT_inode;
-+ goto err;
-+ }
-+
-+ ret = bch2_inode_unpack(k, &inode);
-+ BUG_ON(ret);
-+
-+ inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
-+
-+ ret = bch2_inode_write(trans, &iter, &inode);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+/* set bi_subvol on root inode */
-+noinline_for_stack
-+static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
-+{
-+ int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
-+ __bch2_fs_upgrade_for_subvolumes(trans));
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+const char * const bch2_recovery_passes[] = {
-+#define x(_fn, _when) #_fn,
-+ BCH_RECOVERY_PASSES()
-+#undef x
-+ NULL
-+};
-+
-+static int bch2_check_allocations(struct bch_fs *c)
-+{
-+ return bch2_gc(c, true, c->opts.norecovery);
-+}
-+
-+static int bch2_set_may_go_rw(struct bch_fs *c)
-+{
-+ set_bit(BCH_FS_MAY_GO_RW, &c->flags);
-+ return 0;
-+}
-+
-+struct recovery_pass_fn {
-+ int (*fn)(struct bch_fs *);
-+ unsigned when;
-+};
-+
-+static struct recovery_pass_fn recovery_pass_fns[] = {
-+#define x(_fn, _when) { .fn = bch2_##_fn, .when = _when },
-+ BCH_RECOVERY_PASSES()
-+#undef x
-+};
-+
-+static void check_version_upgrade(struct bch_fs *c)
-+{
-+ unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
-+ unsigned latest_version = bcachefs_metadata_version_current;
-+ unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
-+ unsigned new_version = 0;
-+ u64 recovery_passes;
-+
-+ if (old_version < bcachefs_metadata_required_upgrade_below) {
-+ if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
-+ latest_compatible < bcachefs_metadata_required_upgrade_below)
-+ new_version = latest_version;
-+ else
-+ new_version = latest_compatible;
-+ } else {
-+ switch (c->opts.version_upgrade) {
-+ case BCH_VERSION_UPGRADE_compatible:
-+ new_version = latest_compatible;
-+ break;
-+ case BCH_VERSION_UPGRADE_incompatible:
-+ new_version = latest_version;
-+ break;
-+ case BCH_VERSION_UPGRADE_none:
-+ new_version = old_version;
-+ break;
-+ }
-+ }
-+
-+ if (new_version > old_version) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ if (old_version < bcachefs_metadata_required_upgrade_below)
-+ prt_str(&buf, "Version upgrade required:\n");
-+
-+ if (old_version != c->sb.version) {
-+ prt_str(&buf, "Version upgrade from ");
-+ bch2_version_to_text(&buf, c->sb.version_upgrade_complete);
-+ prt_str(&buf, " to ");
-+ bch2_version_to_text(&buf, c->sb.version);
-+ prt_str(&buf, " incomplete\n");
-+ }
-+
-+ prt_printf(&buf, "Doing %s version upgrade from ",
-+ BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)
-+ ? "incompatible" : "compatible");
-+ bch2_version_to_text(&buf, old_version);
-+ prt_str(&buf, " to ");
-+ bch2_version_to_text(&buf, new_version);
-+ prt_newline(&buf);
-+
-+ recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
-+ if (recovery_passes) {
-+ if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK)
-+ prt_str(&buf, "fsck required");
-+ else {
-+ prt_str(&buf, "running recovery passes: ");
-+ prt_bitflags(&buf, bch2_recovery_passes, recovery_passes);
-+ }
-+
-+ c->recovery_passes_explicit |= recovery_passes;
-+ c->opts.fix_errors = FSCK_FIX_yes;
-+ }
-+
-+ bch_info(c, "%s", buf.buf);
-+
-+ mutex_lock(&c->sb_lock);
-+ bch2_sb_upgrade(c, new_version);
-+ mutex_unlock(&c->sb_lock);
-+
-+ printbuf_exit(&buf);
-+ }
-+}
-+
-+u64 bch2_fsck_recovery_passes(void)
-+{
-+ u64 ret = 0;
-+
-+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
-+ if (recovery_pass_fns[i].when & PASS_FSCK)
-+ ret |= BIT_ULL(i);
-+ return ret;
-+}
-+
-+static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-+{
-+ struct recovery_pass_fn *p = recovery_pass_fns + c->curr_recovery_pass;
-+
-+ if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
-+ return false;
-+ if (c->recovery_passes_explicit & BIT_ULL(pass))
-+ return true;
-+ if ((p->when & PASS_FSCK) && c->opts.fsck)
-+ return true;
-+ if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
-+ return true;
-+ if (p->when & PASS_ALWAYS)
-+ return true;
-+ return false;
-+}
-+
-+static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-+{
-+ int ret;
-+
-+ c->curr_recovery_pass = pass;
-+
-+ if (should_run_recovery_pass(c, pass)) {
-+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
-+
-+ if (!(p->when & PASS_SILENT))
-+ printk(KERN_INFO bch2_log_msg(c, "%s..."),
-+ bch2_recovery_passes[pass]);
-+ ret = p->fn(c);
-+ if (ret)
-+ return ret;
-+ if (!(p->when & PASS_SILENT))
-+ printk(KERN_CONT " done\n");
-+
-+ c->recovery_passes_complete |= BIT_ULL(pass);
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_run_recovery_passes(struct bch_fs *c)
-+{
-+ int ret = 0;
-+
-+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
-+ ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
-+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery))
-+ continue;
-+ if (ret)
-+ break;
-+ c->curr_recovery_pass++;
-+ }
-+
-+ return ret;
-+}
-+
-+int bch2_fs_recovery(struct bch_fs *c)
-+{
-+ struct bch_sb_field_clean *clean = NULL;
-+ struct jset *last_journal_entry = NULL;
-+ u64 last_seq = 0, blacklist_seq, journal_seq;
-+ bool write_sb = false;
-+ int ret = 0;
-+
-+ if (c->sb.clean) {
-+ clean = bch2_read_superblock_clean(c);
-+ ret = PTR_ERR_OR_ZERO(clean);
-+ if (ret)
-+ goto err;
-+
-+ bch_info(c, "recovering from clean shutdown, journal seq %llu",
-+ le64_to_cpu(clean->journal_seq));
-+ } else {
-+ bch_info(c, "recovering from unclean shutdown");
-+ }
-+
-+ if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) {
-+ bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ if (!c->sb.clean &&
-+ !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
-+ bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery))
-+ check_version_upgrade(c);
-+
-+ if (c->opts.fsck && c->opts.norecovery) {
-+ bch_err(c, "cannot select both norecovery and fsck");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ ret = bch2_blacklist_table_initialize(c);
-+ if (ret) {
-+ bch_err(c, "error initializing blacklist table");
-+ goto err;
-+ }
-+
-+ if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
-+ struct genradix_iter iter;
-+ struct journal_replay **i;
-+
-+ bch_verbose(c, "starting journal read");
-+ ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * note: cmd_list_journal needs the blacklist table fully up to date so
-+ * it can asterisk ignored journal entries:
-+ */
-+ if (c->opts.read_journal_only)
-+ goto out;
-+
-+ genradix_for_each_reverse(&c->journal_entries, iter, i)
-+ if (*i && !(*i)->ignore) {
-+ last_journal_entry = &(*i)->j;
-+ break;
-+ }
-+
-+ if (mustfix_fsck_err_on(c->sb.clean &&
-+ last_journal_entry &&
-+ !journal_entry_empty(last_journal_entry), c,
-+ clean_but_journal_not_empty,
-+ "filesystem marked clean but journal not empty")) {
-+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
-+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+ c->sb.clean = false;
-+ }
-+
-+ if (!last_journal_entry) {
-+ fsck_err_on(!c->sb.clean, c,
-+ dirty_but_no_journal_entries,
-+ "no journal entries found");
-+ if (clean)
-+ goto use_clean;
-+
-+ genradix_for_each_reverse(&c->journal_entries, iter, i)
-+ if (*i) {
-+ last_journal_entry = &(*i)->j;
-+ (*i)->ignore = false;
-+ /*
-+ * This was probably a NO_FLUSH entry,
-+ * so last_seq was garbage - but we know
-+ * we're only using a single journal
-+ * entry, set it here:
-+ */
-+ (*i)->j.last_seq = (*i)->j.seq;
-+ break;
-+ }
-+ }
-+
-+ ret = bch2_journal_keys_sort(c);
-+ if (ret)
-+ goto err;
-+
-+ if (c->sb.clean && last_journal_entry) {
-+ ret = bch2_verify_superblock_clean(c, &clean,
-+ last_journal_entry);
-+ if (ret)
-+ goto err;
-+ }
-+ } else {
-+use_clean:
-+ if (!clean) {
-+ bch_err(c, "no superblock clean section found");
-+ ret = -BCH_ERR_fsck_repair_impossible;
-+ goto err;
-+
-+ }
-+ blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1;
-+ }
-+
-+ c->journal_replay_seq_start = last_seq;
-+ c->journal_replay_seq_end = blacklist_seq - 1;
-+
-+ if (c->opts.reconstruct_alloc) {
-+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
-+ drop_alloc_keys(&c->journal_keys);
-+ }
-+
-+ zero_out_btree_mem_ptr(&c->journal_keys);
-+
-+ ret = journal_replay_early(c, clean);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * After an unclean shutdown, skip then next few journal sequence
-+ * numbers as they may have been referenced by btree writes that
-+ * happened before their corresponding journal writes - those btree
-+ * writes need to be ignored, by skipping and blacklisting the next few
-+ * journal sequence numbers:
-+ */
-+ if (!c->sb.clean)
-+ journal_seq += 8;
-+
-+ if (blacklist_seq != journal_seq) {
-+ ret = bch2_journal_log_msg(c, "blacklisting entries %llu-%llu",
-+ blacklist_seq, journal_seq) ?:
-+ bch2_journal_seq_blacklist_add(c,
-+ blacklist_seq, journal_seq);
-+ if (ret) {
-+ bch_err(c, "error creating new journal seq blacklist entry");
-+ goto err;
-+ }
-+ }
-+
-+ ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
-+ journal_seq, last_seq, blacklist_seq - 1) ?:
-+ bch2_fs_journal_start(&c->journal, journal_seq);
-+ if (ret)
-+ goto err;
-+
-+ if (c->opts.reconstruct_alloc)
-+ bch2_journal_log_msg(c, "dropping alloc info");
-+
-+ /*
-+ * Skip past versions that might have possibly been used (as nonces),
-+ * but hadn't had their pointers written:
-+ */
-+ if (c->sb.encryption_type && !c->sb.clean)
-+ atomic64_add(1 << 16, &c->key_version);
-+
-+ ret = read_btree_roots(c);
-+ if (ret)
-+ goto err;
-+
-+ if (c->opts.fsck &&
-+ (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
-+ BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)))
-+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
-+
-+ ret = bch2_run_recovery_passes(c);
-+ if (ret)
-+ goto err;
-+
-+ /* If we fixed errors, verify that fs is actually clean now: */
-+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
-+ test_bit(BCH_FS_ERRORS_FIXED, &c->flags) &&
-+ !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags) &&
-+ !test_bit(BCH_FS_ERROR, &c->flags)) {
-+ bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
-+ clear_bit(BCH_FS_ERRORS_FIXED, &c->flags);
-+
-+ c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
-+
-+ ret = bch2_run_recovery_passes(c);
-+ if (ret)
-+ goto err;
-+
-+ if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags) ||
-+ test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) {
-+ bch_err(c, "Second fsck run was not clean");
-+ set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
-+ }
-+
-+ set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
-+ }
-+
-+ if (enabled_qtypes(c)) {
-+ bch_verbose(c, "reading quotas");
-+ ret = bch2_fs_quota_read(c);
-+ if (ret)
-+ goto err;
-+ bch_verbose(c, "quotas done");
-+ }
-+
-+ mutex_lock(&c->sb_lock);
-+ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != c->sb.version) {
-+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, c->sb.version);
-+ write_sb = true;
-+ }
-+
-+ if (!test_bit(BCH_FS_ERROR, &c->flags)) {
-+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
-+ write_sb = true;
-+ }
-+
-+ if (c->opts.fsck &&
-+ !test_bit(BCH_FS_ERROR, &c->flags) &&
-+ !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) {
-+ SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
-+ SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0);
-+ write_sb = true;
-+ }
-+
-+ if (write_sb)
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) ||
-+ c->sb.version_min < bcachefs_metadata_version_btree_ptr_sectors_written) {
-+ struct bch_move_stats stats;
-+
-+ bch2_move_stats_init(&stats, "recovery");
-+
-+ bch_info(c, "scanning for old btree nodes");
-+ ret = bch2_fs_read_write(c) ?:
-+ bch2_scan_old_btree_nodes(c, &stats);
-+ if (ret)
-+ goto err;
-+ bch_info(c, "scanning for old btree nodes done");
-+ }
-+
-+ if (c->journal_seq_blacklist_table &&
-+ c->journal_seq_blacklist_table->nr > 128)
-+ queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
-+
-+ ret = 0;
-+out:
-+ set_bit(BCH_FS_FSCK_DONE, &c->flags);
-+ bch2_flush_fsck_errs(c);
-+
-+ if (!c->opts.keep_journal &&
-+ test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) {
-+ bch2_journal_keys_free(&c->journal_keys);
-+ bch2_journal_entries_free(c);
-+ }
-+ kfree(clean);
-+
-+ if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
-+ bch2_fs_read_write_early(c);
-+ bch2_delete_dead_snapshots_async(c);
-+ }
-+
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+err:
-+fsck_err:
-+ bch2_fs_emergency_read_only(c);
-+ goto out;
-+}
-+
-+int bch2_fs_initialize(struct bch_fs *c)
-+{
-+ struct bch_inode_unpacked root_inode, lostfound_inode;
-+ struct bkey_inode_buf packed_inode;
-+ struct qstr lostfound = QSTR("lost+found");
-+ struct bch_dev *ca;
-+ unsigned i;
-+ int ret;
-+
-+ bch_notice(c, "initializing new filesystem");
-+
-+ mutex_lock(&c->sb_lock);
-+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
-+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
-+
-+ bch2_sb_maybe_downgrade(c);
-+
-+ if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
-+ bch2_sb_upgrade(c, bcachefs_metadata_version_current);
-+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
-+ bch2_write_super(c);
-+ }
-+ mutex_unlock(&c->sb_lock);
-+
-+ c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
-+ set_bit(BCH_FS_MAY_GO_RW, &c->flags);
-+ set_bit(BCH_FS_FSCK_DONE, &c->flags);
-+
-+ for (i = 0; i < BTREE_ID_NR; i++)
-+ bch2_btree_root_alloc(c, i);
-+
-+ for_each_member_device(ca, c, i)
-+ bch2_dev_usage_init(ca);
-+
-+ ret = bch2_fs_journal_alloc(c);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * journal_res_get() will crash if called before this has
-+ * set up the journal.pin FIFO and journal.cur pointer:
-+ */
-+ bch2_fs_journal_start(&c->journal, 1);
-+ bch2_journal_set_replay_done(&c->journal);
-+
-+ ret = bch2_fs_read_write_early(c);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * Write out the superblock and journal buckets, now that we can do
-+ * btree updates
-+ */
-+ bch_verbose(c, "marking superblocks");
-+ ret = bch2_trans_mark_dev_sbs(c);
-+ bch_err_msg(c, ret, "marking superblocks");
-+ if (ret)
-+ goto err;
-+
-+ for_each_online_member(ca, c, i)
-+ ca->new_fs_bucket_idx = 0;
-+
-+ ret = bch2_fs_freespace_init(c);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_initialize_subvolumes(c);
-+ if (ret)
-+ goto err;
-+
-+ bch_verbose(c, "reading snapshots table");
-+ ret = bch2_snapshots_read(c);
-+ if (ret)
-+ goto err;
-+ bch_verbose(c, "reading snapshots done");
-+
-+ bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL);
-+ root_inode.bi_inum = BCACHEFS_ROOT_INO;
-+ root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
-+ bch2_inode_pack(&packed_inode, &root_inode);
-+ packed_inode.inode.k.p.snapshot = U32_MAX;
-+
-+ ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0);
-+ if (ret) {
-+ bch_err_msg(c, ret, "creating root directory");
-+ goto err;
-+ }
-+
-+ bch2_inode_init_early(c, &lostfound_inode);
-+
-+ ret = bch2_trans_do(c, NULL, NULL, 0,
-+ bch2_create_trans(trans,
-+ BCACHEFS_ROOT_SUBVOL_INUM,
-+ &root_inode, &lostfound_inode,
-+ &lostfound,
-+ 0, 0, S_IFDIR|0700, 0,
-+ NULL, NULL, (subvol_inum) { 0 }, 0));
-+ if (ret) {
-+ bch_err_msg(c, ret, "creating lost+found");
-+ goto err;
-+ }
-+
-+ if (enabled_qtypes(c)) {
-+ ret = bch2_fs_quota_read(c);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = bch2_journal_flush(&c->journal);
-+ if (ret) {
-+ bch_err_msg(c, ret, "writing first journal entry");
-+ goto err;
-+ }
-+
-+ mutex_lock(&c->sb_lock);
-+ SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
-+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ return 0;
-+err:
-+ bch_err_fn(ca, ret);
-+ return ret;
-+}
-diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
-new file mode 100644
-index 000000000000..852d30567da9
---- /dev/null
-+++ b/fs/bcachefs/recovery.h
-@@ -0,0 +1,33 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_RECOVERY_H
-+#define _BCACHEFS_RECOVERY_H
-+
-+extern const char * const bch2_recovery_passes[];
-+
-+/*
-+ * For when we need to rewind recovery passes and run a pass we skipped:
-+ */
-+static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
-+ enum bch_recovery_pass pass)
-+{
-+ bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
-+ bch2_recovery_passes[pass], pass,
-+ bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
-+
-+ c->recovery_passes_explicit |= BIT_ULL(pass);
-+
-+ if (c->curr_recovery_pass >= pass) {
-+ c->curr_recovery_pass = pass;
-+ c->recovery_passes_complete &= (1ULL << pass) >> 1;
-+ return -BCH_ERR_restart_recovery;
-+ } else {
-+ return 0;
-+ }
-+}
-+
-+u64 bch2_fsck_recovery_passes(void);
-+
-+int bch2_fs_recovery(struct bch_fs *);
-+int bch2_fs_initialize(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_RECOVERY_H */
-diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h
-new file mode 100644
-index 000000000000..515e3d62c2ac
---- /dev/null
-+++ b/fs/bcachefs/recovery_types.h
-@@ -0,0 +1,53 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_RECOVERY_TYPES_H
-+#define _BCACHEFS_RECOVERY_TYPES_H
-+
-+#define PASS_SILENT BIT(0)
-+#define PASS_FSCK BIT(1)
-+#define PASS_UNCLEAN BIT(2)
-+#define PASS_ALWAYS BIT(3)
-+
-+#define BCH_RECOVERY_PASSES() \
-+ x(alloc_read, PASS_ALWAYS) \
-+ x(stripes_read, PASS_ALWAYS) \
-+ x(initialize_subvolumes, 0) \
-+ x(snapshots_read, PASS_ALWAYS) \
-+ x(check_topology, 0) \
-+ x(check_allocations, PASS_FSCK) \
-+ x(trans_mark_dev_sbs, PASS_ALWAYS|PASS_SILENT) \
-+ x(fs_journal_alloc, PASS_ALWAYS|PASS_SILENT) \
-+ x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \
-+ x(journal_replay, PASS_ALWAYS) \
-+ x(check_alloc_info, PASS_FSCK) \
-+ x(check_lrus, PASS_FSCK) \
-+ x(check_btree_backpointers, PASS_FSCK) \
-+ x(check_backpointers_to_extents,PASS_FSCK) \
-+ x(check_extents_to_backpointers,PASS_FSCK) \
-+ x(check_alloc_to_lru_refs, PASS_FSCK) \
-+ x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \
-+ x(bucket_gens_init, 0) \
-+ x(check_snapshot_trees, PASS_FSCK) \
-+ x(check_snapshots, PASS_FSCK) \
-+ x(check_subvols, PASS_FSCK) \
-+ x(delete_dead_snapshots, PASS_FSCK) \
-+ x(fs_upgrade_for_subvolumes, 0) \
-+ x(resume_logged_ops, PASS_ALWAYS) \
-+ x(check_inodes, PASS_FSCK) \
-+ x(check_extents, PASS_FSCK) \
-+ x(check_indirect_extents, PASS_FSCK) \
-+ x(check_dirents, PASS_FSCK) \
-+ x(check_xattrs, PASS_FSCK) \
-+ x(check_root, PASS_FSCK) \
-+ x(check_directory_structure, PASS_FSCK) \
-+ x(check_nlinks, PASS_FSCK) \
-+ x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \
-+ x(fix_reflink_p, 0) \
-+ x(set_fs_needs_rebalance, 0) \
-+
-+enum bch_recovery_pass {
-+#define x(n, when) BCH_RECOVERY_PASS_##n,
-+ BCH_RECOVERY_PASSES()
-+#undef x
-+};
-+
-+#endif /* _BCACHEFS_RECOVERY_TYPES_H */
-diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
-new file mode 100644
-index 000000000000..6e1bfe9feb59
---- /dev/null
-+++ b/fs/bcachefs/reflink.c
-@@ -0,0 +1,406 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "bkey_buf.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "extents.h"
-+#include "inode.h"
-+#include "io_misc.h"
-+#include "io_write.h"
-+#include "rebalance.h"
-+#include "reflink.h"
-+#include "subvolume.h"
-+#include "super-io.h"
-+
-+#include <linux/sched/signal.h>
-+
-+static inline unsigned bkey_type_to_indirect(const struct bkey *k)
-+{
-+ switch (k->type) {
-+ case KEY_TYPE_extent:
-+ return KEY_TYPE_reflink_v;
-+ case KEY_TYPE_inline_data:
-+ return KEY_TYPE_indirect_inline_data;
-+ default:
-+ return 0;
-+ }
-+}
-+
-+/* reflink pointers */
-+
-+int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+
-+ if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix &&
-+ le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad)) {
-+ prt_printf(err, "idx < front_pad (%llu < %u)",
-+ le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad));
-+ return -EINVAL;
-+ }
-+
-+ return 0;
-+}
-+
-+void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-+
-+ prt_printf(out, "idx %llu front_pad %u back_pad %u",
-+ le64_to_cpu(p.v->idx),
-+ le32_to_cpu(p.v->front_pad),
-+ le32_to_cpu(p.v->back_pad));
-+}
-+
-+bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
-+{
-+ struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
-+ struct bkey_s_c_reflink_p r = bkey_s_c_to_reflink_p(_r);
-+
-+ /*
-+ * Disabled for now, the triggers code needs to be reworked for merging
-+ * of reflink pointers to work:
-+ */
-+ return false;
-+
-+ if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
-+ return false;
-+
-+ bch2_key_resize(l.k, l.k->size + r.k->size);
-+ return true;
-+}
-+
-+/* indirect extents */
-+
-+int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ return bch2_bkey_ptrs_invalid(c, k, flags, err);
-+}
-+
-+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
-+
-+ prt_printf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount));
-+
-+ bch2_bkey_ptrs_to_text(out, c, k);
-+}
-+
-+#if 0
-+Currently disabled, needs to be debugged:
-+
-+bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
-+{
-+ struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l);
-+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r);
-+
-+ return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r);
-+}
-+#endif
-+
-+static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags)
-+{
-+ if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) {
-+ new->k.type = KEY_TYPE_deleted;
-+ new->k.size = 0;
-+ set_bkey_val_u64s(&new->k, 0);;
-+ *flags &= ~BTREE_TRIGGER_INSERT;
-+ }
-+}
-+
-+int bch2_trans_mark_reflink_v(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_i *new,
-+ unsigned flags)
-+{
-+ check_indirect_extent_deleting(new, &flags);
-+
-+ return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags);
-+}
-+
-+/* indirect inline data */
-+
-+int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ return 0;
-+}
-+
-+void bch2_indirect_inline_data_to_text(struct printbuf *out,
-+ struct bch_fs *c, struct bkey_s_c k)
-+{
-+ struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k);
-+ unsigned datalen = bkey_inline_data_bytes(k.k);
-+
-+ prt_printf(out, "refcount %llu datalen %u: %*phN",
-+ le64_to_cpu(d.v->refcount), datalen,
-+ min(datalen, 32U), d.v->data);
-+}
-+
-+int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans,
-+ enum btree_id btree_id, unsigned level,
-+ struct bkey_s_c old, struct bkey_i *new,
-+ unsigned flags)
-+{
-+ check_indirect_extent_deleting(new, &flags);
-+
-+ return 0;
-+}
-+
-+static int bch2_make_extent_indirect(struct btree_trans *trans,
-+ struct btree_iter *extent_iter,
-+ struct bkey_i *orig)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter reflink_iter = { NULL };
-+ struct bkey_s_c k;
-+ struct bkey_i *r_v;
-+ struct bkey_i_reflink_p *r_p;
-+ __le64 *refcount;
-+ int ret;
-+
-+ if (orig->k.type == KEY_TYPE_inline_data)
-+ bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data);
-+
-+ bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX,
-+ BTREE_ITER_INTENT);
-+ k = bch2_btree_iter_peek_prev(&reflink_iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k));
-+ ret = PTR_ERR_OR_ZERO(r_v);
-+ if (ret)
-+ goto err;
-+
-+ bkey_init(&r_v->k);
-+ r_v->k.type = bkey_type_to_indirect(&orig->k);
-+ r_v->k.p = reflink_iter.pos;
-+ bch2_key_resize(&r_v->k, orig->k.size);
-+ r_v->k.version = orig->k.version;
-+
-+ set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
-+
-+ refcount = bkey_refcount(r_v);
-+ *refcount = 0;
-+ memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
-+
-+ ret = bch2_trans_update(trans, &reflink_iter, r_v, 0);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * orig is in a bkey_buf which statically allocates 5 64s for the val,
-+ * so we know it will be big enough:
-+ */
-+ orig->k.type = KEY_TYPE_reflink_p;
-+ r_p = bkey_i_to_reflink_p(orig);
-+ set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
-+
-+ /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */
-+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
-+ __underlying_memset(&r_p->v, 0, sizeof(r_p->v));
-+#else
-+ memset(&r_p->v, 0, sizeof(r_p->v));
-+#endif
-+
-+ r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
-+
-+ ret = bch2_trans_update(trans, extent_iter, &r_p->k_i,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+err:
-+ bch2_trans_iter_exit(trans, &reflink_iter);
-+
-+ return ret;
-+}
-+
-+static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
-+{
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) {
-+ if (bkey_extent_is_unwritten(k))
-+ continue;
-+
-+ if (bkey_extent_is_data(k.k))
-+ return k;
-+ }
-+
-+ if (bkey_ge(iter->pos, end))
-+ bch2_btree_iter_set_pos(iter, end);
-+ return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
-+}
-+
-+s64 bch2_remap_range(struct bch_fs *c,
-+ subvol_inum dst_inum, u64 dst_offset,
-+ subvol_inum src_inum, u64 src_offset,
-+ u64 remap_sectors,
-+ u64 new_i_size, s64 *i_sectors_delta)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter dst_iter, src_iter;
-+ struct bkey_s_c src_k;
-+ struct bkey_buf new_dst, new_src;
-+ struct bpos dst_start = POS(dst_inum.inum, dst_offset);
-+ struct bpos src_start = POS(src_inum.inum, src_offset);
-+ struct bpos dst_end = dst_start, src_end = src_start;
-+ struct bch_io_opts opts;
-+ struct bpos src_want;
-+ u64 dst_done = 0;
-+ u32 dst_snapshot, src_snapshot;
-+ int ret = 0, ret2 = 0;
-+
-+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink))
-+ return -BCH_ERR_erofs_no_writes;
-+
-+ bch2_check_set_feature(c, BCH_FEATURE_reflink);
-+
-+ dst_end.offset += remap_sectors;
-+ src_end.offset += remap_sectors;
-+
-+ bch2_bkey_buf_init(&new_dst);
-+ bch2_bkey_buf_init(&new_src);
-+ trans = bch2_trans_get(c);
-+
-+ ret = bch2_inum_opts_get(trans, src_inum, &opts);
-+ if (ret)
-+ goto err;
-+
-+ bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start,
-+ BTREE_ITER_INTENT);
-+ bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start,
-+ BTREE_ITER_INTENT);
-+
-+ while ((ret == 0 ||
-+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) &&
-+ bkey_lt(dst_iter.pos, dst_end)) {
-+ struct disk_reservation disk_res = { 0 };
-+
-+ bch2_trans_begin(trans);
-+
-+ if (fatal_signal_pending(current)) {
-+ ret = -EINTR;
-+ break;
-+ }
-+
-+ ret = bch2_subvolume_get_snapshot(trans, src_inum.subvol,
-+ &src_snapshot);
-+ if (ret)
-+ continue;
-+
-+ bch2_btree_iter_set_snapshot(&src_iter, src_snapshot);
-+
-+ ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol,
-+ &dst_snapshot);
-+ if (ret)
-+ continue;
-+
-+ bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot);
-+
-+ dst_done = dst_iter.pos.offset - dst_start.offset;
-+ src_want = POS(src_start.inode, src_start.offset + dst_done);
-+ bch2_btree_iter_set_pos(&src_iter, src_want);
-+
-+ src_k = get_next_src(&src_iter, src_end);
-+ ret = bkey_err(src_k);
-+ if (ret)
-+ continue;
-+
-+ if (bkey_lt(src_want, src_iter.pos)) {
-+ ret = bch2_fpunch_at(trans, &dst_iter, dst_inum,
-+ min(dst_end.offset,
-+ dst_iter.pos.offset +
-+ src_iter.pos.offset - src_want.offset),
-+ i_sectors_delta);
-+ continue;
-+ }
-+
-+ if (src_k.k->type != KEY_TYPE_reflink_p) {
-+ bch2_btree_iter_set_pos_to_extent_start(&src_iter);
-+
-+ bch2_bkey_buf_reassemble(&new_src, c, src_k);
-+ src_k = bkey_i_to_s_c(new_src.k);
-+
-+ ret = bch2_make_extent_indirect(trans, &src_iter,
-+ new_src.k);
-+ if (ret)
-+ continue;
-+
-+ BUG_ON(src_k.k->type != KEY_TYPE_reflink_p);
-+ }
-+
-+ if (src_k.k->type == KEY_TYPE_reflink_p) {
-+ struct bkey_s_c_reflink_p src_p =
-+ bkey_s_c_to_reflink_p(src_k);
-+ struct bkey_i_reflink_p *dst_p =
-+ bkey_reflink_p_init(new_dst.k);
-+
-+ u64 offset = le64_to_cpu(src_p.v->idx) +
-+ (src_want.offset -
-+ bkey_start_offset(src_k.k));
-+
-+ dst_p->v.idx = cpu_to_le64(offset);
-+ } else {
-+ BUG();
-+ }
-+
-+ new_dst.k->k.p = dst_iter.pos;
-+ bch2_key_resize(&new_dst.k->k,
-+ min(src_k.k->p.offset - src_want.offset,
-+ dst_end.offset - dst_iter.pos.offset));
-+
-+ ret = bch2_bkey_set_needs_rebalance(c, new_dst.k,
-+ opts.background_target,
-+ opts.background_compression) ?:
-+ bch2_extent_update(trans, dst_inum, &dst_iter,
-+ new_dst.k, &disk_res,
-+ new_i_size, i_sectors_delta,
-+ true);
-+ bch2_disk_reservation_put(c, &disk_res);
-+ }
-+ bch2_trans_iter_exit(trans, &dst_iter);
-+ bch2_trans_iter_exit(trans, &src_iter);
-+
-+ BUG_ON(!ret && !bkey_eq(dst_iter.pos, dst_end));
-+ BUG_ON(bkey_gt(dst_iter.pos, dst_end));
-+
-+ dst_done = dst_iter.pos.offset - dst_start.offset;
-+ new_i_size = min(dst_iter.pos.offset << 9, new_i_size);
-+
-+ do {
-+ struct bch_inode_unpacked inode_u;
-+ struct btree_iter inode_iter = { NULL };
-+
-+ bch2_trans_begin(trans);
-+
-+ ret2 = bch2_inode_peek(trans, &inode_iter, &inode_u,
-+ dst_inum, BTREE_ITER_INTENT);
-+
-+ if (!ret2 &&
-+ inode_u.bi_size < new_i_size) {
-+ inode_u.bi_size = new_i_size;
-+ ret2 = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
-+ bch2_trans_commit(trans, NULL, NULL,
-+ BTREE_INSERT_NOFAIL);
-+ }
-+
-+ bch2_trans_iter_exit(trans, &inode_iter);
-+ } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
-+err:
-+ bch2_trans_put(trans);
-+ bch2_bkey_buf_exit(&new_src, c);
-+ bch2_bkey_buf_exit(&new_dst, c);
-+
-+ bch2_write_ref_put(c, BCH_WRITE_REF_reflink);
-+
-+ return dst_done ?: ret ?: ret2;
-+}
-diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h
-new file mode 100644
-index 000000000000..8ccf3f9c4939
---- /dev/null
-+++ b/fs/bcachefs/reflink.h
-@@ -0,0 +1,81 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REFLINK_H
-+#define _BCACHEFS_REFLINK_H
-+
-+enum bkey_invalid_flags;
-+
-+int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
-+ struct bkey_s_c);
-+bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \
-+ .key_invalid = bch2_reflink_p_invalid, \
-+ .val_to_text = bch2_reflink_p_to_text, \
-+ .key_merge = bch2_reflink_p_merge, \
-+ .trans_trigger = bch2_trans_mark_reflink_p, \
-+ .atomic_trigger = bch2_mark_reflink_p, \
-+ .min_val_size = 16, \
-+})
-+
-+int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
-+ struct bkey_s_c);
-+int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_i *, unsigned);
-+
-+#define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \
-+ .key_invalid = bch2_reflink_v_invalid, \
-+ .val_to_text = bch2_reflink_v_to_text, \
-+ .swab = bch2_ptr_swab, \
-+ .trans_trigger = bch2_trans_mark_reflink_v, \
-+ .atomic_trigger = bch2_mark_extent, \
-+ .min_val_size = 8, \
-+})
-+
-+int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_indirect_inline_data_to_text(struct printbuf *,
-+ struct bch_fs *, struct bkey_s_c);
-+int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
-+ enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_i *,
-+ unsigned);
-+
-+#define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \
-+ .key_invalid = bch2_indirect_inline_data_invalid, \
-+ .val_to_text = bch2_indirect_inline_data_to_text, \
-+ .trans_trigger = bch2_trans_mark_indirect_inline_data, \
-+ .min_val_size = 8, \
-+})
-+
-+static inline const __le64 *bkey_refcount_c(struct bkey_s_c k)
-+{
-+ switch (k.k->type) {
-+ case KEY_TYPE_reflink_v:
-+ return &bkey_s_c_to_reflink_v(k).v->refcount;
-+ case KEY_TYPE_indirect_inline_data:
-+ return &bkey_s_c_to_indirect_inline_data(k).v->refcount;
-+ default:
-+ return NULL;
-+ }
-+}
-+
-+static inline __le64 *bkey_refcount(struct bkey_i *k)
-+{
-+ switch (k->k.type) {
-+ case KEY_TYPE_reflink_v:
-+ return &bkey_i_to_reflink_v(k)->v.refcount;
-+ case KEY_TYPE_indirect_inline_data:
-+ return &bkey_i_to_indirect_inline_data(k)->v.refcount;
-+ default:
-+ return NULL;
-+ }
-+}
-+
-+s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64,
-+ subvol_inum, u64, u64, u64, s64 *);
-+
-+#endif /* _BCACHEFS_REFLINK_H */
-diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
-new file mode 100644
-index 000000000000..1c3ae13bfced
---- /dev/null
-+++ b/fs/bcachefs/replicas.c
-@@ -0,0 +1,1050 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "buckets.h"
-+#include "journal.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+
-+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
-+ struct bch_replicas_cpu *);
-+
-+/* Replicas tracking - in memory: */
-+
-+static void verify_replicas_entry(struct bch_replicas_entry *e)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ unsigned i;
-+
-+ BUG_ON(e->data_type >= BCH_DATA_NR);
-+ BUG_ON(!e->nr_devs);
-+ BUG_ON(e->nr_required > 1 &&
-+ e->nr_required >= e->nr_devs);
-+
-+ for (i = 0; i + 1 < e->nr_devs; i++)
-+ BUG_ON(e->devs[i] >= e->devs[i + 1]);
-+#endif
-+}
-+
-+void bch2_replicas_entry_sort(struct bch_replicas_entry *e)
-+{
-+ bubble_sort(e->devs, e->nr_devs, u8_cmp);
-+}
-+
-+static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
-+{
-+ eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
-+}
-+
-+static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
-+ struct bch_replicas_entry_v0 *e)
-+{
-+ unsigned i;
-+
-+ if (e->data_type < BCH_DATA_NR)
-+ prt_printf(out, "%s", bch2_data_types[e->data_type]);
-+ else
-+ prt_printf(out, "(invalid data type %u)", e->data_type);
-+
-+ prt_printf(out, ": %u [", e->nr_devs);
-+ for (i = 0; i < e->nr_devs; i++)
-+ prt_printf(out, i ? " %u" : "%u", e->devs[i]);
-+ prt_printf(out, "]");
-+}
-+
-+void bch2_replicas_entry_to_text(struct printbuf *out,
-+ struct bch_replicas_entry *e)
-+{
-+ unsigned i;
-+
-+ if (e->data_type < BCH_DATA_NR)
-+ prt_printf(out, "%s", bch2_data_types[e->data_type]);
-+ else
-+ prt_printf(out, "(invalid data type %u)", e->data_type);
-+
-+ prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs);
-+ for (i = 0; i < e->nr_devs; i++)
-+ prt_printf(out, i ? " %u" : "%u", e->devs[i]);
-+ prt_printf(out, "]");
-+}
-+
-+void bch2_cpu_replicas_to_text(struct printbuf *out,
-+ struct bch_replicas_cpu *r)
-+{
-+ struct bch_replicas_entry *e;
-+ bool first = true;
-+
-+ for_each_cpu_replicas_entry(r, e) {
-+ if (!first)
-+ prt_printf(out, " ");
-+ first = false;
-+
-+ bch2_replicas_entry_to_text(out, e);
-+ }
-+}
-+
-+static void extent_to_replicas(struct bkey_s_c k,
-+ struct bch_replicas_entry *r)
-+{
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+
-+ r->nr_required = 1;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ if (p.ptr.cached)
-+ continue;
-+
-+ if (!p.has_ec)
-+ r->devs[r->nr_devs++] = p.ptr.dev;
-+ else
-+ r->nr_required = 0;
-+ }
-+}
-+
-+static void stripe_to_replicas(struct bkey_s_c k,
-+ struct bch_replicas_entry *r)
-+{
-+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-+ const struct bch_extent_ptr *ptr;
-+
-+ r->nr_required = s.v->nr_blocks - s.v->nr_redundant;
-+
-+ for (ptr = s.v->ptrs;
-+ ptr < s.v->ptrs + s.v->nr_blocks;
-+ ptr++)
-+ r->devs[r->nr_devs++] = ptr->dev;
-+}
-+
-+void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
-+ struct bkey_s_c k)
-+{
-+ e->nr_devs = 0;
-+
-+ switch (k.k->type) {
-+ case KEY_TYPE_btree_ptr:
-+ case KEY_TYPE_btree_ptr_v2:
-+ e->data_type = BCH_DATA_btree;
-+ extent_to_replicas(k, e);
-+ break;
-+ case KEY_TYPE_extent:
-+ case KEY_TYPE_reflink_v:
-+ e->data_type = BCH_DATA_user;
-+ extent_to_replicas(k, e);
-+ break;
-+ case KEY_TYPE_stripe:
-+ e->data_type = BCH_DATA_parity;
-+ stripe_to_replicas(k, e);
-+ break;
-+ }
-+
-+ bch2_replicas_entry_sort(e);
-+}
-+
-+void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
-+ enum bch_data_type data_type,
-+ struct bch_devs_list devs)
-+{
-+ unsigned i;
-+
-+ BUG_ON(!data_type ||
-+ data_type == BCH_DATA_sb ||
-+ data_type >= BCH_DATA_NR);
-+
-+ e->data_type = data_type;
-+ e->nr_devs = 0;
-+ e->nr_required = 1;
-+
-+ for (i = 0; i < devs.nr; i++)
-+ e->devs[e->nr_devs++] = devs.devs[i];
-+
-+ bch2_replicas_entry_sort(e);
-+}
-+
-+static struct bch_replicas_cpu
-+cpu_replicas_add_entry(struct bch_replicas_cpu *old,
-+ struct bch_replicas_entry *new_entry)
-+{
-+ unsigned i;
-+ struct bch_replicas_cpu new = {
-+ .nr = old->nr + 1,
-+ .entry_size = max_t(unsigned, old->entry_size,
-+ replicas_entry_bytes(new_entry)),
-+ };
-+
-+ BUG_ON(!new_entry->data_type);
-+ verify_replicas_entry(new_entry);
-+
-+ new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL);
-+ if (!new.entries)
-+ return new;
-+
-+ for (i = 0; i < old->nr; i++)
-+ memcpy(cpu_replicas_entry(&new, i),
-+ cpu_replicas_entry(old, i),
-+ old->entry_size);
-+
-+ memcpy(cpu_replicas_entry(&new, old->nr),
-+ new_entry,
-+ replicas_entry_bytes(new_entry));
-+
-+ bch2_cpu_replicas_sort(&new);
-+ return new;
-+}
-+
-+static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
-+ struct bch_replicas_entry *search)
-+{
-+ int idx, entry_size = replicas_entry_bytes(search);
-+
-+ if (unlikely(entry_size > r->entry_size))
-+ return -1;
-+
-+ verify_replicas_entry(search);
-+
-+#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
-+ idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
-+ entry_cmp, search);
-+#undef entry_cmp
-+
-+ return idx < r->nr ? idx : -1;
-+}
-+
-+int bch2_replicas_entry_idx(struct bch_fs *c,
-+ struct bch_replicas_entry *search)
-+{
-+ bch2_replicas_entry_sort(search);
-+
-+ return __replicas_entry_idx(&c->replicas, search);
-+}
-+
-+static bool __replicas_has_entry(struct bch_replicas_cpu *r,
-+ struct bch_replicas_entry *search)
-+{
-+ return __replicas_entry_idx(r, search) >= 0;
-+}
-+
-+bool bch2_replicas_marked(struct bch_fs *c,
-+ struct bch_replicas_entry *search)
-+{
-+ bool marked;
-+
-+ if (!search->nr_devs)
-+ return true;
-+
-+ verify_replicas_entry(search);
-+
-+ percpu_down_read(&c->mark_lock);
-+ marked = __replicas_has_entry(&c->replicas, search) &&
-+ (likely((!c->replicas_gc.entries)) ||
-+ __replicas_has_entry(&c->replicas_gc, search));
-+ percpu_up_read(&c->mark_lock);
-+
-+ return marked;
-+}
-+
-+static void __replicas_table_update(struct bch_fs_usage *dst,
-+ struct bch_replicas_cpu *dst_r,
-+ struct bch_fs_usage *src,
-+ struct bch_replicas_cpu *src_r)
-+{
-+ int src_idx, dst_idx;
-+
-+ *dst = *src;
-+
-+ for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
-+ if (!src->replicas[src_idx])
-+ continue;
-+
-+ dst_idx = __replicas_entry_idx(dst_r,
-+ cpu_replicas_entry(src_r, src_idx));
-+ BUG_ON(dst_idx < 0);
-+
-+ dst->replicas[dst_idx] = src->replicas[src_idx];
-+ }
-+}
-+
-+static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
-+ struct bch_replicas_cpu *dst_r,
-+ struct bch_fs_usage __percpu *src_p,
-+ struct bch_replicas_cpu *src_r)
-+{
-+ unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
-+ struct bch_fs_usage *dst, *src = (void *)
-+ bch2_acc_percpu_u64s((u64 __percpu *) src_p, src_nr);
-+
-+ preempt_disable();
-+ dst = this_cpu_ptr(dst_p);
-+ preempt_enable();
-+
-+ __replicas_table_update(dst, dst_r, src, src_r);
-+}
-+
-+/*
-+ * Resize filesystem accounting:
-+ */
-+static int replicas_table_update(struct bch_fs *c,
-+ struct bch_replicas_cpu *new_r)
-+{
-+ struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR];
-+ struct bch_fs_usage_online *new_scratch = NULL;
-+ struct bch_fs_usage __percpu *new_gc = NULL;
-+ struct bch_fs_usage *new_base = NULL;
-+ unsigned i, bytes = sizeof(struct bch_fs_usage) +
-+ sizeof(u64) * new_r->nr;
-+ unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) +
-+ sizeof(u64) * new_r->nr;
-+ int ret = 0;
-+
-+ memset(new_usage, 0, sizeof(new_usage));
-+
-+ for (i = 0; i < ARRAY_SIZE(new_usage); i++)
-+ if (!(new_usage[i] = __alloc_percpu_gfp(bytes,
-+ sizeof(u64), GFP_KERNEL)))
-+ goto err;
-+
-+ if (!(new_base = kzalloc(bytes, GFP_KERNEL)) ||
-+ !(new_scratch = kmalloc(scratch_bytes, GFP_KERNEL)) ||
-+ (c->usage_gc &&
-+ !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_KERNEL))))
-+ goto err;
-+
-+ for (i = 0; i < ARRAY_SIZE(new_usage); i++)
-+ if (c->usage[i])
-+ __replicas_table_update_pcpu(new_usage[i], new_r,
-+ c->usage[i], &c->replicas);
-+ if (c->usage_base)
-+ __replicas_table_update(new_base, new_r,
-+ c->usage_base, &c->replicas);
-+ if (c->usage_gc)
-+ __replicas_table_update_pcpu(new_gc, new_r,
-+ c->usage_gc, &c->replicas);
-+
-+ for (i = 0; i < ARRAY_SIZE(new_usage); i++)
-+ swap(c->usage[i], new_usage[i]);
-+ swap(c->usage_base, new_base);
-+ swap(c->usage_scratch, new_scratch);
-+ swap(c->usage_gc, new_gc);
-+ swap(c->replicas, *new_r);
-+out:
-+ free_percpu(new_gc);
-+ kfree(new_scratch);
-+ for (i = 0; i < ARRAY_SIZE(new_usage); i++)
-+ free_percpu(new_usage[i]);
-+ kfree(new_base);
-+ return ret;
-+err:
-+ bch_err(c, "error updating replicas table: memory allocation failure");
-+ ret = -BCH_ERR_ENOMEM_replicas_table;
-+ goto out;
-+}
-+
-+static unsigned reserve_journal_replicas(struct bch_fs *c,
-+ struct bch_replicas_cpu *r)
-+{
-+ struct bch_replicas_entry *e;
-+ unsigned journal_res_u64s = 0;
-+
-+ /* nr_inodes: */
-+ journal_res_u64s +=
-+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
-+
-+ /* key_version: */
-+ journal_res_u64s +=
-+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
-+
-+ /* persistent_reserved: */
-+ journal_res_u64s +=
-+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
-+ BCH_REPLICAS_MAX;
-+
-+ for_each_cpu_replicas_entry(r, e)
-+ journal_res_u64s +=
-+ DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
-+ e->nr_devs, sizeof(u64));
-+ return journal_res_u64s;
-+}
-+
-+noinline
-+static int bch2_mark_replicas_slowpath(struct bch_fs *c,
-+ struct bch_replicas_entry *new_entry)
-+{
-+ struct bch_replicas_cpu new_r, new_gc;
-+ int ret = 0;
-+
-+ verify_replicas_entry(new_entry);
-+
-+ memset(&new_r, 0, sizeof(new_r));
-+ memset(&new_gc, 0, sizeof(new_gc));
-+
-+ mutex_lock(&c->sb_lock);
-+
-+ if (c->replicas_gc.entries &&
-+ !__replicas_has_entry(&c->replicas_gc, new_entry)) {
-+ new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
-+ if (!new_gc.entries) {
-+ ret = -BCH_ERR_ENOMEM_cpu_replicas;
-+ goto err;
-+ }
-+ }
-+
-+ if (!__replicas_has_entry(&c->replicas, new_entry)) {
-+ new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
-+ if (!new_r.entries) {
-+ ret = -BCH_ERR_ENOMEM_cpu_replicas;
-+ goto err;
-+ }
-+
-+ ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
-+ if (ret)
-+ goto err;
-+
-+ bch2_journal_entry_res_resize(&c->journal,
-+ &c->replicas_journal_res,
-+ reserve_journal_replicas(c, &new_r));
-+ }
-+
-+ if (!new_r.entries &&
-+ !new_gc.entries)
-+ goto out;
-+
-+ /* allocations done, now commit: */
-+
-+ if (new_r.entries)
-+ bch2_write_super(c);
-+
-+ /* don't update in memory replicas until changes are persistent */
-+ percpu_down_write(&c->mark_lock);
-+ if (new_r.entries)
-+ ret = replicas_table_update(c, &new_r);
-+ if (new_gc.entries)
-+ swap(new_gc, c->replicas_gc);
-+ percpu_up_write(&c->mark_lock);
-+out:
-+ mutex_unlock(&c->sb_lock);
-+
-+ kfree(new_r.entries);
-+ kfree(new_gc.entries);
-+
-+ return ret;
-+err:
-+ bch_err_msg(c, ret, "adding replicas entry");
-+ goto out;
-+}
-+
-+int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r)
-+{
-+ return likely(bch2_replicas_marked(c, r))
-+ ? 0 : bch2_mark_replicas_slowpath(c, r);
-+}
-+
-+/* replicas delta list: */
-+
-+int bch2_replicas_delta_list_mark(struct bch_fs *c,
-+ struct replicas_delta_list *r)
-+{
-+ struct replicas_delta *d = r->d;
-+ struct replicas_delta *top = (void *) r->d + r->used;
-+ int ret = 0;
-+
-+ for (d = r->d; !ret && d != top; d = replicas_delta_next(d))
-+ ret = bch2_mark_replicas(c, &d->r);
-+ return ret;
-+}
-+
-+/*
-+ * Old replicas_gc mechanism: only used for journal replicas entries now, should
-+ * die at some point:
-+ */
-+
-+int bch2_replicas_gc_end(struct bch_fs *c, int ret)
-+{
-+ lockdep_assert_held(&c->replicas_gc_lock);
-+
-+ mutex_lock(&c->sb_lock);
-+ percpu_down_write(&c->mark_lock);
-+
-+ ret = ret ?:
-+ bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?:
-+ replicas_table_update(c, &c->replicas_gc);
-+
-+ kfree(c->replicas_gc.entries);
-+ c->replicas_gc.entries = NULL;
-+
-+ percpu_up_write(&c->mark_lock);
-+
-+ if (!ret)
-+ bch2_write_super(c);
-+
-+ mutex_unlock(&c->sb_lock);
-+
-+ return ret;
-+}
-+
-+int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
-+{
-+ struct bch_replicas_entry *e;
-+ unsigned i = 0;
-+
-+ lockdep_assert_held(&c->replicas_gc_lock);
-+
-+ mutex_lock(&c->sb_lock);
-+ BUG_ON(c->replicas_gc.entries);
-+
-+ c->replicas_gc.nr = 0;
-+ c->replicas_gc.entry_size = 0;
-+
-+ for_each_cpu_replicas_entry(&c->replicas, e)
-+ if (!((1 << e->data_type) & typemask)) {
-+ c->replicas_gc.nr++;
-+ c->replicas_gc.entry_size =
-+ max_t(unsigned, c->replicas_gc.entry_size,
-+ replicas_entry_bytes(e));
-+ }
-+
-+ c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
-+ c->replicas_gc.entry_size,
-+ GFP_KERNEL);
-+ if (!c->replicas_gc.entries) {
-+ mutex_unlock(&c->sb_lock);
-+ bch_err(c, "error allocating c->replicas_gc");
-+ return -BCH_ERR_ENOMEM_replicas_gc;
-+ }
-+
-+ for_each_cpu_replicas_entry(&c->replicas, e)
-+ if (!((1 << e->data_type) & typemask))
-+ memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
-+ e, c->replicas_gc.entry_size);
-+
-+ bch2_cpu_replicas_sort(&c->replicas_gc);
-+ mutex_unlock(&c->sb_lock);
-+
-+ return 0;
-+}
-+
-+/*
-+ * New much simpler mechanism for clearing out unneeded replicas entries - drop
-+ * replicas entries that have 0 sectors used.
-+ *
-+ * However, we don't track sector counts for journal usage, so this doesn't drop
-+ * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism
-+ * is retained for that.
-+ */
-+int bch2_replicas_gc2(struct bch_fs *c)
-+{
-+ struct bch_replicas_cpu new = { 0 };
-+ unsigned i, nr;
-+ int ret = 0;
-+
-+ bch2_journal_meta(&c->journal);
-+retry:
-+ nr = READ_ONCE(c->replicas.nr);
-+ new.entry_size = READ_ONCE(c->replicas.entry_size);
-+ new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL);
-+ if (!new.entries) {
-+ bch_err(c, "error allocating c->replicas_gc");
-+ return -BCH_ERR_ENOMEM_replicas_gc;
-+ }
-+
-+ mutex_lock(&c->sb_lock);
-+ percpu_down_write(&c->mark_lock);
-+
-+ if (nr != c->replicas.nr ||
-+ new.entry_size != c->replicas.entry_size) {
-+ percpu_up_write(&c->mark_lock);
-+ mutex_unlock(&c->sb_lock);
-+ kfree(new.entries);
-+ goto retry;
-+ }
-+
-+ for (i = 0; i < c->replicas.nr; i++) {
-+ struct bch_replicas_entry *e =
-+ cpu_replicas_entry(&c->replicas, i);
-+
-+ if (e->data_type == BCH_DATA_journal ||
-+ c->usage_base->replicas[i] ||
-+ percpu_u64_get(&c->usage[0]->replicas[i]) ||
-+ percpu_u64_get(&c->usage[1]->replicas[i]) ||
-+ percpu_u64_get(&c->usage[2]->replicas[i]) ||
-+ percpu_u64_get(&c->usage[3]->replicas[i]))
-+ memcpy(cpu_replicas_entry(&new, new.nr++),
-+ e, new.entry_size);
-+ }
-+
-+ bch2_cpu_replicas_sort(&new);
-+
-+ ret = bch2_cpu_replicas_to_sb_replicas(c, &new) ?:
-+ replicas_table_update(c, &new);
-+
-+ kfree(new.entries);
-+
-+ percpu_up_write(&c->mark_lock);
-+
-+ if (!ret)
-+ bch2_write_super(c);
-+
-+ mutex_unlock(&c->sb_lock);
-+
-+ return ret;
-+}
-+
-+int bch2_replicas_set_usage(struct bch_fs *c,
-+ struct bch_replicas_entry *r,
-+ u64 sectors)
-+{
-+ int ret, idx = bch2_replicas_entry_idx(c, r);
-+
-+ if (idx < 0) {
-+ struct bch_replicas_cpu n;
-+
-+ n = cpu_replicas_add_entry(&c->replicas, r);
-+ if (!n.entries)
-+ return -BCH_ERR_ENOMEM_cpu_replicas;
-+
-+ ret = replicas_table_update(c, &n);
-+ if (ret)
-+ return ret;
-+
-+ kfree(n.entries);
-+
-+ idx = bch2_replicas_entry_idx(c, r);
-+ BUG_ON(ret < 0);
-+ }
-+
-+ c->usage_base->replicas[idx] = sectors;
-+
-+ return 0;
-+}
-+
-+/* Replicas tracking - superblock: */
-+
-+static int
-+__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
-+ struct bch_replicas_cpu *cpu_r)
-+{
-+ struct bch_replicas_entry *e, *dst;
-+ unsigned nr = 0, entry_size = 0, idx = 0;
-+
-+ for_each_replicas_entry(sb_r, e) {
-+ entry_size = max_t(unsigned, entry_size,
-+ replicas_entry_bytes(e));
-+ nr++;
-+ }
-+
-+ cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
-+ if (!cpu_r->entries)
-+ return -BCH_ERR_ENOMEM_cpu_replicas;
-+
-+ cpu_r->nr = nr;
-+ cpu_r->entry_size = entry_size;
-+
-+ for_each_replicas_entry(sb_r, e) {
-+ dst = cpu_replicas_entry(cpu_r, idx++);
-+ memcpy(dst, e, replicas_entry_bytes(e));
-+ bch2_replicas_entry_sort(dst);
-+ }
-+
-+ return 0;
-+}
-+
-+static int
-+__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
-+ struct bch_replicas_cpu *cpu_r)
-+{
-+ struct bch_replicas_entry_v0 *e;
-+ unsigned nr = 0, entry_size = 0, idx = 0;
-+
-+ for_each_replicas_entry(sb_r, e) {
-+ entry_size = max_t(unsigned, entry_size,
-+ replicas_entry_bytes(e));
-+ nr++;
-+ }
-+
-+ entry_size += sizeof(struct bch_replicas_entry) -
-+ sizeof(struct bch_replicas_entry_v0);
-+
-+ cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
-+ if (!cpu_r->entries)
-+ return -BCH_ERR_ENOMEM_cpu_replicas;
-+
-+ cpu_r->nr = nr;
-+ cpu_r->entry_size = entry_size;
-+
-+ for_each_replicas_entry(sb_r, e) {
-+ struct bch_replicas_entry *dst =
-+ cpu_replicas_entry(cpu_r, idx++);
-+
-+ dst->data_type = e->data_type;
-+ dst->nr_devs = e->nr_devs;
-+ dst->nr_required = 1;
-+ memcpy(dst->devs, e->devs, e->nr_devs);
-+ bch2_replicas_entry_sort(dst);
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
-+{
-+ struct bch_sb_field_replicas *sb_v1;
-+ struct bch_sb_field_replicas_v0 *sb_v0;
-+ struct bch_replicas_cpu new_r = { 0, 0, NULL };
-+ int ret = 0;
-+
-+ if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas)))
-+ ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r);
-+ else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0)))
-+ ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r);
-+ if (ret)
-+ return ret;
-+
-+ bch2_cpu_replicas_sort(&new_r);
-+
-+ percpu_down_write(&c->mark_lock);
-+
-+ ret = replicas_table_update(c, &new_r);
-+ percpu_up_write(&c->mark_lock);
-+
-+ kfree(new_r.entries);
-+
-+ return 0;
-+}
-+
-+static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
-+ struct bch_replicas_cpu *r)
-+{
-+ struct bch_sb_field_replicas_v0 *sb_r;
-+ struct bch_replicas_entry_v0 *dst;
-+ struct bch_replicas_entry *src;
-+ size_t bytes;
-+
-+ bytes = sizeof(struct bch_sb_field_replicas);
-+
-+ for_each_cpu_replicas_entry(r, src)
-+ bytes += replicas_entry_bytes(src) - 1;
-+
-+ sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0,
-+ DIV_ROUND_UP(bytes, sizeof(u64)));
-+ if (!sb_r)
-+ return -BCH_ERR_ENOSPC_sb_replicas;
-+
-+ bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
-+ sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0);
-+
-+ memset(&sb_r->entries, 0,
-+ vstruct_end(&sb_r->field) -
-+ (void *) &sb_r->entries);
-+
-+ dst = sb_r->entries;
-+ for_each_cpu_replicas_entry(r, src) {
-+ dst->data_type = src->data_type;
-+ dst->nr_devs = src->nr_devs;
-+ memcpy(dst->devs, src->devs, src->nr_devs);
-+
-+ dst = replicas_entry_next(dst);
-+
-+ BUG_ON((void *) dst > vstruct_end(&sb_r->field));
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
-+ struct bch_replicas_cpu *r)
-+{
-+ struct bch_sb_field_replicas *sb_r;
-+ struct bch_replicas_entry *dst, *src;
-+ bool need_v1 = false;
-+ size_t bytes;
-+
-+ bytes = sizeof(struct bch_sb_field_replicas);
-+
-+ for_each_cpu_replicas_entry(r, src) {
-+ bytes += replicas_entry_bytes(src);
-+ if (src->nr_required != 1)
-+ need_v1 = true;
-+ }
-+
-+ if (!need_v1)
-+ return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
-+
-+ sb_r = bch2_sb_field_resize(&c->disk_sb, replicas,
-+ DIV_ROUND_UP(bytes, sizeof(u64)));
-+ if (!sb_r)
-+ return -BCH_ERR_ENOSPC_sb_replicas;
-+
-+ bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
-+ sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas);
-+
-+ memset(&sb_r->entries, 0,
-+ vstruct_end(&sb_r->field) -
-+ (void *) &sb_r->entries);
-+
-+ dst = sb_r->entries;
-+ for_each_cpu_replicas_entry(r, src) {
-+ memcpy(dst, src, replicas_entry_bytes(src));
-+
-+ dst = replicas_entry_next(dst);
-+
-+ BUG_ON((void *) dst > vstruct_end(&sb_r->field));
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
-+ struct bch_sb *sb,
-+ struct printbuf *err)
-+{
-+ unsigned i, j;
-+
-+ sort_cmp_size(cpu_r->entries,
-+ cpu_r->nr,
-+ cpu_r->entry_size,
-+ memcmp, NULL);
-+
-+ for (i = 0; i < cpu_r->nr; i++) {
-+ struct bch_replicas_entry *e =
-+ cpu_replicas_entry(cpu_r, i);
-+
-+ if (e->data_type >= BCH_DATA_NR) {
-+ prt_printf(err, "invalid data type in entry ");
-+ bch2_replicas_entry_to_text(err, e);
-+ return -BCH_ERR_invalid_sb_replicas;
-+ }
-+
-+ if (!e->nr_devs) {
-+ prt_printf(err, "no devices in entry ");
-+ bch2_replicas_entry_to_text(err, e);
-+ return -BCH_ERR_invalid_sb_replicas;
-+ }
-+
-+ if (e->nr_required > 1 &&
-+ e->nr_required >= e->nr_devs) {
-+ prt_printf(err, "bad nr_required in entry ");
-+ bch2_replicas_entry_to_text(err, e);
-+ return -BCH_ERR_invalid_sb_replicas;
-+ }
-+
-+ for (j = 0; j < e->nr_devs; j++)
-+ if (!bch2_dev_exists(sb, e->devs[j])) {
-+ prt_printf(err, "invalid device %u in entry ", e->devs[j]);
-+ bch2_replicas_entry_to_text(err, e);
-+ return -BCH_ERR_invalid_sb_replicas;
-+ }
-+
-+ if (i + 1 < cpu_r->nr) {
-+ struct bch_replicas_entry *n =
-+ cpu_replicas_entry(cpu_r, i + 1);
-+
-+ BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0);
-+
-+ if (!memcmp(e, n, cpu_r->entry_size)) {
-+ prt_printf(err, "duplicate replicas entry ");
-+ bch2_replicas_entry_to_text(err, e);
-+ return -BCH_ERR_invalid_sb_replicas;
-+ }
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
-+ struct bch_replicas_cpu cpu_r;
-+ int ret;
-+
-+ ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
-+ kfree(cpu_r.entries);
-+ return ret;
-+}
-+
-+static void bch2_sb_replicas_to_text(struct printbuf *out,
-+ struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_replicas *r = field_to_type(f, replicas);
-+ struct bch_replicas_entry *e;
-+ bool first = true;
-+
-+ for_each_replicas_entry(r, e) {
-+ if (!first)
-+ prt_printf(out, " ");
-+ first = false;
-+
-+ bch2_replicas_entry_to_text(out, e);
-+ }
-+ prt_newline(out);
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
-+ .validate = bch2_sb_replicas_validate,
-+ .to_text = bch2_sb_replicas_to_text,
-+};
-+
-+static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
-+ struct bch_replicas_cpu cpu_r;
-+ int ret;
-+
-+ ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
-+ kfree(cpu_r.entries);
-+ return ret;
-+}
-+
-+static void bch2_sb_replicas_v0_to_text(struct printbuf *out,
-+ struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
-+ struct bch_replicas_entry_v0 *e;
-+ bool first = true;
-+
-+ for_each_replicas_entry(sb_r, e) {
-+ if (!first)
-+ prt_printf(out, " ");
-+ first = false;
-+
-+ bch2_replicas_entry_v0_to_text(out, e);
-+ }
-+ prt_newline(out);
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
-+ .validate = bch2_sb_replicas_v0_validate,
-+ .to_text = bch2_sb_replicas_v0_to_text,
-+};
-+
-+/* Query replicas: */
-+
-+bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
-+ unsigned flags, bool print)
-+{
-+ struct bch_replicas_entry *e;
-+ bool ret = true;
-+
-+ percpu_down_read(&c->mark_lock);
-+ for_each_cpu_replicas_entry(&c->replicas, e) {
-+ unsigned i, nr_online = 0, nr_failed = 0, dflags = 0;
-+ bool metadata = e->data_type < BCH_DATA_user;
-+
-+ if (e->data_type == BCH_DATA_cached)
-+ continue;
-+
-+ for (i = 0; i < e->nr_devs; i++) {
-+ struct bch_dev *ca = bch_dev_bkey_exists(c, e->devs[i]);
-+
-+ nr_online += test_bit(e->devs[i], devs.d);
-+ nr_failed += ca->mi.state == BCH_MEMBER_STATE_failed;
-+ }
-+
-+ if (nr_failed == e->nr_devs)
-+ continue;
-+
-+ if (nr_online < e->nr_required)
-+ dflags |= metadata
-+ ? BCH_FORCE_IF_METADATA_LOST
-+ : BCH_FORCE_IF_DATA_LOST;
-+
-+ if (nr_online < e->nr_devs)
-+ dflags |= metadata
-+ ? BCH_FORCE_IF_METADATA_DEGRADED
-+ : BCH_FORCE_IF_DATA_DEGRADED;
-+
-+ if (dflags & ~flags) {
-+ if (print) {
-+ struct printbuf buf = PRINTBUF;
-+
-+ bch2_replicas_entry_to_text(&buf, e);
-+ bch_err(c, "insufficient devices online (%u) for replicas entry %s",
-+ nr_online, buf.buf);
-+ printbuf_exit(&buf);
-+ }
-+ ret = false;
-+ break;
-+ }
-+
-+ }
-+ percpu_up_read(&c->mark_lock);
-+
-+ return ret;
-+}
-+
-+unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
-+{
-+ struct bch_sb_field_replicas *replicas;
-+ struct bch_sb_field_replicas_v0 *replicas_v0;
-+ unsigned i, data_has = 0;
-+
-+ replicas = bch2_sb_field_get(sb, replicas);
-+ replicas_v0 = bch2_sb_field_get(sb, replicas_v0);
-+
-+ if (replicas) {
-+ struct bch_replicas_entry *r;
-+
-+ for_each_replicas_entry(replicas, r)
-+ for (i = 0; i < r->nr_devs; i++)
-+ if (r->devs[i] == dev)
-+ data_has |= 1 << r->data_type;
-+ } else if (replicas_v0) {
-+ struct bch_replicas_entry_v0 *r;
-+
-+ for_each_replicas_entry_v0(replicas_v0, r)
-+ for (i = 0; i < r->nr_devs; i++)
-+ if (r->devs[i] == dev)
-+ data_has |= 1 << r->data_type;
-+ }
-+
-+
-+ return data_has;
-+}
-+
-+unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ unsigned ret;
-+
-+ mutex_lock(&c->sb_lock);
-+ ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx);
-+ mutex_unlock(&c->sb_lock);
-+
-+ return ret;
-+}
-+
-+void bch2_fs_replicas_exit(struct bch_fs *c)
-+{
-+ unsigned i;
-+
-+ kfree(c->usage_scratch);
-+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
-+ free_percpu(c->usage[i]);
-+ kfree(c->usage_base);
-+ kfree(c->replicas.entries);
-+ kfree(c->replicas_gc.entries);
-+
-+ mempool_exit(&c->replicas_delta_pool);
-+}
-+
-+int bch2_fs_replicas_init(struct bch_fs *c)
-+{
-+ bch2_journal_entry_res_resize(&c->journal,
-+ &c->replicas_journal_res,
-+ reserve_journal_replicas(c, &c->replicas));
-+
-+ return mempool_init_kmalloc_pool(&c->replicas_delta_pool, 1,
-+ REPLICAS_DELTA_LIST_MAX) ?:
-+ replicas_table_update(c, &c->replicas);
-+}
-diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
-new file mode 100644
-index 000000000000..4887675a86f0
---- /dev/null
-+++ b/fs/bcachefs/replicas.h
-@@ -0,0 +1,91 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REPLICAS_H
-+#define _BCACHEFS_REPLICAS_H
-+
-+#include "bkey.h"
-+#include "eytzinger.h"
-+#include "replicas_types.h"
-+
-+void bch2_replicas_entry_sort(struct bch_replicas_entry *);
-+void bch2_replicas_entry_to_text(struct printbuf *,
-+ struct bch_replicas_entry *);
-+void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
-+
-+static inline struct bch_replicas_entry *
-+cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
-+{
-+ return (void *) r->entries + r->entry_size * i;
-+}
-+
-+int bch2_replicas_entry_idx(struct bch_fs *,
-+ struct bch_replicas_entry *);
-+
-+void bch2_devlist_to_replicas(struct bch_replicas_entry *,
-+ enum bch_data_type,
-+ struct bch_devs_list);
-+bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *);
-+int bch2_mark_replicas(struct bch_fs *,
-+ struct bch_replicas_entry *);
-+
-+static inline struct replicas_delta *
-+replicas_delta_next(struct replicas_delta *d)
-+{
-+ return (void *) d + replicas_entry_bytes(&d->r) + 8;
-+}
-+
-+int bch2_replicas_delta_list_mark(struct bch_fs *, struct replicas_delta_list *);
-+
-+void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
-+
-+static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
-+ unsigned dev)
-+{
-+ e->data_type = BCH_DATA_cached;
-+ e->nr_devs = 1;
-+ e->nr_required = 1;
-+ e->devs[0] = dev;
-+}
-+
-+bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
-+ unsigned, bool);
-+
-+unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned);
-+unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
-+
-+int bch2_replicas_gc_end(struct bch_fs *, int);
-+int bch2_replicas_gc_start(struct bch_fs *, unsigned);
-+int bch2_replicas_gc2(struct bch_fs *);
-+
-+int bch2_replicas_set_usage(struct bch_fs *,
-+ struct bch_replicas_entry *,
-+ u64);
-+
-+#define for_each_cpu_replicas_entry(_r, _i) \
-+ for (_i = (_r)->entries; \
-+ (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
-+ _i = (void *) (_i) + (_r)->entry_size)
-+
-+/* iterate over superblock replicas - used by userspace tools: */
-+
-+#define replicas_entry_next(_i) \
-+ ((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
-+
-+#define for_each_replicas_entry(_r, _i) \
-+ for (_i = (_r)->entries; \
-+ (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
-+ (_i) = replicas_entry_next(_i))
-+
-+#define for_each_replicas_entry_v0(_r, _i) \
-+ for (_i = (_r)->entries; \
-+ (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
-+ (_i) = replicas_entry_next(_i))
-+
-+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
-+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
-+
-+void bch2_fs_replicas_exit(struct bch_fs *);
-+int bch2_fs_replicas_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_REPLICAS_H */
-diff --git a/fs/bcachefs/replicas_types.h b/fs/bcachefs/replicas_types.h
-new file mode 100644
-index 000000000000..5cfff489bbc3
---- /dev/null
-+++ b/fs/bcachefs/replicas_types.h
-@@ -0,0 +1,27 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_REPLICAS_TYPES_H
-+#define _BCACHEFS_REPLICAS_TYPES_H
-+
-+struct bch_replicas_cpu {
-+ unsigned nr;
-+ unsigned entry_size;
-+ struct bch_replicas_entry *entries;
-+};
-+
-+struct replicas_delta {
-+ s64 delta;
-+ struct bch_replicas_entry r;
-+} __packed;
-+
-+struct replicas_delta_list {
-+ unsigned size;
-+ unsigned used;
-+
-+ struct {} memset_start;
-+ u64 nr_inodes;
-+ u64 persistent_reserved[BCH_REPLICAS_MAX];
-+ struct {} memset_end;
-+ struct replicas_delta d[0];
-+};
-+
-+#endif /* _BCACHEFS_REPLICAS_TYPES_H */
-diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c
-new file mode 100644
-index 000000000000..e151ada1c8bd
---- /dev/null
-+++ b/fs/bcachefs/sb-clean.c
-@@ -0,0 +1,398 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_update_interior.h"
-+#include "buckets.h"
-+#include "error.h"
-+#include "journal_io.h"
-+#include "replicas.h"
-+#include "sb-clean.h"
-+#include "super-io.h"
-+
-+/*
-+ * BCH_SB_FIELD_clean:
-+ *
-+ * Btree roots, and a few other things, are recovered from the journal after an
-+ * unclean shutdown - but after a clean shutdown, to avoid having to read the
-+ * journal, we can store them in the superblock.
-+ *
-+ * bch_sb_field_clean simply contains a list of journal entries, stored exactly
-+ * as they would be in the journal:
-+ */
-+
-+int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *clean,
-+ int write)
-+{
-+ struct jset_entry *entry;
-+ int ret;
-+
-+ for (entry = clean->start;
-+ entry < (struct jset_entry *) vstruct_end(&clean->field);
-+ entry = vstruct_next(entry)) {
-+ ret = bch2_journal_entry_validate(c, NULL, entry,
-+ le16_to_cpu(c->disk_sb.sb->version),
-+ BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
-+ write);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static struct bkey_i *btree_root_find(struct bch_fs *c,
-+ struct bch_sb_field_clean *clean,
-+ struct jset *j,
-+ enum btree_id id, unsigned *level)
-+{
-+ struct bkey_i *k;
-+ struct jset_entry *entry, *start, *end;
-+
-+ if (clean) {
-+ start = clean->start;
-+ end = vstruct_end(&clean->field);
-+ } else {
-+ start = j->start;
-+ end = vstruct_last(j);
-+ }
-+
-+ for (entry = start; entry < end; entry = vstruct_next(entry))
-+ if (entry->type == BCH_JSET_ENTRY_btree_root &&
-+ entry->btree_id == id)
-+ goto found;
-+
-+ return NULL;
-+found:
-+ if (!entry->u64s)
-+ return ERR_PTR(-EINVAL);
-+
-+ k = entry->start;
-+ *level = entry->level;
-+ return k;
-+}
-+
-+int bch2_verify_superblock_clean(struct bch_fs *c,
-+ struct bch_sb_field_clean **cleanp,
-+ struct jset *j)
-+{
-+ unsigned i;
-+ struct bch_sb_field_clean *clean = *cleanp;
-+ struct printbuf buf1 = PRINTBUF;
-+ struct printbuf buf2 = PRINTBUF;
-+ int ret = 0;
-+
-+ if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
-+ sb_clean_journal_seq_mismatch,
-+ "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
-+ le64_to_cpu(clean->journal_seq),
-+ le64_to_cpu(j->seq))) {
-+ kfree(clean);
-+ *cleanp = NULL;
-+ return 0;
-+ }
-+
-+ for (i = 0; i < BTREE_ID_NR; i++) {
-+ struct bkey_i *k1, *k2;
-+ unsigned l1 = 0, l2 = 0;
-+
-+ k1 = btree_root_find(c, clean, NULL, i, &l1);
-+ k2 = btree_root_find(c, NULL, j, i, &l2);
-+
-+ if (!k1 && !k2)
-+ continue;
-+
-+ printbuf_reset(&buf1);
-+ printbuf_reset(&buf2);
-+
-+ if (k1)
-+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(k1));
-+ else
-+ prt_printf(&buf1, "(none)");
-+
-+ if (k2)
-+ bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(k2));
-+ else
-+ prt_printf(&buf2, "(none)");
-+
-+ mustfix_fsck_err_on(!k1 || !k2 ||
-+ IS_ERR(k1) ||
-+ IS_ERR(k2) ||
-+ k1->k.u64s != k2->k.u64s ||
-+ memcmp(k1, k2, bkey_bytes(&k1->k)) ||
-+ l1 != l2, c,
-+ sb_clean_btree_root_mismatch,
-+ "superblock btree root %u doesn't match journal after clean shutdown\n"
-+ "sb: l=%u %s\n"
-+ "journal: l=%u %s\n", i,
-+ l1, buf1.buf,
-+ l2, buf2.buf);
-+ }
-+fsck_err:
-+ printbuf_exit(&buf2);
-+ printbuf_exit(&buf1);
-+ return ret;
-+}
-+
-+struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c)
-+{
-+ struct bch_sb_field_clean *clean, *sb_clean;
-+ int ret;
-+
-+ mutex_lock(&c->sb_lock);
-+ sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean);
-+
-+ if (fsck_err_on(!sb_clean, c,
-+ sb_clean_missing,
-+ "superblock marked clean but clean section not present")) {
-+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+ c->sb.clean = false;
-+ mutex_unlock(&c->sb_lock);
-+ return NULL;
-+ }
-+
-+ clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
-+ GFP_KERNEL);
-+ if (!clean) {
-+ mutex_unlock(&c->sb_lock);
-+ return ERR_PTR(-BCH_ERR_ENOMEM_read_superblock_clean);
-+ }
-+
-+ ret = bch2_sb_clean_validate_late(c, clean, READ);
-+ if (ret) {
-+ mutex_unlock(&c->sb_lock);
-+ return ERR_PTR(ret);
-+ }
-+
-+ mutex_unlock(&c->sb_lock);
-+
-+ return clean;
-+fsck_err:
-+ mutex_unlock(&c->sb_lock);
-+ return ERR_PTR(ret);
-+}
-+
-+static struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size)
-+{
-+ struct jset_entry *entry = *end;
-+ unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
-+
-+ memset(entry, 0, u64s * sizeof(u64));
-+ /*
-+ * The u64s field counts from the start of data, ignoring the shared
-+ * fields.
-+ */
-+ entry->u64s = cpu_to_le16(u64s - 1);
-+
-+ *end = vstruct_next(*end);
-+ return entry;
-+}
-+
-+void bch2_journal_super_entries_add_common(struct bch_fs *c,
-+ struct jset_entry **end,
-+ u64 journal_seq)
-+{
-+ struct bch_dev *ca;
-+ unsigned i, dev;
-+
-+ percpu_down_read(&c->mark_lock);
-+
-+ if (!journal_seq) {
-+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
-+ bch2_fs_usage_acc_to_base(c, i);
-+ } else {
-+ bch2_fs_usage_acc_to_base(c, journal_seq & JOURNAL_BUF_MASK);
-+ }
-+
-+ {
-+ struct jset_entry_usage *u =
-+ container_of(jset_entry_init(end, sizeof(*u)),
-+ struct jset_entry_usage, entry);
-+
-+ u->entry.type = BCH_JSET_ENTRY_usage;
-+ u->entry.btree_id = BCH_FS_USAGE_inodes;
-+ u->v = cpu_to_le64(c->usage_base->nr_inodes);
-+ }
-+
-+ {
-+ struct jset_entry_usage *u =
-+ container_of(jset_entry_init(end, sizeof(*u)),
-+ struct jset_entry_usage, entry);
-+
-+ u->entry.type = BCH_JSET_ENTRY_usage;
-+ u->entry.btree_id = BCH_FS_USAGE_key_version;
-+ u->v = cpu_to_le64(atomic64_read(&c->key_version));
-+ }
-+
-+ for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-+ struct jset_entry_usage *u =
-+ container_of(jset_entry_init(end, sizeof(*u)),
-+ struct jset_entry_usage, entry);
-+
-+ u->entry.type = BCH_JSET_ENTRY_usage;
-+ u->entry.btree_id = BCH_FS_USAGE_reserved;
-+ u->entry.level = i;
-+ u->v = cpu_to_le64(c->usage_base->persistent_reserved[i]);
-+ }
-+
-+ for (i = 0; i < c->replicas.nr; i++) {
-+ struct bch_replicas_entry *e =
-+ cpu_replicas_entry(&c->replicas, i);
-+ struct jset_entry_data_usage *u =
-+ container_of(jset_entry_init(end, sizeof(*u) + e->nr_devs),
-+ struct jset_entry_data_usage, entry);
-+
-+ u->entry.type = BCH_JSET_ENTRY_data_usage;
-+ u->v = cpu_to_le64(c->usage_base->replicas[i]);
-+ unsafe_memcpy(&u->r, e, replicas_entry_bytes(e),
-+ "embedded variable length struct");
-+ }
-+
-+ for_each_member_device(ca, c, dev) {
-+ unsigned b = sizeof(struct jset_entry_dev_usage) +
-+ sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR;
-+ struct jset_entry_dev_usage *u =
-+ container_of(jset_entry_init(end, b),
-+ struct jset_entry_dev_usage, entry);
-+
-+ u->entry.type = BCH_JSET_ENTRY_dev_usage;
-+ u->dev = cpu_to_le32(dev);
-+ u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec);
-+
-+ for (i = 0; i < BCH_DATA_NR; i++) {
-+ u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets);
-+ u->d[i].sectors = cpu_to_le64(ca->usage_base->d[i].sectors);
-+ u->d[i].fragmented = cpu_to_le64(ca->usage_base->d[i].fragmented);
-+ }
-+ }
-+
-+ percpu_up_read(&c->mark_lock);
-+
-+ for (i = 0; i < 2; i++) {
-+ struct jset_entry_clock *clock =
-+ container_of(jset_entry_init(end, sizeof(*clock)),
-+ struct jset_entry_clock, entry);
-+
-+ clock->entry.type = BCH_JSET_ENTRY_clock;
-+ clock->rw = i;
-+ clock->time = cpu_to_le64(atomic64_read(&c->io_clock[i].now));
-+ }
-+}
-+
-+static int bch2_sb_clean_validate(struct bch_sb *sb,
-+ struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_clean *clean = field_to_type(f, clean);
-+
-+ if (vstruct_bytes(&clean->field) < sizeof(*clean)) {
-+ prt_printf(err, "wrong size (got %zu should be %zu)",
-+ vstruct_bytes(&clean->field), sizeof(*clean));
-+ return -BCH_ERR_invalid_sb_clean;
-+ }
-+
-+ return 0;
-+}
-+
-+static void bch2_sb_clean_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_clean *clean = field_to_type(f, clean);
-+ struct jset_entry *entry;
-+
-+ prt_printf(out, "flags: %x", le32_to_cpu(clean->flags));
-+ prt_newline(out);
-+ prt_printf(out, "journal_seq: %llu", le64_to_cpu(clean->journal_seq));
-+ prt_newline(out);
-+
-+ for (entry = clean->start;
-+ entry != vstruct_end(&clean->field);
-+ entry = vstruct_next(entry)) {
-+ if (entry->type == BCH_JSET_ENTRY_btree_keys &&
-+ !entry->u64s)
-+ continue;
-+
-+ bch2_journal_entry_to_text(out, NULL, entry);
-+ prt_newline(out);
-+ }
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_clean = {
-+ .validate = bch2_sb_clean_validate,
-+ .to_text = bch2_sb_clean_to_text,
-+};
-+
-+int bch2_fs_mark_dirty(struct bch_fs *c)
-+{
-+ int ret;
-+
-+ /*
-+ * Unconditionally write superblock, to verify it hasn't changed before
-+ * we go rw:
-+ */
-+
-+ mutex_lock(&c->sb_lock);
-+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-+
-+ bch2_sb_maybe_downgrade(c);
-+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
-+
-+ ret = bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ return ret;
-+}
-+
-+void bch2_fs_mark_clean(struct bch_fs *c)
-+{
-+ struct bch_sb_field_clean *sb_clean;
-+ struct jset_entry *entry;
-+ unsigned u64s;
-+ int ret;
-+
-+ mutex_lock(&c->sb_lock);
-+ if (BCH_SB_CLEAN(c->disk_sb.sb))
-+ goto out;
-+
-+ SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
-+
-+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
-+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_metadata);
-+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_extents_above_btree_updates));
-+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_btree_updates_journalled));
-+
-+ u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
-+
-+ sb_clean = bch2_sb_field_resize(&c->disk_sb, clean, u64s);
-+ if (!sb_clean) {
-+ bch_err(c, "error resizing superblock while setting filesystem clean");
-+ goto out;
-+ }
-+
-+ sb_clean->flags = 0;
-+ sb_clean->journal_seq = cpu_to_le64(atomic64_read(&c->journal.seq));
-+
-+ /* Trying to catch outstanding bug: */
-+ BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);
-+
-+ entry = sb_clean->start;
-+ bch2_journal_super_entries_add_common(c, &entry, 0);
-+ entry = bch2_btree_roots_to_journal_entries(c, entry, 0);
-+ BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
-+
-+ memset(entry, 0,
-+ vstruct_end(&sb_clean->field) - (void *) entry);
-+
-+ /*
-+ * this should be in the write path, and we should be validating every
-+ * superblock section:
-+ */
-+ ret = bch2_sb_clean_validate_late(c, sb_clean, WRITE);
-+ if (ret) {
-+ bch_err(c, "error writing marking filesystem clean: validate error");
-+ goto out;
-+ }
-+
-+ bch2_write_super(c);
-+out:
-+ mutex_unlock(&c->sb_lock);
-+}
-diff --git a/fs/bcachefs/sb-clean.h b/fs/bcachefs/sb-clean.h
-new file mode 100644
-index 000000000000..71caef281239
---- /dev/null
-+++ b/fs/bcachefs/sb-clean.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SB_CLEAN_H
-+#define _BCACHEFS_SB_CLEAN_H
-+
-+int bch2_sb_clean_validate_late(struct bch_fs *, struct bch_sb_field_clean *, int);
-+int bch2_verify_superblock_clean(struct bch_fs *, struct bch_sb_field_clean **,
-+ struct jset *);
-+struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *);
-+void bch2_journal_super_entries_add_common(struct bch_fs *, struct jset_entry **, u64);
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_clean;
-+
-+int bch2_fs_mark_dirty(struct bch_fs *);
-+void bch2_fs_mark_clean(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_SB_CLEAN_H */
-diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
-new file mode 100644
-index 000000000000..f0930ab7f036
---- /dev/null
-+++ b/fs/bcachefs/sb-errors.c
-@@ -0,0 +1,172 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "sb-errors.h"
-+#include "super-io.h"
-+
-+static const char * const bch2_sb_error_strs[] = {
-+#define x(t, n, ...) [n] = #t,
-+ BCH_SB_ERRS()
-+ NULL
-+};
-+
-+static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id)
-+{
-+ if (id < BCH_SB_ERR_MAX)
-+ prt_str(out, bch2_sb_error_strs[id]);
-+ else
-+ prt_printf(out, "(unknown error %u)", id);
-+}
-+
-+static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e)
-+{
-+ return e
-+ ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0])
-+ : 0;
-+}
-+
-+static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)
-+{
-+ return (sizeof(struct bch_sb_field_errors) +
-+ sizeof(struct bch_sb_field_error_entry) * nr) / sizeof(u64);
-+}
-+
-+static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_errors *e = field_to_type(f, errors);
-+ unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
-+
-+ for (i = 0; i < nr; i++) {
-+ if (!BCH_SB_ERROR_ENTRY_NR(&e->entries[i])) {
-+ prt_printf(err, "entry with count 0 (id ");
-+ bch2_sb_error_id_to_text(err, BCH_SB_ERROR_ENTRY_ID(&e->entries[i]));
-+ prt_printf(err, ")");
-+ return -BCH_ERR_invalid_sb_errors;
-+ }
-+
-+ if (i + 1 < nr &&
-+ BCH_SB_ERROR_ENTRY_ID(&e->entries[i]) >=
-+ BCH_SB_ERROR_ENTRY_ID(&e->entries[i + 1])) {
-+ prt_printf(err, "entries out of order");
-+ return -BCH_ERR_invalid_sb_errors;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_errors *e = field_to_type(f, errors);
-+ unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
-+
-+ if (out->nr_tabstops <= 1)
-+ printbuf_tabstop_push(out, 16);
-+
-+ for (i = 0; i < nr; i++) {
-+ bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i]));
-+ prt_tab(out);
-+ prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i]));
-+ prt_tab(out);
-+ bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time));
-+ prt_newline(out);
-+ }
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_errors = {
-+ .validate = bch2_sb_errors_validate,
-+ .to_text = bch2_sb_errors_to_text,
-+};
-+
-+void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
-+{
-+ bch_sb_errors_cpu *e = &c->fsck_error_counts;
-+ struct bch_sb_error_entry_cpu n = {
-+ .id = err,
-+ .nr = 1,
-+ .last_error_time = ktime_get_real_seconds()
-+ };
-+ unsigned i;
-+
-+ mutex_lock(&c->fsck_error_counts_lock);
-+ for (i = 0; i < e->nr; i++) {
-+ if (err == e->data[i].id) {
-+ e->data[i].nr++;
-+ e->data[i].last_error_time = n.last_error_time;
-+ goto out;
-+ }
-+ if (err < e->data[i].id)
-+ break;
-+ }
-+
-+ if (darray_make_room(e, 1))
-+ goto out;
-+
-+ darray_insert_item(e, i, n);
-+out:
-+ mutex_unlock(&c->fsck_error_counts_lock);
-+}
-+
-+void bch2_sb_errors_from_cpu(struct bch_fs *c)
-+{
-+ bch_sb_errors_cpu *src = &c->fsck_error_counts;
-+ struct bch_sb_field_errors *dst =
-+ bch2_sb_field_resize(&c->disk_sb, errors,
-+ bch2_sb_field_errors_u64s(src->nr));
-+ unsigned i;
-+
-+ if (!dst)
-+ return;
-+
-+ for (i = 0; i < src->nr; i++) {
-+ SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id);
-+ SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr);
-+ dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time);
-+ }
-+}
-+
-+static int bch2_sb_errors_to_cpu(struct bch_fs *c)
-+{
-+ struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors);
-+ bch_sb_errors_cpu *dst = &c->fsck_error_counts;
-+ unsigned i, nr = bch2_sb_field_errors_nr_entries(src);
-+ int ret;
-+
-+ if (!nr)
-+ return 0;
-+
-+ mutex_lock(&c->fsck_error_counts_lock);
-+ ret = darray_make_room(dst, nr);
-+ if (ret)
-+ goto err;
-+
-+ dst->nr = nr;
-+
-+ for (i = 0; i < nr; i++) {
-+ dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]);
-+ dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]);
-+ dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time);
-+ }
-+err:
-+ mutex_unlock(&c->fsck_error_counts_lock);
-+
-+ return ret;
-+}
-+
-+void bch2_fs_sb_errors_exit(struct bch_fs *c)
-+{
-+ darray_exit(&c->fsck_error_counts);
-+}
-+
-+void bch2_fs_sb_errors_init_early(struct bch_fs *c)
-+{
-+ mutex_init(&c->fsck_error_counts_lock);
-+ darray_init(&c->fsck_error_counts);
-+}
-+
-+int bch2_fs_sb_errors_init(struct bch_fs *c)
-+{
-+ return bch2_sb_errors_to_cpu(c);
-+}
-diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h
-new file mode 100644
-index 000000000000..5a09a53966be
---- /dev/null
-+++ b/fs/bcachefs/sb-errors.h
-@@ -0,0 +1,270 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SB_ERRORS_H
-+#define _BCACHEFS_SB_ERRORS_H
-+
-+#include "sb-errors_types.h"
-+
-+#define BCH_SB_ERRS() \
-+ x(clean_but_journal_not_empty, 0) \
-+ x(dirty_but_no_journal_entries, 1) \
-+ x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
-+ x(sb_clean_journal_seq_mismatch, 3) \
-+ x(sb_clean_btree_root_mismatch, 4) \
-+ x(sb_clean_missing, 5) \
-+ x(jset_unsupported_version, 6) \
-+ x(jset_unknown_csum, 7) \
-+ x(jset_last_seq_newer_than_seq, 8) \
-+ x(jset_past_bucket_end, 9) \
-+ x(jset_seq_blacklisted, 10) \
-+ x(journal_entries_missing, 11) \
-+ x(journal_entry_replicas_not_marked, 12) \
-+ x(journal_entry_past_jset_end, 13) \
-+ x(journal_entry_replicas_data_mismatch, 14) \
-+ x(journal_entry_bkey_u64s_0, 15) \
-+ x(journal_entry_bkey_past_end, 16) \
-+ x(journal_entry_bkey_bad_format, 17) \
-+ x(journal_entry_bkey_invalid, 18) \
-+ x(journal_entry_btree_root_bad_size, 19) \
-+ x(journal_entry_blacklist_bad_size, 20) \
-+ x(journal_entry_blacklist_v2_bad_size, 21) \
-+ x(journal_entry_blacklist_v2_start_past_end, 22) \
-+ x(journal_entry_usage_bad_size, 23) \
-+ x(journal_entry_data_usage_bad_size, 24) \
-+ x(journal_entry_clock_bad_size, 25) \
-+ x(journal_entry_clock_bad_rw, 26) \
-+ x(journal_entry_dev_usage_bad_size, 27) \
-+ x(journal_entry_dev_usage_bad_dev, 28) \
-+ x(journal_entry_dev_usage_bad_pad, 29) \
-+ x(btree_node_unreadable, 30) \
-+ x(btree_node_fault_injected, 31) \
-+ x(btree_node_bad_magic, 32) \
-+ x(btree_node_bad_seq, 33) \
-+ x(btree_node_unsupported_version, 34) \
-+ x(btree_node_bset_older_than_sb_min, 35) \
-+ x(btree_node_bset_newer_than_sb, 36) \
-+ x(btree_node_data_missing, 37) \
-+ x(btree_node_bset_after_end, 38) \
-+ x(btree_node_replicas_sectors_written_mismatch, 39) \
-+ x(btree_node_replicas_data_mismatch, 40) \
-+ x(bset_unknown_csum, 41) \
-+ x(bset_bad_csum, 42) \
-+ x(bset_past_end_of_btree_node, 43) \
-+ x(bset_wrong_sector_offset, 44) \
-+ x(bset_empty, 45) \
-+ x(bset_bad_seq, 46) \
-+ x(bset_blacklisted_journal_seq, 47) \
-+ x(first_bset_blacklisted_journal_seq, 48) \
-+ x(btree_node_bad_btree, 49) \
-+ x(btree_node_bad_level, 50) \
-+ x(btree_node_bad_min_key, 51) \
-+ x(btree_node_bad_max_key, 52) \
-+ x(btree_node_bad_format, 53) \
-+ x(btree_node_bkey_past_bset_end, 54) \
-+ x(btree_node_bkey_bad_format, 55) \
-+ x(btree_node_bad_bkey, 56) \
-+ x(btree_node_bkey_out_of_order, 57) \
-+ x(btree_root_bkey_invalid, 58) \
-+ x(btree_root_read_error, 59) \
-+ x(btree_root_bad_min_key, 50) \
-+ x(btree_root_bad_max_key, 61) \
-+ x(btree_node_read_error, 62) \
-+ x(btree_node_topology_bad_min_key, 63) \
-+ x(btree_node_topology_bad_max_key, 64) \
-+ x(btree_node_topology_overwritten_by_prev_node, 65) \
-+ x(btree_node_topology_overwritten_by_next_node, 66) \
-+ x(btree_node_topology_interior_node_empty, 67) \
-+ x(fs_usage_hidden_wrong, 68) \
-+ x(fs_usage_btree_wrong, 69) \
-+ x(fs_usage_data_wrong, 70) \
-+ x(fs_usage_cached_wrong, 71) \
-+ x(fs_usage_reserved_wrong, 72) \
-+ x(fs_usage_persistent_reserved_wrong, 73) \
-+ x(fs_usage_nr_inodes_wrong, 74) \
-+ x(fs_usage_replicas_wrong, 75) \
-+ x(dev_usage_buckets_wrong, 76) \
-+ x(dev_usage_sectors_wrong, 77) \
-+ x(dev_usage_fragmented_wrong, 78) \
-+ x(dev_usage_buckets_ec_wrong, 79) \
-+ x(bkey_version_in_future, 80) \
-+ x(bkey_u64s_too_small, 81) \
-+ x(bkey_invalid_type_for_btree, 82) \
-+ x(bkey_extent_size_zero, 83) \
-+ x(bkey_extent_size_greater_than_offset, 84) \
-+ x(bkey_size_nonzero, 85) \
-+ x(bkey_snapshot_nonzero, 86) \
-+ x(bkey_snapshot_zero, 87) \
-+ x(bkey_at_pos_max, 88) \
-+ x(bkey_before_start_of_btree_node, 89) \
-+ x(bkey_after_end_of_btree_node, 90) \
-+ x(bkey_val_size_nonzero, 91) \
-+ x(bkey_val_size_too_small, 92) \
-+ x(alloc_v1_val_size_bad, 93) \
-+ x(alloc_v2_unpack_error, 94) \
-+ x(alloc_v3_unpack_error, 95) \
-+ x(alloc_v4_val_size_bad, 96) \
-+ x(alloc_v4_backpointers_start_bad, 97) \
-+ x(alloc_key_data_type_bad, 98) \
-+ x(alloc_key_empty_but_have_data, 99) \
-+ x(alloc_key_dirty_sectors_0, 100) \
-+ x(alloc_key_data_type_inconsistency, 101) \
-+ x(alloc_key_to_missing_dev_bucket, 102) \
-+ x(alloc_key_cached_inconsistency, 103) \
-+ x(alloc_key_cached_but_read_time_zero, 104) \
-+ x(alloc_key_to_missing_lru_entry, 105) \
-+ x(alloc_key_data_type_wrong, 106) \
-+ x(alloc_key_gen_wrong, 107) \
-+ x(alloc_key_dirty_sectors_wrong, 108) \
-+ x(alloc_key_cached_sectors_wrong, 109) \
-+ x(alloc_key_stripe_wrong, 110) \
-+ x(alloc_key_stripe_redundancy_wrong, 111) \
-+ x(bucket_sector_count_overflow, 112) \
-+ x(bucket_metadata_type_mismatch, 113) \
-+ x(need_discard_key_wrong, 114) \
-+ x(freespace_key_wrong, 115) \
-+ x(freespace_hole_missing, 116) \
-+ x(bucket_gens_val_size_bad, 117) \
-+ x(bucket_gens_key_wrong, 118) \
-+ x(bucket_gens_hole_wrong, 119) \
-+ x(bucket_gens_to_invalid_dev, 120) \
-+ x(bucket_gens_to_invalid_buckets, 121) \
-+ x(bucket_gens_nonzero_for_invalid_buckets, 122) \
-+ x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
-+ x(need_discard_freespace_key_bad, 124) \
-+ x(backpointer_pos_wrong, 125) \
-+ x(backpointer_to_missing_device, 126) \
-+ x(backpointer_to_missing_alloc, 127) \
-+ x(backpointer_to_missing_ptr, 128) \
-+ x(lru_entry_at_time_0, 129) \
-+ x(lru_entry_to_invalid_bucket, 130) \
-+ x(lru_entry_bad, 131) \
-+ x(btree_ptr_val_too_big, 132) \
-+ x(btree_ptr_v2_val_too_big, 133) \
-+ x(btree_ptr_has_non_ptr, 134) \
-+ x(extent_ptrs_invalid_entry, 135) \
-+ x(extent_ptrs_no_ptrs, 136) \
-+ x(extent_ptrs_too_many_ptrs, 137) \
-+ x(extent_ptrs_redundant_crc, 138) \
-+ x(extent_ptrs_redundant_stripe, 139) \
-+ x(extent_ptrs_unwritten, 140) \
-+ x(extent_ptrs_written_and_unwritten, 141) \
-+ x(ptr_to_invalid_device, 142) \
-+ x(ptr_to_duplicate_device, 143) \
-+ x(ptr_after_last_bucket, 144) \
-+ x(ptr_before_first_bucket, 145) \
-+ x(ptr_spans_multiple_buckets, 146) \
-+ x(ptr_to_missing_backpointer, 147) \
-+ x(ptr_to_missing_alloc_key, 148) \
-+ x(ptr_to_missing_replicas_entry, 149) \
-+ x(ptr_to_missing_stripe, 150) \
-+ x(ptr_to_incorrect_stripe, 151) \
-+ x(ptr_gen_newer_than_bucket_gen, 152) \
-+ x(ptr_too_stale, 153) \
-+ x(stale_dirty_ptr, 154) \
-+ x(ptr_bucket_data_type_mismatch, 155) \
-+ x(ptr_cached_and_erasure_coded, 156) \
-+ x(ptr_crc_uncompressed_size_too_small, 157) \
-+ x(ptr_crc_csum_type_unknown, 158) \
-+ x(ptr_crc_compression_type_unknown, 159) \
-+ x(ptr_crc_redundant, 160) \
-+ x(ptr_crc_uncompressed_size_too_big, 161) \
-+ x(ptr_crc_nonce_mismatch, 162) \
-+ x(ptr_stripe_redundant, 163) \
-+ x(reservation_key_nr_replicas_invalid, 164) \
-+ x(reflink_v_refcount_wrong, 165) \
-+ x(reflink_p_to_missing_reflink_v, 166) \
-+ x(stripe_pos_bad, 167) \
-+ x(stripe_val_size_bad, 168) \
-+ x(stripe_sector_count_wrong, 169) \
-+ x(snapshot_tree_pos_bad, 170) \
-+ x(snapshot_tree_to_missing_snapshot, 171) \
-+ x(snapshot_tree_to_missing_subvol, 172) \
-+ x(snapshot_tree_to_wrong_subvol, 173) \
-+ x(snapshot_tree_to_snapshot_subvol, 174) \
-+ x(snapshot_pos_bad, 175) \
-+ x(snapshot_parent_bad, 176) \
-+ x(snapshot_children_not_normalized, 177) \
-+ x(snapshot_child_duplicate, 178) \
-+ x(snapshot_child_bad, 179) \
-+ x(snapshot_skiplist_not_normalized, 180) \
-+ x(snapshot_skiplist_bad, 181) \
-+ x(snapshot_should_not_have_subvol, 182) \
-+ x(snapshot_to_bad_snapshot_tree, 183) \
-+ x(snapshot_bad_depth, 184) \
-+ x(snapshot_bad_skiplist, 185) \
-+ x(subvol_pos_bad, 186) \
-+ x(subvol_not_master_and_not_snapshot, 187) \
-+ x(subvol_to_missing_root, 188) \
-+ x(subvol_root_wrong_bi_subvol, 189) \
-+ x(bkey_in_missing_snapshot, 190) \
-+ x(inode_pos_inode_nonzero, 191) \
-+ x(inode_pos_blockdev_range, 192) \
-+ x(inode_unpack_error, 193) \
-+ x(inode_str_hash_invalid, 194) \
-+ x(inode_v3_fields_start_bad, 195) \
-+ x(inode_snapshot_mismatch, 196) \
-+ x(inode_unlinked_but_clean, 197) \
-+ x(inode_unlinked_but_nlink_nonzero, 198) \
-+ x(inode_checksum_type_invalid, 199) \
-+ x(inode_compression_type_invalid, 200) \
-+ x(inode_subvol_root_but_not_dir, 201) \
-+ x(inode_i_size_dirty_but_clean, 202) \
-+ x(inode_i_sectors_dirty_but_clean, 203) \
-+ x(inode_i_sectors_wrong, 204) \
-+ x(inode_dir_wrong_nlink, 205) \
-+ x(inode_dir_multiple_links, 206) \
-+ x(inode_multiple_links_but_nlink_0, 207) \
-+ x(inode_wrong_backpointer, 208) \
-+ x(inode_wrong_nlink, 209) \
-+ x(inode_unreachable, 210) \
-+ x(deleted_inode_but_clean, 211) \
-+ x(deleted_inode_missing, 212) \
-+ x(deleted_inode_is_dir, 213) \
-+ x(deleted_inode_not_unlinked, 214) \
-+ x(extent_overlapping, 215) \
-+ x(extent_in_missing_inode, 216) \
-+ x(extent_in_non_reg_inode, 217) \
-+ x(extent_past_end_of_inode, 218) \
-+ x(dirent_empty_name, 219) \
-+ x(dirent_val_too_big, 220) \
-+ x(dirent_name_too_long, 221) \
-+ x(dirent_name_embedded_nul, 222) \
-+ x(dirent_name_dot_or_dotdot, 223) \
-+ x(dirent_name_has_slash, 224) \
-+ x(dirent_d_type_wrong, 225) \
-+ x(dirent_d_parent_subvol_wrong, 226) \
-+ x(dirent_in_missing_dir_inode, 227) \
-+ x(dirent_in_non_dir_inode, 228) \
-+ x(dirent_to_missing_inode, 229) \
-+ x(dirent_to_missing_subvol, 230) \
-+ x(dirent_to_itself, 231) \
-+ x(quota_type_invalid, 232) \
-+ x(xattr_val_size_too_small, 233) \
-+ x(xattr_val_size_too_big, 234) \
-+ x(xattr_invalid_type, 235) \
-+ x(xattr_name_invalid_chars, 236) \
-+ x(xattr_in_missing_inode, 237) \
-+ x(root_subvol_missing, 238) \
-+ x(root_dir_missing, 239) \
-+ x(root_inode_not_dir, 240) \
-+ x(dir_loop, 241) \
-+ x(hash_table_key_duplicate, 242) \
-+ x(hash_table_key_wrong_offset, 243)
-+
-+enum bch_sb_error_id {
-+#define x(t, n) BCH_FSCK_ERR_##t = n,
-+ BCH_SB_ERRS()
-+#undef x
-+ BCH_SB_ERR_MAX
-+};
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
-+
-+void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id);
-+
-+void bch2_sb_errors_from_cpu(struct bch_fs *);
-+
-+void bch2_fs_sb_errors_exit(struct bch_fs *);
-+void bch2_fs_sb_errors_init_early(struct bch_fs *);
-+int bch2_fs_sb_errors_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_SB_ERRORS_H */
-diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h
-new file mode 100644
-index 000000000000..b1c099843a39
---- /dev/null
-+++ b/fs/bcachefs/sb-errors_types.h
-@@ -0,0 +1,16 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SB_ERRORS_TYPES_H
-+#define _BCACHEFS_SB_ERRORS_TYPES_H
-+
-+#include "darray.h"
-+
-+struct bch_sb_error_entry_cpu {
-+ u64 id:16,
-+ nr:48;
-+ u64 last_error_time;
-+};
-+
-+typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu;
-+
-+#endif /* _BCACHEFS_SB_ERRORS_TYPES_H */
-+
-diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
-new file mode 100644
-index 000000000000..bed0f857fe5b
---- /dev/null
-+++ b/fs/bcachefs/sb-members.c
-@@ -0,0 +1,420 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "disk_groups.h"
-+#include "opts.h"
-+#include "replicas.h"
-+#include "sb-members.h"
-+#include "super-io.h"
-+
-+#define x(t, n, ...) [n] = #t,
-+static const char * const bch2_iops_measurements[] = {
-+ BCH_IOPS_MEASUREMENTS()
-+ NULL
-+};
-+
-+char * const bch2_member_error_strs[] = {
-+ BCH_MEMBER_ERROR_TYPES()
-+ NULL
-+};
-+#undef x
-+
-+/* Code for bch_sb_field_members_v1: */
-+
-+struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
-+{
-+ return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
-+}
-+
-+static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
-+{
-+ struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
-+ memset(&ret, 0, sizeof(ret));
-+ memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
-+ return ret;
-+}
-+
-+static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i)
-+{
-+ return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES);
-+}
-+
-+static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i)
-+{
-+ struct bch_member ret, *p = members_v1_get_mut(mi, i);
-+ memset(&ret, 0, sizeof(ret));
-+ memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
-+ return ret;
-+}
-+
-+struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i)
-+{
-+ struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2);
-+ if (mi2)
-+ return members_v2_get(mi2, i);
-+ struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1);
-+ return members_v1_get(mi1, i);
-+}
-+
-+static int sb_members_v2_resize_entries(struct bch_fs *c)
-+{
-+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
-+
-+ if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) {
-+ unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) *
-+ c->disk_sb.sb->nr_devices), 8);
-+
-+ mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
-+ if (!mi)
-+ return -BCH_ERR_ENOSPC_sb_members_v2;
-+
-+ for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
-+ void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
-+ memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
-+ memset(dst + le16_to_cpu(mi->member_bytes),
-+ 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
-+ }
-+ mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
-+ }
-+ return 0;
-+}
-+
-+int bch2_sb_members_v2_init(struct bch_fs *c)
-+{
-+ struct bch_sb_field_members_v1 *mi1;
-+ struct bch_sb_field_members_v2 *mi2;
-+
-+ if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) {
-+ mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2,
-+ DIV_ROUND_UP(sizeof(*mi2) +
-+ sizeof(struct bch_member) * c->sb.nr_devices,
-+ sizeof(u64)));
-+ mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1);
-+ memcpy(&mi2->_members[0], &mi1->_members[0],
-+ BCH_MEMBER_V1_BYTES * c->sb.nr_devices);
-+ memset(&mi2->pad[0], 0, sizeof(mi2->pad));
-+ mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES);
-+ }
-+
-+ return sb_members_v2_resize_entries(c);
-+}
-+
-+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
-+{
-+ struct bch_sb_field_members_v1 *mi1;
-+ struct bch_sb_field_members_v2 *mi2;
-+
-+ mi1 = bch2_sb_field_resize(disk_sb, members_v1,
-+ DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
-+ disk_sb->sb->nr_devices, sizeof(u64)));
-+ if (!mi1)
-+ return -BCH_ERR_ENOSPC_sb_members;
-+
-+ mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
-+
-+ for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
-+ memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
-+
-+ return 0;
-+}
-+
-+static int validate_member(struct printbuf *err,
-+ struct bch_member m,
-+ struct bch_sb *sb,
-+ int i)
-+{
-+ if (le64_to_cpu(m.nbuckets) > LONG_MAX) {
-+ prt_printf(err, "device %u: too many buckets (got %llu, max %lu)",
-+ i, le64_to_cpu(m.nbuckets), LONG_MAX);
-+ return -BCH_ERR_invalid_sb_members;
-+ }
-+
-+ if (le64_to_cpu(m.nbuckets) -
-+ le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) {
-+ prt_printf(err, "device %u: not enough buckets (got %llu, max %u)",
-+ i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS);
-+ return -BCH_ERR_invalid_sb_members;
-+ }
-+
-+ if (le16_to_cpu(m.bucket_size) <
-+ le16_to_cpu(sb->block_size)) {
-+ prt_printf(err, "device %u: bucket size %u smaller than block size %u",
-+ i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size));
-+ return -BCH_ERR_invalid_sb_members;
-+ }
-+
-+ if (le16_to_cpu(m.bucket_size) <
-+ BCH_SB_BTREE_NODE_SIZE(sb)) {
-+ prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu",
-+ i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb));
-+ return -BCH_ERR_invalid_sb_members;
-+ }
-+
-+ return 0;
-+}
-+
-+static void member_to_text(struct printbuf *out,
-+ struct bch_member m,
-+ struct bch_sb_field_disk_groups *gi,
-+ struct bch_sb *sb,
-+ int i)
-+{
-+ unsigned data_have = bch2_sb_dev_has_data(sb, i);
-+ u64 bucket_size = le16_to_cpu(m.bucket_size);
-+ u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
-+
-+ if (!bch2_member_exists(&m))
-+ return;
-+
-+ prt_printf(out, "Device:");
-+ prt_tab(out);
-+ prt_printf(out, "%u", i);
-+ prt_newline(out);
-+
-+ printbuf_indent_add(out, 2);
-+
-+ prt_printf(out, "Label:");
-+ prt_tab(out);
-+ if (BCH_MEMBER_GROUP(&m)) {
-+ unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
-+
-+ if (idx < disk_groups_nr(gi))
-+ prt_printf(out, "%s (%u)",
-+ gi->entries[idx].label, idx);
-+ else
-+ prt_printf(out, "(bad disk labels section)");
-+ } else {
-+ prt_printf(out, "(none)");
-+ }
-+ prt_newline(out);
-+
-+ prt_printf(out, "UUID:");
-+ prt_tab(out);
-+ pr_uuid(out, m.uuid.b);
-+ prt_newline(out);
-+
-+ prt_printf(out, "Size:");
-+ prt_tab(out);
-+ prt_units_u64(out, device_size << 9);
-+ prt_newline(out);
-+
-+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
-+ prt_printf(out, "%s errors:", bch2_member_error_strs[i]);
-+ prt_tab(out);
-+ prt_u64(out, le64_to_cpu(m.errors[i]));
-+ prt_newline(out);
-+ }
-+
-+ for (unsigned i = 0; i < BCH_IOPS_NR; i++) {
-+ prt_printf(out, "%s iops:", bch2_iops_measurements[i]);
-+ prt_tab(out);
-+ prt_printf(out, "%u", le32_to_cpu(m.iops[i]));
-+ prt_newline(out);
-+ }
-+
-+ prt_printf(out, "Bucket size:");
-+ prt_tab(out);
-+ prt_units_u64(out, bucket_size << 9);
-+ prt_newline(out);
-+
-+ prt_printf(out, "First bucket:");
-+ prt_tab(out);
-+ prt_printf(out, "%u", le16_to_cpu(m.first_bucket));
-+ prt_newline(out);
-+
-+ prt_printf(out, "Buckets:");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", le64_to_cpu(m.nbuckets));
-+ prt_newline(out);
-+
-+ prt_printf(out, "Last mount:");
-+ prt_tab(out);
-+ if (m.last_mount)
-+ bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
-+ else
-+ prt_printf(out, "(never)");
-+ prt_newline(out);
-+
-+ prt_printf(out, "State:");
-+ prt_tab(out);
-+ prt_printf(out, "%s",
-+ BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR
-+ ? bch2_member_states[BCH_MEMBER_STATE(&m)]
-+ : "unknown");
-+ prt_newline(out);
-+
-+ prt_printf(out, "Data allowed:");
-+ prt_tab(out);
-+ if (BCH_MEMBER_DATA_ALLOWED(&m))
-+ prt_bitflags(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
-+ else
-+ prt_printf(out, "(none)");
-+ prt_newline(out);
-+
-+ prt_printf(out, "Has data:");
-+ prt_tab(out);
-+ if (data_have)
-+ prt_bitflags(out, bch2_data_types, data_have);
-+ else
-+ prt_printf(out, "(none)");
-+ prt_newline(out);
-+
-+ prt_printf(out, "Discard:");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m));
-+ prt_newline(out);
-+
-+ prt_printf(out, "Freespace initialized:");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", BCH_MEMBER_FREESPACE_INITIALIZED(&m));
-+ prt_newline(out);
-+
-+ printbuf_indent_sub(out, 2);
-+}
-+
-+static int bch2_sb_members_v1_validate(struct bch_sb *sb,
-+ struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
-+ unsigned i;
-+
-+ if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) {
-+ prt_printf(err, "too many devices for section size");
-+ return -BCH_ERR_invalid_sb_members;
-+ }
-+
-+ for (i = 0; i < sb->nr_devices; i++) {
-+ struct bch_member m = members_v1_get(mi, i);
-+
-+ int ret = validate_member(err, m, sb, i);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
-+ struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
-+ unsigned i;
-+
-+ for (i = 0; i < sb->nr_devices; i++)
-+ member_to_text(out, members_v1_get(mi, i), gi, sb, i);
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = {
-+ .validate = bch2_sb_members_v1_validate,
-+ .to_text = bch2_sb_members_v1_to_text,
-+};
-+
-+static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
-+ struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
-+ unsigned i;
-+
-+ for (i = 0; i < sb->nr_devices; i++)
-+ member_to_text(out, members_v2_get(mi, i), gi, sb, i);
-+}
-+
-+static int bch2_sb_members_v2_validate(struct bch_sb *sb,
-+ struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
-+ size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
-+ (void *) mi;
-+
-+ if (mi_bytes > vstruct_bytes(&mi->field)) {
-+ prt_printf(err, "section too small (%zu > %zu)",
-+ mi_bytes, vstruct_bytes(&mi->field));
-+ return -BCH_ERR_invalid_sb_members;
-+ }
-+
-+ for (unsigned i = 0; i < sb->nr_devices; i++) {
-+ int ret = validate_member(err, members_v2_get(mi, i), sb, i);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = {
-+ .validate = bch2_sb_members_v2_validate,
-+ .to_text = bch2_sb_members_v2_to_text,
-+};
-+
-+void bch2_sb_members_from_cpu(struct bch_fs *c)
-+{
-+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
-+ struct bch_dev *ca;
-+ unsigned i, e;
-+
-+ rcu_read_lock();
-+ for_each_member_device_rcu(ca, c, i, NULL) {
-+ struct bch_member *m = __bch2_members_v2_get_mut(mi, i);
-+
-+ for (e = 0; e < BCH_MEMBER_ERROR_NR; e++)
-+ m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
-+ }
-+ rcu_read_unlock();
-+}
-+
-+void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
-+{
-+ struct bch_fs *c = ca->fs;
-+ struct bch_member m;
-+
-+ mutex_lock(&ca->fs->sb_lock);
-+ m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
-+ mutex_unlock(&ca->fs->sb_lock);
-+
-+ printbuf_tabstop_push(out, 12);
-+
-+ prt_str(out, "IO errors since filesystem creation");
-+ prt_newline(out);
-+
-+ printbuf_indent_add(out, 2);
-+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
-+ prt_printf(out, "%s:", bch2_member_error_strs[i]);
-+ prt_tab(out);
-+ prt_u64(out, atomic64_read(&ca->errors[i]));
-+ prt_newline(out);
-+ }
-+ printbuf_indent_sub(out, 2);
-+
-+ prt_str(out, "IO errors since ");
-+ bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
-+ prt_str(out, " ago");
-+ prt_newline(out);
-+
-+ printbuf_indent_add(out, 2);
-+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
-+ prt_printf(out, "%s:", bch2_member_error_strs[i]);
-+ prt_tab(out);
-+ prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
-+ prt_newline(out);
-+ }
-+ printbuf_indent_sub(out, 2);
-+}
-+
-+void bch2_dev_errors_reset(struct bch_dev *ca)
-+{
-+ struct bch_fs *c = ca->fs;
-+ struct bch_member *m;
-+
-+ mutex_lock(&c->sb_lock);
-+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
-+ for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
-+ m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
-+ m->errors_reset_time = ktime_get_real_seconds();
-+
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+}
-diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
-new file mode 100644
-index 000000000000..03613e3eb8e3
---- /dev/null
-+++ b/fs/bcachefs/sb-members.h
-@@ -0,0 +1,227 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SB_MEMBERS_H
-+#define _BCACHEFS_SB_MEMBERS_H
-+
-+extern char * const bch2_member_error_strs[];
-+
-+static inline struct bch_member *
-+__bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i)
-+{
-+ return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes));
-+}
-+
-+int bch2_sb_members_v2_init(struct bch_fs *c);
-+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
-+struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i);
-+struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
-+
-+static inline bool bch2_dev_is_online(struct bch_dev *ca)
-+{
-+ return !percpu_ref_is_zero(&ca->io_ref);
-+}
-+
-+static inline bool bch2_dev_is_readable(struct bch_dev *ca)
-+{
-+ return bch2_dev_is_online(ca) &&
-+ ca->mi.state != BCH_MEMBER_STATE_failed;
-+}
-+
-+static inline bool bch2_dev_get_ioref(struct bch_dev *ca, int rw)
-+{
-+ if (!percpu_ref_tryget(&ca->io_ref))
-+ return false;
-+
-+ if (ca->mi.state == BCH_MEMBER_STATE_rw ||
-+ (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ))
-+ return true;
-+
-+ percpu_ref_put(&ca->io_ref);
-+ return false;
-+}
-+
-+static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)
-+{
-+ return bitmap_weight(devs->d, BCH_SB_MEMBERS_MAX);
-+}
-+
-+static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs,
-+ unsigned dev)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < devs.nr; i++)
-+ if (devs.devs[i] == dev)
-+ return true;
-+
-+ return false;
-+}
-+
-+static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,
-+ unsigned dev)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < devs->nr; i++)
-+ if (devs->devs[i] == dev) {
-+ array_remove_item(devs->devs, devs->nr, i);
-+ return;
-+ }
-+}
-+
-+static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
-+ unsigned dev)
-+{
-+ if (!bch2_dev_list_has_dev(*devs, dev)) {
-+ BUG_ON(devs->nr >= ARRAY_SIZE(devs->devs));
-+ devs->devs[devs->nr++] = dev;
-+ }
-+}
-+
-+static inline struct bch_devs_list bch2_dev_list_single(unsigned dev)
-+{
-+ return (struct bch_devs_list) { .nr = 1, .devs[0] = dev };
-+}
-+
-+static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter,
-+ const struct bch_devs_mask *mask)
-+{
-+ struct bch_dev *ca = NULL;
-+
-+ while ((*iter = mask
-+ ? find_next_bit(mask->d, c->sb.nr_devices, *iter)
-+ : *iter) < c->sb.nr_devices &&
-+ !(ca = rcu_dereference_check(c->devs[*iter],
-+ lockdep_is_held(&c->state_lock))))
-+ (*iter)++;
-+
-+ return ca;
-+}
-+
-+#define for_each_member_device_rcu(ca, c, iter, mask) \
-+ for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter), mask)); (iter)++)
-+
-+static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, unsigned *iter)
-+{
-+ struct bch_dev *ca;
-+
-+ rcu_read_lock();
-+ if ((ca = __bch2_next_dev(c, iter, NULL)))
-+ percpu_ref_get(&ca->ref);
-+ rcu_read_unlock();
-+
-+ return ca;
-+}
-+
-+/*
-+ * If you break early, you must drop your ref on the current device
-+ */
-+#define for_each_member_device(ca, c, iter) \
-+ for ((iter) = 0; \
-+ (ca = bch2_get_next_dev(c, &(iter))); \
-+ percpu_ref_put(&ca->ref), (iter)++)
-+
-+static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
-+ unsigned *iter,
-+ int state_mask)
-+{
-+ struct bch_dev *ca;
-+
-+ rcu_read_lock();
-+ while ((ca = __bch2_next_dev(c, iter, NULL)) &&
-+ (!((1 << ca->mi.state) & state_mask) ||
-+ !percpu_ref_tryget(&ca->io_ref)))
-+ (*iter)++;
-+ rcu_read_unlock();
-+
-+ return ca;
-+}
-+
-+#define __for_each_online_member(ca, c, iter, state_mask) \
-+ for ((iter) = 0; \
-+ (ca = bch2_get_next_online_dev(c, &(iter), state_mask)); \
-+ percpu_ref_put(&ca->io_ref), (iter)++)
-+
-+#define for_each_online_member(ca, c, iter) \
-+ __for_each_online_member(ca, c, iter, ~0)
-+
-+#define for_each_rw_member(ca, c, iter) \
-+ __for_each_online_member(ca, c, iter, 1 << BCH_MEMBER_STATE_rw)
-+
-+#define for_each_readable_member(ca, c, iter) \
-+ __for_each_online_member(ca, c, iter, \
-+ (1 << BCH_MEMBER_STATE_rw)|(1 << BCH_MEMBER_STATE_ro))
-+
-+/*
-+ * If a key exists that references a device, the device won't be going away and
-+ * we can omit rcu_read_lock():
-+ */
-+static inline struct bch_dev *bch_dev_bkey_exists(const struct bch_fs *c, unsigned idx)
-+{
-+ EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
-+
-+ return rcu_dereference_check(c->devs[idx], 1);
-+}
-+
-+static inline struct bch_dev *bch_dev_locked(struct bch_fs *c, unsigned idx)
-+{
-+ EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
-+
-+ return rcu_dereference_protected(c->devs[idx],
-+ lockdep_is_held(&c->sb_lock) ||
-+ lockdep_is_held(&c->state_lock));
-+}
-+
-+/* XXX kill, move to struct bch_fs */
-+static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
-+{
-+ struct bch_devs_mask devs;
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ memset(&devs, 0, sizeof(devs));
-+ for_each_online_member(ca, c, i)
-+ __set_bit(ca->dev_idx, devs.d);
-+ return devs;
-+}
-+
-+extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1;
-+extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
-+
-+static inline bool bch2_member_exists(struct bch_member *m)
-+{
-+ return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
-+}
-+
-+static inline bool bch2_dev_exists(struct bch_sb *sb, unsigned dev)
-+{
-+ if (dev < sb->nr_devices) {
-+ struct bch_member m = bch2_sb_member_get(sb, dev);
-+ return bch2_member_exists(&m);
-+ }
-+ return false;
-+}
-+
-+static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
-+{
-+ return (struct bch_member_cpu) {
-+ .nbuckets = le64_to_cpu(mi->nbuckets),
-+ .first_bucket = le16_to_cpu(mi->first_bucket),
-+ .bucket_size = le16_to_cpu(mi->bucket_size),
-+ .group = BCH_MEMBER_GROUP(mi),
-+ .state = BCH_MEMBER_STATE(mi),
-+ .discard = BCH_MEMBER_DISCARD(mi),
-+ .data_allowed = BCH_MEMBER_DATA_ALLOWED(mi),
-+ .durability = BCH_MEMBER_DURABILITY(mi)
-+ ? BCH_MEMBER_DURABILITY(mi) - 1
-+ : 1,
-+ .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
-+ .valid = bch2_member_exists(mi),
-+ };
-+}
-+
-+void bch2_sb_members_from_cpu(struct bch_fs *);
-+
-+void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
-+void bch2_dev_errors_reset(struct bch_dev *);
-+
-+#endif /* _BCACHEFS_SB_MEMBERS_H */
-diff --git a/fs/bcachefs/seqmutex.h b/fs/bcachefs/seqmutex.h
-new file mode 100644
-index 000000000000..c1860d8163fb
---- /dev/null
-+++ b/fs/bcachefs/seqmutex.h
-@@ -0,0 +1,48 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SEQMUTEX_H
-+#define _BCACHEFS_SEQMUTEX_H
-+
-+#include <linux/mutex.h>
-+
-+struct seqmutex {
-+ struct mutex lock;
-+ u32 seq;
-+};
-+
-+#define seqmutex_init(_lock) mutex_init(&(_lock)->lock)
-+
-+static inline bool seqmutex_trylock(struct seqmutex *lock)
-+{
-+ return mutex_trylock(&lock->lock);
-+}
-+
-+static inline void seqmutex_lock(struct seqmutex *lock)
-+{
-+ mutex_lock(&lock->lock);
-+}
-+
-+static inline void seqmutex_unlock(struct seqmutex *lock)
-+{
-+ lock->seq++;
-+ mutex_unlock(&lock->lock);
-+}
-+
-+static inline u32 seqmutex_seq(struct seqmutex *lock)
-+{
-+ return lock->seq;
-+}
-+
-+static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq)
-+{
-+ if (lock->seq != seq || !mutex_trylock(&lock->lock))
-+ return false;
-+
-+ if (lock->seq != seq) {
-+ mutex_unlock(&lock->lock);
-+ return false;
-+ }
-+
-+ return true;
-+}
-+
-+#endif /* _BCACHEFS_SEQMUTEX_H */
-diff --git a/fs/bcachefs/siphash.c b/fs/bcachefs/siphash.c
-new file mode 100644
-index 000000000000..dc1a27cc31cd
---- /dev/null
-+++ b/fs/bcachefs/siphash.c
-@@ -0,0 +1,173 @@
-+// SPDX-License-Identifier: BSD-3-Clause
-+/* $OpenBSD: siphash.c,v 1.3 2015/02/20 11:51:03 tedu Exp $ */
-+
-+/*-
-+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. The name of the author may not be used to endorse or promote
-+ * products derived from this software without specific prior written
-+ * permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ */
-+
-+/*
-+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d
-+ * are the number of compression rounds and the number of finalization rounds.
-+ * A compression round is identical to a finalization round and this round
-+ * function is called SipRound. Given a 128-bit key k and a (possibly empty)
-+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m).
-+ *
-+ * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18,
-+ * by Jean-Philippe Aumasson and Daniel J. Bernstein,
-+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa
-+ * https://131002.net/siphash/siphash.pdf
-+ * https://131002.net/siphash/
-+ */
-+
-+#include <asm/byteorder.h>
-+#include <asm/unaligned.h>
-+#include <linux/bitops.h>
-+#include <linux/string.h>
-+
-+#include "siphash.h"
-+
-+static void SipHash_Rounds(SIPHASH_CTX *ctx, int rounds)
-+{
-+ while (rounds--) {
-+ ctx->v[0] += ctx->v[1];
-+ ctx->v[2] += ctx->v[3];
-+ ctx->v[1] = rol64(ctx->v[1], 13);
-+ ctx->v[3] = rol64(ctx->v[3], 16);
-+
-+ ctx->v[1] ^= ctx->v[0];
-+ ctx->v[3] ^= ctx->v[2];
-+ ctx->v[0] = rol64(ctx->v[0], 32);
-+
-+ ctx->v[2] += ctx->v[1];
-+ ctx->v[0] += ctx->v[3];
-+ ctx->v[1] = rol64(ctx->v[1], 17);
-+ ctx->v[3] = rol64(ctx->v[3], 21);
-+
-+ ctx->v[1] ^= ctx->v[2];
-+ ctx->v[3] ^= ctx->v[0];
-+ ctx->v[2] = rol64(ctx->v[2], 32);
-+ }
-+}
-+
-+static void SipHash_CRounds(SIPHASH_CTX *ctx, const void *ptr, int rounds)
-+{
-+ u64 m = get_unaligned_le64(ptr);
-+
-+ ctx->v[3] ^= m;
-+ SipHash_Rounds(ctx, rounds);
-+ ctx->v[0] ^= m;
-+}
-+
-+void SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
-+{
-+ u64 k0, k1;
-+
-+ k0 = le64_to_cpu(key->k0);
-+ k1 = le64_to_cpu(key->k1);
-+
-+ ctx->v[0] = 0x736f6d6570736575ULL ^ k0;
-+ ctx->v[1] = 0x646f72616e646f6dULL ^ k1;
-+ ctx->v[2] = 0x6c7967656e657261ULL ^ k0;
-+ ctx->v[3] = 0x7465646279746573ULL ^ k1;
-+
-+ memset(ctx->buf, 0, sizeof(ctx->buf));
-+ ctx->bytes = 0;
-+}
-+
-+void SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf,
-+ const void *src, size_t len)
-+{
-+ const u8 *ptr = src;
-+ size_t left, used;
-+
-+ if (len == 0)
-+ return;
-+
-+ used = ctx->bytes % sizeof(ctx->buf);
-+ ctx->bytes += len;
-+
-+ if (used > 0) {
-+ left = sizeof(ctx->buf) - used;
-+
-+ if (len >= left) {
-+ memcpy(&ctx->buf[used], ptr, left);
-+ SipHash_CRounds(ctx, ctx->buf, rc);
-+ len -= left;
-+ ptr += left;
-+ } else {
-+ memcpy(&ctx->buf[used], ptr, len);
-+ return;
-+ }
-+ }
-+
-+ while (len >= sizeof(ctx->buf)) {
-+ SipHash_CRounds(ctx, ptr, rc);
-+ len -= sizeof(ctx->buf);
-+ ptr += sizeof(ctx->buf);
-+ }
-+
-+ if (len > 0)
-+ memcpy(&ctx->buf[used], ptr, len);
-+}
-+
-+void SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf)
-+{
-+ u64 r;
-+
-+ r = SipHash_End(ctx, rc, rf);
-+
-+ *((__le64 *) dst) = cpu_to_le64(r);
-+}
-+
-+u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
-+{
-+ u64 r;
-+ size_t left, used;
-+
-+ used = ctx->bytes % sizeof(ctx->buf);
-+ left = sizeof(ctx->buf) - used;
-+ memset(&ctx->buf[used], 0, left - 1);
-+ ctx->buf[7] = ctx->bytes;
-+
-+ SipHash_CRounds(ctx, ctx->buf, rc);
-+ ctx->v[2] ^= 0xff;
-+ SipHash_Rounds(ctx, rf);
-+
-+ r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
-+ memset(ctx, 0, sizeof(*ctx));
-+ return r;
-+}
-+
-+u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
-+{
-+ SIPHASH_CTX ctx;
-+
-+ SipHash_Init(&ctx, key);
-+ SipHash_Update(&ctx, rc, rf, src, len);
-+ return SipHash_End(&ctx, rc, rf);
-+}
-diff --git a/fs/bcachefs/siphash.h b/fs/bcachefs/siphash.h
-new file mode 100644
-index 000000000000..3dfaf34a43b2
---- /dev/null
-+++ b/fs/bcachefs/siphash.h
-@@ -0,0 +1,87 @@
-+/* SPDX-License-Identifier: BSD-3-Clause */
-+/* $OpenBSD: siphash.h,v 1.5 2015/02/20 11:51:03 tedu Exp $ */
-+/*-
-+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
-+ * All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. The name of the author may not be used to endorse or promote
-+ * products derived from this software without specific prior written
-+ * permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * $FreeBSD$
-+ */
-+
-+/*
-+ * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions)
-+ * optimized for speed on short messages returning a 64bit hash/digest value.
-+ *
-+ * The number of rounds is defined during the initialization:
-+ * SipHash24_Init() for the fast and resonable strong version
-+ * SipHash48_Init() for the strong version (half as fast)
-+ *
-+ * struct SIPHASH_CTX ctx;
-+ * SipHash24_Init(&ctx);
-+ * SipHash_SetKey(&ctx, "16bytes long key");
-+ * SipHash_Update(&ctx, pointer_to_string, length_of_string);
-+ * SipHash_Final(output, &ctx);
-+ */
-+
-+#ifndef _SIPHASH_H_
-+#define _SIPHASH_H_
-+
-+#include <linux/types.h>
-+
-+#define SIPHASH_BLOCK_LENGTH 8
-+#define SIPHASH_KEY_LENGTH 16
-+#define SIPHASH_DIGEST_LENGTH 8
-+
-+typedef struct _SIPHASH_CTX {
-+ u64 v[4];
-+ u8 buf[SIPHASH_BLOCK_LENGTH];
-+ u32 bytes;
-+} SIPHASH_CTX;
-+
-+typedef struct {
-+ __le64 k0;
-+ __le64 k1;
-+} SIPHASH_KEY;
-+
-+void SipHash_Init(SIPHASH_CTX *, const SIPHASH_KEY *);
-+void SipHash_Update(SIPHASH_CTX *, int, int, const void *, size_t);
-+u64 SipHash_End(SIPHASH_CTX *, int, int);
-+void SipHash_Final(void *, SIPHASH_CTX *, int, int);
-+u64 SipHash(const SIPHASH_KEY *, int, int, const void *, size_t);
-+
-+#define SipHash24_Init(_c, _k) SipHash_Init((_c), (_k))
-+#define SipHash24_Update(_c, _p, _l) SipHash_Update((_c), 2, 4, (_p), (_l))
-+#define SipHash24_End(_d) SipHash_End((_d), 2, 4)
-+#define SipHash24_Final(_d, _c) SipHash_Final((_d), (_c), 2, 4)
-+#define SipHash24(_k, _p, _l) SipHash((_k), 2, 4, (_p), (_l))
-+
-+#define SipHash48_Init(_c, _k) SipHash_Init((_c), (_k))
-+#define SipHash48_Update(_c, _p, _l) SipHash_Update((_c), 4, 8, (_p), (_l))
-+#define SipHash48_End(_d) SipHash_End((_d), 4, 8)
-+#define SipHash48_Final(_d, _c) SipHash_Final((_d), (_c), 4, 8)
-+#define SipHash48(_k, _p, _l) SipHash((_k), 4, 8, (_p), (_l))
-+
-+#endif /* _SIPHASH_H_ */
-diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
-new file mode 100644
-index 000000000000..b775cf0fb7cb
---- /dev/null
-+++ b/fs/bcachefs/six.c
-@@ -0,0 +1,917 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/export.h>
-+#include <linux/log2.h>
-+#include <linux/percpu.h>
-+#include <linux/preempt.h>
-+#include <linux/rcupdate.h>
-+#include <linux/sched.h>
-+#include <linux/sched/clock.h>
-+#include <linux/sched/rt.h>
-+#include <linux/sched/task.h>
-+#include <linux/slab.h>
-+
-+#include <trace/events/lock.h>
-+
-+#include "six.h"
-+
-+#ifdef DEBUG
-+#define EBUG_ON(cond) BUG_ON(cond)
-+#else
-+#define EBUG_ON(cond) do {} while (0)
-+#endif
-+
-+#define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip)
-+#define six_release(l, ip) lock_release(l, ip)
-+
-+static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
-+
-+#define SIX_LOCK_HELD_read_OFFSET 0
-+#define SIX_LOCK_HELD_read ~(~0U << 26)
-+#define SIX_LOCK_HELD_intent (1U << 26)
-+#define SIX_LOCK_HELD_write (1U << 27)
-+#define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read))
-+#define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write))
-+#define SIX_LOCK_NOSPIN (1U << 31)
-+
-+struct six_lock_vals {
-+ /* Value we add to the lock in order to take the lock: */
-+ u32 lock_val;
-+
-+ /* If the lock has this value (used as a mask), taking the lock fails: */
-+ u32 lock_fail;
-+
-+ /* Mask that indicates lock is held for this type: */
-+ u32 held_mask;
-+
-+ /* Waitlist we wakeup when releasing the lock: */
-+ enum six_lock_type unlock_wakeup;
-+};
-+
-+static const struct six_lock_vals l[] = {
-+ [SIX_LOCK_read] = {
-+ .lock_val = 1U << SIX_LOCK_HELD_read_OFFSET,
-+ .lock_fail = SIX_LOCK_HELD_write,
-+ .held_mask = SIX_LOCK_HELD_read,
-+ .unlock_wakeup = SIX_LOCK_write,
-+ },
-+ [SIX_LOCK_intent] = {
-+ .lock_val = SIX_LOCK_HELD_intent,
-+ .lock_fail = SIX_LOCK_HELD_intent,
-+ .held_mask = SIX_LOCK_HELD_intent,
-+ .unlock_wakeup = SIX_LOCK_intent,
-+ },
-+ [SIX_LOCK_write] = {
-+ .lock_val = SIX_LOCK_HELD_write,
-+ .lock_fail = SIX_LOCK_HELD_read,
-+ .held_mask = SIX_LOCK_HELD_write,
-+ .unlock_wakeup = SIX_LOCK_read,
-+ },
-+};
-+
-+static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
-+{
-+ if ((atomic_read(&lock->state) & mask) != mask)
-+ atomic_or(mask, &lock->state);
-+}
-+
-+static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
-+{
-+ if (atomic_read(&lock->state) & mask)
-+ atomic_and(~mask, &lock->state);
-+}
-+
-+static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
-+ u32 old, struct task_struct *owner)
-+{
-+ if (type != SIX_LOCK_intent)
-+ return;
-+
-+ if (!(old & SIX_LOCK_HELD_intent)) {
-+ EBUG_ON(lock->owner);
-+ lock->owner = owner;
-+ } else {
-+ EBUG_ON(lock->owner != current);
-+ }
-+}
-+
-+static inline unsigned pcpu_read_count(struct six_lock *lock)
-+{
-+ unsigned read_count = 0;
-+ int cpu;
-+
-+ for_each_possible_cpu(cpu)
-+ read_count += *per_cpu_ptr(lock->readers, cpu);
-+ return read_count;
-+}
-+
-+/*
-+ * __do_six_trylock() - main trylock routine
-+ *
-+ * Returns 1 on success, 0 on failure
-+ *
-+ * In percpu reader mode, a failed trylock may cause a spurious trylock failure
-+ * for anoter thread taking the competing lock type, and we may havve to do a
-+ * wakeup: when a wakeup is required, we return -1 - wakeup_type.
-+ */
-+static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
-+ struct task_struct *task, bool try)
-+{
-+ int ret;
-+ u32 old;
-+
-+ EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
-+ EBUG_ON(type == SIX_LOCK_write &&
-+ (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
-+
-+ /*
-+ * Percpu reader mode:
-+ *
-+ * The basic idea behind this algorithm is that you can implement a lock
-+ * between two threads without any atomics, just memory barriers:
-+ *
-+ * For two threads you'll need two variables, one variable for "thread a
-+ * has the lock" and another for "thread b has the lock".
-+ *
-+ * To take the lock, a thread sets its variable indicating that it holds
-+ * the lock, then issues a full memory barrier, then reads from the
-+ * other thread's variable to check if the other thread thinks it has
-+ * the lock. If we raced, we backoff and retry/sleep.
-+ *
-+ * Failure to take the lock may cause a spurious trylock failure in
-+ * another thread, because we temporarily set the lock to indicate that
-+ * we held it. This would be a problem for a thread in six_lock(), when
-+ * they are calling trylock after adding themself to the waitlist and
-+ * prior to sleeping.
-+ *
-+ * Therefore, if we fail to get the lock, and there were waiters of the
-+ * type we conflict with, we will have to issue a wakeup.
-+ *
-+ * Since we may be called under wait_lock (and by the wakeup code
-+ * itself), we return that the wakeup has to be done instead of doing it
-+ * here.
-+ */
-+ if (type == SIX_LOCK_read && lock->readers) {
-+ preempt_disable();
-+ this_cpu_inc(*lock->readers); /* signal that we own lock */
-+
-+ smp_mb();
-+
-+ old = atomic_read(&lock->state);
-+ ret = !(old & l[type].lock_fail);
-+
-+ this_cpu_sub(*lock->readers, !ret);
-+ preempt_enable();
-+
-+ if (!ret && (old & SIX_LOCK_WAITING_write))
-+ ret = -1 - SIX_LOCK_write;
-+ } else if (type == SIX_LOCK_write && lock->readers) {
-+ if (try) {
-+ atomic_add(SIX_LOCK_HELD_write, &lock->state);
-+ smp_mb__after_atomic();
-+ }
-+
-+ ret = !pcpu_read_count(lock);
-+
-+ if (try && !ret) {
-+ old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
-+ if (old & SIX_LOCK_WAITING_read)
-+ ret = -1 - SIX_LOCK_read;
-+ }
-+ } else {
-+ old = atomic_read(&lock->state);
-+ do {
-+ ret = !(old & l[type].lock_fail);
-+ if (!ret || (type == SIX_LOCK_write && !try)) {
-+ smp_mb();
-+ break;
-+ }
-+ } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
-+
-+ EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
-+ }
-+
-+ if (ret > 0)
-+ six_set_owner(lock, type, old, task);
-+
-+ EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
-+ (atomic_read(&lock->state) & SIX_LOCK_HELD_write));
-+
-+ return ret;
-+}
-+
-+static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
-+{
-+ struct six_lock_waiter *w, *next;
-+ struct task_struct *task;
-+ bool saw_one;
-+ int ret;
-+again:
-+ ret = 0;
-+ saw_one = false;
-+ raw_spin_lock(&lock->wait_lock);
-+
-+ list_for_each_entry_safe(w, next, &lock->wait_list, list) {
-+ if (w->lock_want != lock_type)
-+ continue;
-+
-+ if (saw_one && lock_type != SIX_LOCK_read)
-+ goto unlock;
-+ saw_one = true;
-+
-+ ret = __do_six_trylock(lock, lock_type, w->task, false);
-+ if (ret <= 0)
-+ goto unlock;
-+
-+ /*
-+ * Similar to percpu_rwsem_wake_function(), we need to guard
-+ * against the wakee noticing w->lock_acquired, returning, and
-+ * then exiting before we do the wakeup:
-+ */
-+ task = get_task_struct(w->task);
-+ __list_del(w->list.prev, w->list.next);
-+ /*
-+ * The release barrier here ensures the ordering of the
-+ * __list_del before setting w->lock_acquired; @w is on the
-+ * stack of the thread doing the waiting and will be reused
-+ * after it sees w->lock_acquired with no other locking:
-+ * pairs with smp_load_acquire() in six_lock_slowpath()
-+ */
-+ smp_store_release(&w->lock_acquired, true);
-+ wake_up_process(task);
-+ put_task_struct(task);
-+ }
-+
-+ six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
-+unlock:
-+ raw_spin_unlock(&lock->wait_lock);
-+
-+ if (ret < 0) {
-+ lock_type = -ret - 1;
-+ goto again;
-+ }
-+}
-+
-+__always_inline
-+static void six_lock_wakeup(struct six_lock *lock, u32 state,
-+ enum six_lock_type lock_type)
-+{
-+ if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
-+ return;
-+
-+ if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
-+ return;
-+
-+ __six_lock_wakeup(lock, lock_type);
-+}
-+
-+__always_inline
-+static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
-+{
-+ int ret;
-+
-+ ret = __do_six_trylock(lock, type, current, try);
-+ if (ret < 0)
-+ __six_lock_wakeup(lock, -ret - 1);
-+
-+ return ret > 0;
-+}
-+
-+/**
-+ * six_trylock_ip - attempt to take a six lock without blocking
-+ * @lock: lock to take
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
-+ *
-+ * Return: true on success, false on failure.
-+ */
-+bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
-+{
-+ if (!do_six_trylock(lock, type, true))
-+ return false;
-+
-+ if (type != SIX_LOCK_write)
-+ six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
-+ return true;
-+}
-+EXPORT_SYMBOL_GPL(six_trylock_ip);
-+
-+/**
-+ * six_relock_ip - attempt to re-take a lock that was held previously
-+ * @lock: lock to take
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ * @seq: lock sequence number obtained from six_lock_seq() while lock was
-+ * held previously
-+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
-+ *
-+ * Return: true on success, false on failure.
-+ */
-+bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
-+ unsigned seq, unsigned long ip)
-+{
-+ if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
-+ return false;
-+
-+ if (six_lock_seq(lock) != seq) {
-+ six_unlock_ip(lock, type, ip);
-+ return false;
-+ }
-+
-+ return true;
-+}
-+EXPORT_SYMBOL_GPL(six_relock_ip);
-+
-+#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
-+
-+static inline bool six_can_spin_on_owner(struct six_lock *lock)
-+{
-+ struct task_struct *owner;
-+ bool ret;
-+
-+ if (need_resched())
-+ return false;
-+
-+ rcu_read_lock();
-+ owner = READ_ONCE(lock->owner);
-+ ret = !owner || owner_on_cpu(owner);
-+ rcu_read_unlock();
-+
-+ return ret;
-+}
-+
-+static inline bool six_spin_on_owner(struct six_lock *lock,
-+ struct task_struct *owner,
-+ u64 end_time)
-+{
-+ bool ret = true;
-+ unsigned loop = 0;
-+
-+ rcu_read_lock();
-+ while (lock->owner == owner) {
-+ /*
-+ * Ensure we emit the owner->on_cpu, dereference _after_
-+ * checking lock->owner still matches owner. If that fails,
-+ * owner might point to freed memory. If it still matches,
-+ * the rcu_read_lock() ensures the memory stays valid.
-+ */
-+ barrier();
-+
-+ if (!owner_on_cpu(owner) || need_resched()) {
-+ ret = false;
-+ break;
-+ }
-+
-+ if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
-+ six_set_bitmask(lock, SIX_LOCK_NOSPIN);
-+ ret = false;
-+ break;
-+ }
-+
-+ cpu_relax();
-+ }
-+ rcu_read_unlock();
-+
-+ return ret;
-+}
-+
-+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-+{
-+ struct task_struct *task = current;
-+ u64 end_time;
-+
-+ if (type == SIX_LOCK_write)
-+ return false;
-+
-+ preempt_disable();
-+ if (!six_can_spin_on_owner(lock))
-+ goto fail;
-+
-+ if (!osq_lock(&lock->osq))
-+ goto fail;
-+
-+ end_time = sched_clock() + 10 * NSEC_PER_USEC;
-+
-+ while (1) {
-+ struct task_struct *owner;
-+
-+ /*
-+ * If there's an owner, wait for it to either
-+ * release the lock or go to sleep.
-+ */
-+ owner = READ_ONCE(lock->owner);
-+ if (owner && !six_spin_on_owner(lock, owner, end_time))
-+ break;
-+
-+ if (do_six_trylock(lock, type, false)) {
-+ osq_unlock(&lock->osq);
-+ preempt_enable();
-+ return true;
-+ }
-+
-+ /*
-+ * When there's no owner, we might have preempted between the
-+ * owner acquiring the lock and setting the owner field. If
-+ * we're an RT task that will live-lock because we won't let
-+ * the owner complete.
-+ */
-+ if (!owner && (need_resched() || rt_task(task)))
-+ break;
-+
-+ /*
-+ * The cpu_relax() call is a compiler barrier which forces
-+ * everything in this loop to be re-loaded. We don't need
-+ * memory barriers as we'll eventually observe the right
-+ * values at the cost of a few extra spins.
-+ */
-+ cpu_relax();
-+ }
-+
-+ osq_unlock(&lock->osq);
-+fail:
-+ preempt_enable();
-+
-+ /*
-+ * If we fell out of the spin path because of need_resched(),
-+ * reschedule now, before we try-lock again. This avoids getting
-+ * scheduled out right after we obtained the lock.
-+ */
-+ if (need_resched())
-+ schedule();
-+
-+ return false;
-+}
-+
-+#else /* CONFIG_SIX_LOCK_SPIN_ON_OWNER */
-+
-+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-+{
-+ return false;
-+}
-+
-+#endif
-+
-+noinline
-+static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
-+ struct six_lock_waiter *wait,
-+ six_lock_should_sleep_fn should_sleep_fn, void *p,
-+ unsigned long ip)
-+{
-+ int ret = 0;
-+
-+ if (type == SIX_LOCK_write) {
-+ EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
-+ atomic_add(SIX_LOCK_HELD_write, &lock->state);
-+ smp_mb__after_atomic();
-+ }
-+
-+ trace_contention_begin(lock, 0);
-+ lock_contended(&lock->dep_map, ip);
-+
-+ if (six_optimistic_spin(lock, type))
-+ goto out;
-+
-+ wait->task = current;
-+ wait->lock_want = type;
-+ wait->lock_acquired = false;
-+
-+ raw_spin_lock(&lock->wait_lock);
-+ six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
-+ /*
-+ * Retry taking the lock after taking waitlist lock, in case we raced
-+ * with an unlock:
-+ */
-+ ret = __do_six_trylock(lock, type, current, false);
-+ if (ret <= 0) {
-+ wait->start_time = local_clock();
-+
-+ if (!list_empty(&lock->wait_list)) {
-+ struct six_lock_waiter *last =
-+ list_last_entry(&lock->wait_list,
-+ struct six_lock_waiter, list);
-+
-+ if (time_before_eq64(wait->start_time, last->start_time))
-+ wait->start_time = last->start_time + 1;
-+ }
-+
-+ list_add_tail(&wait->list, &lock->wait_list);
-+ }
-+ raw_spin_unlock(&lock->wait_lock);
-+
-+ if (unlikely(ret > 0)) {
-+ ret = 0;
-+ goto out;
-+ }
-+
-+ if (unlikely(ret < 0)) {
-+ __six_lock_wakeup(lock, -ret - 1);
-+ ret = 0;
-+ }
-+
-+ while (1) {
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+
-+ /*
-+ * Ensures that writes to the waitlist entry happen after we see
-+ * wait->lock_acquired: pairs with the smp_store_release in
-+ * __six_lock_wakeup
-+ */
-+ if (smp_load_acquire(&wait->lock_acquired))
-+ break;
-+
-+ ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
-+ if (unlikely(ret)) {
-+ bool acquired;
-+
-+ /*
-+ * If should_sleep_fn() returns an error, we are
-+ * required to return that error even if we already
-+ * acquired the lock - should_sleep_fn() might have
-+ * modified external state (e.g. when the deadlock cycle
-+ * detector in bcachefs issued a transaction restart)
-+ */
-+ raw_spin_lock(&lock->wait_lock);
-+ acquired = wait->lock_acquired;
-+ if (!acquired)
-+ list_del(&wait->list);
-+ raw_spin_unlock(&lock->wait_lock);
-+
-+ if (unlikely(acquired))
-+ do_six_unlock_type(lock, type);
-+ break;
-+ }
-+
-+ schedule();
-+ }
-+
-+ __set_current_state(TASK_RUNNING);
-+out:
-+ if (ret && type == SIX_LOCK_write) {
-+ six_clear_bitmask(lock, SIX_LOCK_HELD_write);
-+ six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
-+ }
-+ trace_contention_end(lock, 0);
-+
-+ return ret;
-+}
-+
-+/**
-+ * six_lock_ip_waiter - take a lock, with full waitlist interface
-+ * @lock: lock to take
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ * @wait: pointer to wait object, which will be added to lock's waitlist
-+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
-+ * to scheduling
-+ * @p: passed through to @should_sleep_fn
-+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
-+ *
-+ * This is the most general six_lock() variant, with parameters to support full
-+ * cycle detection for deadlock avoidance.
-+ *
-+ * The code calling this function must implement tracking of held locks, and the
-+ * @wait object should be embedded into the struct that tracks held locks -
-+ * which must also be accessible in a thread-safe way.
-+ *
-+ * @should_sleep_fn should invoke the cycle detector; it should walk each
-+ * lock's waiters, and for each waiter recursively walk their held locks.
-+ *
-+ * When this function must block, @wait will be added to @lock's waitlist before
-+ * calling trylock, and before calling @should_sleep_fn, and @wait will not be
-+ * removed from the lock waitlist until the lock has been successfully acquired,
-+ * or we abort.
-+ *
-+ * @wait.start_time will be monotonically increasing for any given waitlist, and
-+ * thus may be used as a loop cursor.
-+ *
-+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
-+ */
-+int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
-+ struct six_lock_waiter *wait,
-+ six_lock_should_sleep_fn should_sleep_fn, void *p,
-+ unsigned long ip)
-+{
-+ int ret;
-+
-+ wait->start_time = 0;
-+
-+ if (type != SIX_LOCK_write)
-+ six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
-+
-+ ret = do_six_trylock(lock, type, true) ? 0
-+ : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
-+
-+ if (ret && type != SIX_LOCK_write)
-+ six_release(&lock->dep_map, ip);
-+ if (!ret)
-+ lock_acquired(&lock->dep_map, ip);
-+
-+ return ret;
-+}
-+EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
-+
-+__always_inline
-+static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+ u32 state;
-+
-+ if (type == SIX_LOCK_intent)
-+ lock->owner = NULL;
-+
-+ if (type == SIX_LOCK_read &&
-+ lock->readers) {
-+ smp_mb(); /* unlock barrier */
-+ this_cpu_dec(*lock->readers);
-+ smp_mb(); /* between unlocking and checking for waiters */
-+ state = atomic_read(&lock->state);
-+ } else {
-+ u32 v = l[type].lock_val;
-+
-+ if (type != SIX_LOCK_read)
-+ v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
-+
-+ EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
-+ state = atomic_sub_return_release(v, &lock->state);
-+ }
-+
-+ six_lock_wakeup(lock, state, l[type].unlock_wakeup);
-+}
-+
-+/**
-+ * six_unlock_ip - drop a six lock
-+ * @lock: lock to unlock
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
-+ *
-+ * When a lock is held multiple times (because six_lock_incement()) was used),
-+ * this decrements the 'lock held' counter by one.
-+ *
-+ * For example:
-+ * six_lock_read(&foo->lock); read count 1
-+ * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
-+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
-+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
-+ */
-+void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
-+{
-+ EBUG_ON(type == SIX_LOCK_write &&
-+ !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
-+ EBUG_ON((type == SIX_LOCK_write ||
-+ type == SIX_LOCK_intent) &&
-+ lock->owner != current);
-+
-+ if (type != SIX_LOCK_write)
-+ six_release(&lock->dep_map, ip);
-+ else
-+ lock->seq++;
-+
-+ if (type == SIX_LOCK_intent &&
-+ lock->intent_lock_recurse) {
-+ --lock->intent_lock_recurse;
-+ return;
-+ }
-+
-+ do_six_unlock_type(lock, type);
-+}
-+EXPORT_SYMBOL_GPL(six_unlock_ip);
-+
-+/**
-+ * six_lock_downgrade - convert an intent lock to a read lock
-+ * @lock: lock to dowgrade
-+ *
-+ * @lock will have read count incremented and intent count decremented
-+ */
-+void six_lock_downgrade(struct six_lock *lock)
-+{
-+ six_lock_increment(lock, SIX_LOCK_read);
-+ six_unlock_intent(lock);
-+}
-+EXPORT_SYMBOL_GPL(six_lock_downgrade);
-+
-+/**
-+ * six_lock_tryupgrade - attempt to convert read lock to an intent lock
-+ * @lock: lock to upgrade
-+ *
-+ * On success, @lock will have intent count incremented and read count
-+ * decremented
-+ *
-+ * Return: true on success, false on failure
-+ */
-+bool six_lock_tryupgrade(struct six_lock *lock)
-+{
-+ u32 old = atomic_read(&lock->state), new;
-+
-+ do {
-+ new = old;
-+
-+ if (new & SIX_LOCK_HELD_intent)
-+ return false;
-+
-+ if (!lock->readers) {
-+ EBUG_ON(!(new & SIX_LOCK_HELD_read));
-+ new -= l[SIX_LOCK_read].lock_val;
-+ }
-+
-+ new |= SIX_LOCK_HELD_intent;
-+ } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
-+
-+ if (lock->readers)
-+ this_cpu_dec(*lock->readers);
-+
-+ six_set_owner(lock, SIX_LOCK_intent, old, current);
-+
-+ return true;
-+}
-+EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
-+
-+/**
-+ * six_trylock_convert - attempt to convert a held lock from one type to another
-+ * @lock: lock to upgrade
-+ * @from: SIX_LOCK_read or SIX_LOCK_intent
-+ * @to: SIX_LOCK_read or SIX_LOCK_intent
-+ *
-+ * On success, @lock will have intent count incremented and read count
-+ * decremented
-+ *
-+ * Return: true on success, false on failure
-+ */
-+bool six_trylock_convert(struct six_lock *lock,
-+ enum six_lock_type from,
-+ enum six_lock_type to)
-+{
-+ EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
-+
-+ if (to == from)
-+ return true;
-+
-+ if (to == SIX_LOCK_read) {
-+ six_lock_downgrade(lock);
-+ return true;
-+ } else {
-+ return six_lock_tryupgrade(lock);
-+ }
-+}
-+EXPORT_SYMBOL_GPL(six_trylock_convert);
-+
-+/**
-+ * six_lock_increment - increase held lock count on a lock that is already held
-+ * @lock: lock to increment
-+ * @type: SIX_LOCK_read or SIX_LOCK_intent
-+ *
-+ * @lock must already be held, with a lock type that is greater than or equal to
-+ * @type
-+ *
-+ * A corresponding six_unlock_type() call will be required for @lock to be fully
-+ * unlocked.
-+ */
-+void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
-+{
-+ six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
-+
-+ /* XXX: assert already locked, and that we don't overflow: */
-+
-+ switch (type) {
-+ case SIX_LOCK_read:
-+ if (lock->readers) {
-+ this_cpu_inc(*lock->readers);
-+ } else {
-+ EBUG_ON(!(atomic_read(&lock->state) &
-+ (SIX_LOCK_HELD_read|
-+ SIX_LOCK_HELD_intent)));
-+ atomic_add(l[type].lock_val, &lock->state);
-+ }
-+ break;
-+ case SIX_LOCK_intent:
-+ EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
-+ lock->intent_lock_recurse++;
-+ break;
-+ case SIX_LOCK_write:
-+ BUG();
-+ break;
-+ }
-+}
-+EXPORT_SYMBOL_GPL(six_lock_increment);
-+
-+/**
-+ * six_lock_wakeup_all - wake up all waiters on @lock
-+ * @lock: lock to wake up waiters for
-+ *
-+ * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
-+ * abort the lock operation.
-+ *
-+ * This function is never needed in a bug-free program; it's only useful in
-+ * debug code, e.g. to determine if a cycle detector is at fault.
-+ */
-+void six_lock_wakeup_all(struct six_lock *lock)
-+{
-+ u32 state = atomic_read(&lock->state);
-+ struct six_lock_waiter *w;
-+
-+ six_lock_wakeup(lock, state, SIX_LOCK_read);
-+ six_lock_wakeup(lock, state, SIX_LOCK_intent);
-+ six_lock_wakeup(lock, state, SIX_LOCK_write);
-+
-+ raw_spin_lock(&lock->wait_lock);
-+ list_for_each_entry(w, &lock->wait_list, list)
-+ wake_up_process(w->task);
-+ raw_spin_unlock(&lock->wait_lock);
-+}
-+EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
-+
-+/**
-+ * six_lock_counts - return held lock counts, for each lock type
-+ * @lock: lock to return counters for
-+ *
-+ * Return: the number of times a lock is held for read, intent and write.
-+ */
-+struct six_lock_count six_lock_counts(struct six_lock *lock)
-+{
-+ struct six_lock_count ret;
-+
-+ ret.n[SIX_LOCK_read] = !lock->readers
-+ ? atomic_read(&lock->state) & SIX_LOCK_HELD_read
-+ : pcpu_read_count(lock);
-+ ret.n[SIX_LOCK_intent] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
-+ lock->intent_lock_recurse;
-+ ret.n[SIX_LOCK_write] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
-+
-+ return ret;
-+}
-+EXPORT_SYMBOL_GPL(six_lock_counts);
-+
-+/**
-+ * six_lock_readers_add - directly manipulate reader count of a lock
-+ * @lock: lock to add/subtract readers for
-+ * @nr: reader count to add/subtract
-+ *
-+ * When an upper layer is implementing lock reentrency, we may have both read
-+ * and intent locks on the same lock.
-+ *
-+ * When we need to take a write lock, the read locks will cause self-deadlock,
-+ * because six locks themselves do not track which read locks are held by the
-+ * current thread and which are held by a different thread - it does no
-+ * per-thread tracking of held locks.
-+ *
-+ * The upper layer that is tracking held locks may however, if trylock() has
-+ * failed, count up its own read locks, subtract them, take the write lock, and
-+ * then re-add them.
-+ *
-+ * As in any other situation when taking a write lock, @lock must be held for
-+ * intent one (or more) times, so @lock will never be left unlocked.
-+ */
-+void six_lock_readers_add(struct six_lock *lock, int nr)
-+{
-+ if (lock->readers) {
-+ this_cpu_add(*lock->readers, nr);
-+ } else {
-+ EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
-+ /* reader count starts at bit 0 */
-+ atomic_add(nr, &lock->state);
-+ }
-+}
-+EXPORT_SYMBOL_GPL(six_lock_readers_add);
-+
-+/**
-+ * six_lock_exit - release resources held by a lock prior to freeing
-+ * @lock: lock to exit
-+ *
-+ * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
-+ * required to free the percpu read counts.
-+ */
-+void six_lock_exit(struct six_lock *lock)
-+{
-+ WARN_ON(lock->readers && pcpu_read_count(lock));
-+ WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
-+
-+ free_percpu(lock->readers);
-+ lock->readers = NULL;
-+}
-+EXPORT_SYMBOL_GPL(six_lock_exit);
-+
-+void __six_lock_init(struct six_lock *lock, const char *name,
-+ struct lock_class_key *key, enum six_lock_init_flags flags)
-+{
-+ atomic_set(&lock->state, 0);
-+ raw_spin_lock_init(&lock->wait_lock);
-+ INIT_LIST_HEAD(&lock->wait_list);
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+ debug_check_no_locks_freed((void *) lock, sizeof(*lock));
-+ lockdep_init_map(&lock->dep_map, name, key, 0);
-+#endif
-+
-+ /*
-+ * Don't assume that we have real percpu variables available in
-+ * userspace:
-+ */
-+#ifdef __KERNEL__
-+ if (flags & SIX_LOCK_INIT_PCPU) {
-+ /*
-+ * We don't return an error here on memory allocation failure
-+ * since percpu is an optimization, and locks will work with the
-+ * same semantics in non-percpu mode: callers can check for
-+ * failure if they wish by checking lock->readers, but generally
-+ * will not want to treat it as an error.
-+ */
-+ lock->readers = alloc_percpu(unsigned);
-+ }
-+#endif
-+}
-+EXPORT_SYMBOL_GPL(__six_lock_init);
-diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h
-new file mode 100644
-index 000000000000..4c268b0b8316
---- /dev/null
-+++ b/fs/bcachefs/six.h
-@@ -0,0 +1,393 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+#ifndef _LINUX_SIX_H
-+#define _LINUX_SIX_H
-+
-+/**
-+ * DOC: SIX locks overview
-+ *
-+ * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
-+ * but with an additional state: read/shared, intent, exclusive/write
-+ *
-+ * The purpose of the intent state is to allow for greater concurrency on tree
-+ * structures without deadlocking. In general, a read can't be upgraded to a
-+ * write lock without deadlocking, so an operation that updates multiple nodes
-+ * will have to take write locks for the full duration of the operation.
-+ *
-+ * But by adding an intent state, which is exclusive with other intent locks but
-+ * not with readers, we can take intent locks at thte start of the operation,
-+ * and then take write locks only for the actual update to each individual
-+ * nodes, without deadlocking.
-+ *
-+ * Example usage:
-+ * six_lock_read(&foo->lock);
-+ * six_unlock_read(&foo->lock);
-+ *
-+ * An intent lock must be held before taking a write lock:
-+ * six_lock_intent(&foo->lock);
-+ * six_lock_write(&foo->lock);
-+ * six_unlock_write(&foo->lock);
-+ * six_unlock_intent(&foo->lock);
-+ *
-+ * Other operations:
-+ * six_trylock_read()
-+ * six_trylock_intent()
-+ * six_trylock_write()
-+ *
-+ * six_lock_downgrade() convert from intent to read
-+ * six_lock_tryupgrade() attempt to convert from read to intent, may fail
-+ *
-+ * There are also interfaces that take the lock type as an enum:
-+ *
-+ * six_lock_type(&foo->lock, SIX_LOCK_read);
-+ * six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
-+ * six_lock_type(&foo->lock, SIX_LOCK_write);
-+ * six_unlock_type(&foo->lock, SIX_LOCK_write);
-+ * six_unlock_type(&foo->lock, SIX_LOCK_intent);
-+ *
-+ * Lock sequence numbers - unlock(), relock():
-+ *
-+ * Locks embed sequences numbers, which are incremented on write lock/unlock.
-+ * This allows locks to be dropped and the retaken iff the state they protect
-+ * hasn't changed; this makes it much easier to avoid holding locks while e.g.
-+ * doing IO or allocating memory.
-+ *
-+ * Example usage:
-+ * six_lock_read(&foo->lock);
-+ * u32 seq = six_lock_seq(&foo->lock);
-+ * six_unlock_read(&foo->lock);
-+ *
-+ * some_operation_that_may_block();
-+ *
-+ * if (six_relock_read(&foo->lock, seq)) { ... }
-+ *
-+ * If the relock operation succeeds, it is as if the lock was never unlocked.
-+ *
-+ * Reentrancy:
-+ *
-+ * Six locks are not by themselves reentrent, but have counters for both the
-+ * read and intent states that can be used to provide reentrency by an upper
-+ * layer that tracks held locks. If a lock is known to already be held in the
-+ * read or intent state, six_lock_increment() can be used to bump the "lock
-+ * held in this state" counter, increasing the number of unlock calls that
-+ * will be required to fully unlock it.
-+ *
-+ * Example usage:
-+ * six_lock_read(&foo->lock);
-+ * six_lock_increment(&foo->lock, SIX_LOCK_read);
-+ * six_unlock_read(&foo->lock);
-+ * six_unlock_read(&foo->lock);
-+ * foo->lock is now fully unlocked.
-+ *
-+ * Since the intent state supercedes read, it's legal to increment the read
-+ * counter when holding an intent lock, but not the reverse.
-+ *
-+ * A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
-+ * is not legal.
-+ *
-+ * should_sleep_fn:
-+ *
-+ * There is a six_lock() variant that takes a function pointer that is called
-+ * immediately prior to schedule() when blocking, and may return an error to
-+ * abort.
-+ *
-+ * One possible use for this feature is when objects being locked are part of
-+ * a cache and may reused, and lock ordering is based on a property of the
-+ * object that will change when the object is reused - i.e. logical key order.
-+ *
-+ * If looking up an object in the cache may race with object reuse, and lock
-+ * ordering is required to prevent deadlock, object reuse may change the
-+ * correct lock order for that object and cause a deadlock. should_sleep_fn
-+ * can be used to check if the object is still the object we want and avoid
-+ * this deadlock.
-+ *
-+ * Wait list entry interface:
-+ *
-+ * There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
-+ * wait list entry. By embedding six_lock_waiter into another object, and by
-+ * traversing lock waitlists, it is then possible for an upper layer to
-+ * implement full cycle detection for deadlock avoidance.
-+ *
-+ * should_sleep_fn should be used for invoking the cycle detector, walking the
-+ * graph of held locks to check for a deadlock. The upper layer must track
-+ * held locks for each thread, and each thread's held locks must be reachable
-+ * from its six_lock_waiter object.
-+ *
-+ * six_lock_waiter() will add the wait object to the waitlist re-trying taking
-+ * the lock, and before calling should_sleep_fn, and the wait object will not
-+ * be removed from the waitlist until either the lock has been successfully
-+ * acquired, or we aborted because should_sleep_fn returned an error.
-+ *
-+ * Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
-+ * have timestamps in strictly ascending order - this is so the timestamp can
-+ * be used as a cursor for lock graph traverse.
-+ */
-+
-+#include <linux/lockdep.h>
-+#include <linux/sched.h>
-+#include <linux/types.h>
-+
-+#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
-+#include <linux/osq_lock.h>
-+#endif
-+
-+enum six_lock_type {
-+ SIX_LOCK_read,
-+ SIX_LOCK_intent,
-+ SIX_LOCK_write,
-+};
-+
-+struct six_lock {
-+ atomic_t state;
-+ u32 seq;
-+ unsigned intent_lock_recurse;
-+ struct task_struct *owner;
-+ unsigned __percpu *readers;
-+#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
-+ struct optimistic_spin_queue osq;
-+#endif
-+ raw_spinlock_t wait_lock;
-+ struct list_head wait_list;
-+#ifdef CONFIG_DEBUG_LOCK_ALLOC
-+ struct lockdep_map dep_map;
-+#endif
-+};
-+
-+struct six_lock_waiter {
-+ struct list_head list;
-+ struct task_struct *task;
-+ enum six_lock_type lock_want;
-+ bool lock_acquired;
-+ u64 start_time;
-+};
-+
-+typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
-+
-+void six_lock_exit(struct six_lock *lock);
-+
-+enum six_lock_init_flags {
-+ SIX_LOCK_INIT_PCPU = 1U << 0,
-+};
-+
-+void __six_lock_init(struct six_lock *lock, const char *name,
-+ struct lock_class_key *key, enum six_lock_init_flags flags);
-+
-+/**
-+ * six_lock_init - initialize a six lock
-+ * @lock: lock to initialize
-+ * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU
-+ */
-+#define six_lock_init(lock, flags) \
-+do { \
-+ static struct lock_class_key __key; \
-+ \
-+ __six_lock_init((lock), #lock, &__key, flags); \
-+} while (0)
-+
-+/**
-+ * six_lock_seq - obtain current lock sequence number
-+ * @lock: six_lock to obtain sequence number for
-+ *
-+ * @lock should be held for read or intent, and not write
-+ *
-+ * By saving the lock sequence number, we can unlock @lock and then (typically
-+ * after some blocking operation) attempt to relock it: the relock will succeed
-+ * if the sequence number hasn't changed, meaning no write locks have been taken
-+ * and state corresponding to what @lock protects is still valid.
-+ */
-+static inline u32 six_lock_seq(const struct six_lock *lock)
-+{
-+ return lock->seq;
-+}
-+
-+bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
-+
-+/**
-+ * six_trylock_type - attempt to take a six lock without blocking
-+ * @lock: lock to take
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ *
-+ * Return: true on success, false on failure.
-+ */
-+static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+ return six_trylock_ip(lock, type, _THIS_IP_);
-+}
-+
-+int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
-+ struct six_lock_waiter *wait,
-+ six_lock_should_sleep_fn should_sleep_fn, void *p,
-+ unsigned long ip);
-+
-+/**
-+ * six_lock_waiter - take a lock, with full waitlist interface
-+ * @lock: lock to take
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ * @wait: pointer to wait object, which will be added to lock's waitlist
-+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
-+ * to scheduling
-+ * @p: passed through to @should_sleep_fn
-+ *
-+ * This is a convenience wrapper around six_lock_ip_waiter(), see that function
-+ * for full documentation.
-+ *
-+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
-+ */
-+static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
-+ struct six_lock_waiter *wait,
-+ six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+ return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
-+}
-+
-+/**
-+ * six_lock_ip - take a six lock lock
-+ * @lock: lock to take
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
-+ * to scheduling
-+ * @p: passed through to @should_sleep_fn
-+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
-+ *
-+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
-+ */
-+static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
-+ six_lock_should_sleep_fn should_sleep_fn, void *p,
-+ unsigned long ip)
-+{
-+ struct six_lock_waiter wait;
-+
-+ return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
-+}
-+
-+/**
-+ * six_lock_type - take a six lock lock
-+ * @lock: lock to take
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
-+ * to scheduling
-+ * @p: passed through to @should_sleep_fn
-+ *
-+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
-+ */
-+static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
-+ six_lock_should_sleep_fn should_sleep_fn, void *p)
-+{
-+ struct six_lock_waiter wait;
-+
-+ return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
-+}
-+
-+bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
-+ unsigned seq, unsigned long ip);
-+
-+/**
-+ * six_relock_type - attempt to re-take a lock that was held previously
-+ * @lock: lock to take
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ * @seq: lock sequence number obtained from six_lock_seq() while lock was
-+ * held previously
-+ *
-+ * Return: true on success, false on failure.
-+ */
-+static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
-+ unsigned seq)
-+{
-+ return six_relock_ip(lock, type, seq, _THIS_IP_);
-+}
-+
-+void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
-+
-+/**
-+ * six_unlock_type - drop a six lock
-+ * @lock: lock to unlock
-+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
-+ *
-+ * When a lock is held multiple times (because six_lock_incement()) was used),
-+ * this decrements the 'lock held' counter by one.
-+ *
-+ * For example:
-+ * six_lock_read(&foo->lock); read count 1
-+ * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
-+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
-+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
-+ */
-+static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
-+{
-+ six_unlock_ip(lock, type, _THIS_IP_);
-+}
-+
-+#define __SIX_LOCK(type) \
-+static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
-+{ \
-+ return six_trylock_ip(lock, SIX_LOCK_##type, ip); \
-+} \
-+ \
-+static inline bool six_trylock_##type(struct six_lock *lock) \
-+{ \
-+ return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
-+} \
-+ \
-+static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \
-+ struct six_lock_waiter *wait, \
-+ six_lock_should_sleep_fn should_sleep_fn, void *p,\
-+ unsigned long ip) \
-+{ \
-+ return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
-+} \
-+ \
-+static inline int six_lock_ip_##type(struct six_lock *lock, \
-+ six_lock_should_sleep_fn should_sleep_fn, void *p, \
-+ unsigned long ip) \
-+{ \
-+ return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
-+} \
-+ \
-+static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
-+{ \
-+ return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \
-+} \
-+ \
-+static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \
-+{ \
-+ return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \
-+} \
-+ \
-+static inline int six_lock_##type(struct six_lock *lock, \
-+ six_lock_should_sleep_fn fn, void *p)\
-+{ \
-+ return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \
-+} \
-+ \
-+static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \
-+{ \
-+ six_unlock_ip(lock, SIX_LOCK_##type, ip); \
-+} \
-+ \
-+static inline void six_unlock_##type(struct six_lock *lock) \
-+{ \
-+ six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
-+}
-+
-+__SIX_LOCK(read)
-+__SIX_LOCK(intent)
-+__SIX_LOCK(write)
-+#undef __SIX_LOCK
-+
-+void six_lock_downgrade(struct six_lock *);
-+bool six_lock_tryupgrade(struct six_lock *);
-+bool six_trylock_convert(struct six_lock *, enum six_lock_type,
-+ enum six_lock_type);
-+
-+void six_lock_increment(struct six_lock *, enum six_lock_type);
-+
-+void six_lock_wakeup_all(struct six_lock *);
-+
-+struct six_lock_count {
-+ unsigned n[3];
-+};
-+
-+struct six_lock_count six_lock_counts(struct six_lock *);
-+void six_lock_readers_add(struct six_lock *, int);
-+
-+#endif /* _LINUX_SIX_H */
-diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
-new file mode 100644
-index 000000000000..e9af77b384c7
---- /dev/null
-+++ b/fs/bcachefs/snapshot.c
-@@ -0,0 +1,1713 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "bkey_buf.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "buckets.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "fs.h"
-+#include "snapshot.h"
-+
-+#include <linux/random.h>
-+
-+/*
-+ * Snapshot trees:
-+ *
-+ * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they
-+ * exist to provide a stable identifier for the whole lifetime of a snapshot
-+ * tree.
-+ */
-+
-+void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
-+
-+ prt_printf(out, "subvol %u root snapshot %u",
-+ le32_to_cpu(t.v->master_subvol),
-+ le32_to_cpu(t.v->root_snapshot));
-+}
-+
-+int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
-+ bkey_lt(k.k->p, POS(0, 1)), c, err,
-+ snapshot_tree_pos_bad,
-+ "bad pos");
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
-+ struct bch_snapshot_tree *s)
-+{
-+ int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
-+ BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
-+
-+ if (bch2_err_matches(ret, ENOENT))
-+ ret = -BCH_ERR_ENOENT_snapshot_tree;
-+ return ret;
-+}
-+
-+struct bkey_i_snapshot_tree *
-+__bch2_snapshot_tree_create(struct btree_trans *trans)
-+{
-+ struct btree_iter iter;
-+ int ret = bch2_bkey_get_empty_slot(trans, &iter,
-+ BTREE_ID_snapshot_trees, POS(0, U32_MAX));
-+ struct bkey_i_snapshot_tree *s_t;
-+
-+ if (ret == -BCH_ERR_ENOSPC_btree_slot)
-+ ret = -BCH_ERR_ENOSPC_snapshot_tree;
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
-+ ret = PTR_ERR_OR_ZERO(s_t);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret ? ERR_PTR(ret) : s_t;
-+}
-+
-+static int bch2_snapshot_tree_create(struct btree_trans *trans,
-+ u32 root_id, u32 subvol_id, u32 *tree_id)
-+{
-+ struct bkey_i_snapshot_tree *n_tree =
-+ __bch2_snapshot_tree_create(trans);
-+
-+ if (IS_ERR(n_tree))
-+ return PTR_ERR(n_tree);
-+
-+ n_tree->v.master_subvol = cpu_to_le32(subvol_id);
-+ n_tree->v.root_snapshot = cpu_to_le32(root_id);
-+ *tree_id = n_tree->k.p.offset;
-+ return 0;
-+}
-+
-+/* Snapshot nodes: */
-+
-+static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
-+{
-+ struct snapshot_table *t;
-+
-+ rcu_read_lock();
-+ t = rcu_dereference(c->snapshots);
-+
-+ while (id && id < ancestor)
-+ id = __snapshot_t(t, id)->parent;
-+ rcu_read_unlock();
-+
-+ return id == ancestor;
-+}
-+
-+static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
-+{
-+ const struct snapshot_t *s = __snapshot_t(t, id);
-+
-+ if (s->skip[2] <= ancestor)
-+ return s->skip[2];
-+ if (s->skip[1] <= ancestor)
-+ return s->skip[1];
-+ if (s->skip[0] <= ancestor)
-+ return s->skip[0];
-+ return s->parent;
-+}
-+
-+bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
-+{
-+ struct snapshot_table *t;
-+ bool ret;
-+
-+ EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
-+
-+ rcu_read_lock();
-+ t = rcu_dereference(c->snapshots);
-+
-+ while (id && id < ancestor - IS_ANCESTOR_BITMAP)
-+ id = get_ancestor_below(t, id, ancestor);
-+
-+ if (id && id < ancestor) {
-+ ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
-+
-+ EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor));
-+ } else {
-+ ret = id == ancestor;
-+ }
-+
-+ rcu_read_unlock();
-+
-+ return ret;
-+}
-+
-+static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
-+{
-+ size_t idx = U32_MAX - id;
-+ size_t new_size;
-+ struct snapshot_table *new, *old;
-+
-+ new_size = max(16UL, roundup_pow_of_two(idx + 1));
-+
-+ new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
-+ if (!new)
-+ return NULL;
-+
-+ old = rcu_dereference_protected(c->snapshots, true);
-+ if (old)
-+ memcpy(new->s,
-+ rcu_dereference_protected(c->snapshots, true)->s,
-+ sizeof(new->s[0]) * c->snapshot_table_size);
-+
-+ rcu_assign_pointer(c->snapshots, new);
-+ c->snapshot_table_size = new_size;
-+ kvfree_rcu_mightsleep(old);
-+
-+ return &rcu_dereference_protected(c->snapshots, true)->s[idx];
-+}
-+
-+static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
-+{
-+ size_t idx = U32_MAX - id;
-+
-+ lockdep_assert_held(&c->snapshot_table_lock);
-+
-+ if (likely(idx < c->snapshot_table_size))
-+ return &rcu_dereference_protected(c->snapshots, true)->s[idx];
-+
-+ return __snapshot_t_mut(c, id);
-+}
-+
-+void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
-+
-+ prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
-+ BCH_SNAPSHOT_SUBVOL(s.v),
-+ BCH_SNAPSHOT_DELETED(s.v),
-+ le32_to_cpu(s.v->parent),
-+ le32_to_cpu(s.v->children[0]),
-+ le32_to_cpu(s.v->children[1]),
-+ le32_to_cpu(s.v->subvol),
-+ le32_to_cpu(s.v->tree));
-+
-+ if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth))
-+ prt_printf(out, " depth %u skiplist %u %u %u",
-+ le32_to_cpu(s.v->depth),
-+ le32_to_cpu(s.v->skip[0]),
-+ le32_to_cpu(s.v->skip[1]),
-+ le32_to_cpu(s.v->skip[2]));
-+}
-+
-+int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_snapshot s;
-+ u32 i, id;
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
-+ bkey_lt(k.k->p, POS(0, 1)), c, err,
-+ snapshot_pos_bad,
-+ "bad pos");
-+
-+ s = bkey_s_c_to_snapshot(k);
-+
-+ id = le32_to_cpu(s.v->parent);
-+ bkey_fsck_err_on(id && id <= k.k->p.offset, c, err,
-+ snapshot_parent_bad,
-+ "bad parent node (%u <= %llu)",
-+ id, k.k->p.offset);
-+
-+ bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err,
-+ snapshot_children_not_normalized,
-+ "children not normalized");
-+
-+ bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err,
-+ snapshot_child_duplicate,
-+ "duplicate child nodes");
-+
-+ for (i = 0; i < 2; i++) {
-+ id = le32_to_cpu(s.v->children[i]);
-+
-+ bkey_fsck_err_on(id >= k.k->p.offset, c, err,
-+ snapshot_child_bad,
-+ "bad child node (%u >= %llu)",
-+ id, k.k->p.offset);
-+ }
-+
-+ if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
-+ bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
-+ le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err,
-+ snapshot_skiplist_not_normalized,
-+ "skiplist not normalized");
-+
-+ for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
-+ id = le32_to_cpu(s.v->skip[i]);
-+
-+ bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err,
-+ snapshot_skiplist_bad,
-+ "bad skiplist node %u", id);
-+ }
-+ }
-+fsck_err:
-+ return ret;
-+}
-+
-+static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
-+{
-+ struct snapshot_t *t = snapshot_t_mut(c, id);
-+ u32 parent = id;
-+
-+ while ((parent = bch2_snapshot_parent_early(c, parent)) &&
-+ parent - id - 1 < IS_ANCESTOR_BITMAP)
-+ __set_bit(parent - id - 1, t->is_ancestor);
-+}
-+
-+static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
-+{
-+ mutex_lock(&c->snapshot_table_lock);
-+ __set_is_ancestor_bitmap(c, id);
-+ mutex_unlock(&c->snapshot_table_lock);
-+}
-+
-+int bch2_mark_snapshot(struct btree_trans *trans,
-+ enum btree_id btree, unsigned level,
-+ struct bkey_s_c old, struct bkey_s_c new,
-+ unsigned flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct snapshot_t *t;
-+ u32 id = new.k->p.offset;
-+ int ret = 0;
-+
-+ mutex_lock(&c->snapshot_table_lock);
-+
-+ t = snapshot_t_mut(c, id);
-+ if (!t) {
-+ ret = -BCH_ERR_ENOMEM_mark_snapshot;
-+ goto err;
-+ }
-+
-+ if (new.k->type == KEY_TYPE_snapshot) {
-+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
-+
-+ t->parent = le32_to_cpu(s.v->parent);
-+ t->children[0] = le32_to_cpu(s.v->children[0]);
-+ t->children[1] = le32_to_cpu(s.v->children[1]);
-+ t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
-+ t->tree = le32_to_cpu(s.v->tree);
-+
-+ if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) {
-+ t->depth = le32_to_cpu(s.v->depth);
-+ t->skip[0] = le32_to_cpu(s.v->skip[0]);
-+ t->skip[1] = le32_to_cpu(s.v->skip[1]);
-+ t->skip[2] = le32_to_cpu(s.v->skip[2]);
-+ } else {
-+ t->depth = 0;
-+ t->skip[0] = 0;
-+ t->skip[1] = 0;
-+ t->skip[2] = 0;
-+ }
-+
-+ __set_is_ancestor_bitmap(c, id);
-+
-+ if (BCH_SNAPSHOT_DELETED(s.v)) {
-+ set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
-+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
-+ bch2_delete_dead_snapshots_async(c);
-+ }
-+ } else {
-+ memset(t, 0, sizeof(*t));
-+ }
-+err:
-+ mutex_unlock(&c->snapshot_table_lock);
-+ return ret;
-+}
-+
-+int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
-+ struct bch_snapshot *s)
-+{
-+ return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
-+ BTREE_ITER_WITH_UPDATES, snapshot, s);
-+}
-+
-+static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
-+{
-+ struct bch_snapshot v;
-+ int ret;
-+
-+ if (!id)
-+ return 0;
-+
-+ ret = bch2_snapshot_lookup(trans, id, &v);
-+ if (bch2_err_matches(ret, ENOENT))
-+ bch_err(trans->c, "snapshot node %u not found", id);
-+ if (ret)
-+ return ret;
-+
-+ return !BCH_SNAPSHOT_DELETED(&v);
-+}
-+
-+/*
-+ * If @k is a snapshot with just one live child, it's part of a linear chain,
-+ * which we consider to be an equivalence class: and then after snapshot
-+ * deletion cleanup, there should only be a single key at a given position in
-+ * this equivalence class.
-+ *
-+ * This sets the equivalence class of @k to be the child's equivalence class, if
-+ * it's part of such a linear chain: this correctly sets equivalence classes on
-+ * startup if we run leaf to root (i.e. in natural key order).
-+ */
-+static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ unsigned i, nr_live = 0, live_idx = 0;
-+ struct bkey_s_c_snapshot snap;
-+ u32 id = k.k->p.offset, child[2];
-+
-+ if (k.k->type != KEY_TYPE_snapshot)
-+ return 0;
-+
-+ snap = bkey_s_c_to_snapshot(k);
-+
-+ child[0] = le32_to_cpu(snap.v->children[0]);
-+ child[1] = le32_to_cpu(snap.v->children[1]);
-+
-+ for (i = 0; i < 2; i++) {
-+ int ret = bch2_snapshot_live(trans, child[i]);
-+
-+ if (ret < 0)
-+ return ret;
-+
-+ if (ret)
-+ live_idx = i;
-+ nr_live += ret;
-+ }
-+
-+ mutex_lock(&c->snapshot_table_lock);
-+
-+ snapshot_t_mut(c, id)->equiv = nr_live == 1
-+ ? snapshot_t_mut(c, child[live_idx])->equiv
-+ : id;
-+
-+ mutex_unlock(&c->snapshot_table_lock);
-+
-+ return 0;
-+}
-+
-+/* fsck: */
-+
-+static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
-+{
-+ return snapshot_t(c, id)->children[child];
-+}
-+
-+static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
-+{
-+ return bch2_snapshot_child(c, id, 0);
-+}
-+
-+static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
-+{
-+ return bch2_snapshot_child(c, id, 1);
-+}
-+
-+static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
-+{
-+ u32 n, parent;
-+
-+ n = bch2_snapshot_left_child(c, id);
-+ if (n)
-+ return n;
-+
-+ while ((parent = bch2_snapshot_parent(c, id))) {
-+ n = bch2_snapshot_right_child(c, parent);
-+ if (n && n != id)
-+ return n;
-+ id = parent;
-+ }
-+
-+ return 0;
-+}
-+
-+static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
-+{
-+ u32 id = snapshot_root;
-+ u32 subvol = 0, s;
-+
-+ while (id) {
-+ s = snapshot_t(c, id)->subvol;
-+
-+ if (s && (!subvol || s < subvol))
-+ subvol = s;
-+
-+ id = bch2_snapshot_tree_next(c, id);
-+ }
-+
-+ return subvol;
-+}
-+
-+static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
-+ u32 snapshot_root, u32 *subvol_id)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_s_c_subvolume s;
-+ bool found = false;
-+ int ret;
-+
-+ for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
-+ 0, k, ret) {
-+ if (k.k->type != KEY_TYPE_subvolume)
-+ continue;
-+
-+ s = bkey_s_c_to_subvolume(k);
-+ if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
-+ continue;
-+ if (!BCH_SUBVOLUME_SNAP(s.v)) {
-+ *subvol_id = s.k->p.offset;
-+ found = true;
-+ break;
-+ }
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (!ret && !found) {
-+ struct bkey_i_subvolume *u;
-+
-+ *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
-+
-+ u = bch2_bkey_get_mut_typed(trans, &iter,
-+ BTREE_ID_subvolumes, POS(0, *subvol_id),
-+ 0, subvolume);
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ return ret;
-+
-+ SET_BCH_SUBVOLUME_SNAP(&u->v, false);
-+ }
-+
-+ return ret;
-+}
-+
-+static int check_snapshot_tree(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_s_c_snapshot_tree st;
-+ struct bch_snapshot s;
-+ struct bch_subvolume subvol;
-+ struct printbuf buf = PRINTBUF;
-+ u32 root_id;
-+ int ret;
-+
-+ if (k.k->type != KEY_TYPE_snapshot_tree)
-+ return 0;
-+
-+ st = bkey_s_c_to_snapshot_tree(k);
-+ root_id = le32_to_cpu(st.v->root_snapshot);
-+
-+ ret = bch2_snapshot_lookup(trans, root_id, &s);
-+ if (ret && !bch2_err_matches(ret, ENOENT))
-+ goto err;
-+
-+ if (fsck_err_on(ret ||
-+ root_id != bch2_snapshot_root(c, root_id) ||
-+ st.k->p.offset != le32_to_cpu(s.tree),
-+ c, snapshot_tree_to_missing_snapshot,
-+ "snapshot tree points to missing/incorrect snapshot:\n %s",
-+ (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
-+ ret = bch2_btree_delete_at(trans, iter, 0);
-+ goto err;
-+ }
-+
-+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
-+ false, 0, &subvol);
-+ if (ret && !bch2_err_matches(ret, ENOENT))
-+ goto err;
-+
-+ if (fsck_err_on(ret,
-+ c, snapshot_tree_to_missing_subvol,
-+ "snapshot tree points to missing subvolume:\n %s",
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
-+ fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
-+ le32_to_cpu(subvol.snapshot),
-+ root_id),
-+ c, snapshot_tree_to_wrong_subvol,
-+ "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
-+ fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol),
-+ c, snapshot_tree_to_snapshot_subvol,
-+ "snapshot tree points to snapshot subvolume:\n %s",
-+ (printbuf_reset(&buf),
-+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
-+ struct bkey_i_snapshot_tree *u;
-+ u32 subvol_id;
-+
-+ ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
-+ if (ret)
-+ goto err;
-+
-+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree);
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ goto err;
-+
-+ u->v.master_subvol = cpu_to_le32(subvol_id);
-+ st = snapshot_tree_i_to_s_c(u);
-+ }
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+/*
-+ * For each snapshot_tree, make sure it points to the root of a snapshot tree
-+ * and that snapshot entry points back to it, or delete it.
-+ *
-+ * And, make sure it points to a subvolume within that snapshot tree, or correct
-+ * it to point to the oldest subvolume within that snapshot tree.
-+ */
-+int bch2_check_snapshot_trees(struct bch_fs *c)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter,
-+ BTREE_ID_snapshot_trees, POS_MIN,
-+ BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-+ check_snapshot_tree(trans, &iter, k)));
-+
-+ if (ret)
-+ bch_err(c, "error %i checking snapshot trees", ret);
-+ return ret;
-+}
-+
-+/*
-+ * Look up snapshot tree for @tree_id and find root,
-+ * make sure @snap_id is a descendent:
-+ */
-+static int snapshot_tree_ptr_good(struct btree_trans *trans,
-+ u32 snap_id, u32 tree_id)
-+{
-+ struct bch_snapshot_tree s_t;
-+ int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
-+
-+ if (bch2_err_matches(ret, ENOENT))
-+ return 0;
-+ if (ret)
-+ return ret;
-+
-+ return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
-+}
-+
-+u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id)
-+{
-+ const struct snapshot_t *s;
-+
-+ if (!id)
-+ return 0;
-+
-+ rcu_read_lock();
-+ s = snapshot_t(c, id);
-+ if (s->parent)
-+ id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
-+ rcu_read_unlock();
-+
-+ return id;
-+}
-+
-+static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < 3; i++)
-+ if (!s.parent) {
-+ if (s.skip[i])
-+ return false;
-+ } else {
-+ if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i])))
-+ return false;
-+ }
-+
-+ return true;
-+}
-+
-+/*
-+ * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
-+ * its snapshot_tree pointer is correct (allocate new one if necessary), then
-+ * update this node's pointer to root node's pointer:
-+ */
-+static int snapshot_tree_ptr_repair(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ struct bch_snapshot *s)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter root_iter;
-+ struct bch_snapshot_tree s_t;
-+ struct bkey_s_c_snapshot root;
-+ struct bkey_i_snapshot *u;
-+ u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
-+ int ret;
-+
-+ root = bch2_bkey_get_iter_typed(trans, &root_iter,
-+ BTREE_ID_snapshots, POS(0, root_id),
-+ BTREE_ITER_WITH_UPDATES, snapshot);
-+ ret = bkey_err(root);
-+ if (ret)
-+ goto err;
-+
-+ tree_id = le32_to_cpu(root.v->tree);
-+
-+ ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
-+ if (ret && !bch2_err_matches(ret, ENOENT))
-+ return ret;
-+
-+ if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
-+ u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(u) ?:
-+ bch2_snapshot_tree_create(trans, root_id,
-+ bch2_snapshot_tree_oldest_subvol(c, root_id),
-+ &tree_id);
-+ if (ret)
-+ goto err;
-+
-+ u->v.tree = cpu_to_le32(tree_id);
-+ if (k.k->p.offset == root_id)
-+ *s = u->v;
-+ }
-+
-+ if (k.k->p.offset != root_id) {
-+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ goto err;
-+
-+ u->v.tree = cpu_to_le32(tree_id);
-+ *s = u->v;
-+ }
-+err:
-+ bch2_trans_iter_exit(trans, &root_iter);
-+ return ret;
-+}
-+
-+static int check_snapshot(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bch_snapshot s;
-+ struct bch_subvolume subvol;
-+ struct bch_snapshot v;
-+ struct bkey_i_snapshot *u;
-+ u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
-+ u32 real_depth;
-+ struct printbuf buf = PRINTBUF;
-+ bool should_have_subvol;
-+ u32 i, id;
-+ int ret = 0;
-+
-+ if (k.k->type != KEY_TYPE_snapshot)
-+ return 0;
-+
-+ memset(&s, 0, sizeof(s));
-+ memcpy(&s, k.v, bkey_val_bytes(k.k));
-+
-+ id = le32_to_cpu(s.parent);
-+ if (id) {
-+ ret = bch2_snapshot_lookup(trans, id, &v);
-+ if (bch2_err_matches(ret, ENOENT))
-+ bch_err(c, "snapshot with nonexistent parent:\n %s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ if (ret)
-+ goto err;
-+
-+ if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
-+ le32_to_cpu(v.children[1]) != k.k->p.offset) {
-+ bch_err(c, "snapshot parent %u missing pointer to child %llu",
-+ id, k.k->p.offset);
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+ }
-+
-+ for (i = 0; i < 2 && s.children[i]; i++) {
-+ id = le32_to_cpu(s.children[i]);
-+
-+ ret = bch2_snapshot_lookup(trans, id, &v);
-+ if (bch2_err_matches(ret, ENOENT))
-+ bch_err(c, "snapshot node %llu has nonexistent child %u",
-+ k.k->p.offset, id);
-+ if (ret)
-+ goto err;
-+
-+ if (le32_to_cpu(v.parent) != k.k->p.offset) {
-+ bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
-+ id, le32_to_cpu(v.parent), k.k->p.offset);
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+ }
-+
-+ should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
-+ !BCH_SNAPSHOT_DELETED(&s);
-+
-+ if (should_have_subvol) {
-+ id = le32_to_cpu(s.subvol);
-+ ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
-+ if (bch2_err_matches(ret, ENOENT))
-+ bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-+ if (ret)
-+ goto err;
-+
-+ if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
-+ bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
-+ k.k->p.offset);
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+ } else {
-+ if (fsck_err_on(s.subvol,
-+ c, snapshot_should_not_have_subvol,
-+ "snapshot should not point to subvol:\n %s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ goto err;
-+
-+ u->v.subvol = 0;
-+ s = u->v;
-+ }
-+ }
-+
-+ ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
-+ if (ret < 0)
-+ goto err;
-+
-+ if (fsck_err_on(!ret, c, snapshot_to_bad_snapshot_tree,
-+ "snapshot points to missing/incorrect tree:\n %s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-+ ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
-+ if (ret)
-+ goto err;
-+ }
-+ ret = 0;
-+
-+ real_depth = bch2_snapshot_depth(c, parent_id);
-+
-+ if (le32_to_cpu(s.depth) != real_depth &&
-+ (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
-+ fsck_err(c, snapshot_bad_depth,
-+ "snapshot with incorrect depth field, should be %u:\n %s",
-+ real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
-+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ goto err;
-+
-+ u->v.depth = cpu_to_le32(real_depth);
-+ s = u->v;
-+ }
-+
-+ ret = snapshot_skiplist_good(trans, k.k->p.offset, s);
-+ if (ret < 0)
-+ goto err;
-+
-+ if (!ret &&
-+ (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
-+ fsck_err(c, snapshot_bad_skiplist,
-+ "snapshot with bad skiplist field:\n %s",
-+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
-+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(u);
-+ if (ret)
-+ goto err;
-+
-+ for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
-+ u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id));
-+
-+ bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32);
-+ s = u->v;
-+ }
-+ ret = 0;
-+err:
-+fsck_err:
-+ printbuf_exit(&buf);
-+ return ret;
-+}
-+
-+int bch2_check_snapshots(struct bch_fs *c)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ /*
-+ * We iterate backwards as checking/fixing the depth field requires that
-+ * the parent's depth already be correct:
-+ */
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key_reverse_commit(trans, iter,
-+ BTREE_ID_snapshots, POS_MAX,
-+ BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-+ check_snapshot(trans, &iter, k)));
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+/*
-+ * Mark a snapshot as deleted, for future cleanup:
-+ */
-+int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
-+{
-+ struct btree_iter iter;
-+ struct bkey_i_snapshot *s;
-+ int ret = 0;
-+
-+ s = bch2_bkey_get_mut_typed(trans, &iter,
-+ BTREE_ID_snapshots, POS(0, id),
-+ 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(s);
-+ if (unlikely(ret)) {
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
-+ trans->c, "missing snapshot %u", id);
-+ return ret;
-+ }
-+
-+ /* already deleted? */
-+ if (BCH_SNAPSHOT_DELETED(&s->v))
-+ goto err;
-+
-+ SET_BCH_SNAPSHOT_DELETED(&s->v, true);
-+ SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
-+ s->v.subvol = 0;
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
-+{
-+ if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1]))
-+ swap(s->children[0], s->children[1]);
-+}
-+
-+static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
-+ struct btree_iter c_iter = (struct btree_iter) { NULL };
-+ struct btree_iter tree_iter = (struct btree_iter) { NULL };
-+ struct bkey_s_c_snapshot s;
-+ u32 parent_id, child_id;
-+ unsigned i;
-+ int ret = 0;
-+
-+ s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
-+ BTREE_ITER_INTENT, snapshot);
-+ ret = bkey_err(s);
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
-+ "missing snapshot %u", id);
-+
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(s.v->children[1]);
-+
-+ parent_id = le32_to_cpu(s.v->parent);
-+ child_id = le32_to_cpu(s.v->children[0]);
-+
-+ if (parent_id) {
-+ struct bkey_i_snapshot *parent;
-+
-+ parent = bch2_bkey_get_mut_typed(trans, &p_iter,
-+ BTREE_ID_snapshots, POS(0, parent_id),
-+ 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(parent);
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
-+ "missing snapshot %u", parent_id);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ /* find entry in parent->children for node being deleted */
-+ for (i = 0; i < 2; i++)
-+ if (le32_to_cpu(parent->v.children[i]) == id)
-+ break;
-+
-+ if (bch2_fs_inconsistent_on(i == 2, c,
-+ "snapshot %u missing child pointer to %u",
-+ parent_id, id))
-+ goto err;
-+
-+ parent->v.children[i] = le32_to_cpu(child_id);
-+
-+ normalize_snapshot_child_pointers(&parent->v);
-+ }
-+
-+ if (child_id) {
-+ struct bkey_i_snapshot *child;
-+
-+ child = bch2_bkey_get_mut_typed(trans, &c_iter,
-+ BTREE_ID_snapshots, POS(0, child_id),
-+ 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(child);
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
-+ "missing snapshot %u", child_id);
-+ if (unlikely(ret))
-+ goto err;
-+
-+ child->v.parent = cpu_to_le32(parent_id);
-+
-+ if (!child->v.parent) {
-+ child->v.skip[0] = 0;
-+ child->v.skip[1] = 0;
-+ child->v.skip[2] = 0;
-+ }
-+ }
-+
-+ if (!parent_id) {
-+ /*
-+ * We're deleting the root of a snapshot tree: update the
-+ * snapshot_tree entry to point to the new root, or delete it if
-+ * this is the last snapshot ID in this tree:
-+ */
-+ struct bkey_i_snapshot_tree *s_t;
-+
-+ BUG_ON(s.v->children[1]);
-+
-+ s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
-+ BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
-+ 0, snapshot_tree);
-+ ret = PTR_ERR_OR_ZERO(s_t);
-+ if (ret)
-+ goto err;
-+
-+ if (s.v->children[0]) {
-+ s_t->v.root_snapshot = s.v->children[0];
-+ } else {
-+ s_t->k.type = KEY_TYPE_deleted;
-+ set_bkey_val_u64s(&s_t->k, 0);
-+ }
-+ }
-+
-+ ret = bch2_btree_delete_at(trans, &iter, 0);
-+err:
-+ bch2_trans_iter_exit(trans, &tree_iter);
-+ bch2_trans_iter_exit(trans, &p_iter);
-+ bch2_trans_iter_exit(trans, &c_iter);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
-+ u32 *new_snapids,
-+ u32 *snapshot_subvols,
-+ unsigned nr_snapids)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_i_snapshot *n;
-+ struct bkey_s_c k;
-+ unsigned i, j;
-+ u32 depth = bch2_snapshot_depth(c, parent);
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
-+ POS_MIN, BTREE_ITER_INTENT);
-+ k = bch2_btree_iter_peek(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ for (i = 0; i < nr_snapids; i++) {
-+ k = bch2_btree_iter_prev_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (!k.k || !k.k->p.offset) {
-+ ret = -BCH_ERR_ENOSPC_snapshot_create;
-+ goto err;
-+ }
-+
-+ n = bch2_bkey_alloc(trans, &iter, 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(n);
-+ if (ret)
-+ goto err;
-+
-+ n->v.flags = 0;
-+ n->v.parent = cpu_to_le32(parent);
-+ n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
-+ n->v.tree = cpu_to_le32(tree);
-+ n->v.depth = cpu_to_le32(depth);
-+
-+ for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
-+ n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent));
-+
-+ bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32);
-+ SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
-+
-+ ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
-+ bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
-+ if (ret)
-+ goto err;
-+
-+ new_snapids[i] = iter.pos.offset;
-+
-+ mutex_lock(&c->snapshot_table_lock);
-+ snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
-+ mutex_unlock(&c->snapshot_table_lock);
-+ }
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+/*
-+ * Create new snapshot IDs as children of an existing snapshot ID:
-+ */
-+static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
-+ u32 *new_snapids,
-+ u32 *snapshot_subvols,
-+ unsigned nr_snapids)
-+{
-+ struct btree_iter iter;
-+ struct bkey_i_snapshot *n_parent;
-+ int ret = 0;
-+
-+ n_parent = bch2_bkey_get_mut_typed(trans, &iter,
-+ BTREE_ID_snapshots, POS(0, parent),
-+ 0, snapshot);
-+ ret = PTR_ERR_OR_ZERO(n_parent);
-+ if (unlikely(ret)) {
-+ if (bch2_err_matches(ret, ENOENT))
-+ bch_err(trans->c, "snapshot %u not found", parent);
-+ return ret;
-+ }
-+
-+ if (n_parent->v.children[0] || n_parent->v.children[1]) {
-+ bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
-+ new_snapids, snapshot_subvols, nr_snapids);
-+ if (ret)
-+ goto err;
-+
-+ n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
-+ n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
-+ n_parent->v.subvol = 0;
-+ SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+/*
-+ * Create a snapshot node that is the root of a new tree:
-+ */
-+static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
-+ u32 *new_snapids,
-+ u32 *snapshot_subvols,
-+ unsigned nr_snapids)
-+{
-+ struct bkey_i_snapshot_tree *n_tree;
-+ int ret;
-+
-+ n_tree = __bch2_snapshot_tree_create(trans);
-+ ret = PTR_ERR_OR_ZERO(n_tree) ?:
-+ create_snapids(trans, 0, n_tree->k.p.offset,
-+ new_snapids, snapshot_subvols, nr_snapids);
-+ if (ret)
-+ return ret;
-+
-+ n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]);
-+ n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]);
-+ return 0;
-+}
-+
-+int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
-+ u32 *new_snapids,
-+ u32 *snapshot_subvols,
-+ unsigned nr_snapids)
-+{
-+ BUG_ON((parent == 0) != (nr_snapids == 1));
-+ BUG_ON((parent != 0) != (nr_snapids == 2));
-+
-+ return parent
-+ ? bch2_snapshot_node_create_children(trans, parent,
-+ new_snapids, snapshot_subvols, nr_snapids)
-+ : bch2_snapshot_node_create_tree(trans,
-+ new_snapids, snapshot_subvols, nr_snapids);
-+
-+}
-+
-+/*
-+ * If we have an unlinked inode in an internal snapshot node, and the inode
-+ * really has been deleted in all child snapshots, how does this get cleaned up?
-+ *
-+ * first there is the problem of how keys that have been overwritten in all
-+ * child snapshots get deleted (unimplemented?), but inodes may perhaps be
-+ * special?
-+ *
-+ * also: unlinked inode in internal snapshot appears to not be getting deleted
-+ * correctly if inode doesn't exist in leaf snapshots
-+ *
-+ * solution:
-+ *
-+ * for a key in an interior snapshot node that needs work to be done that
-+ * requires it to be mutated: iterate over all descendent leaf nodes and copy
-+ * that key to snapshot leaf nodes, where we can mutate it
-+ */
-+
-+static int snapshot_delete_key(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ snapshot_id_list *deleted,
-+ snapshot_id_list *equiv_seen,
-+ struct bpos *last_pos)
-+{
-+ struct bch_fs *c = trans->c;
-+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
-+
-+ if (!bkey_eq(k.k->p, *last_pos))
-+ equiv_seen->nr = 0;
-+ *last_pos = k.k->p;
-+
-+ if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
-+ snapshot_list_has_id(equiv_seen, equiv)) {
-+ return bch2_btree_delete_at(trans, iter,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ } else {
-+ return snapshot_list_add(c, equiv_seen, equiv);
-+ }
-+}
-+
-+static int move_key_to_correct_snapshot(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
-+
-+ /*
-+ * When we have a linear chain of snapshot nodes, we consider
-+ * those to form an equivalence class: we're going to collapse
-+ * them all down to a single node, and keep the leaf-most node -
-+ * which has the same id as the equivalence class id.
-+ *
-+ * If there are multiple keys in different snapshots at the same
-+ * position, we're only going to keep the one in the newest
-+ * snapshot - the rest have been overwritten and are redundant,
-+ * and for the key we're going to keep we need to move it to the
-+ * equivalance class ID if it's not there already.
-+ */
-+ if (equiv != k.k->p.snapshot) {
-+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
-+ struct btree_iter new_iter;
-+ int ret;
-+
-+ ret = PTR_ERR_OR_ZERO(new);
-+ if (ret)
-+ return ret;
-+
-+ new->k.p.snapshot = equiv;
-+
-+ bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
-+ BTREE_ITER_ALL_SNAPSHOTS|
-+ BTREE_ITER_CACHED|
-+ BTREE_ITER_INTENT);
-+
-+ ret = bch2_btree_iter_traverse(&new_iter) ?:
-+ bch2_trans_update(trans, &new_iter, new,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-+ bch2_btree_delete_at(trans, iter,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-+ bch2_trans_iter_exit(trans, &new_iter);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
-+{
-+ struct bkey_s_c_snapshot snap;
-+ u32 children[2];
-+ int ret;
-+
-+ if (k.k->type != KEY_TYPE_snapshot)
-+ return 0;
-+
-+ snap = bkey_s_c_to_snapshot(k);
-+ if (BCH_SNAPSHOT_DELETED(snap.v) ||
-+ BCH_SNAPSHOT_SUBVOL(snap.v))
-+ return 0;
-+
-+ children[0] = le32_to_cpu(snap.v->children[0]);
-+ children[1] = le32_to_cpu(snap.v->children[1]);
-+
-+ ret = bch2_snapshot_live(trans, children[0]) ?:
-+ bch2_snapshot_live(trans, children[1]);
-+ if (ret < 0)
-+ return ret;
-+ return !ret;
-+}
-+
-+/*
-+ * For a given snapshot, if it doesn't have a subvolume that points to it, and
-+ * it doesn't have child snapshot nodes - it's now redundant and we can mark it
-+ * as deleted.
-+ */
-+static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
-+{
-+ int ret = bch2_snapshot_needs_delete(trans, k);
-+
-+ return ret <= 0
-+ ? ret
-+ : bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
-+}
-+
-+static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
-+ snapshot_id_list *skip)
-+{
-+ rcu_read_lock();
-+ while (snapshot_list_has_id(skip, id))
-+ id = __bch2_snapshot_parent(c, id);
-+
-+ while (n--) {
-+ do {
-+ id = __bch2_snapshot_parent(c, id);
-+ } while (snapshot_list_has_id(skip, id));
-+ }
-+ rcu_read_unlock();
-+
-+ return id;
-+}
-+
-+static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
-+ struct btree_iter *iter, struct bkey_s_c k,
-+ snapshot_id_list *deleted)
-+{
-+ struct bch_fs *c = trans->c;
-+ u32 nr_deleted_ancestors = 0;
-+ struct bkey_i_snapshot *s;
-+ u32 *i;
-+ int ret;
-+
-+ if (k.k->type != KEY_TYPE_snapshot)
-+ return 0;
-+
-+ if (snapshot_list_has_id(deleted, k.k->p.offset))
-+ return 0;
-+
-+ s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot);
-+ ret = PTR_ERR_OR_ZERO(s);
-+ if (ret)
-+ return ret;
-+
-+ darray_for_each(*deleted, i)
-+ nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i);
-+
-+ if (!nr_deleted_ancestors)
-+ return 0;
-+
-+ le32_add_cpu(&s->v.depth, -nr_deleted_ancestors);
-+
-+ if (!s->v.depth) {
-+ s->v.skip[0] = 0;
-+ s->v.skip[1] = 0;
-+ s->v.skip[2] = 0;
-+ } else {
-+ u32 depth = le32_to_cpu(s->v.depth);
-+ u32 parent = bch2_snapshot_parent(c, s->k.p.offset);
-+
-+ for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) {
-+ u32 id = le32_to_cpu(s->v.skip[j]);
-+
-+ if (snapshot_list_has_id(deleted, id)) {
-+ id = bch2_snapshot_nth_parent_skip(c,
-+ parent,
-+ depth > 1
-+ ? get_random_u32_below(depth - 1)
-+ : 0,
-+ deleted);
-+ s->v.skip[j] = cpu_to_le32(id);
-+ }
-+ }
-+
-+ bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32);
-+ }
-+
-+ return bch2_trans_update(trans, iter, &s->k_i, 0);
-+}
-+
-+int bch2_delete_dead_snapshots(struct bch_fs *c)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_s_c_snapshot snap;
-+ snapshot_id_list deleted = { 0 };
-+ snapshot_id_list deleted_interior = { 0 };
-+ u32 *i, id;
-+ int ret = 0;
-+
-+ if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags))
-+ return 0;
-+
-+ if (!test_bit(BCH_FS_STARTED, &c->flags)) {
-+ ret = bch2_fs_read_write_early(c);
-+ if (ret) {
-+ bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
-+ return ret;
-+ }
-+ }
-+
-+ trans = bch2_trans_get(c);
-+
-+ /*
-+ * For every snapshot node: If we have no live children and it's not
-+ * pointed to by a subvolume, delete it:
-+ */
-+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
-+ POS_MIN, 0, k,
-+ NULL, NULL, 0,
-+ bch2_delete_redundant_snapshot(trans, k));
-+ if (ret) {
-+ bch_err_msg(c, ret, "deleting redundant snapshots");
-+ goto err;
-+ }
-+
-+ ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
-+ POS_MIN, 0, k,
-+ bch2_snapshot_set_equiv(trans, k));
-+ if (ret) {
-+ bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
-+ goto err;
-+ }
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_snapshots,
-+ POS_MIN, 0, k, ret) {
-+ if (k.k->type != KEY_TYPE_snapshot)
-+ continue;
-+
-+ snap = bkey_s_c_to_snapshot(k);
-+ if (BCH_SNAPSHOT_DELETED(snap.v)) {
-+ ret = snapshot_list_add(c, &deleted, k.k->p.offset);
-+ if (ret)
-+ break;
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret) {
-+ bch_err_msg(c, ret, "walking snapshots");
-+ goto err;
-+ }
-+
-+ for (id = 0; id < BTREE_ID_NR; id++) {
-+ struct bpos last_pos = POS_MIN;
-+ snapshot_id_list equiv_seen = { 0 };
-+ struct disk_reservation res = { 0 };
-+
-+ if (!btree_type_has_snapshots(id))
-+ continue;
-+
-+ /*
-+ * deleted inodes btree is maintained by a trigger on the inodes
-+ * btree - no work for us to do here, and it's not safe to scan
-+ * it because we'll see out of date keys due to the btree write
-+ * buffer:
-+ */
-+ if (id == BTREE_ID_deleted_inodes)
-+ continue;
-+
-+ ret = for_each_btree_key_commit(trans, iter,
-+ id, POS_MIN,
-+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
-+ &res, NULL, BTREE_INSERT_NOFAIL,
-+ snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
-+ for_each_btree_key_commit(trans, iter,
-+ id, POS_MIN,
-+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
-+ &res, NULL, BTREE_INSERT_NOFAIL,
-+ move_key_to_correct_snapshot(trans, &iter, k));
-+
-+ bch2_disk_reservation_put(c, &res);
-+ darray_exit(&equiv_seen);
-+
-+ if (ret) {
-+ bch_err_msg(c, ret, "deleting keys from dying snapshots");
-+ goto err;
-+ }
-+ }
-+
-+ bch2_trans_unlock(trans);
-+ down_write(&c->snapshot_create_lock);
-+
-+ for_each_btree_key(trans, iter, BTREE_ID_snapshots,
-+ POS_MIN, 0, k, ret) {
-+ u32 snapshot = k.k->p.offset;
-+ u32 equiv = bch2_snapshot_equiv(c, snapshot);
-+
-+ if (equiv != snapshot)
-+ snapshot_list_add(c, &deleted_interior, snapshot);
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ if (ret)
-+ goto err_create_lock;
-+
-+ /*
-+ * Fixing children of deleted snapshots can't be done completely
-+ * atomically, if we crash between here and when we delete the interior
-+ * nodes some depth fields will be off:
-+ */
-+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
-+ BTREE_ITER_INTENT, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL,
-+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
-+ if (ret)
-+ goto err_create_lock;
-+
-+ darray_for_each(deleted, i) {
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_snapshot_node_delete(trans, *i));
-+ if (ret) {
-+ bch_err_msg(c, ret, "deleting snapshot %u", *i);
-+ goto err_create_lock;
-+ }
-+ }
-+
-+ darray_for_each(deleted_interior, i) {
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_snapshot_node_delete(trans, *i));
-+ if (ret) {
-+ bch_err_msg(c, ret, "deleting snapshot %u", *i);
-+ goto err_create_lock;
-+ }
-+ }
-+err_create_lock:
-+ up_write(&c->snapshot_create_lock);
-+err:
-+ darray_exit(&deleted_interior);
-+ darray_exit(&deleted);
-+ bch2_trans_put(trans);
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+void bch2_delete_dead_snapshots_work(struct work_struct *work)
-+{
-+ struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
-+
-+ bch2_delete_dead_snapshots(c);
-+ bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
-+}
-+
-+void bch2_delete_dead_snapshots_async(struct bch_fs *c)
-+{
-+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
-+ !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
-+ bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
-+}
-+
-+int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
-+ enum btree_id id,
-+ struct bpos pos)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ bch2_trans_iter_init(trans, &iter, id, pos,
-+ BTREE_ITER_NOT_EXTENTS|
-+ BTREE_ITER_ALL_SNAPSHOTS);
-+ while (1) {
-+ k = bch2_btree_iter_prev(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ break;
-+
-+ if (!k.k)
-+ break;
-+
-+ if (!bkey_eq(pos, k.k->p))
-+ break;
-+
-+ if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
-+ ret = 1;
-+ break;
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret;
-+}
-+
-+static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id)
-+{
-+ const struct snapshot_t *s = snapshot_t(c, id);
-+
-+ return s->children[1] ?: s->children[0];
-+}
-+
-+static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id)
-+{
-+ u32 child;
-+
-+ while ((child = bch2_snapshot_smallest_child(c, id)))
-+ id = child;
-+ return id;
-+}
-+
-+static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
-+ enum btree_id btree,
-+ struct bkey_s_c interior_k,
-+ u32 leaf_id, struct bpos *new_min_pos)
-+{
-+ struct btree_iter iter;
-+ struct bpos pos = interior_k.k->p;
-+ struct bkey_s_c k;
-+ struct bkey_i *new;
-+ int ret;
-+
-+ pos.snapshot = leaf_id;
-+
-+ bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto out;
-+
-+ /* key already overwritten in this snapshot? */
-+ if (k.k->p.snapshot != interior_k.k->p.snapshot)
-+ goto out;
-+
-+ if (bpos_eq(*new_min_pos, POS_MIN)) {
-+ *new_min_pos = k.k->p;
-+ new_min_pos->snapshot = leaf_id;
-+ }
-+
-+ new = bch2_bkey_make_mut_noupdate(trans, interior_k);
-+ ret = PTR_ERR_OR_ZERO(new);
-+ if (ret)
-+ goto out;
-+
-+ new->k.p.snapshot = leaf_id;
-+ ret = bch2_trans_update(trans, &iter, new, 0);
-+out:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
-+ enum btree_id btree,
-+ struct bkey_s_c k,
-+ struct bpos *new_min_pos)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_buf sk;
-+ u32 restart_count = trans->restart_count;
-+ int ret = 0;
-+
-+ bch2_bkey_buf_init(&sk);
-+ bch2_bkey_buf_reassemble(&sk, c, k);
-+ k = bkey_i_to_s_c(sk.k);
-+
-+ *new_min_pos = POS_MIN;
-+
-+ for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot);
-+ id < k.k->p.snapshot;
-+ id++) {
-+ if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) ||
-+ !bch2_snapshot_is_leaf(c, id))
-+ continue;
-+again:
-+ ret = btree_trans_too_many_iters(trans) ?:
-+ bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos) ?:
-+ bch2_trans_commit(trans, NULL, NULL, 0);
-+ if (ret && bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
-+ bch2_trans_begin(trans);
-+ goto again;
-+ }
-+
-+ if (ret)
-+ break;
-+ }
-+
-+ bch2_bkey_buf_exit(&sk, c);
-+
-+ return ret ?: trans_was_restarted(trans, restart_count);
-+}
-+
-+static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_s_c_snapshot snap;
-+ int ret = 0;
-+
-+ if (k.k->type != KEY_TYPE_snapshot)
-+ return 0;
-+
-+ snap = bkey_s_c_to_snapshot(k);
-+ if (BCH_SNAPSHOT_DELETED(snap.v) ||
-+ bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
-+ (ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
-+ set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
-+ return 0;
-+ }
-+
-+ return ret;
-+}
-+
-+int bch2_snapshots_read(struct bch_fs *c)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
-+ POS_MIN, 0, k,
-+ bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
-+ bch2_snapshot_set_equiv(trans, k) ?:
-+ bch2_check_snapshot_needs_deletion(trans, k)) ?:
-+ for_each_btree_key2(trans, iter, BTREE_ID_snapshots,
-+ POS_MIN, 0, k,
-+ (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+void bch2_fs_snapshots_exit(struct bch_fs *c)
-+{
-+ kfree(rcu_dereference_protected(c->snapshots, true));
-+}
-diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
-new file mode 100644
-index 000000000000..f09a22f44239
---- /dev/null
-+++ b/fs/bcachefs/snapshot.h
-@@ -0,0 +1,268 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SNAPSHOT_H
-+#define _BCACHEFS_SNAPSHOT_H
-+
-+enum bkey_invalid_flags;
-+
-+void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+
-+#define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \
-+ .key_invalid = bch2_snapshot_tree_invalid, \
-+ .val_to_text = bch2_snapshot_tree_to_text, \
-+ .min_val_size = 8, \
-+})
-+
-+struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *);
-+
-+int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *);
-+
-+void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
-+ struct bkey_s_c, struct bkey_s_c, unsigned);
-+
-+#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \
-+ .key_invalid = bch2_snapshot_invalid, \
-+ .val_to_text = bch2_snapshot_to_text, \
-+ .atomic_trigger = bch2_mark_snapshot, \
-+ .min_val_size = 24, \
-+})
-+
-+static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
-+{
-+ return &t->s[U32_MAX - id];
-+}
-+
-+static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
-+{
-+ return __snapshot_t(rcu_dereference(c->snapshots), id);
-+}
-+
-+static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
-+{
-+ rcu_read_lock();
-+ id = snapshot_t(c, id)->tree;
-+ rcu_read_unlock();
-+
-+ return id;
-+}
-+
-+static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
-+{
-+ return snapshot_t(c, id)->parent;
-+}
-+
-+static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
-+{
-+ rcu_read_lock();
-+ id = __bch2_snapshot_parent_early(c, id);
-+ rcu_read_unlock();
-+
-+ return id;
-+}
-+
-+static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
-+{
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+ u32 parent = snapshot_t(c, id)->parent;
-+
-+ if (parent &&
-+ snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
-+ panic("id %u depth=%u parent %u depth=%u\n",
-+ id, snapshot_t(c, id)->depth,
-+ parent, snapshot_t(c, parent)->depth);
-+
-+ return parent;
-+#else
-+ return snapshot_t(c, id)->parent;
-+#endif
-+}
-+
-+static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
-+{
-+ rcu_read_lock();
-+ id = __bch2_snapshot_parent(c, id);
-+ rcu_read_unlock();
-+
-+ return id;
-+}
-+
-+static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
-+{
-+ rcu_read_lock();
-+ while (n--)
-+ id = __bch2_snapshot_parent(c, id);
-+ rcu_read_unlock();
-+
-+ return id;
-+}
-+
-+u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32);
-+
-+static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
-+{
-+ u32 parent;
-+
-+ rcu_read_lock();
-+ while ((parent = __bch2_snapshot_parent(c, id)))
-+ id = parent;
-+ rcu_read_unlock();
-+
-+ return id;
-+}
-+
-+static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
-+{
-+ return snapshot_t(c, id)->equiv;
-+}
-+
-+static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
-+{
-+ rcu_read_lock();
-+ id = __bch2_snapshot_equiv(c, id);
-+ rcu_read_unlock();
-+
-+ return id;
-+}
-+
-+static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
-+{
-+ return id == bch2_snapshot_equiv(c, id);
-+}
-+
-+static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
-+{
-+ const struct snapshot_t *s;
-+ bool ret;
-+
-+ rcu_read_lock();
-+ s = snapshot_t(c, id);
-+ ret = s->children[0];
-+ rcu_read_unlock();
-+
-+ return ret;
-+}
-+
-+static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
-+{
-+ return !bch2_snapshot_is_internal_node(c, id);
-+}
-+
-+static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
-+{
-+ const struct snapshot_t *s;
-+ u32 parent = __bch2_snapshot_parent(c, id);
-+
-+ if (!parent)
-+ return 0;
-+
-+ s = snapshot_t(c, __bch2_snapshot_parent(c, id));
-+ if (id == s->children[0])
-+ return s->children[1];
-+ if (id == s->children[1])
-+ return s->children[0];
-+ return 0;
-+}
-+
-+static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
-+{
-+ u32 depth;
-+
-+ rcu_read_lock();
-+ depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
-+ rcu_read_unlock();
-+
-+ return depth;
-+}
-+
-+bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
-+
-+static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
-+{
-+ return id == ancestor
-+ ? true
-+ : __bch2_snapshot_is_ancestor(c, id, ancestor);
-+}
-+
-+static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
-+{
-+ const struct snapshot_t *t;
-+ bool ret;
-+
-+ rcu_read_lock();
-+ t = snapshot_t(c, id);
-+ ret = (t->children[0]|t->children[1]) != 0;
-+ rcu_read_unlock();
-+
-+ return ret;
-+}
-+
-+static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)
-+{
-+ u32 *i;
-+
-+ darray_for_each(*s, i)
-+ if (*i == id)
-+ return true;
-+ return false;
-+}
-+
-+static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id)
-+{
-+ u32 *i;
-+
-+ darray_for_each(*s, i)
-+ if (bch2_snapshot_is_ancestor(c, id, *i))
-+ return true;
-+ return false;
-+}
-+
-+static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
-+{
-+ int ret;
-+
-+ BUG_ON(snapshot_list_has_id(s, id));
-+ ret = darray_push(s, id);
-+ if (ret)
-+ bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
-+ return ret;
-+}
-+
-+int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
-+ struct bch_snapshot *s);
-+int bch2_snapshot_get_subvol(struct btree_trans *, u32,
-+ struct bch_subvolume *);
-+
-+/* only exported for tests: */
-+int bch2_snapshot_node_create(struct btree_trans *, u32,
-+ u32 *, u32 *, unsigned);
-+
-+int bch2_check_snapshot_trees(struct bch_fs *);
-+int bch2_check_snapshots(struct bch_fs *);
-+
-+int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
-+void bch2_delete_dead_snapshots_work(struct work_struct *);
-+
-+int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos);
-+
-+static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
-+ enum btree_id id,
-+ struct bpos pos)
-+{
-+ if (!btree_type_has_snapshots(id) ||
-+ bch2_snapshot_is_leaf(trans->c, pos.snapshot))
-+ return 0;
-+
-+ return __bch2_key_has_snapshot_overwrites(trans, id, pos);
-+}
-+
-+int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *, enum btree_id,
-+ struct bkey_s_c, struct bpos *);
-+
-+int bch2_snapshots_read(struct bch_fs *);
-+void bch2_fs_snapshots_exit(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_SNAPSHOT_H */
-diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
-new file mode 100644
-index 000000000000..ae21a8cca1b4
---- /dev/null
-+++ b/fs/bcachefs/str_hash.h
-@@ -0,0 +1,370 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_STR_HASH_H
-+#define _BCACHEFS_STR_HASH_H
-+
-+#include "btree_iter.h"
-+#include "btree_update.h"
-+#include "checksum.h"
-+#include "error.h"
-+#include "inode.h"
-+#include "siphash.h"
-+#include "subvolume.h"
-+#include "super.h"
-+
-+#include <linux/crc32c.h>
-+#include <crypto/hash.h>
-+#include <crypto/sha2.h>
-+
-+static inline enum bch_str_hash_type
-+bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
-+{
-+ switch (opt) {
-+ case BCH_STR_HASH_OPT_crc32c:
-+ return BCH_STR_HASH_crc32c;
-+ case BCH_STR_HASH_OPT_crc64:
-+ return BCH_STR_HASH_crc64;
-+ case BCH_STR_HASH_OPT_siphash:
-+ return c->sb.features & (1ULL << BCH_FEATURE_new_siphash)
-+ ? BCH_STR_HASH_siphash
-+ : BCH_STR_HASH_siphash_old;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+struct bch_hash_info {
-+ u8 type;
-+ /*
-+ * For crc32 or crc64 string hashes the first key value of
-+ * the siphash_key (k0) is used as the key.
-+ */
-+ SIPHASH_KEY siphash_key;
-+};
-+
-+static inline struct bch_hash_info
-+bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
-+{
-+ /* XXX ick */
-+ struct bch_hash_info info = {
-+ .type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) &
-+ ~(~0U << INODE_STR_HASH_BITS),
-+ .siphash_key = { .k0 = bi->bi_hash_seed }
-+ };
-+
-+ if (unlikely(info.type == BCH_STR_HASH_siphash_old)) {
-+ SHASH_DESC_ON_STACK(desc, c->sha256);
-+ u8 digest[SHA256_DIGEST_SIZE];
-+
-+ desc->tfm = c->sha256;
-+
-+ crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
-+ sizeof(bi->bi_hash_seed), digest);
-+ memcpy(&info.siphash_key, digest, sizeof(info.siphash_key));
-+ }
-+
-+ return info;
-+}
-+
-+struct bch_str_hash_ctx {
-+ union {
-+ u32 crc32c;
-+ u64 crc64;
-+ SIPHASH_CTX siphash;
-+ };
-+};
-+
-+static inline void bch2_str_hash_init(struct bch_str_hash_ctx *ctx,
-+ const struct bch_hash_info *info)
-+{
-+ switch (info->type) {
-+ case BCH_STR_HASH_crc32c:
-+ ctx->crc32c = crc32c(~0, &info->siphash_key.k0,
-+ sizeof(info->siphash_key.k0));
-+ break;
-+ case BCH_STR_HASH_crc64:
-+ ctx->crc64 = crc64_be(~0, &info->siphash_key.k0,
-+ sizeof(info->siphash_key.k0));
-+ break;
-+ case BCH_STR_HASH_siphash_old:
-+ case BCH_STR_HASH_siphash:
-+ SipHash24_Init(&ctx->siphash, &info->siphash_key);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx,
-+ const struct bch_hash_info *info,
-+ const void *data, size_t len)
-+{
-+ switch (info->type) {
-+ case BCH_STR_HASH_crc32c:
-+ ctx->crc32c = crc32c(ctx->crc32c, data, len);
-+ break;
-+ case BCH_STR_HASH_crc64:
-+ ctx->crc64 = crc64_be(ctx->crc64, data, len);
-+ break;
-+ case BCH_STR_HASH_siphash_old:
-+ case BCH_STR_HASH_siphash:
-+ SipHash24_Update(&ctx->siphash, data, len);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
-+ const struct bch_hash_info *info)
-+{
-+ switch (info->type) {
-+ case BCH_STR_HASH_crc32c:
-+ return ctx->crc32c;
-+ case BCH_STR_HASH_crc64:
-+ return ctx->crc64 >> 1;
-+ case BCH_STR_HASH_siphash_old:
-+ case BCH_STR_HASH_siphash:
-+ return SipHash24_End(&ctx->siphash) >> 1;
-+ default:
-+ BUG();
-+ }
-+}
-+
-+struct bch_hash_desc {
-+ enum btree_id btree_id;
-+ u8 key_type;
-+
-+ u64 (*hash_key)(const struct bch_hash_info *, const void *);
-+ u64 (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
-+ bool (*cmp_key)(struct bkey_s_c, const void *);
-+ bool (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
-+ bool (*is_visible)(subvol_inum inum, struct bkey_s_c);
-+};
-+
-+static inline bool is_visible_key(struct bch_hash_desc desc, subvol_inum inum, struct bkey_s_c k)
-+{
-+ return k.k->type == desc.key_type &&
-+ (!desc.is_visible ||
-+ !inum.inum ||
-+ desc.is_visible(inum, k));
-+}
-+
-+static __always_inline int
-+bch2_hash_lookup(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ const struct bch_hash_desc desc,
-+ const struct bch_hash_info *info,
-+ subvol_inum inum, const void *key,
-+ unsigned flags)
-+{
-+ struct bkey_s_c k;
-+ u32 snapshot;
-+ int ret;
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ return ret;
-+
-+ for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
-+ SPOS(inum.inum, desc.hash_key(info, key), snapshot),
-+ POS(inum.inum, U64_MAX),
-+ BTREE_ITER_SLOTS|flags, k, ret) {
-+ if (is_visible_key(desc, inum, k)) {
-+ if (!desc.cmp_key(k, key))
-+ return 0;
-+ } else if (k.k->type == KEY_TYPE_hash_whiteout) {
-+ ;
-+ } else {
-+ /* hole, not found */
-+ break;
-+ }
-+ }
-+ bch2_trans_iter_exit(trans, iter);
-+
-+ return ret ?: -BCH_ERR_ENOENT_str_hash_lookup;
-+}
-+
-+static __always_inline int
-+bch2_hash_hole(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ const struct bch_hash_desc desc,
-+ const struct bch_hash_info *info,
-+ subvol_inum inum, const void *key)
-+{
-+ struct bkey_s_c k;
-+ u32 snapshot;
-+ int ret;
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ return ret;
-+
-+ for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
-+ SPOS(inum.inum, desc.hash_key(info, key), snapshot),
-+ POS(inum.inum, U64_MAX),
-+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret)
-+ if (!is_visible_key(desc, inum, k))
-+ return 0;
-+ bch2_trans_iter_exit(trans, iter);
-+
-+ return ret ?: -BCH_ERR_ENOSPC_str_hash_create;
-+}
-+
-+static __always_inline
-+int bch2_hash_needs_whiteout(struct btree_trans *trans,
-+ const struct bch_hash_desc desc,
-+ const struct bch_hash_info *info,
-+ struct btree_iter *start)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ bch2_trans_copy_iter(&iter, start);
-+
-+ bch2_btree_iter_advance(&iter);
-+
-+ for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, k, ret) {
-+ if (k.k->type != desc.key_type &&
-+ k.k->type != KEY_TYPE_hash_whiteout)
-+ break;
-+
-+ if (k.k->type == desc.key_type &&
-+ desc.hash_bkey(info, k) <= start->pos.offset) {
-+ ret = 1;
-+ break;
-+ }
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static __always_inline
-+int bch2_hash_set_snapshot(struct btree_trans *trans,
-+ const struct bch_hash_desc desc,
-+ const struct bch_hash_info *info,
-+ subvol_inum inum, u32 snapshot,
-+ struct bkey_i *insert,
-+ int flags,
-+ int update_flags)
-+{
-+ struct btree_iter iter, slot = { NULL };
-+ struct bkey_s_c k;
-+ bool found = false;
-+ int ret;
-+
-+ for_each_btree_key_upto_norestart(trans, iter, desc.btree_id,
-+ SPOS(insert->k.p.inode,
-+ desc.hash_bkey(info, bkey_i_to_s_c(insert)),
-+ snapshot),
-+ POS(insert->k.p.inode, U64_MAX),
-+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-+ if (is_visible_key(desc, inum, k)) {
-+ if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
-+ goto found;
-+
-+ /* hash collision: */
-+ continue;
-+ }
-+
-+ if (!slot.path &&
-+ !(flags & BCH_HASH_SET_MUST_REPLACE))
-+ bch2_trans_copy_iter(&slot, &iter);
-+
-+ if (k.k->type != KEY_TYPE_hash_whiteout)
-+ goto not_found;
-+ }
-+
-+ if (!ret)
-+ ret = -BCH_ERR_ENOSPC_str_hash_create;
-+out:
-+ bch2_trans_iter_exit(trans, &slot);
-+ bch2_trans_iter_exit(trans, &iter);
-+
-+ return ret;
-+found:
-+ found = true;
-+not_found:
-+
-+ if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) {
-+ ret = -BCH_ERR_ENOENT_str_hash_set_must_replace;
-+ } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) {
-+ ret = -EEXIST;
-+ } else {
-+ if (!found && slot.path)
-+ swap(iter, slot);
-+
-+ insert->k.p = iter.pos;
-+ ret = bch2_trans_update(trans, &iter, insert, 0);
-+ }
-+
-+ goto out;
-+}
-+
-+static __always_inline
-+int bch2_hash_set(struct btree_trans *trans,
-+ const struct bch_hash_desc desc,
-+ const struct bch_hash_info *info,
-+ subvol_inum inum,
-+ struct bkey_i *insert, int flags)
-+{
-+ u32 snapshot;
-+ int ret;
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-+ if (ret)
-+ return ret;
-+
-+ insert->k.p.inode = inum.inum;
-+
-+ return bch2_hash_set_snapshot(trans, desc, info, inum,
-+ snapshot, insert, flags, 0);
-+}
-+
-+static __always_inline
-+int bch2_hash_delete_at(struct btree_trans *trans,
-+ const struct bch_hash_desc desc,
-+ const struct bch_hash_info *info,
-+ struct btree_iter *iter,
-+ unsigned update_flags)
-+{
-+ struct bkey_i *delete;
-+ int ret;
-+
-+ delete = bch2_trans_kmalloc(trans, sizeof(*delete));
-+ ret = PTR_ERR_OR_ZERO(delete);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_hash_needs_whiteout(trans, desc, info, iter);
-+ if (ret < 0)
-+ return ret;
-+
-+ bkey_init(&delete->k);
-+ delete->k.p = iter->pos;
-+ delete->k.type = ret ? KEY_TYPE_hash_whiteout : KEY_TYPE_deleted;
-+
-+ return bch2_trans_update(trans, iter, delete, update_flags);
-+}
-+
-+static __always_inline
-+int bch2_hash_delete(struct btree_trans *trans,
-+ const struct bch_hash_desc desc,
-+ const struct bch_hash_info *info,
-+ subvol_inum inum, const void *key)
-+{
-+ struct btree_iter iter;
-+ int ret;
-+
-+ ret = bch2_hash_lookup(trans, &iter, desc, info, inum, key,
-+ BTREE_ITER_INTENT);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_hash_delete_at(trans, desc, info, &iter, 0);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+#endif /* _BCACHEFS_STR_HASH_H */
-diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
-new file mode 100644
-index 000000000000..fccd25aa3242
---- /dev/null
-+++ b/fs/bcachefs/subvolume.c
-@@ -0,0 +1,437 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "fs.h"
-+#include "snapshot.h"
-+#include "subvolume.h"
-+
-+#include <linux/random.h>
-+
-+static int bch2_subvolume_delete(struct btree_trans *, u32);
-+
-+static int check_subvol(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct bkey_s_c_subvolume subvol;
-+ struct bch_snapshot snapshot;
-+ unsigned snapid;
-+ int ret = 0;
-+
-+ if (k.k->type != KEY_TYPE_subvolume)
-+ return 0;
-+
-+ subvol = bkey_s_c_to_subvolume(k);
-+ snapid = le32_to_cpu(subvol.v->snapshot);
-+ ret = bch2_snapshot_lookup(trans, snapid, &snapshot);
-+
-+ if (bch2_err_matches(ret, ENOENT))
-+ bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
-+ k.k->p.offset, snapid);
-+ if (ret)
-+ return ret;
-+
-+ if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
-+ bch2_fs_lazy_rw(c);
-+
-+ ret = bch2_subvolume_delete(trans, iter->pos.offset);
-+ if (ret)
-+ bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
-+ return ret ?: -BCH_ERR_transaction_restart_nested;
-+ }
-+
-+ if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
-+ u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
-+ u32 snapshot_tree;
-+ struct bch_snapshot_tree st;
-+
-+ rcu_read_lock();
-+ snapshot_tree = snapshot_t(c, snapshot_root)->tree;
-+ rcu_read_unlock();
-+
-+ ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
-+
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
-+ "%s: snapshot tree %u not found", __func__, snapshot_tree);
-+
-+ if (ret)
-+ return ret;
-+
-+ if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset,
-+ c, subvol_not_master_and_not_snapshot,
-+ "subvolume %llu is not set as snapshot but is not master subvolume",
-+ k.k->p.offset)) {
-+ struct bkey_i_subvolume *s =
-+ bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
-+ ret = PTR_ERR_OR_ZERO(s);
-+ if (ret)
-+ return ret;
-+
-+ SET_BCH_SUBVOLUME_SNAP(&s->v, true);
-+ }
-+ }
-+
-+fsck_err:
-+ return ret;
-+}
-+
-+int bch2_check_subvols(struct bch_fs *c)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ ret = bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter,
-+ BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-+ check_subvol(trans, &iter, k)));
-+ if (ret)
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+/* Subvolumes: */
-+
-+int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags, struct printbuf *err)
-+{
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
-+ bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err,
-+ subvol_pos_bad,
-+ "invalid pos");
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
-+
-+ prt_printf(out, "root %llu snapshot id %u",
-+ le64_to_cpu(s.v->inode),
-+ le32_to_cpu(s.v->snapshot));
-+
-+ if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
-+ prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
-+}
-+
-+static __always_inline int
-+bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
-+ bool inconsistent_if_not_found,
-+ int iter_flags,
-+ struct bch_subvolume *s)
-+{
-+ int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
-+ iter_flags, subvolume, s);
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
-+ inconsistent_if_not_found,
-+ trans->c, "missing subvolume %u", subvol);
-+ return ret;
-+}
-+
-+int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
-+ bool inconsistent_if_not_found,
-+ int iter_flags,
-+ struct bch_subvolume *s)
-+{
-+ return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
-+}
-+
-+int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
-+ struct bch_subvolume *subvol)
-+{
-+ struct bch_snapshot snap;
-+
-+ return bch2_snapshot_lookup(trans, snapshot, &snap) ?:
-+ bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
-+}
-+
-+int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
-+ u32 *snapid)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c_subvolume subvol;
-+ int ret;
-+
-+ subvol = bch2_bkey_get_iter_typed(trans, &iter,
-+ BTREE_ID_subvolumes, POS(0, subvolid),
-+ BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES,
-+ subvolume);
-+ ret = bkey_err(subvol);
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
-+ "missing subvolume %u", subvolid);
-+
-+ if (likely(!ret))
-+ *snapid = le32_to_cpu(subvol.v->snapshot);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int bch2_subvolume_reparent(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_s_c k,
-+ u32 old_parent, u32 new_parent)
-+{
-+ struct bkey_i_subvolume *s;
-+ int ret;
-+
-+ if (k.k->type != KEY_TYPE_subvolume)
-+ return 0;
-+
-+ if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
-+ le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
-+ return 0;
-+
-+ s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
-+ ret = PTR_ERR_OR_ZERO(s);
-+ if (ret)
-+ return ret;
-+
-+ s->v.parent = cpu_to_le32(new_parent);
-+ return 0;
-+}
-+
-+/*
-+ * Separate from the snapshot tree in the snapshots btree, we record the tree
-+ * structure of how snapshot subvolumes were created - the parent subvolume of
-+ * each snapshot subvolume.
-+ *
-+ * When a subvolume is deleted, we scan for child subvolumes and reparant them,
-+ * to avoid dangling references:
-+ */
-+static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bch_subvolume s;
-+
-+ return lockrestart_do(trans,
-+ bch2_subvolume_get(trans, subvolid_to_delete, true,
-+ BTREE_ITER_CACHED, &s)) ?:
-+ for_each_btree_key_commit(trans, iter,
-+ BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
-+ NULL, NULL, BTREE_INSERT_NOFAIL,
-+ bch2_subvolume_reparent(trans, &iter, k,
-+ subvolid_to_delete, le32_to_cpu(s.parent)));
-+}
-+
-+/*
-+ * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
-+ * deletion/cleanup:
-+ */
-+static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c_subvolume subvol;
-+ u32 snapid;
-+ int ret = 0;
-+
-+ subvol = bch2_bkey_get_iter_typed(trans, &iter,
-+ BTREE_ID_subvolumes, POS(0, subvolid),
-+ BTREE_ITER_CACHED|BTREE_ITER_INTENT,
-+ subvolume);
-+ ret = bkey_err(subvol);
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
-+ "missing subvolume %u", subvolid);
-+ if (ret)
-+ return ret;
-+
-+ snapid = le32_to_cpu(subvol.v->snapshot);
-+
-+ ret = bch2_btree_delete_at(trans, &iter, 0) ?:
-+ bch2_snapshot_node_set_deleted(trans, snapid);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
-+{
-+ return bch2_subvolumes_reparent(trans, subvolid) ?:
-+ commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
-+ __bch2_subvolume_delete(trans, subvolid));
-+}
-+
-+static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
-+{
-+ struct bch_fs *c = container_of(work, struct bch_fs,
-+ snapshot_wait_for_pagecache_and_delete_work);
-+ snapshot_id_list s;
-+ u32 *id;
-+ int ret = 0;
-+
-+ while (!ret) {
-+ mutex_lock(&c->snapshots_unlinked_lock);
-+ s = c->snapshots_unlinked;
-+ darray_init(&c->snapshots_unlinked);
-+ mutex_unlock(&c->snapshots_unlinked_lock);
-+
-+ if (!s.nr)
-+ break;
-+
-+ bch2_evict_subvolume_inodes(c, &s);
-+
-+ for (id = s.data; id < s.data + s.nr; id++) {
-+ ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id));
-+ if (ret) {
-+ bch_err_msg(c, ret, "deleting subvolume %u", *id);
-+ break;
-+ }
-+ }
-+
-+ darray_exit(&s);
-+ }
-+
-+ bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
-+}
-+
-+struct subvolume_unlink_hook {
-+ struct btree_trans_commit_hook h;
-+ u32 subvol;
-+};
-+
-+static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
-+ struct btree_trans_commit_hook *_h)
-+{
-+ struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
-+ struct bch_fs *c = trans->c;
-+ int ret = 0;
-+
-+ mutex_lock(&c->snapshots_unlinked_lock);
-+ if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
-+ ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
-+ mutex_unlock(&c->snapshots_unlinked_lock);
-+
-+ if (ret)
-+ return ret;
-+
-+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
-+ return -EROFS;
-+
-+ if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
-+ bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
-+ return 0;
-+}
-+
-+int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
-+{
-+ struct btree_iter iter;
-+ struct bkey_i_subvolume *n;
-+ struct subvolume_unlink_hook *h;
-+ int ret = 0;
-+
-+ h = bch2_trans_kmalloc(trans, sizeof(*h));
-+ ret = PTR_ERR_OR_ZERO(h);
-+ if (ret)
-+ return ret;
-+
-+ h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook;
-+ h->subvol = subvolid;
-+ bch2_trans_commit_hook(trans, &h->h);
-+
-+ n = bch2_bkey_get_mut_typed(trans, &iter,
-+ BTREE_ID_subvolumes, POS(0, subvolid),
-+ BTREE_ITER_CACHED, subvolume);
-+ ret = PTR_ERR_OR_ZERO(n);
-+ if (unlikely(ret)) {
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
-+ "missing subvolume %u", subvolid);
-+ return ret;
-+ }
-+
-+ SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
-+ u32 src_subvolid,
-+ u32 *new_subvolid,
-+ u32 *new_snapshotid,
-+ bool ro)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
-+ struct bkey_i_subvolume *new_subvol = NULL;
-+ struct bkey_i_subvolume *src_subvol = NULL;
-+ u32 parent = 0, new_nodes[2], snapshot_subvols[2];
-+ int ret = 0;
-+
-+ ret = bch2_bkey_get_empty_slot(trans, &dst_iter,
-+ BTREE_ID_subvolumes, POS(0, U32_MAX));
-+ if (ret == -BCH_ERR_ENOSPC_btree_slot)
-+ ret = -BCH_ERR_ENOSPC_subvolume_create;
-+ if (ret)
-+ return ret;
-+
-+ snapshot_subvols[0] = dst_iter.pos.offset;
-+ snapshot_subvols[1] = src_subvolid;
-+
-+ if (src_subvolid) {
-+ /* Creating a snapshot: */
-+
-+ src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter,
-+ BTREE_ID_subvolumes, POS(0, src_subvolid),
-+ BTREE_ITER_CACHED, subvolume);
-+ ret = PTR_ERR_OR_ZERO(src_subvol);
-+ if (unlikely(ret)) {
-+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
-+ "subvolume %u not found", src_subvolid);
-+ goto err;
-+ }
-+
-+ parent = le32_to_cpu(src_subvol->v.snapshot);
-+ }
-+
-+ ret = bch2_snapshot_node_create(trans, parent, new_nodes,
-+ snapshot_subvols,
-+ src_subvolid ? 2 : 1);
-+ if (ret)
-+ goto err;
-+
-+ if (src_subvolid) {
-+ src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
-+ ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume);
-+ ret = PTR_ERR_OR_ZERO(new_subvol);
-+ if (ret)
-+ goto err;
-+
-+ new_subvol->v.flags = 0;
-+ new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
-+ new_subvol->v.inode = cpu_to_le64(inode);
-+ new_subvol->v.parent = cpu_to_le32(src_subvolid);
-+ new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
-+ new_subvol->v.otime.hi = 0;
-+
-+ SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
-+ SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
-+
-+ *new_subvolid = new_subvol->k.p.offset;
-+ *new_snapshotid = new_nodes[0];
-+err:
-+ bch2_trans_iter_exit(trans, &src_iter);
-+ bch2_trans_iter_exit(trans, &dst_iter);
-+ return ret;
-+}
-+
-+int bch2_fs_subvolumes_init(struct bch_fs *c)
-+{
-+ INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
-+ INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
-+ bch2_subvolume_wait_for_pagecache_and_delete);
-+ mutex_init(&c->snapshots_unlinked_lock);
-+ return 0;
-+}
-diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h
-new file mode 100644
-index 000000000000..a1003d30ab0a
---- /dev/null
-+++ b/fs/bcachefs/subvolume.h
-@@ -0,0 +1,35 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUBVOLUME_H
-+#define _BCACHEFS_SUBVOLUME_H
-+
-+#include "darray.h"
-+#include "subvolume_types.h"
-+
-+enum bkey_invalid_flags;
-+
-+int bch2_check_subvols(struct bch_fs *);
-+
-+int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \
-+ .key_invalid = bch2_subvolume_invalid, \
-+ .val_to_text = bch2_subvolume_to_text, \
-+ .min_val_size = 16, \
-+})
-+
-+int bch2_subvolume_get(struct btree_trans *, unsigned,
-+ bool, int, struct bch_subvolume *);
-+int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
-+
-+int bch2_delete_dead_snapshots(struct bch_fs *);
-+void bch2_delete_dead_snapshots_async(struct bch_fs *);
-+
-+int bch2_subvolume_unlink(struct btree_trans *, u32);
-+int bch2_subvolume_create(struct btree_trans *, u64, u32,
-+ u32 *, u32 *, bool);
-+
-+int bch2_fs_subvolumes_init(struct bch_fs *);
-+
-+#endif /* _BCACHEFS_SUBVOLUME_H */
-diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h
-new file mode 100644
-index 000000000000..86833445af20
---- /dev/null
-+++ b/fs/bcachefs/subvolume_types.h
-@@ -0,0 +1,31 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUBVOLUME_TYPES_H
-+#define _BCACHEFS_SUBVOLUME_TYPES_H
-+
-+#include "darray.h"
-+
-+typedef DARRAY(u32) snapshot_id_list;
-+
-+#define IS_ANCESTOR_BITMAP 128
-+
-+struct snapshot_t {
-+ u32 parent;
-+ u32 skip[3];
-+ u32 depth;
-+ u32 children[2];
-+ u32 subvol; /* Nonzero only if a subvolume points to this node: */
-+ u32 tree;
-+ u32 equiv;
-+ unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
-+};
-+
-+struct snapshot_table {
-+ struct snapshot_t s[0];
-+};
-+
-+typedef struct {
-+ u32 subvol;
-+ u64 inum;
-+} subvol_inum;
-+
-+#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */
-diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
-new file mode 100644
-index 000000000000..f4cad903f4d6
---- /dev/null
-+++ b/fs/bcachefs/super-io.c
-@@ -0,0 +1,1266 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "checksum.h"
-+#include "counters.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "error.h"
-+#include "journal.h"
-+#include "journal_sb.h"
-+#include "journal_seq_blacklist.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "quota.h"
-+#include "sb-clean.h"
-+#include "sb-errors.h"
-+#include "sb-members.h"
-+#include "super-io.h"
-+#include "super.h"
-+#include "trace.h"
-+#include "vstructs.h"
-+
-+#include <linux/backing-dev.h>
-+#include <linux/sort.h>
-+
-+static const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
-+};
-+
-+struct bch2_metadata_version {
-+ u16 version;
-+ const char *name;
-+ u64 recovery_passes;
-+};
-+
-+static const struct bch2_metadata_version bch2_metadata_versions[] = {
-+#define x(n, v, _recovery_passes) { \
-+ .version = v, \
-+ .name = #n, \
-+ .recovery_passes = _recovery_passes, \
-+},
-+ BCH_METADATA_VERSIONS()
-+#undef x
-+};
-+
-+void bch2_version_to_text(struct printbuf *out, unsigned v)
-+{
-+ const char *str = "(unknown version)";
-+
-+ for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++)
-+ if (bch2_metadata_versions[i].version == v) {
-+ str = bch2_metadata_versions[i].name;
-+ break;
-+ }
-+
-+ prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str);
-+}
-+
-+unsigned bch2_latest_compatible_version(unsigned v)
-+{
-+ if (!BCH_VERSION_MAJOR(v))
-+ return v;
-+
-+ for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++)
-+ if (bch2_metadata_versions[i].version > v &&
-+ BCH_VERSION_MAJOR(bch2_metadata_versions[i].version) ==
-+ BCH_VERSION_MAJOR(v))
-+ v = bch2_metadata_versions[i].version;
-+
-+ return v;
-+}
-+
-+u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
-+ unsigned old_version,
-+ unsigned new_version)
-+{
-+ u64 ret = 0;
-+
-+ for (const struct bch2_metadata_version *i = bch2_metadata_versions;
-+ i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions);
-+ i++)
-+ if (i->version > old_version && i->version <= new_version) {
-+ if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK)
-+ ret |= bch2_fsck_recovery_passes();
-+ ret |= i->recovery_passes;
-+ }
-+
-+ return ret &= ~RECOVERY_PASS_ALL_FSCK;
-+}
-+
-+const char * const bch2_sb_fields[] = {
-+#define x(name, nr) #name,
-+ BCH_SB_FIELDS()
-+#undef x
-+ NULL
-+};
-+
-+static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *,
-+ struct printbuf *);
-+
-+struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb,
-+ enum bch_sb_field_type type)
-+{
-+ struct bch_sb_field *f;
-+
-+ /* XXX: need locking around superblock to access optional fields */
-+
-+ vstruct_for_each(sb, f)
-+ if (le32_to_cpu(f->type) == type)
-+ return f;
-+ return NULL;
-+}
-+
-+static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
-+ struct bch_sb_field *f,
-+ unsigned u64s)
-+{
-+ unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
-+ unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s;
-+
-+ BUG_ON(__vstruct_bytes(struct bch_sb, sb_u64s) > sb->buffer_size);
-+
-+ if (!f && !u64s) {
-+ /* nothing to do: */
-+ } else if (!f) {
-+ f = vstruct_last(sb->sb);
-+ memset(f, 0, sizeof(u64) * u64s);
-+ f->u64s = cpu_to_le32(u64s);
-+ f->type = 0;
-+ } else {
-+ void *src, *dst;
-+
-+ src = vstruct_end(f);
-+
-+ if (u64s) {
-+ f->u64s = cpu_to_le32(u64s);
-+ dst = vstruct_end(f);
-+ } else {
-+ dst = f;
-+ }
-+
-+ memmove(dst, src, vstruct_end(sb->sb) - src);
-+
-+ if (dst > src)
-+ memset(src, 0, dst - src);
-+ }
-+
-+ sb->sb->u64s = cpu_to_le32(sb_u64s);
-+
-+ return u64s ? f : NULL;
-+}
-+
-+void bch2_sb_field_delete(struct bch_sb_handle *sb,
-+ enum bch_sb_field_type type)
-+{
-+ struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
-+
-+ if (f)
-+ __bch2_sb_field_resize(sb, f, 0);
-+}
-+
-+/* Superblock realloc/free: */
-+
-+void bch2_free_super(struct bch_sb_handle *sb)
-+{
-+ kfree(sb->bio);
-+ if (!IS_ERR_OR_NULL(sb->bdev))
-+ blkdev_put(sb->bdev, sb->holder);
-+ kfree(sb->holder);
-+
-+ kfree(sb->sb);
-+ memset(sb, 0, sizeof(*sb));
-+}
-+
-+int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
-+{
-+ size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s);
-+ size_t new_buffer_size;
-+ struct bch_sb *new_sb;
-+ struct bio *bio;
-+
-+ if (sb->bdev)
-+ new_bytes = max_t(size_t, new_bytes, bdev_logical_block_size(sb->bdev));
-+
-+ new_buffer_size = roundup_pow_of_two(new_bytes);
-+
-+ if (sb->sb && sb->buffer_size >= new_buffer_size)
-+ return 0;
-+
-+ if (sb->sb && sb->have_layout) {
-+ u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
-+
-+ if (new_bytes > max_bytes) {
-+ pr_err("%pg: superblock too big: want %zu but have %llu",
-+ sb->bdev, new_bytes, max_bytes);
-+ return -BCH_ERR_ENOSPC_sb;
-+ }
-+ }
-+
-+ if (sb->buffer_size >= new_buffer_size && sb->sb)
-+ return 0;
-+
-+ if (dynamic_fault("bcachefs:add:super_realloc"))
-+ return -BCH_ERR_ENOMEM_sb_realloc_injected;
-+
-+ new_sb = krealloc(sb->sb, new_buffer_size, GFP_NOFS|__GFP_ZERO);
-+ if (!new_sb)
-+ return -BCH_ERR_ENOMEM_sb_buf_realloc;
-+
-+ sb->sb = new_sb;
-+
-+ if (sb->have_bio) {
-+ unsigned nr_bvecs = buf_pages(sb->sb, new_buffer_size);
-+
-+ bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
-+ if (!bio)
-+ return -BCH_ERR_ENOMEM_sb_bio_realloc;
-+
-+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0);
-+
-+ kfree(sb->bio);
-+ sb->bio = bio;
-+ }
-+
-+ sb->buffer_size = new_buffer_size;
-+
-+ return 0;
-+}
-+
-+struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
-+ enum bch_sb_field_type type,
-+ unsigned u64s)
-+{
-+ struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
-+ ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
-+ ssize_t d = -old_u64s + u64s;
-+
-+ if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
-+ return NULL;
-+
-+ if (sb->fs_sb) {
-+ struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb);
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ lockdep_assert_held(&c->sb_lock);
-+
-+ /* XXX: we're not checking that offline device have enough space */
-+
-+ for_each_online_member(ca, c, i) {
-+ struct bch_sb_handle *dev_sb = &ca->disk_sb;
-+
-+ if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
-+ percpu_ref_put(&ca->ref);
-+ return NULL;
-+ }
-+ }
-+ }
-+
-+ f = bch2_sb_field_get_id(sb->sb, type);
-+ f = __bch2_sb_field_resize(sb, f, u64s);
-+ if (f)
-+ f->type = cpu_to_le32(type);
-+ return f;
-+}
-+
-+/* Superblock validate: */
-+
-+static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out)
-+{
-+ u64 offset, prev_offset, max_sectors;
-+ unsigned i;
-+
-+ BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
-+
-+ if (!uuid_equal(&layout->magic, &BCACHE_MAGIC) &&
-+ !uuid_equal(&layout->magic, &BCHFS_MAGIC)) {
-+ prt_printf(out, "Not a bcachefs superblock layout");
-+ return -BCH_ERR_invalid_sb_layout;
-+ }
-+
-+ if (layout->layout_type != 0) {
-+ prt_printf(out, "Invalid superblock layout type %u",
-+ layout->layout_type);
-+ return -BCH_ERR_invalid_sb_layout_type;
-+ }
-+
-+ if (!layout->nr_superblocks) {
-+ prt_printf(out, "Invalid superblock layout: no superblocks");
-+ return -BCH_ERR_invalid_sb_layout_nr_superblocks;
-+ }
-+
-+ if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) {
-+ prt_printf(out, "Invalid superblock layout: too many superblocks");
-+ return -BCH_ERR_invalid_sb_layout_nr_superblocks;
-+ }
-+
-+ max_sectors = 1 << layout->sb_max_size_bits;
-+
-+ prev_offset = le64_to_cpu(layout->sb_offset[0]);
-+
-+ for (i = 1; i < layout->nr_superblocks; i++) {
-+ offset = le64_to_cpu(layout->sb_offset[i]);
-+
-+ if (offset < prev_offset + max_sectors) {
-+ prt_printf(out, "Invalid superblock layout: superblocks overlap\n"
-+ " (sb %u ends at %llu next starts at %llu",
-+ i - 1, prev_offset + max_sectors, offset);
-+ return -BCH_ERR_invalid_sb_layout_superblocks_overlap;
-+ }
-+ prev_offset = offset;
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out)
-+{
-+ u16 version = le16_to_cpu(sb->version);
-+ u16 version_min = le16_to_cpu(sb->version_min);
-+
-+ if (!bch2_version_compatible(version)) {
-+ prt_str(out, "Unsupported superblock version ");
-+ bch2_version_to_text(out, version);
-+ prt_str(out, " (min ");
-+ bch2_version_to_text(out, bcachefs_metadata_version_min);
-+ prt_str(out, ", max ");
-+ bch2_version_to_text(out, bcachefs_metadata_version_current);
-+ prt_str(out, ")");
-+ return -BCH_ERR_invalid_sb_version;
-+ }
-+
-+ if (!bch2_version_compatible(version_min)) {
-+ prt_str(out, "Unsupported superblock version_min ");
-+ bch2_version_to_text(out, version_min);
-+ prt_str(out, " (min ");
-+ bch2_version_to_text(out, bcachefs_metadata_version_min);
-+ prt_str(out, ", max ");
-+ bch2_version_to_text(out, bcachefs_metadata_version_current);
-+ prt_str(out, ")");
-+ return -BCH_ERR_invalid_sb_version;
-+ }
-+
-+ if (version_min > version) {
-+ prt_str(out, "Bad minimum version ");
-+ bch2_version_to_text(out, version_min);
-+ prt_str(out, ", greater than version field ");
-+ bch2_version_to_text(out, version);
-+ return -BCH_ERR_invalid_sb_version;
-+ }
-+
-+ return 0;
-+}
-+
-+static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
-+ int rw)
-+{
-+ struct bch_sb *sb = disk_sb->sb;
-+ struct bch_sb_field *f;
-+ struct bch_sb_field_members_v1 *mi;
-+ enum bch_opt_id opt_id;
-+ u16 block_size;
-+ int ret;
-+
-+ ret = bch2_sb_compatible(sb, out);
-+ if (ret)
-+ return ret;
-+
-+ if (sb->features[1] ||
-+ (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) {
-+ prt_printf(out, "Filesystem has incompatible features");
-+ return -BCH_ERR_invalid_sb_features;
-+ }
-+
-+ block_size = le16_to_cpu(sb->block_size);
-+
-+ if (block_size > PAGE_SECTORS) {
-+ prt_printf(out, "Block size too big (got %u, max %u)",
-+ block_size, PAGE_SECTORS);
-+ return -BCH_ERR_invalid_sb_block_size;
-+ }
-+
-+ if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) {
-+ prt_printf(out, "Bad user UUID (got zeroes)");
-+ return -BCH_ERR_invalid_sb_uuid;
-+ }
-+
-+ if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) {
-+ prt_printf(out, "Bad internal UUID (got zeroes)");
-+ return -BCH_ERR_invalid_sb_uuid;
-+ }
-+
-+ if (!sb->nr_devices ||
-+ sb->nr_devices > BCH_SB_MEMBERS_MAX) {
-+ prt_printf(out, "Bad number of member devices %u (max %u)",
-+ sb->nr_devices, BCH_SB_MEMBERS_MAX);
-+ return -BCH_ERR_invalid_sb_too_many_members;
-+ }
-+
-+ if (sb->dev_idx >= sb->nr_devices) {
-+ prt_printf(out, "Bad dev_idx (got %u, nr_devices %u)",
-+ sb->dev_idx, sb->nr_devices);
-+ return -BCH_ERR_invalid_sb_dev_idx;
-+ }
-+
-+ if (!sb->time_precision ||
-+ le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) {
-+ prt_printf(out, "Invalid time precision: %u (min 1, max %lu)",
-+ le32_to_cpu(sb->time_precision), NSEC_PER_SEC);
-+ return -BCH_ERR_invalid_sb_time_precision;
-+ }
-+
-+ if (rw == READ) {
-+ /*
-+ * Been seeing a bug where these are getting inexplicably
-+ * zeroed, so we're now validating them, but we have to be
-+ * careful not to preven people's filesystems from mounting:
-+ */
-+ if (!BCH_SB_JOURNAL_FLUSH_DELAY(sb))
-+ SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000);
-+ if (!BCH_SB_JOURNAL_RECLAIM_DELAY(sb))
-+ SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 1000);
-+
-+ if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb))
-+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version));
-+ }
-+
-+ for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
-+ const struct bch_option *opt = bch2_opt_table + opt_id;
-+
-+ if (opt->get_sb != BCH2_NO_SB_OPT) {
-+ u64 v = bch2_opt_from_sb(sb, opt_id);
-+
-+ prt_printf(out, "Invalid option ");
-+ ret = bch2_opt_validate(opt, v, out);
-+ if (ret)
-+ return ret;
-+
-+ printbuf_reset(out);
-+ }
-+ }
-+
-+ /* validate layout */
-+ ret = validate_sb_layout(&sb->layout, out);
-+ if (ret)
-+ return ret;
-+
-+ vstruct_for_each(sb, f) {
-+ if (!f->u64s) {
-+ prt_printf(out, "Invalid superblock: optional field with size 0 (type %u)",
-+ le32_to_cpu(f->type));
-+ return -BCH_ERR_invalid_sb_field_size;
-+ }
-+
-+ if (vstruct_next(f) > vstruct_last(sb)) {
-+ prt_printf(out, "Invalid superblock: optional field extends past end of superblock (type %u)",
-+ le32_to_cpu(f->type));
-+ return -BCH_ERR_invalid_sb_field_size;
-+ }
-+ }
-+
-+ /* members must be validated first: */
-+ mi = bch2_sb_field_get(sb, members_v1);
-+ if (!mi) {
-+ prt_printf(out, "Invalid superblock: member info area missing");
-+ return -BCH_ERR_invalid_sb_members_missing;
-+ }
-+
-+ ret = bch2_sb_field_validate(sb, &mi->field, out);
-+ if (ret)
-+ return ret;
-+
-+ vstruct_for_each(sb, f) {
-+ if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1)
-+ continue;
-+
-+ ret = bch2_sb_field_validate(sb, f, out);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+/* device open: */
-+
-+static void bch2_sb_update(struct bch_fs *c)
-+{
-+ struct bch_sb *src = c->disk_sb.sb;
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ lockdep_assert_held(&c->sb_lock);
-+
-+ c->sb.uuid = src->uuid;
-+ c->sb.user_uuid = src->user_uuid;
-+ c->sb.version = le16_to_cpu(src->version);
-+ c->sb.version_min = le16_to_cpu(src->version_min);
-+ c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src);
-+ c->sb.nr_devices = src->nr_devices;
-+ c->sb.clean = BCH_SB_CLEAN(src);
-+ c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src);
-+
-+ c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision);
-+ c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit;
-+
-+ /* XXX this is wrong, we need a 96 or 128 bit integer type */
-+ c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo),
-+ c->sb.nsec_per_time_unit);
-+ c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
-+
-+ c->sb.features = le64_to_cpu(src->features[0]);
-+ c->sb.compat = le64_to_cpu(src->compat[0]);
-+
-+ for_each_member_device(ca, c, i) {
-+ struct bch_member m = bch2_sb_member_get(src, i);
-+ ca->mi = bch2_mi_to_cpu(&m);
-+ }
-+}
-+
-+static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
-+{
-+ struct bch_sb_field *src_f, *dst_f;
-+ struct bch_sb *dst = dst_handle->sb;
-+ unsigned i;
-+
-+ dst->version = src->version;
-+ dst->version_min = src->version_min;
-+ dst->seq = src->seq;
-+ dst->uuid = src->uuid;
-+ dst->user_uuid = src->user_uuid;
-+ memcpy(dst->label, src->label, sizeof(dst->label));
-+
-+ dst->block_size = src->block_size;
-+ dst->nr_devices = src->nr_devices;
-+
-+ dst->time_base_lo = src->time_base_lo;
-+ dst->time_base_hi = src->time_base_hi;
-+ dst->time_precision = src->time_precision;
-+
-+ memcpy(dst->flags, src->flags, sizeof(dst->flags));
-+ memcpy(dst->features, src->features, sizeof(dst->features));
-+ memcpy(dst->compat, src->compat, sizeof(dst->compat));
-+
-+ for (i = 0; i < BCH_SB_FIELD_NR; i++) {
-+ int d;
-+
-+ if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS)
-+ continue;
-+
-+ src_f = bch2_sb_field_get_id(src, i);
-+ dst_f = bch2_sb_field_get_id(dst, i);
-+
-+ d = (src_f ? le32_to_cpu(src_f->u64s) : 0) -
-+ (dst_f ? le32_to_cpu(dst_f->u64s) : 0);
-+ if (d > 0) {
-+ int ret = bch2_sb_realloc(dst_handle,
-+ le32_to_cpu(dst_handle->sb->u64s) + d);
-+
-+ if (ret)
-+ return ret;
-+
-+ dst = dst_handle->sb;
-+ dst_f = bch2_sb_field_get_id(dst, i);
-+ }
-+
-+ dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
-+ src_f ? le32_to_cpu(src_f->u64s) : 0);
-+
-+ if (src_f)
-+ memcpy(dst_f, src_f, vstruct_bytes(src_f));
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
-+{
-+ int ret;
-+
-+ lockdep_assert_held(&c->sb_lock);
-+
-+ ret = bch2_sb_realloc(&c->disk_sb, 0) ?:
-+ __copy_super(&c->disk_sb, src) ?:
-+ bch2_sb_replicas_to_cpu_replicas(c) ?:
-+ bch2_sb_disk_groups_to_cpu(c);
-+ if (ret)
-+ return ret;
-+
-+ bch2_sb_update(c);
-+ return 0;
-+}
-+
-+int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ return __copy_super(&ca->disk_sb, c->disk_sb.sb);
-+}
-+
-+/* read superblock: */
-+
-+static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
-+{
-+ struct bch_csum csum;
-+ size_t bytes;
-+ int ret;
-+reread:
-+ bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
-+ sb->bio->bi_iter.bi_sector = offset;
-+ bch2_bio_map(sb->bio, sb->sb, sb->buffer_size);
-+
-+ ret = submit_bio_wait(sb->bio);
-+ if (ret) {
-+ prt_printf(err, "IO error: %i", ret);
-+ return ret;
-+ }
-+
-+ if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) &&
-+ !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) {
-+ prt_printf(err, "Not a bcachefs superblock");
-+ return -BCH_ERR_invalid_sb_magic;
-+ }
-+
-+ ret = bch2_sb_compatible(sb->sb, err);
-+ if (ret)
-+ return ret;
-+
-+ bytes = vstruct_bytes(sb->sb);
-+
-+ if (bytes > 512 << sb->sb->layout.sb_max_size_bits) {
-+ prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)",
-+ bytes, 512UL << sb->sb->layout.sb_max_size_bits);
-+ return -BCH_ERR_invalid_sb_too_big;
-+ }
-+
-+ if (bytes > sb->buffer_size) {
-+ ret = bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s));
-+ if (ret)
-+ return ret;
-+ goto reread;
-+ }
-+
-+ if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) {
-+ prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
-+ return -BCH_ERR_invalid_sb_csum_type;
-+ }
-+
-+ /* XXX: verify MACs */
-+ csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
-+ null_nonce(), sb->sb);
-+
-+ if (bch2_crc_cmp(csum, sb->sb->csum)) {
-+ prt_printf(err, "bad checksum");
-+ return -BCH_ERR_invalid_sb_csum;
-+ }
-+
-+ sb->seq = le64_to_cpu(sb->sb->seq);
-+
-+ return 0;
-+}
-+
-+int bch2_read_super(const char *path, struct bch_opts *opts,
-+ struct bch_sb_handle *sb)
-+{
-+ u64 offset = opt_get(*opts, sb);
-+ struct bch_sb_layout layout;
-+ struct printbuf err = PRINTBUF;
-+ __le64 *i;
-+ int ret;
-+#ifndef __KERNEL__
-+retry:
-+#endif
-+ memset(sb, 0, sizeof(*sb));
-+ sb->mode = BLK_OPEN_READ;
-+ sb->have_bio = true;
-+ sb->holder = kmalloc(1, GFP_KERNEL);
-+ if (!sb->holder)
-+ return -ENOMEM;
-+
-+#ifndef __KERNEL__
-+ if (opt_get(*opts, direct_io) == false)
-+ sb->mode |= BLK_OPEN_BUFFERED;
-+#endif
-+
-+ if (!opt_get(*opts, noexcl))
-+ sb->mode |= BLK_OPEN_EXCL;
-+
-+ if (!opt_get(*opts, nochanges))
-+ sb->mode |= BLK_OPEN_WRITE;
-+
-+ sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
-+ if (IS_ERR(sb->bdev) &&
-+ PTR_ERR(sb->bdev) == -EACCES &&
-+ opt_get(*opts, read_only)) {
-+ sb->mode &= ~BLK_OPEN_WRITE;
-+
-+ sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
-+ if (!IS_ERR(sb->bdev))
-+ opt_set(*opts, nochanges, true);
-+ }
-+
-+ if (IS_ERR(sb->bdev)) {
-+ ret = PTR_ERR(sb->bdev);
-+ goto out;
-+ }
-+
-+ ret = bch2_sb_realloc(sb, 0);
-+ if (ret) {
-+ prt_printf(&err, "error allocating memory for superblock");
-+ goto err;
-+ }
-+
-+ if (bch2_fs_init_fault("read_super")) {
-+ prt_printf(&err, "dynamic fault");
-+ ret = -EFAULT;
-+ goto err;
-+ }
-+
-+ ret = read_one_super(sb, offset, &err);
-+ if (!ret)
-+ goto got_super;
-+
-+ if (opt_defined(*opts, sb))
-+ goto err;
-+
-+ printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s\n",
-+ path, err.buf);
-+ printbuf_reset(&err);
-+
-+ /*
-+ * Error reading primary superblock - read location of backup
-+ * superblocks:
-+ */
-+ bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
-+ sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
-+ /*
-+ * use sb buffer to read layout, since sb buffer is page aligned but
-+ * layout won't be:
-+ */
-+ bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
-+
-+ ret = submit_bio_wait(sb->bio);
-+ if (ret) {
-+ prt_printf(&err, "IO error: %i", ret);
-+ goto err;
-+ }
-+
-+ memcpy(&layout, sb->sb, sizeof(layout));
-+ ret = validate_sb_layout(&layout, &err);
-+ if (ret)
-+ goto err;
-+
-+ for (i = layout.sb_offset;
-+ i < layout.sb_offset + layout.nr_superblocks; i++) {
-+ offset = le64_to_cpu(*i);
-+
-+ if (offset == opt_get(*opts, sb))
-+ continue;
-+
-+ ret = read_one_super(sb, offset, &err);
-+ if (!ret)
-+ goto got_super;
-+ }
-+
-+ goto err;
-+
-+got_super:
-+ if (le16_to_cpu(sb->sb->block_size) << 9 <
-+ bdev_logical_block_size(sb->bdev) &&
-+ opt_get(*opts, direct_io)) {
-+#ifndef __KERNEL__
-+ opt_set(*opts, direct_io, false);
-+ bch2_free_super(sb);
-+ goto retry;
-+#endif
-+ prt_printf(&err, "block size (%u) smaller than device block size (%u)",
-+ le16_to_cpu(sb->sb->block_size) << 9,
-+ bdev_logical_block_size(sb->bdev));
-+ ret = -BCH_ERR_block_size_too_small;
-+ goto err;
-+ }
-+
-+ ret = 0;
-+ sb->have_layout = true;
-+
-+ ret = bch2_sb_validate(sb, &err, READ);
-+ if (ret) {
-+ printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n",
-+ path, err.buf);
-+ goto err_no_print;
-+ }
-+out:
-+ printbuf_exit(&err);
-+ return ret;
-+err:
-+ printk(KERN_ERR "bcachefs (%s): error reading superblock: %s\n",
-+ path, err.buf);
-+err_no_print:
-+ bch2_free_super(sb);
-+ goto out;
-+}
-+
-+/* write superblock: */
-+
-+static void write_super_endio(struct bio *bio)
-+{
-+ struct bch_dev *ca = bio->bi_private;
-+
-+ /* XXX: return errors directly */
-+
-+ if (bch2_dev_io_err_on(bio->bi_status, ca,
-+ bio_data_dir(bio)
-+ ? BCH_MEMBER_ERROR_write
-+ : BCH_MEMBER_ERROR_read,
-+ "superblock %s error: %s",
-+ bio_data_dir(bio) ? "write" : "read",
-+ bch2_blk_status_to_str(bio->bi_status)))
-+ ca->sb_write_error = 1;
-+
-+ closure_put(&ca->fs->sb_write);
-+ percpu_ref_put(&ca->io_ref);
-+}
-+
-+static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ struct bch_sb *sb = ca->disk_sb.sb;
-+ struct bio *bio = ca->disk_sb.bio;
-+
-+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
-+ bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]);
-+ bio->bi_end_io = write_super_endio;
-+ bio->bi_private = ca;
-+ bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE);
-+
-+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb],
-+ bio_sectors(bio));
-+
-+ percpu_ref_get(&ca->io_ref);
-+ closure_bio_submit(bio, &c->sb_write);
-+}
-+
-+static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
-+{
-+ struct bch_sb *sb = ca->disk_sb.sb;
-+ struct bio *bio = ca->disk_sb.bio;
-+
-+ sb->offset = sb->layout.sb_offset[idx];
-+
-+ SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false));
-+ sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
-+ null_nonce(), sb);
-+
-+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
-+ bio->bi_iter.bi_sector = le64_to_cpu(sb->offset);
-+ bio->bi_end_io = write_super_endio;
-+ bio->bi_private = ca;
-+ bch2_bio_map(bio, sb,
-+ roundup((size_t) vstruct_bytes(sb),
-+ bdev_logical_block_size(ca->disk_sb.bdev)));
-+
-+ this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
-+ bio_sectors(bio));
-+
-+ percpu_ref_get(&ca->io_ref);
-+ closure_bio_submit(bio, &c->sb_write);
-+}
-+
-+int bch2_write_super(struct bch_fs *c)
-+{
-+ struct closure *cl = &c->sb_write;
-+ struct bch_dev *ca;
-+ struct printbuf err = PRINTBUF;
-+ unsigned i, sb = 0, nr_wrote;
-+ struct bch_devs_mask sb_written;
-+ bool wrote, can_mount_without_written, can_mount_with_written;
-+ unsigned degraded_flags = BCH_FORCE_IF_DEGRADED;
-+ int ret = 0;
-+
-+ trace_and_count(c, write_super, c, _RET_IP_);
-+
-+ if (c->opts.very_degraded)
-+ degraded_flags |= BCH_FORCE_IF_LOST;
-+
-+ lockdep_assert_held(&c->sb_lock);
-+
-+ closure_init_stack(cl);
-+ memset(&sb_written, 0, sizeof(sb_written));
-+
-+ /* Make sure we're using the new magic numbers: */
-+ c->disk_sb.sb->magic = BCHFS_MAGIC;
-+ c->disk_sb.sb->layout.magic = BCHFS_MAGIC;
-+
-+ le64_add_cpu(&c->disk_sb.sb->seq, 1);
-+
-+ if (test_bit(BCH_FS_ERROR, &c->flags))
-+ SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
-+ if (test_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags))
-+ SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1);
-+
-+ SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
-+
-+ bch2_sb_counters_from_cpu(c);
-+ bch2_sb_members_from_cpu(c);
-+ bch2_sb_members_cpy_v2_v1(&c->disk_sb);
-+ bch2_sb_errors_from_cpu(c);
-+
-+ for_each_online_member(ca, c, i)
-+ bch2_sb_from_fs(c, ca);
-+
-+ for_each_online_member(ca, c, i) {
-+ printbuf_reset(&err);
-+
-+ ret = bch2_sb_validate(&ca->disk_sb, &err, WRITE);
-+ if (ret) {
-+ bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf);
-+ percpu_ref_put(&ca->io_ref);
-+ goto out;
-+ }
-+ }
-+
-+ if (c->opts.nochanges)
-+ goto out;
-+
-+ /*
-+ * Defer writing the superblock until filesystem initialization is
-+ * complete - don't write out a partly initialized superblock:
-+ */
-+ if (!BCH_SB_INITIALIZED(c->disk_sb.sb))
-+ goto out;
-+
-+ for_each_online_member(ca, c, i) {
-+ __set_bit(ca->dev_idx, sb_written.d);
-+ ca->sb_write_error = 0;
-+ }
-+
-+ for_each_online_member(ca, c, i)
-+ read_back_super(c, ca);
-+ closure_sync(cl);
-+
-+ for_each_online_member(ca, c, i) {
-+ if (ca->sb_write_error)
-+ continue;
-+
-+ if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
-+ bch2_fs_fatal_error(c,
-+ "Superblock write was silently dropped! (seq %llu expected %llu)",
-+ le64_to_cpu(ca->sb_read_scratch->seq),
-+ ca->disk_sb.seq);
-+ percpu_ref_put(&ca->io_ref);
-+ ret = -BCH_ERR_erofs_sb_err;
-+ goto out;
-+ }
-+
-+ if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
-+ bch2_fs_fatal_error(c,
-+ "Superblock modified by another process (seq %llu expected %llu)",
-+ le64_to_cpu(ca->sb_read_scratch->seq),
-+ ca->disk_sb.seq);
-+ percpu_ref_put(&ca->io_ref);
-+ ret = -BCH_ERR_erofs_sb_err;
-+ goto out;
-+ }
-+ }
-+
-+ do {
-+ wrote = false;
-+ for_each_online_member(ca, c, i)
-+ if (!ca->sb_write_error &&
-+ sb < ca->disk_sb.sb->layout.nr_superblocks) {
-+ write_one_super(c, ca, sb);
-+ wrote = true;
-+ }
-+ closure_sync(cl);
-+ sb++;
-+ } while (wrote);
-+
-+ for_each_online_member(ca, c, i) {
-+ if (ca->sb_write_error)
-+ __clear_bit(ca->dev_idx, sb_written.d);
-+ else
-+ ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
-+ }
-+
-+ nr_wrote = dev_mask_nr(&sb_written);
-+
-+ can_mount_with_written =
-+ bch2_have_enough_devs(c, sb_written, degraded_flags, false);
-+
-+ for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
-+ sb_written.d[i] = ~sb_written.d[i];
-+
-+ can_mount_without_written =
-+ bch2_have_enough_devs(c, sb_written, degraded_flags, false);
-+
-+ /*
-+ * If we would be able to mount _without_ the devices we successfully
-+ * wrote superblocks to, we weren't able to write to enough devices:
-+ *
-+ * Exception: if we can mount without the successes because we haven't
-+ * written anything (new filesystem), we continue if we'd be able to
-+ * mount with the devices we did successfully write to:
-+ */
-+ if (bch2_fs_fatal_err_on(!nr_wrote ||
-+ !can_mount_with_written ||
-+ (can_mount_without_written &&
-+ !can_mount_with_written), c,
-+ "Unable to write superblock to sufficient devices (from %ps)",
-+ (void *) _RET_IP_))
-+ ret = -1;
-+out:
-+ /* Make new options visible after they're persistent: */
-+ bch2_sb_update(c);
-+ printbuf_exit(&err);
-+ return ret;
-+}
-+
-+void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
-+{
-+ mutex_lock(&c->sb_lock);
-+ if (!(c->sb.features & (1ULL << feat))) {
-+ c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat);
-+
-+ bch2_write_super(c);
-+ }
-+ mutex_unlock(&c->sb_lock);
-+}
-+
-+/* Downgrade if superblock is at a higher version than currently supported: */
-+void bch2_sb_maybe_downgrade(struct bch_fs *c)
-+{
-+ lockdep_assert_held(&c->sb_lock);
-+
-+ /*
-+ * Downgrade, if superblock is at a higher version than currently
-+ * supported:
-+ */
-+ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current)
-+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
-+ if (c->sb.version > bcachefs_metadata_version_current)
-+ c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
-+ if (c->sb.version_min > bcachefs_metadata_version_current)
-+ c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
-+ c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
-+}
-+
-+void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
-+{
-+ lockdep_assert_held(&c->sb_lock);
-+
-+ c->disk_sb.sb->version = cpu_to_le16(new_version);
-+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
-+}
-+
-+static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
-+#define x(f, nr) \
-+ [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
-+ BCH_SB_FIELDS()
-+#undef x
-+};
-+
-+static const struct bch_sb_field_ops bch2_sb_field_null_ops;
-+
-+static const struct bch_sb_field_ops *bch2_sb_field_type_ops(unsigned type)
-+{
-+ return likely(type < ARRAY_SIZE(bch2_sb_field_ops))
-+ ? bch2_sb_field_ops[type]
-+ : &bch2_sb_field_null_ops;
-+}
-+
-+static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
-+ struct printbuf *err)
-+{
-+ unsigned type = le32_to_cpu(f->type);
-+ struct printbuf field_err = PRINTBUF;
-+ const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
-+ int ret;
-+
-+ ret = ops->validate ? ops->validate(sb, f, &field_err) : 0;
-+ if (ret) {
-+ prt_printf(err, "Invalid superblock section %s: %s",
-+ bch2_sb_fields[type], field_err.buf);
-+ prt_newline(err);
-+ bch2_sb_field_to_text(err, sb, f);
-+ }
-+
-+ printbuf_exit(&field_err);
-+ return ret;
-+}
-+
-+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
-+ struct bch_sb_field *f)
-+{
-+ unsigned type = le32_to_cpu(f->type);
-+ const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
-+
-+ if (!out->nr_tabstops)
-+ printbuf_tabstop_push(out, 32);
-+
-+ if (type < BCH_SB_FIELD_NR)
-+ prt_printf(out, "%s", bch2_sb_fields[type]);
-+ else
-+ prt_printf(out, "(unknown field %u)", type);
-+
-+ prt_printf(out, " (size %zu):", vstruct_bytes(f));
-+ prt_newline(out);
-+
-+ if (ops->to_text) {
-+ printbuf_indent_add(out, 2);
-+ ops->to_text(out, sb, f);
-+ printbuf_indent_sub(out, 2);
-+ }
-+}
-+
-+void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l)
-+{
-+ unsigned i;
-+
-+ prt_printf(out, "Type: %u", l->layout_type);
-+ prt_newline(out);
-+
-+ prt_str(out, "Superblock max size: ");
-+ prt_units_u64(out, 512 << l->sb_max_size_bits);
-+ prt_newline(out);
-+
-+ prt_printf(out, "Nr superblocks: %u", l->nr_superblocks);
-+ prt_newline(out);
-+
-+ prt_str(out, "Offsets: ");
-+ for (i = 0; i < l->nr_superblocks; i++) {
-+ if (i)
-+ prt_str(out, ", ");
-+ prt_printf(out, "%llu", le64_to_cpu(l->sb_offset[i]));
-+ }
-+ prt_newline(out);
-+}
-+
-+void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
-+ bool print_layout, unsigned fields)
-+{
-+ struct bch_sb_field *f;
-+ u64 fields_have = 0;
-+ unsigned nr_devices = 0;
-+
-+ if (!out->nr_tabstops)
-+ printbuf_tabstop_push(out, 44);
-+
-+ for (int i = 0; i < sb->nr_devices; i++)
-+ nr_devices += bch2_dev_exists(sb, i);
-+
-+ prt_printf(out, "External UUID:");
-+ prt_tab(out);
-+ pr_uuid(out, sb->user_uuid.b);
-+ prt_newline(out);
-+
-+ prt_printf(out, "Internal UUID:");
-+ prt_tab(out);
-+ pr_uuid(out, sb->uuid.b);
-+ prt_newline(out);
-+
-+ prt_str(out, "Device index:");
-+ prt_tab(out);
-+ prt_printf(out, "%u", sb->dev_idx);
-+ prt_newline(out);
-+
-+ prt_str(out, "Label:");
-+ prt_tab(out);
-+ prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label);
-+ prt_newline(out);
-+
-+ prt_str(out, "Version:");
-+ prt_tab(out);
-+ bch2_version_to_text(out, le16_to_cpu(sb->version));
-+ prt_newline(out);
-+
-+ prt_str(out, "Version upgrade complete:");
-+ prt_tab(out);
-+ bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb));
-+ prt_newline(out);
-+
-+ prt_printf(out, "Oldest version on disk:");
-+ prt_tab(out);
-+ bch2_version_to_text(out, le16_to_cpu(sb->version_min));
-+ prt_newline(out);
-+
-+ prt_printf(out, "Created:");
-+ prt_tab(out);
-+ if (sb->time_base_lo)
-+ bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
-+ else
-+ prt_printf(out, "(not set)");
-+ prt_newline(out);
-+
-+ prt_printf(out, "Sequence number:");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", le64_to_cpu(sb->seq));
-+ prt_newline(out);
-+
-+ prt_printf(out, "Superblock size:");
-+ prt_tab(out);
-+ prt_printf(out, "%zu", vstruct_bytes(sb));
-+ prt_newline(out);
-+
-+ prt_printf(out, "Clean:");
-+ prt_tab(out);
-+ prt_printf(out, "%llu", BCH_SB_CLEAN(sb));
-+ prt_newline(out);
-+
-+ prt_printf(out, "Devices:");
-+ prt_tab(out);
-+ prt_printf(out, "%u", nr_devices);
-+ prt_newline(out);
-+
-+ prt_printf(out, "Sections:");
-+ vstruct_for_each(sb, f)
-+ fields_have |= 1 << le32_to_cpu(f->type);
-+ prt_tab(out);
-+ prt_bitflags(out, bch2_sb_fields, fields_have);
-+ prt_newline(out);
-+
-+ prt_printf(out, "Features:");
-+ prt_tab(out);
-+ prt_bitflags(out, bch2_sb_features, le64_to_cpu(sb->features[0]));
-+ prt_newline(out);
-+
-+ prt_printf(out, "Compat features:");
-+ prt_tab(out);
-+ prt_bitflags(out, bch2_sb_compat, le64_to_cpu(sb->compat[0]));
-+ prt_newline(out);
-+
-+ prt_newline(out);
-+ prt_printf(out, "Options:");
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+ {
-+ enum bch_opt_id id;
-+
-+ for (id = 0; id < bch2_opts_nr; id++) {
-+ const struct bch_option *opt = bch2_opt_table + id;
-+
-+ if (opt->get_sb != BCH2_NO_SB_OPT) {
-+ u64 v = bch2_opt_from_sb(sb, id);
-+
-+ prt_printf(out, "%s:", opt->attr.name);
-+ prt_tab(out);
-+ bch2_opt_to_text(out, NULL, sb, opt, v,
-+ OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST);
-+ prt_newline(out);
-+ }
-+ }
-+ }
-+
-+ printbuf_indent_sub(out, 2);
-+
-+ if (print_layout) {
-+ prt_newline(out);
-+ prt_printf(out, "layout:");
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+ bch2_sb_layout_to_text(out, &sb->layout);
-+ printbuf_indent_sub(out, 2);
-+ }
-+
-+ vstruct_for_each(sb, f)
-+ if (fields & (1 << le32_to_cpu(f->type))) {
-+ prt_newline(out);
-+ bch2_sb_field_to_text(out, sb, f);
-+ }
-+}
-diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
-new file mode 100644
-index 000000000000..f5abd102bff7
---- /dev/null
-+++ b/fs/bcachefs/super-io.h
-@@ -0,0 +1,94 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_IO_H
-+#define _BCACHEFS_SUPER_IO_H
-+
-+#include "extents.h"
-+#include "eytzinger.h"
-+#include "super_types.h"
-+#include "super.h"
-+#include "sb-members.h"
-+
-+#include <asm/byteorder.h>
-+
-+static inline bool bch2_version_compatible(u16 version)
-+{
-+ return BCH_VERSION_MAJOR(version) <= BCH_VERSION_MAJOR(bcachefs_metadata_version_current) &&
-+ version >= bcachefs_metadata_version_min;
-+}
-+
-+void bch2_version_to_text(struct printbuf *, unsigned);
-+unsigned bch2_latest_compatible_version(unsigned);
-+
-+u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
-+ unsigned,
-+ unsigned);
-+
-+static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f)
-+{
-+ return le32_to_cpu(f->u64s) * sizeof(u64);
-+}
-+
-+#define field_to_type(_f, _name) \
-+ container_of_or_null(_f, struct bch_sb_field_##_name, field)
-+
-+struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *, enum bch_sb_field_type);
-+#define bch2_sb_field_get(_sb, _name) \
-+ field_to_type(bch2_sb_field_get_id(_sb, BCH_SB_FIELD_##_name), _name)
-+
-+struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *,
-+ enum bch_sb_field_type, unsigned);
-+#define bch2_sb_field_resize(_sb, _name, _u64s) \
-+ field_to_type(bch2_sb_field_resize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name)
-+
-+void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
-+
-+extern const char * const bch2_sb_fields[];
-+
-+struct bch_sb_field_ops {
-+ int (*validate)(struct bch_sb *, struct bch_sb_field *, struct printbuf *);
-+ void (*to_text)(struct printbuf *, struct bch_sb *, struct bch_sb_field *);
-+};
-+
-+static inline __le64 bch2_sb_magic(struct bch_fs *c)
-+{
-+ __le64 ret;
-+
-+ memcpy(&ret, &c->sb.uuid, sizeof(ret));
-+ return ret;
-+}
-+
-+static inline __u64 jset_magic(struct bch_fs *c)
-+{
-+ return __le64_to_cpu(bch2_sb_magic(c) ^ JSET_MAGIC);
-+}
-+
-+static inline __u64 bset_magic(struct bch_fs *c)
-+{
-+ return __le64_to_cpu(bch2_sb_magic(c) ^ BSET_MAGIC);
-+}
-+
-+int bch2_sb_to_fs(struct bch_fs *, struct bch_sb *);
-+int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *);
-+
-+void bch2_free_super(struct bch_sb_handle *);
-+int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
-+
-+int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
-+int bch2_write_super(struct bch_fs *);
-+void __bch2_check_set_feature(struct bch_fs *, unsigned);
-+
-+static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
-+{
-+ if (!(c->sb.features & (1ULL << feat)))
-+ __bch2_check_set_feature(c, feat);
-+}
-+
-+void bch2_sb_maybe_downgrade(struct bch_fs *);
-+void bch2_sb_upgrade(struct bch_fs *, unsigned);
-+
-+void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
-+ struct bch_sb_field *);
-+void bch2_sb_layout_to_text(struct printbuf *, struct bch_sb_layout *);
-+void bch2_sb_to_text(struct printbuf *, struct bch_sb *, bool, unsigned);
-+
-+#endif /* _BCACHEFS_SUPER_IO_H */
-diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
-new file mode 100644
-index 000000000000..24672bb31cbe
---- /dev/null
-+++ b/fs/bcachefs/super.c
-@@ -0,0 +1,2017 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcachefs setup/teardown code, and some metadata io - read a superblock and
-+ * figure out what to do with it.
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "bkey_sort.h"
-+#include "btree_cache.h"
-+#include "btree_gc.h"
-+#include "btree_journal_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_update_interior.h"
-+#include "btree_io.h"
-+#include "btree_write_buffer.h"
-+#include "buckets_waiting_for_journal.h"
-+#include "chardev.h"
-+#include "checksum.h"
-+#include "clock.h"
-+#include "compress.h"
-+#include "counters.h"
-+#include "debug.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "errcode.h"
-+#include "error.h"
-+#include "fs.h"
-+#include "fs-io.h"
-+#include "fs-io-buffered.h"
-+#include "fs-io-direct.h"
-+#include "fsck.h"
-+#include "inode.h"
-+#include "io_read.h"
-+#include "io_write.h"
-+#include "journal.h"
-+#include "journal_reclaim.h"
-+#include "journal_seq_blacklist.h"
-+#include "move.h"
-+#include "migrate.h"
-+#include "movinggc.h"
-+#include "nocow_locking.h"
-+#include "quota.h"
-+#include "rebalance.h"
-+#include "recovery.h"
-+#include "replicas.h"
-+#include "sb-clean.h"
-+#include "sb-errors.h"
-+#include "sb-members.h"
-+#include "snapshot.h"
-+#include "subvolume.h"
-+#include "super.h"
-+#include "super-io.h"
-+#include "sysfs.h"
-+#include "trace.h"
-+
-+#include <linux/backing-dev.h>
-+#include <linux/blkdev.h>
-+#include <linux/debugfs.h>
-+#include <linux/device.h>
-+#include <linux/idr.h>
-+#include <linux/module.h>
-+#include <linux/percpu.h>
-+#include <linux/random.h>
-+#include <linux/sysfs.h>
-+#include <crypto/hash.h>
-+
-+MODULE_LICENSE("GPL");
-+MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
-+MODULE_DESCRIPTION("bcachefs filesystem");
-+
-+#define KTYPE(type) \
-+static const struct attribute_group type ## _group = { \
-+ .attrs = type ## _files \
-+}; \
-+ \
-+static const struct attribute_group *type ## _groups[] = { \
-+ &type ## _group, \
-+ NULL \
-+}; \
-+ \
-+static const struct kobj_type type ## _ktype = { \
-+ .release = type ## _release, \
-+ .sysfs_ops = &type ## _sysfs_ops, \
-+ .default_groups = type ## _groups \
-+}
-+
-+static void bch2_fs_release(struct kobject *);
-+static void bch2_dev_release(struct kobject *);
-+static void bch2_fs_counters_release(struct kobject *k)
-+{
-+}
-+
-+static void bch2_fs_internal_release(struct kobject *k)
-+{
-+}
-+
-+static void bch2_fs_opts_dir_release(struct kobject *k)
-+{
-+}
-+
-+static void bch2_fs_time_stats_release(struct kobject *k)
-+{
-+}
-+
-+KTYPE(bch2_fs);
-+KTYPE(bch2_fs_counters);
-+KTYPE(bch2_fs_internal);
-+KTYPE(bch2_fs_opts_dir);
-+KTYPE(bch2_fs_time_stats);
-+KTYPE(bch2_dev);
-+
-+static struct kset *bcachefs_kset;
-+static LIST_HEAD(bch_fs_list);
-+static DEFINE_MUTEX(bch_fs_list_lock);
-+
-+DECLARE_WAIT_QUEUE_HEAD(bch2_read_only_wait);
-+
-+static void bch2_dev_free(struct bch_dev *);
-+static int bch2_dev_alloc(struct bch_fs *, unsigned);
-+static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
-+static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
-+
-+struct bch_fs *bch2_dev_to_fs(dev_t dev)
-+{
-+ struct bch_fs *c;
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ mutex_lock(&bch_fs_list_lock);
-+ rcu_read_lock();
-+
-+ list_for_each_entry(c, &bch_fs_list, list)
-+ for_each_member_device_rcu(ca, c, i, NULL)
-+ if (ca->disk_sb.bdev && ca->disk_sb.bdev->bd_dev == dev) {
-+ closure_get(&c->cl);
-+ goto found;
-+ }
-+ c = NULL;
-+found:
-+ rcu_read_unlock();
-+ mutex_unlock(&bch_fs_list_lock);
-+
-+ return c;
-+}
-+
-+static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid)
-+{
-+ struct bch_fs *c;
-+
-+ lockdep_assert_held(&bch_fs_list_lock);
-+
-+ list_for_each_entry(c, &bch_fs_list, list)
-+ if (!memcmp(&c->disk_sb.sb->uuid, &uuid, sizeof(uuid)))
-+ return c;
-+
-+ return NULL;
-+}
-+
-+struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
-+{
-+ struct bch_fs *c;
-+
-+ mutex_lock(&bch_fs_list_lock);
-+ c = __bch2_uuid_to_fs(uuid);
-+ if (c)
-+ closure_get(&c->cl);
-+ mutex_unlock(&bch_fs_list_lock);
-+
-+ return c;
-+}
-+
-+static void bch2_dev_usage_journal_reserve(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i, nr = 0, u64s =
-+ ((sizeof(struct jset_entry_dev_usage) +
-+ sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR)) /
-+ sizeof(u64);
-+
-+ rcu_read_lock();
-+ for_each_member_device_rcu(ca, c, i, NULL)
-+ nr++;
-+ rcu_read_unlock();
-+
-+ bch2_journal_entry_res_resize(&c->journal,
-+ &c->dev_usage_journal_res, u64s * nr);
-+}
-+
-+/* Filesystem RO/RW: */
-+
-+/*
-+ * For startup/shutdown of RW stuff, the dependencies are:
-+ *
-+ * - foreground writes depend on copygc and rebalance (to free up space)
-+ *
-+ * - copygc and rebalance depend on mark and sweep gc (they actually probably
-+ * don't because they either reserve ahead of time or don't block if
-+ * allocations fail, but allocations can require mark and sweep gc to run
-+ * because of generation number wraparound)
-+ *
-+ * - all of the above depends on the allocator threads
-+ *
-+ * - allocator depends on the journal (when it rewrites prios and gens)
-+ */
-+
-+static void __bch2_fs_read_only(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i, clean_passes = 0;
-+ u64 seq = 0;
-+
-+ bch2_fs_ec_stop(c);
-+ bch2_open_buckets_stop(c, NULL, true);
-+ bch2_rebalance_stop(c);
-+ bch2_copygc_stop(c);
-+ bch2_gc_thread_stop(c);
-+ bch2_fs_ec_flush(c);
-+
-+ bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
-+ journal_cur_seq(&c->journal));
-+
-+ do {
-+ clean_passes++;
-+
-+ if (bch2_btree_interior_updates_flush(c) ||
-+ bch2_journal_flush_all_pins(&c->journal) ||
-+ bch2_btree_flush_all_writes(c) ||
-+ seq != atomic64_read(&c->journal.seq)) {
-+ seq = atomic64_read(&c->journal.seq);
-+ clean_passes = 0;
-+ }
-+ } while (clean_passes < 2);
-+
-+ bch_verbose(c, "flushing journal and stopping allocators complete, journal seq %llu",
-+ journal_cur_seq(&c->journal));
-+
-+ if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
-+ !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
-+ set_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
-+ bch2_fs_journal_stop(&c->journal);
-+
-+ /*
-+ * After stopping journal:
-+ */
-+ for_each_member_device(ca, c, i)
-+ bch2_dev_allocator_remove(c, ca);
-+}
-+
-+#ifndef BCH_WRITE_REF_DEBUG
-+static void bch2_writes_disabled(struct percpu_ref *writes)
-+{
-+ struct bch_fs *c = container_of(writes, struct bch_fs, writes);
-+
-+ set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-+ wake_up(&bch2_read_only_wait);
-+}
-+#endif
-+
-+void bch2_fs_read_only(struct bch_fs *c)
-+{
-+ if (!test_bit(BCH_FS_RW, &c->flags)) {
-+ bch2_journal_reclaim_stop(&c->journal);
-+ return;
-+ }
-+
-+ BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
-+
-+ /*
-+ * Block new foreground-end write operations from starting - any new
-+ * writes will return -EROFS:
-+ */
-+ set_bit(BCH_FS_GOING_RO, &c->flags);
-+#ifndef BCH_WRITE_REF_DEBUG
-+ percpu_ref_kill(&c->writes);
-+#else
-+ for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
-+ bch2_write_ref_put(c, i);
-+#endif
-+
-+ /*
-+ * If we're not doing an emergency shutdown, we want to wait on
-+ * outstanding writes to complete so they don't see spurious errors due
-+ * to shutting down the allocator:
-+ *
-+ * If we are doing an emergency shutdown outstanding writes may
-+ * hang until we shutdown the allocator so we don't want to wait
-+ * on outstanding writes before shutting everything down - but
-+ * we do need to wait on them before returning and signalling
-+ * that going RO is complete:
-+ */
-+ wait_event(bch2_read_only_wait,
-+ test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
-+ test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
-+
-+ __bch2_fs_read_only(c);
-+
-+ wait_event(bch2_read_only_wait,
-+ test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
-+
-+ clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
-+ clear_bit(BCH_FS_GOING_RO, &c->flags);
-+
-+ if (!bch2_journal_error(&c->journal) &&
-+ !test_bit(BCH_FS_ERROR, &c->flags) &&
-+ !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
-+ test_bit(BCH_FS_STARTED, &c->flags) &&
-+ test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags) &&
-+ !c->opts.norecovery) {
-+ BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
-+ BUG_ON(atomic_read(&c->btree_cache.dirty));
-+ BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
-+ BUG_ON(c->btree_write_buffer.state.nr);
-+
-+ bch_verbose(c, "marking filesystem clean");
-+ bch2_fs_mark_clean(c);
-+ }
-+
-+ clear_bit(BCH_FS_RW, &c->flags);
-+}
-+
-+static void bch2_fs_read_only_work(struct work_struct *work)
-+{
-+ struct bch_fs *c =
-+ container_of(work, struct bch_fs, read_only_work);
-+
-+ down_write(&c->state_lock);
-+ bch2_fs_read_only(c);
-+ up_write(&c->state_lock);
-+}
-+
-+static void bch2_fs_read_only_async(struct bch_fs *c)
-+{
-+ queue_work(system_long_wq, &c->read_only_work);
-+}
-+
-+bool bch2_fs_emergency_read_only(struct bch_fs *c)
-+{
-+ bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
-+
-+ bch2_journal_halt(&c->journal);
-+ bch2_fs_read_only_async(c);
-+
-+ wake_up(&bch2_read_only_wait);
-+ return ret;
-+}
-+
-+static int bch2_fs_read_write_late(struct bch_fs *c)
-+{
-+ int ret;
-+
-+ /*
-+ * Data move operations can't run until after check_snapshots has
-+ * completed, and bch2_snapshot_is_ancestor() is available.
-+ *
-+ * Ideally we'd start copygc/rebalance earlier instead of waiting for
-+ * all of recovery/fsck to complete:
-+ */
-+ ret = bch2_copygc_start(c);
-+ if (ret) {
-+ bch_err(c, "error starting copygc thread");
-+ return ret;
-+ }
-+
-+ ret = bch2_rebalance_start(c);
-+ if (ret) {
-+ bch_err(c, "error starting rebalance thread");
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static int __bch2_fs_read_write(struct bch_fs *c, bool early)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+ int ret;
-+
-+ if (test_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags)) {
-+ bch_err(c, "cannot go rw, unfixed btree errors");
-+ return -BCH_ERR_erofs_unfixed_errors;
-+ }
-+
-+ if (test_bit(BCH_FS_RW, &c->flags))
-+ return 0;
-+
-+ if (c->opts.norecovery)
-+ return -BCH_ERR_erofs_norecovery;
-+
-+ /*
-+ * nochanges is used for fsck -n mode - we have to allow going rw
-+ * during recovery for that to work:
-+ */
-+ if (c->opts.nochanges && (!early || c->opts.read_only))
-+ return -BCH_ERR_erofs_nochanges;
-+
-+ bch_info(c, "going read-write");
-+
-+ ret = bch2_sb_members_v2_init(c);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_fs_mark_dirty(c);
-+ if (ret)
-+ goto err;
-+
-+ clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
-+
-+ /*
-+ * First journal write must be a flush write: after a clean shutdown we
-+ * don't read the journal, so the first journal write may end up
-+ * overwriting whatever was there previously, and there must always be
-+ * at least one non-flush write in the journal or recovery will fail:
-+ */
-+ set_bit(JOURNAL_NEED_FLUSH_WRITE, &c->journal.flags);
-+
-+ for_each_rw_member(ca, c, i)
-+ bch2_dev_allocator_add(c, ca);
-+ bch2_recalc_capacity(c);
-+
-+ ret = bch2_gc_thread_start(c);
-+ if (ret) {
-+ bch_err(c, "error starting gc thread");
-+ return ret;
-+ }
-+
-+ ret = bch2_journal_reclaim_start(&c->journal);
-+ if (ret)
-+ goto err;
-+
-+ if (!early) {
-+ ret = bch2_fs_read_write_late(c);
-+ if (ret)
-+ goto err;
-+ }
-+
-+#ifndef BCH_WRITE_REF_DEBUG
-+ percpu_ref_reinit(&c->writes);
-+#else
-+ for (i = 0; i < BCH_WRITE_REF_NR; i++) {
-+ BUG_ON(atomic_long_read(&c->writes[i]));
-+ atomic_long_inc(&c->writes[i]);
-+ }
-+#endif
-+ set_bit(BCH_FS_RW, &c->flags);
-+ set_bit(BCH_FS_WAS_RW, &c->flags);
-+
-+ bch2_do_discards(c);
-+ bch2_do_invalidates(c);
-+ bch2_do_stripe_deletes(c);
-+ bch2_do_pending_node_rewrites(c);
-+ return 0;
-+err:
-+ __bch2_fs_read_only(c);
-+ return ret;
-+}
-+
-+int bch2_fs_read_write(struct bch_fs *c)
-+{
-+ return __bch2_fs_read_write(c, false);
-+}
-+
-+int bch2_fs_read_write_early(struct bch_fs *c)
-+{
-+ lockdep_assert_held(&c->state_lock);
-+
-+ return __bch2_fs_read_write(c, true);
-+}
-+
-+/* Filesystem startup/shutdown: */
-+
-+static void __bch2_fs_free(struct bch_fs *c)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < BCH_TIME_STAT_NR; i++)
-+ bch2_time_stats_exit(&c->times[i]);
-+
-+ bch2_free_pending_node_rewrites(c);
-+ bch2_fs_sb_errors_exit(c);
-+ bch2_fs_counters_exit(c);
-+ bch2_fs_snapshots_exit(c);
-+ bch2_fs_quota_exit(c);
-+ bch2_fs_fs_io_direct_exit(c);
-+ bch2_fs_fs_io_buffered_exit(c);
-+ bch2_fs_fsio_exit(c);
-+ bch2_fs_ec_exit(c);
-+ bch2_fs_encryption_exit(c);
-+ bch2_fs_nocow_locking_exit(c);
-+ bch2_fs_io_write_exit(c);
-+ bch2_fs_io_read_exit(c);
-+ bch2_fs_buckets_waiting_for_journal_exit(c);
-+ bch2_fs_btree_interior_update_exit(c);
-+ bch2_fs_btree_iter_exit(c);
-+ bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
-+ bch2_fs_btree_cache_exit(c);
-+ bch2_fs_replicas_exit(c);
-+ bch2_fs_journal_exit(&c->journal);
-+ bch2_io_clock_exit(&c->io_clock[WRITE]);
-+ bch2_io_clock_exit(&c->io_clock[READ]);
-+ bch2_fs_compress_exit(c);
-+ bch2_journal_keys_free(&c->journal_keys);
-+ bch2_journal_entries_free(c);
-+ bch2_fs_btree_write_buffer_exit(c);
-+ percpu_free_rwsem(&c->mark_lock);
-+ free_percpu(c->online_reserved);
-+
-+ darray_exit(&c->btree_roots_extra);
-+ free_percpu(c->pcpu);
-+ mempool_exit(&c->large_bkey_pool);
-+ mempool_exit(&c->btree_bounce_pool);
-+ bioset_exit(&c->btree_bio);
-+ mempool_exit(&c->fill_iter);
-+#ifndef BCH_WRITE_REF_DEBUG
-+ percpu_ref_exit(&c->writes);
-+#endif
-+ kfree(rcu_dereference_protected(c->disk_groups, 1));
-+ kfree(c->journal_seq_blacklist_table);
-+ kfree(c->unused_inode_hints);
-+
-+ if (c->write_ref_wq)
-+ destroy_workqueue(c->write_ref_wq);
-+ if (c->io_complete_wq)
-+ destroy_workqueue(c->io_complete_wq);
-+ if (c->copygc_wq)
-+ destroy_workqueue(c->copygc_wq);
-+ if (c->btree_io_complete_wq)
-+ destroy_workqueue(c->btree_io_complete_wq);
-+ if (c->btree_update_wq)
-+ destroy_workqueue(c->btree_update_wq);
-+
-+ bch2_free_super(&c->disk_sb);
-+ kvpfree(c, sizeof(*c));
-+ module_put(THIS_MODULE);
-+}
-+
-+static void bch2_fs_release(struct kobject *kobj)
-+{
-+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+ __bch2_fs_free(c);
-+}
-+
-+void __bch2_fs_stop(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ bch_verbose(c, "shutting down");
-+
-+ set_bit(BCH_FS_STOPPING, &c->flags);
-+
-+ cancel_work_sync(&c->journal_seq_blacklist_gc_work);
-+
-+ down_write(&c->state_lock);
-+ bch2_fs_read_only(c);
-+ up_write(&c->state_lock);
-+
-+ for_each_member_device(ca, c, i)
-+ if (ca->kobj.state_in_sysfs &&
-+ ca->disk_sb.bdev)
-+ sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
-+
-+ if (c->kobj.state_in_sysfs)
-+ kobject_del(&c->kobj);
-+
-+ bch2_fs_debug_exit(c);
-+ bch2_fs_chardev_exit(c);
-+
-+ kobject_put(&c->counters_kobj);
-+ kobject_put(&c->time_stats);
-+ kobject_put(&c->opts_dir);
-+ kobject_put(&c->internal);
-+
-+ /* btree prefetch might have kicked off reads in the background: */
-+ bch2_btree_flush_all_reads(c);
-+
-+ for_each_member_device(ca, c, i)
-+ cancel_work_sync(&ca->io_error_work);
-+
-+ cancel_work_sync(&c->read_only_work);
-+}
-+
-+void bch2_fs_free(struct bch_fs *c)
-+{
-+ unsigned i;
-+
-+ mutex_lock(&bch_fs_list_lock);
-+ list_del(&c->list);
-+ mutex_unlock(&bch_fs_list_lock);
-+
-+ closure_sync(&c->cl);
-+ closure_debug_destroy(&c->cl);
-+
-+ for (i = 0; i < c->sb.nr_devices; i++) {
-+ struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true);
-+
-+ if (ca) {
-+ bch2_free_super(&ca->disk_sb);
-+ bch2_dev_free(ca);
-+ }
-+ }
-+
-+ bch_verbose(c, "shutdown complete");
-+
-+ kobject_put(&c->kobj);
-+}
-+
-+void bch2_fs_stop(struct bch_fs *c)
-+{
-+ __bch2_fs_stop(c);
-+ bch2_fs_free(c);
-+}
-+
-+static int bch2_fs_online(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+ int ret = 0;
-+
-+ lockdep_assert_held(&bch_fs_list_lock);
-+
-+ if (__bch2_uuid_to_fs(c->sb.uuid)) {
-+ bch_err(c, "filesystem UUID already open");
-+ return -EINVAL;
-+ }
-+
-+ ret = bch2_fs_chardev_init(c);
-+ if (ret) {
-+ bch_err(c, "error creating character device");
-+ return ret;
-+ }
-+
-+ bch2_fs_debug_init(c);
-+
-+ ret = kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ?:
-+ kobject_add(&c->internal, &c->kobj, "internal") ?:
-+ kobject_add(&c->opts_dir, &c->kobj, "options") ?:
-+ kobject_add(&c->time_stats, &c->kobj, "time_stats") ?:
-+ kobject_add(&c->counters_kobj, &c->kobj, "counters") ?:
-+ bch2_opts_create_sysfs_files(&c->opts_dir);
-+ if (ret) {
-+ bch_err(c, "error creating sysfs objects");
-+ return ret;
-+ }
-+
-+ down_write(&c->state_lock);
-+
-+ for_each_member_device(ca, c, i) {
-+ ret = bch2_dev_sysfs_online(c, ca);
-+ if (ret) {
-+ bch_err(c, "error creating sysfs objects");
-+ percpu_ref_put(&ca->ref);
-+ goto err;
-+ }
-+ }
-+
-+ BUG_ON(!list_empty(&c->list));
-+ list_add(&c->list, &bch_fs_list);
-+err:
-+ up_write(&c->state_lock);
-+ return ret;
-+}
-+
-+static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
-+{
-+ struct bch_fs *c;
-+ struct printbuf name = PRINTBUF;
-+ unsigned i, iter_size;
-+ int ret = 0;
-+
-+ c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
-+ if (!c) {
-+ c = ERR_PTR(-BCH_ERR_ENOMEM_fs_alloc);
-+ goto out;
-+ }
-+
-+ __module_get(THIS_MODULE);
-+
-+ closure_init(&c->cl, NULL);
-+
-+ c->kobj.kset = bcachefs_kset;
-+ kobject_init(&c->kobj, &bch2_fs_ktype);
-+ kobject_init(&c->internal, &bch2_fs_internal_ktype);
-+ kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
-+ kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
-+ kobject_init(&c->counters_kobj, &bch2_fs_counters_ktype);
-+
-+ c->minor = -1;
-+ c->disk_sb.fs_sb = true;
-+
-+ init_rwsem(&c->state_lock);
-+ mutex_init(&c->sb_lock);
-+ mutex_init(&c->replicas_gc_lock);
-+ mutex_init(&c->btree_root_lock);
-+ INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
-+
-+ init_rwsem(&c->gc_lock);
-+ mutex_init(&c->gc_gens_lock);
-+
-+ for (i = 0; i < BCH_TIME_STAT_NR; i++)
-+ bch2_time_stats_init(&c->times[i]);
-+
-+ bch2_fs_copygc_init(c);
-+ bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
-+ bch2_fs_btree_interior_update_init_early(c);
-+ bch2_fs_allocator_background_init(c);
-+ bch2_fs_allocator_foreground_init(c);
-+ bch2_fs_rebalance_init(c);
-+ bch2_fs_quota_init(c);
-+ bch2_fs_ec_init_early(c);
-+ bch2_fs_move_init(c);
-+ bch2_fs_sb_errors_init_early(c);
-+
-+ INIT_LIST_HEAD(&c->list);
-+
-+ mutex_init(&c->usage_scratch_lock);
-+
-+ mutex_init(&c->bio_bounce_pages_lock);
-+ mutex_init(&c->snapshot_table_lock);
-+ init_rwsem(&c->snapshot_create_lock);
-+
-+ spin_lock_init(&c->btree_write_error_lock);
-+
-+ INIT_WORK(&c->journal_seq_blacklist_gc_work,
-+ bch2_blacklist_entries_gc);
-+
-+ INIT_LIST_HEAD(&c->journal_iters);
-+
-+ INIT_LIST_HEAD(&c->fsck_error_msgs);
-+ mutex_init(&c->fsck_error_msgs_lock);
-+
-+ seqcount_init(&c->gc_pos_lock);
-+
-+ seqcount_init(&c->usage_lock);
-+
-+ sema_init(&c->io_in_flight, 128);
-+
-+ INIT_LIST_HEAD(&c->vfs_inodes_list);
-+ mutex_init(&c->vfs_inodes_lock);
-+
-+ c->copy_gc_enabled = 1;
-+ c->rebalance.enabled = 1;
-+ c->promote_whole_extents = true;
-+
-+ c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write];
-+ c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write];
-+ c->journal.blocked_time = &c->times[BCH_TIME_blocked_journal];
-+ c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
-+
-+ bch2_fs_btree_cache_init_early(&c->btree_cache);
-+
-+ mutex_init(&c->sectors_available_lock);
-+
-+ ret = percpu_init_rwsem(&c->mark_lock);
-+ if (ret)
-+ goto err;
-+
-+ mutex_lock(&c->sb_lock);
-+ ret = bch2_sb_to_fs(c, sb);
-+ mutex_unlock(&c->sb_lock);
-+
-+ if (ret)
-+ goto err;
-+
-+ pr_uuid(&name, c->sb.user_uuid.b);
-+ strscpy(c->name, name.buf, sizeof(c->name));
-+ printbuf_exit(&name);
-+
-+ ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0;
-+ if (ret)
-+ goto err;
-+
-+ /* Compat: */
-+ if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
-+ !BCH_SB_JOURNAL_FLUSH_DELAY(sb))
-+ SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000);
-+
-+ if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
-+ !BCH_SB_JOURNAL_RECLAIM_DELAY(sb))
-+ SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 100);
-+
-+ c->opts = bch2_opts_default;
-+ ret = bch2_opts_from_sb(&c->opts, sb);
-+ if (ret)
-+ goto err;
-+
-+ bch2_opts_apply(&c->opts, opts);
-+
-+ c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc;
-+ if (c->opts.inodes_use_key_cache)
-+ c->btree_key_cache_btrees |= 1U << BTREE_ID_inodes;
-+ c->btree_key_cache_btrees |= 1U << BTREE_ID_logged_ops;
-+
-+ c->block_bits = ilog2(block_sectors(c));
-+ c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c);
-+
-+ if (bch2_fs_init_fault("fs_alloc")) {
-+ bch_err(c, "fs_alloc fault injected");
-+ ret = -EFAULT;
-+ goto err;
-+ }
-+
-+ iter_size = sizeof(struct sort_iter) +
-+ (btree_blocks(c) + 1) * 2 *
-+ sizeof(struct sort_iter_set);
-+
-+ c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
-+
-+ if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
-+ WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512)) ||
-+ !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
-+ WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
-+ !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
-+ WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
-+ !(c->io_complete_wq = alloc_workqueue("bcachefs_io",
-+ WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) ||
-+ !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
-+ WQ_FREEZABLE, 0)) ||
-+#ifndef BCH_WRITE_REF_DEBUG
-+ percpu_ref_init(&c->writes, bch2_writes_disabled,
-+ PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-+#endif
-+ mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
-+ bioset_init(&c->btree_bio, 1,
-+ max(offsetof(struct btree_read_bio, bio),
-+ offsetof(struct btree_write_bio, wbio.bio)),
-+ BIOSET_NEED_BVECS) ||
-+ !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
-+ !(c->online_reserved = alloc_percpu(u64)) ||
-+ mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
-+ btree_bytes(c)) ||
-+ mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
-+ !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
-+ sizeof(u64), GFP_KERNEL))) {
-+ ret = -BCH_ERR_ENOMEM_fs_other_alloc;
-+ goto err;
-+ }
-+
-+ ret = bch2_fs_counters_init(c) ?:
-+ bch2_fs_sb_errors_init(c) ?:
-+ bch2_io_clock_init(&c->io_clock[READ]) ?:
-+ bch2_io_clock_init(&c->io_clock[WRITE]) ?:
-+ bch2_fs_journal_init(&c->journal) ?:
-+ bch2_fs_replicas_init(c) ?:
-+ bch2_fs_btree_cache_init(c) ?:
-+ bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
-+ bch2_fs_btree_iter_init(c) ?:
-+ bch2_fs_btree_interior_update_init(c) ?:
-+ bch2_fs_buckets_waiting_for_journal_init(c) ?:
-+ bch2_fs_btree_write_buffer_init(c) ?:
-+ bch2_fs_subvolumes_init(c) ?:
-+ bch2_fs_io_read_init(c) ?:
-+ bch2_fs_io_write_init(c) ?:
-+ bch2_fs_nocow_locking_init(c) ?:
-+ bch2_fs_encryption_init(c) ?:
-+ bch2_fs_compress_init(c) ?:
-+ bch2_fs_ec_init(c) ?:
-+ bch2_fs_fsio_init(c) ?:
-+ bch2_fs_fs_io_buffered_init(c) ?:
-+ bch2_fs_fs_io_direct_init(c);
-+ if (ret)
-+ goto err;
-+
-+ for (i = 0; i < c->sb.nr_devices; i++)
-+ if (bch2_dev_exists(c->disk_sb.sb, i) &&
-+ bch2_dev_alloc(c, i)) {
-+ ret = -EEXIST;
-+ goto err;
-+ }
-+
-+ bch2_journal_entry_res_resize(&c->journal,
-+ &c->btree_root_journal_res,
-+ BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX));
-+ bch2_dev_usage_journal_reserve(c);
-+ bch2_journal_entry_res_resize(&c->journal,
-+ &c->clock_journal_res,
-+ (sizeof(struct jset_entry_clock) / sizeof(u64)) * 2);
-+
-+ mutex_lock(&bch_fs_list_lock);
-+ ret = bch2_fs_online(c);
-+ mutex_unlock(&bch_fs_list_lock);
-+
-+ if (ret)
-+ goto err;
-+out:
-+ return c;
-+err:
-+ bch2_fs_free(c);
-+ c = ERR_PTR(ret);
-+ goto out;
-+}
-+
-+noinline_for_stack
-+static void print_mount_opts(struct bch_fs *c)
-+{
-+ enum bch_opt_id i;
-+ struct printbuf p = PRINTBUF;
-+ bool first = true;
-+
-+ prt_str(&p, "mounting version ");
-+ bch2_version_to_text(&p, c->sb.version);
-+
-+ if (c->opts.read_only) {
-+ prt_str(&p, " opts=");
-+ first = false;
-+ prt_printf(&p, "ro");
-+ }
-+
-+ for (i = 0; i < bch2_opts_nr; i++) {
-+ const struct bch_option *opt = &bch2_opt_table[i];
-+ u64 v = bch2_opt_get_by_id(&c->opts, i);
-+
-+ if (!(opt->flags & OPT_MOUNT))
-+ continue;
-+
-+ if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
-+ continue;
-+
-+ prt_str(&p, first ? " opts=" : ",");
-+ first = false;
-+ bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE);
-+ }
-+
-+ bch_info(c, "%s", p.buf);
-+ printbuf_exit(&p);
-+}
-+
-+int bch2_fs_start(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ time64_t now = ktime_get_real_seconds();
-+ unsigned i;
-+ int ret;
-+
-+ print_mount_opts(c);
-+
-+ down_write(&c->state_lock);
-+
-+ BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
-+
-+ mutex_lock(&c->sb_lock);
-+
-+ ret = bch2_sb_members_v2_init(c);
-+ if (ret) {
-+ mutex_unlock(&c->sb_lock);
-+ goto err;
-+ }
-+
-+ for_each_online_member(ca, c, i)
-+ bch2_members_v2_get_mut(c->disk_sb.sb, i)->last_mount = cpu_to_le64(now);
-+
-+ mutex_unlock(&c->sb_lock);
-+
-+ for_each_rw_member(ca, c, i)
-+ bch2_dev_allocator_add(c, ca);
-+ bch2_recalc_capacity(c);
-+
-+ ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
-+ ? bch2_fs_recovery(c)
-+ : bch2_fs_initialize(c);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_opts_check_may_set(c);
-+ if (ret)
-+ goto err;
-+
-+ if (bch2_fs_init_fault("fs_start")) {
-+ bch_err(c, "fs_start fault injected");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ set_bit(BCH_FS_STARTED, &c->flags);
-+
-+ if (c->opts.read_only || c->opts.nochanges) {
-+ bch2_fs_read_only(c);
-+ } else {
-+ ret = !test_bit(BCH_FS_RW, &c->flags)
-+ ? bch2_fs_read_write(c)
-+ : bch2_fs_read_write_late(c);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = 0;
-+out:
-+ up_write(&c->state_lock);
-+ return ret;
-+err:
-+ bch_err_msg(c, ret, "starting filesystem");
-+ goto out;
-+}
-+
-+static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
-+{
-+ struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx);
-+
-+ if (le16_to_cpu(sb->block_size) != block_sectors(c))
-+ return -BCH_ERR_mismatched_block_size;
-+
-+ if (le16_to_cpu(m.bucket_size) <
-+ BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb))
-+ return -BCH_ERR_bucket_size_too_small;
-+
-+ return 0;
-+}
-+
-+static int bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb)
-+{
-+ struct bch_sb *newest =
-+ le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb;
-+
-+ if (!uuid_equal(&fs->uuid, &sb->uuid))
-+ return -BCH_ERR_device_not_a_member_of_filesystem;
-+
-+ if (!bch2_dev_exists(newest, sb->dev_idx))
-+ return -BCH_ERR_device_has_been_removed;
-+
-+ if (fs->block_size != sb->block_size)
-+ return -BCH_ERR_mismatched_block_size;
-+
-+ return 0;
-+}
-+
-+/* Device startup/shutdown: */
-+
-+static void bch2_dev_release(struct kobject *kobj)
-+{
-+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+
-+ kfree(ca);
-+}
-+
-+static void bch2_dev_free(struct bch_dev *ca)
-+{
-+ cancel_work_sync(&ca->io_error_work);
-+
-+ if (ca->kobj.state_in_sysfs &&
-+ ca->disk_sb.bdev)
-+ sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
-+
-+ if (ca->kobj.state_in_sysfs)
-+ kobject_del(&ca->kobj);
-+
-+ bch2_free_super(&ca->disk_sb);
-+ bch2_dev_journal_exit(ca);
-+
-+ free_percpu(ca->io_done);
-+ bioset_exit(&ca->replica_set);
-+ bch2_dev_buckets_free(ca);
-+ free_page((unsigned long) ca->sb_read_scratch);
-+
-+ bch2_time_stats_exit(&ca->io_latency[WRITE]);
-+ bch2_time_stats_exit(&ca->io_latency[READ]);
-+
-+ percpu_ref_exit(&ca->io_ref);
-+ percpu_ref_exit(&ca->ref);
-+ kobject_put(&ca->kobj);
-+}
-+
-+static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
-+{
-+
-+ lockdep_assert_held(&c->state_lock);
-+
-+ if (percpu_ref_is_zero(&ca->io_ref))
-+ return;
-+
-+ __bch2_dev_read_only(c, ca);
-+
-+ reinit_completion(&ca->io_ref_completion);
-+ percpu_ref_kill(&ca->io_ref);
-+ wait_for_completion(&ca->io_ref_completion);
-+
-+ if (ca->kobj.state_in_sysfs) {
-+ sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
-+ sysfs_remove_link(&ca->kobj, "block");
-+ }
-+
-+ bch2_free_super(&ca->disk_sb);
-+ bch2_dev_journal_exit(ca);
-+}
-+
-+static void bch2_dev_ref_complete(struct percpu_ref *ref)
-+{
-+ struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
-+
-+ complete(&ca->ref_completion);
-+}
-+
-+static void bch2_dev_io_ref_complete(struct percpu_ref *ref)
-+{
-+ struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
-+
-+ complete(&ca->io_ref_completion);
-+}
-+
-+static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ int ret;
-+
-+ if (!c->kobj.state_in_sysfs)
-+ return 0;
-+
-+ if (!ca->kobj.state_in_sysfs) {
-+ ret = kobject_add(&ca->kobj, &c->kobj,
-+ "dev-%u", ca->dev_idx);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (ca->disk_sb.bdev) {
-+ struct kobject *block = bdev_kobj(ca->disk_sb.bdev);
-+
-+ ret = sysfs_create_link(block, &ca->kobj, "bcachefs");
-+ if (ret)
-+ return ret;
-+
-+ ret = sysfs_create_link(&ca->kobj, block, "block");
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
-+ struct bch_member *member)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ ca = kzalloc(sizeof(*ca), GFP_KERNEL);
-+ if (!ca)
-+ return NULL;
-+
-+ kobject_init(&ca->kobj, &bch2_dev_ktype);
-+ init_completion(&ca->ref_completion);
-+ init_completion(&ca->io_ref_completion);
-+
-+ init_rwsem(&ca->bucket_lock);
-+
-+ INIT_WORK(&ca->io_error_work, bch2_io_error_work);
-+
-+ bch2_time_stats_init(&ca->io_latency[READ]);
-+ bch2_time_stats_init(&ca->io_latency[WRITE]);
-+
-+ ca->mi = bch2_mi_to_cpu(member);
-+
-+ for (i = 0; i < ARRAY_SIZE(member->errors); i++)
-+ atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i]));
-+
-+ ca->uuid = member->uuid;
-+
-+ ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
-+ ca->mi.bucket_size / btree_sectors(c));
-+
-+ if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
-+ 0, GFP_KERNEL) ||
-+ percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
-+ PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
-+ !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
-+ bch2_dev_buckets_alloc(c, ca) ||
-+ bioset_init(&ca->replica_set, 4,
-+ offsetof(struct bch_write_bio, bio), 0) ||
-+ !(ca->io_done = alloc_percpu(*ca->io_done)))
-+ goto err;
-+
-+ return ca;
-+err:
-+ bch2_dev_free(ca);
-+ return NULL;
-+}
-+
-+static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
-+ unsigned dev_idx)
-+{
-+ ca->dev_idx = dev_idx;
-+ __set_bit(ca->dev_idx, ca->self.d);
-+ scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
-+
-+ ca->fs = c;
-+ rcu_assign_pointer(c->devs[ca->dev_idx], ca);
-+
-+ if (bch2_dev_sysfs_online(c, ca))
-+ pr_warn("error creating sysfs objects");
-+}
-+
-+static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
-+{
-+ struct bch_member member = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
-+ struct bch_dev *ca = NULL;
-+ int ret = 0;
-+
-+ if (bch2_fs_init_fault("dev_alloc"))
-+ goto err;
-+
-+ ca = __bch2_dev_alloc(c, &member);
-+ if (!ca)
-+ goto err;
-+
-+ ca->fs = c;
-+
-+ bch2_dev_attach(c, ca, dev_idx);
-+ return ret;
-+err:
-+ if (ca)
-+ bch2_dev_free(ca);
-+ return -BCH_ERR_ENOMEM_dev_alloc;
-+}
-+
-+static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
-+{
-+ unsigned ret;
-+
-+ if (bch2_dev_is_online(ca)) {
-+ bch_err(ca, "already have device online in slot %u",
-+ sb->sb->dev_idx);
-+ return -BCH_ERR_device_already_online;
-+ }
-+
-+ if (get_capacity(sb->bdev->bd_disk) <
-+ ca->mi.bucket_size * ca->mi.nbuckets) {
-+ bch_err(ca, "cannot online: device too small");
-+ return -BCH_ERR_device_size_too_small;
-+ }
-+
-+ BUG_ON(!percpu_ref_is_zero(&ca->io_ref));
-+
-+ ret = bch2_dev_journal_init(ca, sb->sb);
-+ if (ret)
-+ return ret;
-+
-+ /* Commit: */
-+ ca->disk_sb = *sb;
-+ memset(sb, 0, sizeof(*sb));
-+
-+ ca->dev = ca->disk_sb.bdev->bd_dev;
-+
-+ percpu_ref_reinit(&ca->io_ref);
-+
-+ return 0;
-+}
-+
-+static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
-+{
-+ struct bch_dev *ca;
-+ int ret;
-+
-+ lockdep_assert_held(&c->state_lock);
-+
-+ if (le64_to_cpu(sb->sb->seq) >
-+ le64_to_cpu(c->disk_sb.sb->seq))
-+ bch2_sb_to_fs(c, sb->sb);
-+
-+ BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
-+ !c->devs[sb->sb->dev_idx]);
-+
-+ ca = bch_dev_locked(c, sb->sb->dev_idx);
-+
-+ ret = __bch2_dev_attach_bdev(ca, sb);
-+ if (ret)
-+ return ret;
-+
-+ bch2_dev_sysfs_online(c, ca);
-+
-+ if (c->sb.nr_devices == 1)
-+ snprintf(c->name, sizeof(c->name), "%pg", ca->disk_sb.bdev);
-+ snprintf(ca->name, sizeof(ca->name), "%pg", ca->disk_sb.bdev);
-+
-+ rebalance_wakeup(c);
-+ return 0;
-+}
-+
-+/* Device management: */
-+
-+/*
-+ * Note: this function is also used by the error paths - when a particular
-+ * device sees an error, we call it to determine whether we can just set the
-+ * device RO, or - if this function returns false - we'll set the whole
-+ * filesystem RO:
-+ *
-+ * XXX: maybe we should be more explicit about whether we're changing state
-+ * because we got an error or what have you?
-+ */
-+bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
-+ enum bch_member_state new_state, int flags)
-+{
-+ struct bch_devs_mask new_online_devs;
-+ struct bch_dev *ca2;
-+ int i, nr_rw = 0, required;
-+
-+ lockdep_assert_held(&c->state_lock);
-+
-+ switch (new_state) {
-+ case BCH_MEMBER_STATE_rw:
-+ return true;
-+ case BCH_MEMBER_STATE_ro:
-+ if (ca->mi.state != BCH_MEMBER_STATE_rw)
-+ return true;
-+
-+ /* do we have enough devices to write to? */
-+ for_each_member_device(ca2, c, i)
-+ if (ca2 != ca)
-+ nr_rw += ca2->mi.state == BCH_MEMBER_STATE_rw;
-+
-+ required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
-+ ? c->opts.metadata_replicas
-+ : c->opts.metadata_replicas_required,
-+ !(flags & BCH_FORCE_IF_DATA_DEGRADED)
-+ ? c->opts.data_replicas
-+ : c->opts.data_replicas_required);
-+
-+ return nr_rw >= required;
-+ case BCH_MEMBER_STATE_failed:
-+ case BCH_MEMBER_STATE_spare:
-+ if (ca->mi.state != BCH_MEMBER_STATE_rw &&
-+ ca->mi.state != BCH_MEMBER_STATE_ro)
-+ return true;
-+
-+ /* do we have enough devices to read from? */
-+ new_online_devs = bch2_online_devs(c);
-+ __clear_bit(ca->dev_idx, new_online_devs.d);
-+
-+ return bch2_have_enough_devs(c, new_online_devs, flags, false);
-+ default:
-+ BUG();
-+ }
-+}
-+
-+static bool bch2_fs_may_start(struct bch_fs *c)
-+{
-+ struct bch_dev *ca;
-+ unsigned i, flags = 0;
-+
-+ if (c->opts.very_degraded)
-+ flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
-+
-+ if (c->opts.degraded)
-+ flags |= BCH_FORCE_IF_DEGRADED;
-+
-+ if (!c->opts.degraded &&
-+ !c->opts.very_degraded) {
-+ mutex_lock(&c->sb_lock);
-+
-+ for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
-+ if (!bch2_dev_exists(c->disk_sb.sb, i))
-+ continue;
-+
-+ ca = bch_dev_locked(c, i);
-+
-+ if (!bch2_dev_is_online(ca) &&
-+ (ca->mi.state == BCH_MEMBER_STATE_rw ||
-+ ca->mi.state == BCH_MEMBER_STATE_ro)) {
-+ mutex_unlock(&c->sb_lock);
-+ return false;
-+ }
-+ }
-+ mutex_unlock(&c->sb_lock);
-+ }
-+
-+ return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
-+}
-+
-+static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ /*
-+ * The allocator thread itself allocates btree nodes, so stop it first:
-+ */
-+ bch2_dev_allocator_remove(c, ca);
-+ bch2_dev_journal_stop(&c->journal, ca);
-+}
-+
-+static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ lockdep_assert_held(&c->state_lock);
-+
-+ BUG_ON(ca->mi.state != BCH_MEMBER_STATE_rw);
-+
-+ bch2_dev_allocator_add(c, ca);
-+ bch2_recalc_capacity(c);
-+}
-+
-+int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
-+ enum bch_member_state new_state, int flags)
-+{
-+ struct bch_member *m;
-+ int ret = 0;
-+
-+ if (ca->mi.state == new_state)
-+ return 0;
-+
-+ if (!bch2_dev_state_allowed(c, ca, new_state, flags))
-+ return -BCH_ERR_device_state_not_allowed;
-+
-+ if (new_state != BCH_MEMBER_STATE_rw)
-+ __bch2_dev_read_only(c, ca);
-+
-+ bch_notice(ca, "%s", bch2_member_states[new_state]);
-+
-+ mutex_lock(&c->sb_lock);
-+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
-+ SET_BCH_MEMBER_STATE(m, new_state);
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ if (new_state == BCH_MEMBER_STATE_rw)
-+ __bch2_dev_read_write(c, ca);
-+
-+ rebalance_wakeup(c);
-+
-+ return ret;
-+}
-+
-+int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
-+ enum bch_member_state new_state, int flags)
-+{
-+ int ret;
-+
-+ down_write(&c->state_lock);
-+ ret = __bch2_dev_set_state(c, ca, new_state, flags);
-+ up_write(&c->state_lock);
-+
-+ return ret;
-+}
-+
-+/* Device add/removal: */
-+
-+static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
-+{
-+ struct bpos start = POS(ca->dev_idx, 0);
-+ struct bpos end = POS(ca->dev_idx, U64_MAX);
-+ int ret;
-+
-+ /*
-+ * We clear the LRU and need_discard btrees first so that we don't race
-+ * with bch2_do_invalidates() and bch2_do_discards()
-+ */
-+ ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
-+ BTREE_TRIGGER_NORUN, NULL) ?:
-+ bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
-+ BTREE_TRIGGER_NORUN, NULL) ?:
-+ bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
-+ BTREE_TRIGGER_NORUN, NULL) ?:
-+ bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end,
-+ BTREE_TRIGGER_NORUN, NULL) ?:
-+ bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
-+ BTREE_TRIGGER_NORUN, NULL) ?:
-+ bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end,
-+ BTREE_TRIGGER_NORUN, NULL);
-+ if (ret)
-+ bch_err_msg(c, ret, "removing dev alloc info");
-+
-+ return ret;
-+}
-+
-+int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
-+{
-+ struct bch_member *m;
-+ unsigned dev_idx = ca->dev_idx, data;
-+ int ret;
-+
-+ down_write(&c->state_lock);
-+
-+ /*
-+ * We consume a reference to ca->ref, regardless of whether we succeed
-+ * or fail:
-+ */
-+ percpu_ref_put(&ca->ref);
-+
-+ if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) {
-+ bch_err(ca, "Cannot remove without losing data");
-+ ret = -BCH_ERR_device_state_not_allowed;
-+ goto err;
-+ }
-+
-+ __bch2_dev_read_only(c, ca);
-+
-+ ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
-+ if (ret) {
-+ bch_err_msg(ca, ret, "dropping data");
-+ goto err;
-+ }
-+
-+ ret = bch2_dev_remove_alloc(c, ca);
-+ if (ret) {
-+ bch_err_msg(ca, ret, "deleting alloc info");
-+ goto err;
-+ }
-+
-+ ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
-+ if (ret) {
-+ bch_err_msg(ca, ret, "flushing journal");
-+ goto err;
-+ }
-+
-+ ret = bch2_journal_flush(&c->journal);
-+ if (ret) {
-+ bch_err(ca, "journal error");
-+ goto err;
-+ }
-+
-+ ret = bch2_replicas_gc2(c);
-+ if (ret) {
-+ bch_err_msg(ca, ret, "in replicas_gc2()");
-+ goto err;
-+ }
-+
-+ data = bch2_dev_has_data(c, ca);
-+ if (data) {
-+ struct printbuf data_has = PRINTBUF;
-+
-+ prt_bitflags(&data_has, bch2_data_types, data);
-+ bch_err(ca, "Remove failed, still has data (%s)", data_has.buf);
-+ printbuf_exit(&data_has);
-+ ret = -EBUSY;
-+ goto err;
-+ }
-+
-+ __bch2_dev_offline(c, ca);
-+
-+ mutex_lock(&c->sb_lock);
-+ rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
-+ mutex_unlock(&c->sb_lock);
-+
-+ percpu_ref_kill(&ca->ref);
-+ wait_for_completion(&ca->ref_completion);
-+
-+ bch2_dev_free(ca);
-+
-+ /*
-+ * At this point the device object has been removed in-core, but the
-+ * on-disk journal might still refer to the device index via sb device
-+ * usage entries. Recovery fails if it sees usage information for an
-+ * invalid device. Flush journal pins to push the back of the journal
-+ * past now invalid device index references before we update the
-+ * superblock, but after the device object has been removed so any
-+ * further journal writes elide usage info for the device.
-+ */
-+ bch2_journal_flush_all_pins(&c->journal);
-+
-+ /*
-+ * Free this device's slot in the bch_member array - all pointers to
-+ * this device must be gone:
-+ */
-+ mutex_lock(&c->sb_lock);
-+ m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
-+ memset(&m->uuid, 0, sizeof(m->uuid));
-+
-+ bch2_write_super(c);
-+
-+ mutex_unlock(&c->sb_lock);
-+ up_write(&c->state_lock);
-+
-+ bch2_dev_usage_journal_reserve(c);
-+ return 0;
-+err:
-+ if (ca->mi.state == BCH_MEMBER_STATE_rw &&
-+ !percpu_ref_is_zero(&ca->io_ref))
-+ __bch2_dev_read_write(c, ca);
-+ up_write(&c->state_lock);
-+ return ret;
-+}
-+
-+/* Add new device to running filesystem: */
-+int bch2_dev_add(struct bch_fs *c, const char *path)
-+{
-+ struct bch_opts opts = bch2_opts_empty();
-+ struct bch_sb_handle sb;
-+ struct bch_dev *ca = NULL;
-+ struct bch_sb_field_members_v2 *mi;
-+ struct bch_member dev_mi;
-+ unsigned dev_idx, nr_devices, u64s;
-+ struct printbuf errbuf = PRINTBUF;
-+ struct printbuf label = PRINTBUF;
-+ int ret;
-+
-+ ret = bch2_read_super(path, &opts, &sb);
-+ if (ret) {
-+ bch_err_msg(c, ret, "reading super");
-+ goto err;
-+ }
-+
-+ dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx);
-+
-+ if (BCH_MEMBER_GROUP(&dev_mi)) {
-+ bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1);
-+ if (label.allocation_failure) {
-+ ret = -ENOMEM;
-+ goto err;
-+ }
-+ }
-+
-+ ret = bch2_dev_may_add(sb.sb, c);
-+ if (ret) {
-+ bch_err_fn(c, ret);
-+ goto err;
-+ }
-+
-+ ca = __bch2_dev_alloc(c, &dev_mi);
-+ if (!ca) {
-+ ret = -ENOMEM;
-+ goto err;
-+ }
-+
-+ bch2_dev_usage_init(ca);
-+
-+ ret = __bch2_dev_attach_bdev(ca, &sb);
-+ if (ret)
-+ goto err;
-+
-+ ret = bch2_dev_journal_alloc(ca);
-+ if (ret) {
-+ bch_err_msg(c, ret, "allocating journal");
-+ goto err;
-+ }
-+
-+ down_write(&c->state_lock);
-+ mutex_lock(&c->sb_lock);
-+
-+ ret = bch2_sb_from_fs(c, ca);
-+ if (ret) {
-+ bch_err_msg(c, ret, "setting up new superblock");
-+ goto err_unlock;
-+ }
-+
-+ if (dynamic_fault("bcachefs:add:no_slot"))
-+ goto no_slot;
-+
-+ for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
-+ if (!bch2_dev_exists(c->disk_sb.sb, dev_idx))
-+ goto have_slot;
-+no_slot:
-+ ret = -BCH_ERR_ENOSPC_sb_members;
-+ bch_err_msg(c, ret, "setting up new superblock");
-+ goto err_unlock;
-+
-+have_slot:
-+ nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
-+
-+ mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
-+ u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) +
-+ le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64));
-+
-+ mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
-+ if (!mi) {
-+ ret = -BCH_ERR_ENOSPC_sb_members;
-+ bch_err_msg(c, ret, "setting up new superblock");
-+ goto err_unlock;
-+ }
-+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
-+
-+ /* success: */
-+
-+ *m = dev_mi;
-+ m->last_mount = cpu_to_le64(ktime_get_real_seconds());
-+ c->disk_sb.sb->nr_devices = nr_devices;
-+
-+ ca->disk_sb.sb->dev_idx = dev_idx;
-+ bch2_dev_attach(c, ca, dev_idx);
-+
-+ if (BCH_MEMBER_GROUP(&dev_mi)) {
-+ ret = __bch2_dev_group_set(c, ca, label.buf);
-+ if (ret) {
-+ bch_err_msg(c, ret, "creating new label");
-+ goto err_unlock;
-+ }
-+ }
-+
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ bch2_dev_usage_journal_reserve(c);
-+
-+ ret = bch2_trans_mark_dev_sb(c, ca);
-+ if (ret) {
-+ bch_err_msg(ca, ret, "marking new superblock");
-+ goto err_late;
-+ }
-+
-+ ret = bch2_fs_freespace_init(c);
-+ if (ret) {
-+ bch_err_msg(ca, ret, "initializing free space");
-+ goto err_late;
-+ }
-+
-+ ca->new_fs_bucket_idx = 0;
-+
-+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
-+ __bch2_dev_read_write(c, ca);
-+
-+ up_write(&c->state_lock);
-+ return 0;
-+
-+err_unlock:
-+ mutex_unlock(&c->sb_lock);
-+ up_write(&c->state_lock);
-+err:
-+ if (ca)
-+ bch2_dev_free(ca);
-+ bch2_free_super(&sb);
-+ printbuf_exit(&label);
-+ printbuf_exit(&errbuf);
-+ return ret;
-+err_late:
-+ up_write(&c->state_lock);
-+ ca = NULL;
-+ goto err;
-+}
-+
-+/* Hot add existing device to running filesystem: */
-+int bch2_dev_online(struct bch_fs *c, const char *path)
-+{
-+ struct bch_opts opts = bch2_opts_empty();
-+ struct bch_sb_handle sb = { NULL };
-+ struct bch_dev *ca;
-+ unsigned dev_idx;
-+ int ret;
-+
-+ down_write(&c->state_lock);
-+
-+ ret = bch2_read_super(path, &opts, &sb);
-+ if (ret) {
-+ up_write(&c->state_lock);
-+ return ret;
-+ }
-+
-+ dev_idx = sb.sb->dev_idx;
-+
-+ ret = bch2_dev_in_fs(c->disk_sb.sb, sb.sb);
-+ if (ret) {
-+ bch_err_msg(c, ret, "bringing %s online", path);
-+ goto err;
-+ }
-+
-+ ret = bch2_dev_attach_bdev(c, &sb);
-+ if (ret)
-+ goto err;
-+
-+ ca = bch_dev_locked(c, dev_idx);
-+
-+ ret = bch2_trans_mark_dev_sb(c, ca);
-+ if (ret) {
-+ bch_err_msg(c, ret, "bringing %s online: error from bch2_trans_mark_dev_sb", path);
-+ goto err;
-+ }
-+
-+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
-+ __bch2_dev_read_write(c, ca);
-+
-+ if (!ca->mi.freespace_initialized) {
-+ ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
-+ bch_err_msg(ca, ret, "initializing free space");
-+ if (ret)
-+ goto err;
-+ }
-+
-+ if (!ca->journal.nr) {
-+ ret = bch2_dev_journal_alloc(ca);
-+ bch_err_msg(ca, ret, "allocating journal");
-+ if (ret)
-+ goto err;
-+ }
-+
-+ mutex_lock(&c->sb_lock);
-+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
-+ cpu_to_le64(ktime_get_real_seconds());
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ up_write(&c->state_lock);
-+ return 0;
-+err:
-+ up_write(&c->state_lock);
-+ bch2_free_super(&sb);
-+ return ret;
-+}
-+
-+int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
-+{
-+ down_write(&c->state_lock);
-+
-+ if (!bch2_dev_is_online(ca)) {
-+ bch_err(ca, "Already offline");
-+ up_write(&c->state_lock);
-+ return 0;
-+ }
-+
-+ if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) {
-+ bch_err(ca, "Cannot offline required disk");
-+ up_write(&c->state_lock);
-+ return -BCH_ERR_device_state_not_allowed;
-+ }
-+
-+ __bch2_dev_offline(c, ca);
-+
-+ up_write(&c->state_lock);
-+ return 0;
-+}
-+
-+int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
-+{
-+ struct bch_member *m;
-+ u64 old_nbuckets;
-+ int ret = 0;
-+
-+ down_write(&c->state_lock);
-+ old_nbuckets = ca->mi.nbuckets;
-+
-+ if (nbuckets < ca->mi.nbuckets) {
-+ bch_err(ca, "Cannot shrink yet");
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ if (bch2_dev_is_online(ca) &&
-+ get_capacity(ca->disk_sb.bdev->bd_disk) <
-+ ca->mi.bucket_size * nbuckets) {
-+ bch_err(ca, "New size larger than device");
-+ ret = -BCH_ERR_device_size_too_small;
-+ goto err;
-+ }
-+
-+ ret = bch2_dev_buckets_resize(c, ca, nbuckets);
-+ if (ret) {
-+ bch_err_msg(ca, ret, "resizing buckets");
-+ goto err;
-+ }
-+
-+ ret = bch2_trans_mark_dev_sb(c, ca);
-+ if (ret)
-+ goto err;
-+
-+ mutex_lock(&c->sb_lock);
-+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
-+ m->nbuckets = cpu_to_le64(nbuckets);
-+
-+ bch2_write_super(c);
-+ mutex_unlock(&c->sb_lock);
-+
-+ if (ca->mi.freespace_initialized) {
-+ ret = bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets);
-+ if (ret)
-+ goto err;
-+
-+ /*
-+ * XXX: this is all wrong transactionally - we'll be able to do
-+ * this correctly after the disk space accounting rewrite
-+ */
-+ ca->usage_base->d[BCH_DATA_free].buckets += nbuckets - old_nbuckets;
-+ }
-+
-+ bch2_recalc_capacity(c);
-+err:
-+ up_write(&c->state_lock);
-+ return ret;
-+}
-+
-+/* return with ref on ca->ref: */
-+struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name)
-+{
-+ struct bch_dev *ca;
-+ unsigned i;
-+
-+ rcu_read_lock();
-+ for_each_member_device_rcu(ca, c, i, NULL)
-+ if (!strcmp(name, ca->name))
-+ goto found;
-+ ca = ERR_PTR(-BCH_ERR_ENOENT_dev_not_found);
-+found:
-+ rcu_read_unlock();
-+
-+ return ca;
-+}
-+
-+/* Filesystem open: */
-+
-+struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
-+ struct bch_opts opts)
-+{
-+ DARRAY(struct bch_sb_handle) sbs = { 0 };
-+ struct bch_fs *c = NULL;
-+ struct bch_sb_handle *sb, *best = NULL;
-+ struct printbuf errbuf = PRINTBUF;
-+ int ret = 0;
-+
-+ if (!try_module_get(THIS_MODULE))
-+ return ERR_PTR(-ENODEV);
-+
-+ if (!nr_devices) {
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ ret = darray_make_room(&sbs, nr_devices);
-+ if (ret)
-+ goto err;
-+
-+ for (unsigned i = 0; i < nr_devices; i++) {
-+ struct bch_sb_handle sb = { NULL };
-+
-+ ret = bch2_read_super(devices[i], &opts, &sb);
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(darray_push(&sbs, sb));
-+ }
-+
-+ darray_for_each(sbs, sb)
-+ if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq))
-+ best = sb;
-+
-+ darray_for_each_reverse(sbs, sb) {
-+ if (sb != best && !bch2_dev_exists(best->sb, sb->sb->dev_idx)) {
-+ pr_info("%pg has been removed, skipping", sb->bdev);
-+ bch2_free_super(sb);
-+ darray_remove_item(&sbs, sb);
-+ best -= best > sb;
-+ continue;
-+ }
-+
-+ ret = bch2_dev_in_fs(best->sb, sb->sb);
-+ if (ret)
-+ goto err_print;
-+ }
-+
-+ c = bch2_fs_alloc(best->sb, opts);
-+ ret = PTR_ERR_OR_ZERO(c);
-+ if (ret)
-+ goto err;
-+
-+ down_write(&c->state_lock);
-+ darray_for_each(sbs, sb) {
-+ ret = bch2_dev_attach_bdev(c, sb);
-+ if (ret) {
-+ up_write(&c->state_lock);
-+ goto err;
-+ }
-+ }
-+ up_write(&c->state_lock);
-+
-+ if (!bch2_fs_may_start(c)) {
-+ ret = -BCH_ERR_insufficient_devices_to_start;
-+ goto err_print;
-+ }
-+
-+ if (!c->opts.nostart) {
-+ ret = bch2_fs_start(c);
-+ if (ret)
-+ goto err;
-+ }
-+out:
-+ darray_for_each(sbs, sb)
-+ bch2_free_super(sb);
-+ darray_exit(&sbs);
-+ printbuf_exit(&errbuf);
-+ module_put(THIS_MODULE);
-+ return c;
-+err_print:
-+ pr_err("bch_fs_open err opening %s: %s",
-+ devices[0], bch2_err_str(ret));
-+err:
-+ if (!IS_ERR_OR_NULL(c))
-+ bch2_fs_stop(c);
-+ c = ERR_PTR(ret);
-+ goto out;
-+}
-+
-+/* Global interfaces/init */
-+
-+static void bcachefs_exit(void)
-+{
-+ bch2_debug_exit();
-+ bch2_vfs_exit();
-+ bch2_chardev_exit();
-+ bch2_btree_key_cache_exit();
-+ if (bcachefs_kset)
-+ kset_unregister(bcachefs_kset);
-+}
-+
-+static int __init bcachefs_init(void)
-+{
-+ bch2_bkey_pack_test();
-+
-+ if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
-+ bch2_btree_key_cache_init() ||
-+ bch2_chardev_init() ||
-+ bch2_vfs_init() ||
-+ bch2_debug_init())
-+ goto err;
-+
-+ return 0;
-+err:
-+ bcachefs_exit();
-+ return -ENOMEM;
-+}
-+
-+#define BCH_DEBUG_PARAM(name, description) \
-+ bool bch2_##name; \
-+ module_param_named(name, bch2_##name, bool, 0644); \
-+ MODULE_PARM_DESC(name, description);
-+BCH_DEBUG_PARAMS()
-+#undef BCH_DEBUG_PARAM
-+
-+__maybe_unused
-+static unsigned bch2_metadata_version = bcachefs_metadata_version_current;
-+module_param_named(version, bch2_metadata_version, uint, 0400);
-+
-+module_exit(bcachefs_exit);
-+module_init(bcachefs_init);
-diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
-new file mode 100644
-index 000000000000..bf762df18012
---- /dev/null
-+++ b/fs/bcachefs/super.h
-@@ -0,0 +1,52 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_H
-+#define _BCACHEFS_SUPER_H
-+
-+#include "extents.h"
-+
-+#include "bcachefs_ioctl.h"
-+
-+#include <linux/math64.h>
-+
-+struct bch_fs *bch2_dev_to_fs(dev_t);
-+struct bch_fs *bch2_uuid_to_fs(__uuid_t);
-+
-+bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
-+ enum bch_member_state, int);
-+int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
-+ enum bch_member_state, int);
-+int bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
-+ enum bch_member_state, int);
-+
-+int bch2_dev_fail(struct bch_dev *, int);
-+int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int);
-+int bch2_dev_add(struct bch_fs *, const char *);
-+int bch2_dev_online(struct bch_fs *, const char *);
-+int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
-+int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
-+struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
-+
-+bool bch2_fs_emergency_read_only(struct bch_fs *);
-+void bch2_fs_read_only(struct bch_fs *);
-+
-+int bch2_fs_read_write(struct bch_fs *);
-+int bch2_fs_read_write_early(struct bch_fs *);
-+
-+/*
-+ * Only for use in the recovery/fsck path:
-+ */
-+static inline void bch2_fs_lazy_rw(struct bch_fs *c)
-+{
-+ if (!test_bit(BCH_FS_RW, &c->flags) &&
-+ !test_bit(BCH_FS_WAS_RW, &c->flags))
-+ bch2_fs_read_write_early(c);
-+}
-+
-+void __bch2_fs_stop(struct bch_fs *);
-+void bch2_fs_free(struct bch_fs *);
-+void bch2_fs_stop(struct bch_fs *);
-+
-+int bch2_fs_start(struct bch_fs *);
-+struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
-+
-+#endif /* _BCACHEFS_SUPER_H */
-diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
-new file mode 100644
-index 000000000000..7dda4985b99f
---- /dev/null
-+++ b/fs/bcachefs/super_types.h
-@@ -0,0 +1,40 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SUPER_TYPES_H
-+#define _BCACHEFS_SUPER_TYPES_H
-+
-+struct bch_sb_handle {
-+ struct bch_sb *sb;
-+ struct block_device *bdev;
-+ struct bio *bio;
-+ void *holder;
-+ size_t buffer_size;
-+ blk_mode_t mode;
-+ unsigned have_layout:1;
-+ unsigned have_bio:1;
-+ unsigned fs_sb:1;
-+ u64 seq;
-+};
-+
-+struct bch_devs_mask {
-+ unsigned long d[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
-+};
-+
-+struct bch_devs_list {
-+ u8 nr;
-+ u8 devs[BCH_BKEY_PTRS_MAX];
-+};
-+
-+struct bch_member_cpu {
-+ u64 nbuckets; /* device size */
-+ u16 first_bucket; /* index of first bucket used */
-+ u16 bucket_size; /* sectors */
-+ u16 group;
-+ u8 state;
-+ u8 discard;
-+ u8 data_allowed;
-+ u8 durability;
-+ u8 freespace_initialized;
-+ u8 valid;
-+};
-+
-+#endif /* _BCACHEFS_SUPER_TYPES_H */
-diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
-new file mode 100644
-index 000000000000..662366ce9e00
---- /dev/null
-+++ b/fs/bcachefs/sysfs.c
-@@ -0,0 +1,1034 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * bcache sysfs interfaces
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#ifndef NO_BCACHEFS_SYSFS
-+
-+#include "bcachefs.h"
-+#include "alloc_background.h"
-+#include "alloc_foreground.h"
-+#include "sysfs.h"
-+#include "btree_cache.h"
-+#include "btree_io.h"
-+#include "btree_iter.h"
-+#include "btree_key_cache.h"
-+#include "btree_update.h"
-+#include "btree_update_interior.h"
-+#include "btree_gc.h"
-+#include "buckets.h"
-+#include "clock.h"
-+#include "disk_groups.h"
-+#include "ec.h"
-+#include "inode.h"
-+#include "journal.h"
-+#include "keylist.h"
-+#include "move.h"
-+#include "movinggc.h"
-+#include "nocow_locking.h"
-+#include "opts.h"
-+#include "rebalance.h"
-+#include "replicas.h"
-+#include "super-io.h"
-+#include "tests.h"
-+
-+#include <linux/blkdev.h>
-+#include <linux/sort.h>
-+#include <linux/sched/clock.h>
-+
-+#include "util.h"
-+
-+#define SYSFS_OPS(type) \
-+const struct sysfs_ops type ## _sysfs_ops = { \
-+ .show = type ## _show, \
-+ .store = type ## _store \
-+}
-+
-+#define SHOW(fn) \
-+static ssize_t fn ## _to_text(struct printbuf *, \
-+ struct kobject *, struct attribute *); \
-+ \
-+static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
-+ char *buf) \
-+{ \
-+ struct printbuf out = PRINTBUF; \
-+ ssize_t ret = fn ## _to_text(&out, kobj, attr); \
-+ \
-+ if (out.pos && out.buf[out.pos - 1] != '\n') \
-+ prt_newline(&out); \
-+ \
-+ if (!ret && out.allocation_failure) \
-+ ret = -ENOMEM; \
-+ \
-+ if (!ret) { \
-+ ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \
-+ memcpy(buf, out.buf, ret); \
-+ } \
-+ printbuf_exit(&out); \
-+ return bch2_err_class(ret); \
-+} \
-+ \
-+static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\
-+ struct attribute *attr)
-+
-+#define STORE(fn) \
-+static ssize_t fn ## _store_inner(struct kobject *, struct attribute *,\
-+ const char *, size_t); \
-+ \
-+static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
-+ const char *buf, size_t size) \
-+{ \
-+ return bch2_err_class(fn##_store_inner(kobj, attr, buf, size)); \
-+} \
-+ \
-+static ssize_t fn ## _store_inner(struct kobject *kobj, struct attribute *attr,\
-+ const char *buf, size_t size)
-+
-+#define __sysfs_attribute(_name, _mode) \
-+ static struct attribute sysfs_##_name = \
-+ { .name = #_name, .mode = _mode }
-+
-+#define write_attribute(n) __sysfs_attribute(n, 0200)
-+#define read_attribute(n) __sysfs_attribute(n, 0444)
-+#define rw_attribute(n) __sysfs_attribute(n, 0644)
-+
-+#define sysfs_printf(file, fmt, ...) \
-+do { \
-+ if (attr == &sysfs_ ## file) \
-+ prt_printf(out, fmt "\n", __VA_ARGS__); \
-+} while (0)
-+
-+#define sysfs_print(file, var) \
-+do { \
-+ if (attr == &sysfs_ ## file) \
-+ snprint(out, var); \
-+} while (0)
-+
-+#define sysfs_hprint(file, val) \
-+do { \
-+ if (attr == &sysfs_ ## file) \
-+ prt_human_readable_s64(out, val); \
-+} while (0)
-+
-+#define sysfs_strtoul(file, var) \
-+do { \
-+ if (attr == &sysfs_ ## file) \
-+ return strtoul_safe(buf, var) ?: (ssize_t) size; \
-+} while (0)
-+
-+#define sysfs_strtoul_clamp(file, var, min, max) \
-+do { \
-+ if (attr == &sysfs_ ## file) \
-+ return strtoul_safe_clamp(buf, var, min, max) \
-+ ?: (ssize_t) size; \
-+} while (0)
-+
-+#define strtoul_or_return(cp) \
-+({ \
-+ unsigned long _v; \
-+ int _r = kstrtoul(cp, 10, &_v); \
-+ if (_r) \
-+ return _r; \
-+ _v; \
-+})
-+
-+write_attribute(trigger_gc);
-+write_attribute(trigger_discards);
-+write_attribute(trigger_invalidates);
-+write_attribute(prune_cache);
-+write_attribute(btree_wakeup);
-+rw_attribute(btree_gc_periodic);
-+rw_attribute(gc_gens_pos);
-+
-+read_attribute(uuid);
-+read_attribute(minor);
-+read_attribute(bucket_size);
-+read_attribute(first_bucket);
-+read_attribute(nbuckets);
-+rw_attribute(durability);
-+read_attribute(io_done);
-+read_attribute(io_errors);
-+write_attribute(io_errors_reset);
-+
-+read_attribute(io_latency_read);
-+read_attribute(io_latency_write);
-+read_attribute(io_latency_stats_read);
-+read_attribute(io_latency_stats_write);
-+read_attribute(congested);
-+
-+read_attribute(btree_write_stats);
-+
-+read_attribute(btree_cache_size);
-+read_attribute(compression_stats);
-+read_attribute(journal_debug);
-+read_attribute(btree_updates);
-+read_attribute(btree_cache);
-+read_attribute(btree_key_cache);
-+read_attribute(stripes_heap);
-+read_attribute(open_buckets);
-+read_attribute(open_buckets_partial);
-+read_attribute(write_points);
-+read_attribute(nocow_lock_table);
-+
-+#ifdef BCH_WRITE_REF_DEBUG
-+read_attribute(write_refs);
-+
-+static const char * const bch2_write_refs[] = {
-+#define x(n) #n,
-+ BCH_WRITE_REFS()
-+#undef x
-+ NULL
-+};
-+
-+static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ bch2_printbuf_tabstop_push(out, 24);
-+
-+ for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++) {
-+ prt_str(out, bch2_write_refs[i]);
-+ prt_tab(out);
-+ prt_printf(out, "%li", atomic_long_read(&c->writes[i]));
-+ prt_newline(out);
-+ }
-+}
-+#endif
-+
-+read_attribute(internal_uuid);
-+read_attribute(disk_groups);
-+
-+read_attribute(has_data);
-+read_attribute(alloc_debug);
-+
-+#define x(t, n, ...) read_attribute(t);
-+BCH_PERSISTENT_COUNTERS()
-+#undef x
-+
-+rw_attribute(discard);
-+rw_attribute(label);
-+
-+rw_attribute(copy_gc_enabled);
-+read_attribute(copy_gc_wait);
-+
-+rw_attribute(rebalance_enabled);
-+sysfs_pd_controller_attribute(rebalance);
-+read_attribute(rebalance_status);
-+rw_attribute(promote_whole_extents);
-+
-+read_attribute(new_stripes);
-+
-+read_attribute(io_timers_read);
-+read_attribute(io_timers_write);
-+
-+read_attribute(moving_ctxts);
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+write_attribute(perf_test);
-+#endif /* CONFIG_BCACHEFS_TESTS */
-+
-+#define x(_name) \
-+ static struct attribute sysfs_time_stat_##_name = \
-+ { .name = #_name, .mode = 0444 };
-+ BCH_TIME_STATS()
-+#undef x
-+
-+static struct attribute sysfs_state_rw = {
-+ .name = "state",
-+ .mode = 0444,
-+};
-+
-+static size_t bch2_btree_cache_size(struct bch_fs *c)
-+{
-+ size_t ret = 0;
-+ struct btree *b;
-+
-+ mutex_lock(&c->btree_cache.lock);
-+ list_for_each_entry(b, &c->btree_cache.live, list)
-+ ret += btree_bytes(c);
-+
-+ mutex_unlock(&c->btree_cache.lock);
-+ return ret;
-+}
-+
-+static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ enum btree_id id;
-+ u64 nr_uncompressed_extents = 0,
-+ nr_compressed_extents = 0,
-+ nr_incompressible_extents = 0,
-+ uncompressed_sectors = 0,
-+ incompressible_sectors = 0,
-+ compressed_sectors_compressed = 0,
-+ compressed_sectors_uncompressed = 0;
-+ int ret = 0;
-+
-+ if (!test_bit(BCH_FS_STARTED, &c->flags))
-+ return -EPERM;
-+
-+ trans = bch2_trans_get(c);
-+
-+ for (id = 0; id < BTREE_ID_NR; id++) {
-+ if (!btree_type_has_ptrs(id))
-+ continue;
-+
-+ for_each_btree_key(trans, iter, id, POS_MIN,
-+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-+ const union bch_extent_entry *entry;
-+ struct extent_ptr_decoded p;
-+ bool compressed = false, uncompressed = false, incompressible = false;
-+
-+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-+ switch (p.crc.compression_type) {
-+ case BCH_COMPRESSION_TYPE_none:
-+ uncompressed = true;
-+ uncompressed_sectors += k.k->size;
-+ break;
-+ case BCH_COMPRESSION_TYPE_incompressible:
-+ incompressible = true;
-+ incompressible_sectors += k.k->size;
-+ break;
-+ default:
-+ compressed_sectors_compressed +=
-+ p.crc.compressed_size;
-+ compressed_sectors_uncompressed +=
-+ p.crc.uncompressed_size;
-+ compressed = true;
-+ break;
-+ }
-+ }
-+
-+ if (incompressible)
-+ nr_incompressible_extents++;
-+ else if (uncompressed)
-+ nr_uncompressed_extents++;
-+ else if (compressed)
-+ nr_compressed_extents++;
-+ }
-+ bch2_trans_iter_exit(trans, &iter);
-+ }
-+
-+ bch2_trans_put(trans);
-+
-+ if (ret)
-+ return ret;
-+
-+ prt_printf(out, "uncompressed:\n");
-+ prt_printf(out, " nr extents: %llu\n", nr_uncompressed_extents);
-+ prt_printf(out, " size: ");
-+ prt_human_readable_u64(out, uncompressed_sectors << 9);
-+ prt_printf(out, "\n");
-+
-+ prt_printf(out, "compressed:\n");
-+ prt_printf(out, " nr extents: %llu\n", nr_compressed_extents);
-+ prt_printf(out, " compressed size: ");
-+ prt_human_readable_u64(out, compressed_sectors_compressed << 9);
-+ prt_printf(out, "\n");
-+ prt_printf(out, " uncompressed size: ");
-+ prt_human_readable_u64(out, compressed_sectors_uncompressed << 9);
-+ prt_printf(out, "\n");
-+
-+ prt_printf(out, "incompressible:\n");
-+ prt_printf(out, " nr extents: %llu\n", nr_incompressible_extents);
-+ prt_printf(out, " size: ");
-+ prt_human_readable_u64(out, incompressible_sectors << 9);
-+ prt_printf(out, "\n");
-+ return 0;
-+}
-+
-+static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
-+{
-+ prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree));
-+ bch2_bpos_to_text(out, c->gc_gens_pos);
-+ prt_printf(out, "\n");
-+}
-+
-+static void bch2_btree_wakeup_all(struct bch_fs *c)
-+{
-+ struct btree_trans *trans;
-+
-+ seqmutex_lock(&c->btree_trans_lock);
-+ list_for_each_entry(trans, &c->btree_trans_list, list) {
-+ struct btree_bkey_cached_common *b = READ_ONCE(trans->locking);
-+
-+ if (b)
-+ six_lock_wakeup_all(&b->lock);
-+
-+ }
-+ seqmutex_unlock(&c->btree_trans_lock);
-+}
-+
-+SHOW(bch2_fs)
-+{
-+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+ sysfs_print(minor, c->minor);
-+ sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
-+
-+ sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
-+
-+ if (attr == &sysfs_btree_write_stats)
-+ bch2_btree_write_stats_to_text(out, c);
-+
-+ sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
-+
-+ if (attr == &sysfs_gc_gens_pos)
-+ bch2_gc_gens_pos_to_text(out, c);
-+
-+ sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+
-+ sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
-+ sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
-+
-+ if (attr == &sysfs_copy_gc_wait)
-+ bch2_copygc_wait_to_text(out, c);
-+
-+ if (attr == &sysfs_rebalance_status)
-+ bch2_rebalance_status_to_text(out, c);
-+
-+ sysfs_print(promote_whole_extents, c->promote_whole_extents);
-+
-+ /* Debugging: */
-+
-+ if (attr == &sysfs_journal_debug)
-+ bch2_journal_debug_to_text(out, &c->journal);
-+
-+ if (attr == &sysfs_btree_updates)
-+ bch2_btree_updates_to_text(out, c);
-+
-+ if (attr == &sysfs_btree_cache)
-+ bch2_btree_cache_to_text(out, c);
-+
-+ if (attr == &sysfs_btree_key_cache)
-+ bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
-+
-+ if (attr == &sysfs_stripes_heap)
-+ bch2_stripes_heap_to_text(out, c);
-+
-+ if (attr == &sysfs_open_buckets)
-+ bch2_open_buckets_to_text(out, c);
-+
-+ if (attr == &sysfs_open_buckets_partial)
-+ bch2_open_buckets_partial_to_text(out, c);
-+
-+ if (attr == &sysfs_write_points)
-+ bch2_write_points_to_text(out, c);
-+
-+ if (attr == &sysfs_compression_stats)
-+ bch2_compression_stats_to_text(out, c);
-+
-+ if (attr == &sysfs_new_stripes)
-+ bch2_new_stripes_to_text(out, c);
-+
-+ if (attr == &sysfs_io_timers_read)
-+ bch2_io_timers_to_text(out, &c->io_clock[READ]);
-+
-+ if (attr == &sysfs_io_timers_write)
-+ bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
-+
-+ if (attr == &sysfs_moving_ctxts)
-+ bch2_fs_moving_ctxts_to_text(out, c);
-+
-+#ifdef BCH_WRITE_REF_DEBUG
-+ if (attr == &sysfs_write_refs)
-+ bch2_write_refs_to_text(out, c);
-+#endif
-+
-+ if (attr == &sysfs_nocow_lock_table)
-+ bch2_nocow_locks_to_text(out, &c->nocow_locks);
-+
-+ if (attr == &sysfs_disk_groups)
-+ bch2_disk_groups_to_text(out, c);
-+
-+ return 0;
-+}
-+
-+STORE(bch2_fs)
-+{
-+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
-+
-+ if (attr == &sysfs_btree_gc_periodic) {
-+ ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
-+ ?: (ssize_t) size;
-+
-+ wake_up_process(c->gc_thread);
-+ return ret;
-+ }
-+
-+ if (attr == &sysfs_copy_gc_enabled) {
-+ ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
-+ ?: (ssize_t) size;
-+
-+ if (c->copygc_thread)
-+ wake_up_process(c->copygc_thread);
-+ return ret;
-+ }
-+
-+ if (attr == &sysfs_rebalance_enabled) {
-+ ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
-+ ?: (ssize_t) size;
-+
-+ rebalance_wakeup(c);
-+ return ret;
-+ }
-+
-+ sysfs_pd_controller_store(rebalance, &c->rebalance.pd);
-+
-+ sysfs_strtoul(promote_whole_extents, c->promote_whole_extents);
-+
-+ /* Debugging: */
-+
-+ if (!test_bit(BCH_FS_STARTED, &c->flags))
-+ return -EPERM;
-+
-+ /* Debugging: */
-+
-+ if (!test_bit(BCH_FS_RW, &c->flags))
-+ return -EROFS;
-+
-+ if (attr == &sysfs_prune_cache) {
-+ struct shrink_control sc;
-+
-+ sc.gfp_mask = GFP_KERNEL;
-+ sc.nr_to_scan = strtoul_or_return(buf);
-+ c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
-+ }
-+
-+ if (attr == &sysfs_btree_wakeup)
-+ bch2_btree_wakeup_all(c);
-+
-+ if (attr == &sysfs_trigger_gc) {
-+ /*
-+ * Full gc is currently incompatible with btree key cache:
-+ */
-+#if 0
-+ down_read(&c->state_lock);
-+ bch2_gc(c, false, false);
-+ up_read(&c->state_lock);
-+#else
-+ bch2_gc_gens(c);
-+#endif
-+ }
-+
-+ if (attr == &sysfs_trigger_discards)
-+ bch2_do_discards(c);
-+
-+ if (attr == &sysfs_trigger_invalidates)
-+ bch2_do_invalidates(c);
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+ if (attr == &sysfs_perf_test) {
-+ char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
-+ char *test = strsep(&p, " \t\n");
-+ char *nr_str = strsep(&p, " \t\n");
-+ char *threads_str = strsep(&p, " \t\n");
-+ unsigned threads;
-+ u64 nr;
-+ int ret = -EINVAL;
-+
-+ if (threads_str &&
-+ !(ret = kstrtouint(threads_str, 10, &threads)) &&
-+ !(ret = bch2_strtoull_h(nr_str, &nr)))
-+ ret = bch2_btree_perf_test(c, test, nr, threads);
-+ kfree(tmp);
-+
-+ if (ret)
-+ size = ret;
-+ }
-+#endif
-+ return size;
-+}
-+SYSFS_OPS(bch2_fs);
-+
-+struct attribute *bch2_fs_files[] = {
-+ &sysfs_minor,
-+ &sysfs_btree_cache_size,
-+ &sysfs_btree_write_stats,
-+
-+ &sysfs_promote_whole_extents,
-+
-+ &sysfs_compression_stats,
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+ &sysfs_perf_test,
-+#endif
-+ NULL
-+};
-+
-+/* counters dir */
-+
-+SHOW(bch2_fs_counters)
-+{
-+ struct bch_fs *c = container_of(kobj, struct bch_fs, counters_kobj);
-+ u64 counter = 0;
-+ u64 counter_since_mount = 0;
-+
-+ printbuf_tabstop_push(out, 32);
-+
-+ #define x(t, ...) \
-+ if (attr == &sysfs_##t) { \
-+ counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\
-+ counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\
-+ prt_printf(out, "since mount:"); \
-+ prt_tab(out); \
-+ prt_human_readable_u64(out, counter_since_mount); \
-+ prt_newline(out); \
-+ \
-+ prt_printf(out, "since filesystem creation:"); \
-+ prt_tab(out); \
-+ prt_human_readable_u64(out, counter); \
-+ prt_newline(out); \
-+ }
-+ BCH_PERSISTENT_COUNTERS()
-+ #undef x
-+ return 0;
-+}
-+
-+STORE(bch2_fs_counters) {
-+ return 0;
-+}
-+
-+SYSFS_OPS(bch2_fs_counters);
-+
-+struct attribute *bch2_fs_counters_files[] = {
-+#define x(t, ...) \
-+ &sysfs_##t,
-+ BCH_PERSISTENT_COUNTERS()
-+#undef x
-+ NULL
-+};
-+/* internal dir - just a wrapper */
-+
-+SHOW(bch2_fs_internal)
-+{
-+ struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
-+
-+ return bch2_fs_to_text(out, &c->kobj, attr);
-+}
-+
-+STORE(bch2_fs_internal)
-+{
-+ struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
-+
-+ return bch2_fs_store(&c->kobj, attr, buf, size);
-+}
-+SYSFS_OPS(bch2_fs_internal);
-+
-+struct attribute *bch2_fs_internal_files[] = {
-+ &sysfs_journal_debug,
-+ &sysfs_btree_updates,
-+ &sysfs_btree_cache,
-+ &sysfs_btree_key_cache,
-+ &sysfs_new_stripes,
-+ &sysfs_stripes_heap,
-+ &sysfs_open_buckets,
-+ &sysfs_open_buckets_partial,
-+ &sysfs_write_points,
-+#ifdef BCH_WRITE_REF_DEBUG
-+ &sysfs_write_refs,
-+#endif
-+ &sysfs_nocow_lock_table,
-+ &sysfs_io_timers_read,
-+ &sysfs_io_timers_write,
-+
-+ &sysfs_trigger_gc,
-+ &sysfs_trigger_discards,
-+ &sysfs_trigger_invalidates,
-+ &sysfs_prune_cache,
-+ &sysfs_btree_wakeup,
-+
-+ &sysfs_gc_gens_pos,
-+
-+ &sysfs_copy_gc_enabled,
-+ &sysfs_copy_gc_wait,
-+
-+ &sysfs_rebalance_enabled,
-+ &sysfs_rebalance_status,
-+ sysfs_pd_controller_files(rebalance),
-+
-+ &sysfs_moving_ctxts,
-+
-+ &sysfs_internal_uuid,
-+
-+ &sysfs_disk_groups,
-+ NULL
-+};
-+
-+/* options */
-+
-+SHOW(bch2_fs_opts_dir)
-+{
-+ struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-+ const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-+ int id = opt - bch2_opt_table;
-+ u64 v = bch2_opt_get_by_id(&c->opts, id);
-+
-+ bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST);
-+ prt_char(out, '\n');
-+
-+ return 0;
-+}
-+
-+STORE(bch2_fs_opts_dir)
-+{
-+ struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
-+ const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-+ int ret, id = opt - bch2_opt_table;
-+ char *tmp;
-+ u64 v;
-+
-+ /*
-+ * We don't need to take c->writes for correctness, but it eliminates an
-+ * unsightly error message in the dmesg log when we're RO:
-+ */
-+ if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
-+ return -EROFS;
-+
-+ tmp = kstrdup(buf, GFP_KERNEL);
-+ if (!tmp) {
-+ ret = -ENOMEM;
-+ goto err;
-+ }
-+
-+ ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL);
-+ kfree(tmp);
-+
-+ if (ret < 0)
-+ goto err;
-+
-+ ret = bch2_opt_check_may_set(c, id, v);
-+ if (ret < 0)
-+ goto err;
-+
-+ bch2_opt_set_sb(c, opt, v);
-+ bch2_opt_set_by_id(&c->opts, id, v);
-+
-+ if ((id == Opt_background_target ||
-+ id == Opt_background_compression) && v)
-+ bch2_set_rebalance_needs_scan(c, 0);
-+
-+ ret = size;
-+err:
-+ bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
-+ return ret;
-+}
-+SYSFS_OPS(bch2_fs_opts_dir);
-+
-+struct attribute *bch2_fs_opts_dir_files[] = { NULL };
-+
-+int bch2_opts_create_sysfs_files(struct kobject *kobj)
-+{
-+ const struct bch_option *i;
-+ int ret;
-+
-+ for (i = bch2_opt_table;
-+ i < bch2_opt_table + bch2_opts_nr;
-+ i++) {
-+ if (!(i->flags & OPT_FS))
-+ continue;
-+
-+ ret = sysfs_create_file(kobj, &i->attr);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+/* time stats */
-+
-+SHOW(bch2_fs_time_stats)
-+{
-+ struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
-+
-+#define x(name) \
-+ if (attr == &sysfs_time_stat_##name) \
-+ bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]);
-+ BCH_TIME_STATS()
-+#undef x
-+
-+ return 0;
-+}
-+
-+STORE(bch2_fs_time_stats)
-+{
-+ return size;
-+}
-+SYSFS_OPS(bch2_fs_time_stats);
-+
-+struct attribute *bch2_fs_time_stats_files[] = {
-+#define x(name) \
-+ &sysfs_time_stat_##name,
-+ BCH_TIME_STATS()
-+#undef x
-+ NULL
-+};
-+
-+static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
-+{
-+ struct bch_fs *c = ca->fs;
-+ struct bch_dev_usage stats = bch2_dev_usage_read(ca);
-+ unsigned i, nr[BCH_DATA_NR];
-+
-+ memset(nr, 0, sizeof(nr));
-+
-+ for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
-+ nr[c->open_buckets[i].data_type]++;
-+
-+ printbuf_tabstop_push(out, 8);
-+ printbuf_tabstop_push(out, 16);
-+ printbuf_tabstop_push(out, 16);
-+ printbuf_tabstop_push(out, 16);
-+ printbuf_tabstop_push(out, 16);
-+
-+ prt_tab(out);
-+ prt_str(out, "buckets");
-+ prt_tab_rjust(out);
-+ prt_str(out, "sectors");
-+ prt_tab_rjust(out);
-+ prt_str(out, "fragmented");
-+ prt_tab_rjust(out);
-+ prt_newline(out);
-+
-+ for (i = 0; i < BCH_DATA_NR; i++) {
-+ prt_str(out, bch2_data_types[i]);
-+ prt_tab(out);
-+ prt_u64(out, stats.d[i].buckets);
-+ prt_tab_rjust(out);
-+ prt_u64(out, stats.d[i].sectors);
-+ prt_tab_rjust(out);
-+ prt_u64(out, stats.d[i].fragmented);
-+ prt_tab_rjust(out);
-+ prt_newline(out);
-+ }
-+
-+ prt_str(out, "ec");
-+ prt_tab(out);
-+ prt_u64(out, stats.buckets_ec);
-+ prt_tab_rjust(out);
-+ prt_newline(out);
-+
-+ prt_newline(out);
-+
-+ prt_printf(out, "reserves:");
-+ prt_newline(out);
-+ for (i = 0; i < BCH_WATERMARK_NR; i++) {
-+ prt_str(out, bch2_watermarks[i]);
-+ prt_tab(out);
-+ prt_u64(out, bch2_dev_buckets_reserved(ca, i));
-+ prt_tab_rjust(out);
-+ prt_newline(out);
-+ }
-+
-+ prt_newline(out);
-+
-+ printbuf_tabstops_reset(out);
-+ printbuf_tabstop_push(out, 24);
-+
-+ prt_str(out, "freelist_wait");
-+ prt_tab(out);
-+ prt_str(out, c->freelist_wait.list.first ? "waiting" : "empty");
-+ prt_newline(out);
-+
-+ prt_str(out, "open buckets allocated");
-+ prt_tab(out);
-+ prt_u64(out, OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
-+ prt_newline(out);
-+
-+ prt_str(out, "open buckets this dev");
-+ prt_tab(out);
-+ prt_u64(out, ca->nr_open_buckets);
-+ prt_newline(out);
-+
-+ prt_str(out, "open buckets total");
-+ prt_tab(out);
-+ prt_u64(out, OPEN_BUCKETS_COUNT);
-+ prt_newline(out);
-+
-+ prt_str(out, "open_buckets_wait");
-+ prt_tab(out);
-+ prt_str(out, c->open_buckets_wait.list.first ? "waiting" : "empty");
-+ prt_newline(out);
-+
-+ prt_str(out, "open_buckets_btree");
-+ prt_tab(out);
-+ prt_u64(out, nr[BCH_DATA_btree]);
-+ prt_newline(out);
-+
-+ prt_str(out, "open_buckets_user");
-+ prt_tab(out);
-+ prt_u64(out, nr[BCH_DATA_user]);
-+ prt_newline(out);
-+
-+ prt_str(out, "buckets_to_invalidate");
-+ prt_tab(out);
-+ prt_u64(out, should_invalidate_buckets(ca, stats));
-+ prt_newline(out);
-+
-+ prt_str(out, "btree reserve cache");
-+ prt_tab(out);
-+ prt_u64(out, c->btree_reserve_cache_nr);
-+ prt_newline(out);
-+}
-+
-+static const char * const bch2_rw[] = {
-+ "read",
-+ "write",
-+ NULL
-+};
-+
-+static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca)
-+{
-+ int rw, i;
-+
-+ for (rw = 0; rw < 2; rw++) {
-+ prt_printf(out, "%s:\n", bch2_rw[rw]);
-+
-+ for (i = 1; i < BCH_DATA_NR; i++)
-+ prt_printf(out, "%-12s:%12llu\n",
-+ bch2_data_types[i],
-+ percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
-+ }
-+}
-+
-+SHOW(bch2_dev)
-+{
-+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+ struct bch_fs *c = ca->fs;
-+
-+ sysfs_printf(uuid, "%pU\n", ca->uuid.b);
-+
-+ sysfs_print(bucket_size, bucket_bytes(ca));
-+ sysfs_print(first_bucket, ca->mi.first_bucket);
-+ sysfs_print(nbuckets, ca->mi.nbuckets);
-+ sysfs_print(durability, ca->mi.durability);
-+ sysfs_print(discard, ca->mi.discard);
-+
-+ if (attr == &sysfs_label) {
-+ if (ca->mi.group)
-+ bch2_disk_path_to_text(out, c, ca->mi.group - 1);
-+ prt_char(out, '\n');
-+ }
-+
-+ if (attr == &sysfs_has_data) {
-+ prt_bitflags(out, bch2_data_types, bch2_dev_has_data(c, ca));
-+ prt_char(out, '\n');
-+ }
-+
-+ if (attr == &sysfs_state_rw) {
-+ prt_string_option(out, bch2_member_states, ca->mi.state);
-+ prt_char(out, '\n');
-+ }
-+
-+ if (attr == &sysfs_io_done)
-+ dev_io_done_to_text(out, ca);
-+
-+ if (attr == &sysfs_io_errors)
-+ bch2_dev_io_errors_to_text(out, ca);
-+
-+ sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
-+ sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
-+
-+ if (attr == &sysfs_io_latency_stats_read)
-+ bch2_time_stats_to_text(out, &ca->io_latency[READ]);
-+
-+ if (attr == &sysfs_io_latency_stats_write)
-+ bch2_time_stats_to_text(out, &ca->io_latency[WRITE]);
-+
-+ sysfs_printf(congested, "%u%%",
-+ clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
-+ * 100 / CONGESTED_MAX);
-+
-+ if (attr == &sysfs_alloc_debug)
-+ dev_alloc_debug_to_text(out, ca);
-+
-+ return 0;
-+}
-+
-+STORE(bch2_dev)
-+{
-+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
-+ struct bch_fs *c = ca->fs;
-+ struct bch_member *mi;
-+
-+ if (attr == &sysfs_discard) {
-+ bool v = strtoul_or_return(buf);
-+
-+ mutex_lock(&c->sb_lock);
-+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
-+
-+ if (v != BCH_MEMBER_DISCARD(mi)) {
-+ SET_BCH_MEMBER_DISCARD(mi, v);
-+ bch2_write_super(c);
-+ }
-+ mutex_unlock(&c->sb_lock);
-+ }
-+
-+ if (attr == &sysfs_durability) {
-+ u64 v = strtoul_or_return(buf);
-+
-+ mutex_lock(&c->sb_lock);
-+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
-+
-+ if (v + 1 != BCH_MEMBER_DURABILITY(mi)) {
-+ SET_BCH_MEMBER_DURABILITY(mi, v + 1);
-+ bch2_write_super(c);
-+ }
-+ mutex_unlock(&c->sb_lock);
-+ }
-+
-+ if (attr == &sysfs_label) {
-+ char *tmp;
-+ int ret;
-+
-+ tmp = kstrdup(buf, GFP_KERNEL);
-+ if (!tmp)
-+ return -ENOMEM;
-+
-+ ret = bch2_dev_group_set(c, ca, strim(tmp));
-+ kfree(tmp);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (attr == &sysfs_io_errors_reset)
-+ bch2_dev_errors_reset(ca);
-+
-+ return size;
-+}
-+SYSFS_OPS(bch2_dev);
-+
-+struct attribute *bch2_dev_files[] = {
-+ &sysfs_uuid,
-+ &sysfs_bucket_size,
-+ &sysfs_first_bucket,
-+ &sysfs_nbuckets,
-+ &sysfs_durability,
-+
-+ /* settings: */
-+ &sysfs_discard,
-+ &sysfs_state_rw,
-+ &sysfs_label,
-+
-+ &sysfs_has_data,
-+ &sysfs_io_done,
-+ &sysfs_io_errors,
-+ &sysfs_io_errors_reset,
-+
-+ &sysfs_io_latency_read,
-+ &sysfs_io_latency_write,
-+ &sysfs_io_latency_stats_read,
-+ &sysfs_io_latency_stats_write,
-+ &sysfs_congested,
-+
-+ /* debug: */
-+ &sysfs_alloc_debug,
-+ NULL
-+};
-+
-+#endif /* _BCACHEFS_SYSFS_H_ */
-diff --git a/fs/bcachefs/sysfs.h b/fs/bcachefs/sysfs.h
-new file mode 100644
-index 000000000000..222cd5062702
---- /dev/null
-+++ b/fs/bcachefs/sysfs.h
-@@ -0,0 +1,48 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_SYSFS_H_
-+#define _BCACHEFS_SYSFS_H_
-+
-+#include <linux/sysfs.h>
-+
-+#ifndef NO_BCACHEFS_SYSFS
-+
-+struct attribute;
-+struct sysfs_ops;
-+
-+extern struct attribute *bch2_fs_files[];
-+extern struct attribute *bch2_fs_counters_files[];
-+extern struct attribute *bch2_fs_internal_files[];
-+extern struct attribute *bch2_fs_opts_dir_files[];
-+extern struct attribute *bch2_fs_time_stats_files[];
-+extern struct attribute *bch2_dev_files[];
-+
-+extern const struct sysfs_ops bch2_fs_sysfs_ops;
-+extern const struct sysfs_ops bch2_fs_counters_sysfs_ops;
-+extern const struct sysfs_ops bch2_fs_internal_sysfs_ops;
-+extern const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
-+extern const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
-+extern const struct sysfs_ops bch2_dev_sysfs_ops;
-+
-+int bch2_opts_create_sysfs_files(struct kobject *);
-+
-+#else
-+
-+static struct attribute *bch2_fs_files[] = {};
-+static struct attribute *bch2_fs_counters_files[] = {};
-+static struct attribute *bch2_fs_internal_files[] = {};
-+static struct attribute *bch2_fs_opts_dir_files[] = {};
-+static struct attribute *bch2_fs_time_stats_files[] = {};
-+static struct attribute *bch2_dev_files[] = {};
-+
-+static const struct sysfs_ops bch2_fs_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_counters_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_internal_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
-+static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
-+static const struct sysfs_ops bch2_dev_sysfs_ops;
-+
-+static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; }
-+
-+#endif /* NO_BCACHEFS_SYSFS */
-+
-+#endif /* _BCACHEFS_SYSFS_H_ */
-diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
-new file mode 100644
-index 000000000000..2fc9e60c754b
---- /dev/null
-+++ b/fs/bcachefs/tests.c
-@@ -0,0 +1,919 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifdef CONFIG_BCACHEFS_TESTS
-+
-+#include "bcachefs.h"
-+#include "btree_update.h"
-+#include "journal_reclaim.h"
-+#include "snapshot.h"
-+#include "tests.h"
-+
-+#include "linux/kthread.h"
-+#include "linux/random.h"
-+
-+static void delete_test_keys(struct bch_fs *c)
-+{
-+ int ret;
-+
-+ ret = bch2_btree_delete_range(c, BTREE_ID_extents,
-+ SPOS(0, 0, U32_MAX),
-+ POS(0, U64_MAX),
-+ 0, NULL);
-+ BUG_ON(ret);
-+
-+ ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX),
-+ POS(0, U64_MAX),
-+ 0, NULL);
-+ BUG_ON(ret);
-+}
-+
-+/* unit tests */
-+
-+static int test_delete(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_i_cookie k;
-+ int ret;
-+
-+ bkey_cookie_init(&k.k_i);
-+ k.k.p.snapshot = U32_MAX;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p,
-+ BTREE_ITER_INTENT);
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_btree_iter_traverse(&iter) ?:
-+ bch2_trans_update(trans, &iter, &k.k_i, 0));
-+ bch_err_msg(c, ret, "update error");
-+ if (ret)
-+ goto err;
-+
-+ pr_info("deleting once");
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_btree_iter_traverse(&iter) ?:
-+ bch2_btree_delete_at(trans, &iter, 0));
-+ bch_err_msg(c, ret, "delete error (first)");
-+ if (ret)
-+ goto err;
-+
-+ pr_info("deleting twice");
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_btree_iter_traverse(&iter) ?:
-+ bch2_btree_delete_at(trans, &iter, 0));
-+ bch_err_msg(c, ret, "delete error (second)");
-+ if (ret)
-+ goto err;
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int test_delete_written(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_i_cookie k;
-+ int ret;
-+
-+ bkey_cookie_init(&k.k_i);
-+ k.k.p.snapshot = U32_MAX;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p,
-+ BTREE_ITER_INTENT);
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_btree_iter_traverse(&iter) ?:
-+ bch2_trans_update(trans, &iter, &k.k_i, 0));
-+ bch_err_msg(c, ret, "update error");
-+ if (ret)
-+ goto err;
-+
-+ bch2_trans_unlock(trans);
-+ bch2_journal_flush_all_pins(&c->journal);
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_btree_iter_traverse(&iter) ?:
-+ bch2_btree_delete_at(trans, &iter, 0));
-+ bch_err_msg(c, ret, "delete error");
-+ if (ret)
-+ goto err;
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int test_iterate(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter = { NULL };
-+ struct bkey_s_c k;
-+ u64 i;
-+ int ret = 0;
-+
-+ delete_test_keys(c);
-+
-+ pr_info("inserting test keys");
-+
-+ for (i = 0; i < nr; i++) {
-+ struct bkey_i_cookie ck;
-+
-+ bkey_cookie_init(&ck.k_i);
-+ ck.k.p.offset = i;
-+ ck.k.p.snapshot = U32_MAX;
-+
-+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0);
-+ bch_err_msg(c, ret, "insert error");
-+ if (ret)
-+ goto err;
-+ }
-+
-+ pr_info("iterating forwards");
-+
-+ i = 0;
-+
-+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
-+ 0, k, ({
-+ BUG_ON(k.k->p.offset != i++);
-+ 0;
-+ }));
-+ bch_err_msg(c, ret, "error iterating forwards");
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(i != nr);
-+
-+ pr_info("iterating backwards");
-+
-+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs,
-+ SPOS(0, U64_MAX, U32_MAX), 0, k,
-+ ({
-+ BUG_ON(k.k->p.offset != --i);
-+ 0;
-+ }));
-+ bch_err_msg(c, ret, "error iterating backwards");
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(i);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int test_iterate_extents(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter = { NULL };
-+ struct bkey_s_c k;
-+ u64 i;
-+ int ret = 0;
-+
-+ delete_test_keys(c);
-+
-+ pr_info("inserting test extents");
-+
-+ for (i = 0; i < nr; i += 8) {
-+ struct bkey_i_cookie ck;
-+
-+ bkey_cookie_init(&ck.k_i);
-+ ck.k.p.offset = i + 8;
-+ ck.k.p.snapshot = U32_MAX;
-+ ck.k.size = 8;
-+
-+ ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0);
-+ bch_err_msg(c, ret, "insert error");
-+ if (ret)
-+ goto err;
-+ }
-+
-+ pr_info("iterating forwards");
-+
-+ i = 0;
-+
-+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents,
-+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
-+ 0, k, ({
-+ BUG_ON(bkey_start_offset(k.k) != i);
-+ i = k.k->p.offset;
-+ 0;
-+ }));
-+ bch_err_msg(c, ret, "error iterating forwards");
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(i != nr);
-+
-+ pr_info("iterating backwards");
-+
-+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_extents,
-+ SPOS(0, U64_MAX, U32_MAX), 0, k,
-+ ({
-+ BUG_ON(k.k->p.offset != i);
-+ i = bkey_start_offset(k.k);
-+ 0;
-+ }));
-+ bch_err_msg(c, ret, "error iterating backwards");
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(i);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int test_iterate_slots(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter = { NULL };
-+ struct bkey_s_c k;
-+ u64 i;
-+ int ret = 0;
-+
-+ delete_test_keys(c);
-+
-+ pr_info("inserting test keys");
-+
-+ for (i = 0; i < nr; i++) {
-+ struct bkey_i_cookie ck;
-+
-+ bkey_cookie_init(&ck.k_i);
-+ ck.k.p.offset = i * 2;
-+ ck.k.p.snapshot = U32_MAX;
-+
-+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0);
-+ bch_err_msg(c, ret, "insert error");
-+ if (ret)
-+ goto err;
-+ }
-+
-+ pr_info("iterating forwards");
-+
-+ i = 0;
-+
-+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
-+ 0, k, ({
-+ BUG_ON(k.k->p.offset != i);
-+ i += 2;
-+ 0;
-+ }));
-+ bch_err_msg(c, ret, "error iterating forwards");
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(i != nr * 2);
-+
-+ pr_info("iterating forwards by slots");
-+
-+ i = 0;
-+
-+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
-+ BTREE_ITER_SLOTS, k, ({
-+ if (i >= nr * 2)
-+ break;
-+
-+ BUG_ON(k.k->p.offset != i);
-+ BUG_ON(bkey_deleted(k.k) != (i & 1));
-+
-+ i++;
-+ 0;
-+ }));
-+ if (ret < 0) {
-+ bch_err_msg(c, ret, "error iterating forwards by slots");
-+ goto err;
-+ }
-+ ret = 0;
-+err:
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter = { NULL };
-+ struct bkey_s_c k;
-+ u64 i;
-+ int ret = 0;
-+
-+ delete_test_keys(c);
-+
-+ pr_info("inserting test keys");
-+
-+ for (i = 0; i < nr; i += 16) {
-+ struct bkey_i_cookie ck;
-+
-+ bkey_cookie_init(&ck.k_i);
-+ ck.k.p.offset = i + 16;
-+ ck.k.p.snapshot = U32_MAX;
-+ ck.k.size = 8;
-+
-+ ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0);
-+ bch_err_msg(c, ret, "insert error");
-+ if (ret)
-+ goto err;
-+ }
-+
-+ pr_info("iterating forwards");
-+
-+ i = 0;
-+
-+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents,
-+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
-+ 0, k, ({
-+ BUG_ON(bkey_start_offset(k.k) != i + 8);
-+ BUG_ON(k.k->size != 8);
-+ i += 16;
-+ 0;
-+ }));
-+ bch_err_msg(c, ret, "error iterating forwards");
-+ if (ret)
-+ goto err;
-+
-+ BUG_ON(i != nr);
-+
-+ pr_info("iterating forwards by slots");
-+
-+ i = 0;
-+
-+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents,
-+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
-+ BTREE_ITER_SLOTS, k, ({
-+ if (i == nr)
-+ break;
-+ BUG_ON(bkey_deleted(k.k) != !(i % 16));
-+
-+ BUG_ON(bkey_start_offset(k.k) != i);
-+ BUG_ON(k.k->size != 8);
-+ i = k.k->p.offset;
-+ 0;
-+ }));
-+ bch_err_msg(c, ret, "error iterating forwards by slots");
-+ if (ret)
-+ goto err;
-+ ret = 0;
-+err:
-+ bch2_trans_put(trans);
-+ return 0;
-+}
-+
-+/*
-+ * XXX: we really want to make sure we've got a btree with depth > 0 for these
-+ * tests
-+ */
-+static int test_peek_end(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX), 0);
-+
-+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
-+ BUG_ON(k.k);
-+
-+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
-+ BUG_ON(k.k);
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return 0;
-+}
-+
-+static int test_peek_end_extents(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-+ SPOS(0, 0, U32_MAX), 0);
-+
-+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
-+ BUG_ON(k.k);
-+
-+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
-+ BUG_ON(k.k);
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return 0;
-+}
-+
-+/* extent unit tests */
-+
-+static u64 test_version;
-+
-+static int insert_test_extent(struct bch_fs *c,
-+ u64 start, u64 end)
-+{
-+ struct bkey_i_cookie k;
-+ int ret;
-+
-+ bkey_cookie_init(&k.k_i);
-+ k.k_i.k.p.offset = end;
-+ k.k_i.k.p.snapshot = U32_MAX;
-+ k.k_i.k.size = end - start;
-+ k.k_i.k.version.lo = test_version++;
-+
-+ ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0);
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int __test_extent_overwrite(struct bch_fs *c,
-+ u64 e1_start, u64 e1_end,
-+ u64 e2_start, u64 e2_end)
-+{
-+ int ret;
-+
-+ ret = insert_test_extent(c, e1_start, e1_end) ?:
-+ insert_test_extent(c, e2_start, e2_end);
-+
-+ delete_test_keys(c);
-+ return ret;
-+}
-+
-+static int test_extent_overwrite_front(struct bch_fs *c, u64 nr)
-+{
-+ return __test_extent_overwrite(c, 0, 64, 0, 32) ?:
-+ __test_extent_overwrite(c, 8, 64, 0, 32);
-+}
-+
-+static int test_extent_overwrite_back(struct bch_fs *c, u64 nr)
-+{
-+ return __test_extent_overwrite(c, 0, 64, 32, 64) ?:
-+ __test_extent_overwrite(c, 0, 64, 32, 72);
-+}
-+
-+static int test_extent_overwrite_middle(struct bch_fs *c, u64 nr)
-+{
-+ return __test_extent_overwrite(c, 0, 64, 32, 40);
-+}
-+
-+static int test_extent_overwrite_all(struct bch_fs *c, u64 nr)
-+{
-+ return __test_extent_overwrite(c, 32, 64, 0, 64) ?:
-+ __test_extent_overwrite(c, 32, 64, 0, 128) ?:
-+ __test_extent_overwrite(c, 32, 64, 32, 64) ?:
-+ __test_extent_overwrite(c, 32, 64, 32, 128);
-+}
-+
-+static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid)
-+{
-+ struct bkey_i_cookie k;
-+ int ret;
-+
-+ bkey_cookie_init(&k.k_i);
-+ k.k_i.k.p.inode = inum;
-+ k.k_i.k.p.offset = start + len;
-+ k.k_i.k.p.snapshot = snapid;
-+ k.k_i.k.size = len;
-+
-+ ret = bch2_trans_do(c, NULL, NULL, 0,
-+ bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i,
-+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
-+ bch_err_fn(c, ret);
-+ return ret;
-+}
-+
-+static int test_extent_create_overlapping(struct bch_fs *c, u64 inum)
-+{
-+ return insert_test_overlapping_extent(c, inum, 0, 16, U32_MAX - 2) ?: /* overwrite entire */
-+ insert_test_overlapping_extent(c, inum, 2, 8, U32_MAX - 2) ?:
-+ insert_test_overlapping_extent(c, inum, 4, 4, U32_MAX) ?:
-+ insert_test_overlapping_extent(c, inum, 32, 8, U32_MAX - 2) ?: /* overwrite front/back */
-+ insert_test_overlapping_extent(c, inum, 36, 8, U32_MAX) ?:
-+ insert_test_overlapping_extent(c, inum, 60, 8, U32_MAX - 2) ?:
-+ insert_test_overlapping_extent(c, inum, 64, 8, U32_MAX);
-+}
-+
-+/* snapshot unit tests */
-+
-+/* Test skipping over keys in unrelated snapshots: */
-+static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
-+{
-+ struct btree_trans *trans;
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_i_cookie cookie;
-+ int ret;
-+
-+ bkey_cookie_init(&cookie.k_i);
-+ cookie.k.p.snapshot = snapid_hi;
-+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0);
-+ if (ret)
-+ return ret;
-+
-+ trans = bch2_trans_get(c);
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, snapid_lo), 0);
-+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
-+
-+ BUG_ON(k.k->p.snapshot != U32_MAX);
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int test_snapshots(struct bch_fs *c, u64 nr)
-+{
-+ struct bkey_i_cookie cookie;
-+ u32 snapids[2];
-+ u32 snapid_subvols[2] = { 1, 1 };
-+ int ret;
-+
-+ bkey_cookie_init(&cookie.k_i);
-+ cookie.k.p.snapshot = U32_MAX;
-+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0);
-+ if (ret)
-+ return ret;
-+
-+ ret = bch2_trans_do(c, NULL, NULL, 0,
-+ bch2_snapshot_node_create(trans, U32_MAX,
-+ snapids,
-+ snapid_subvols,
-+ 2));
-+ if (ret)
-+ return ret;
-+
-+ if (snapids[0] > snapids[1])
-+ swap(snapids[0], snapids[1]);
-+
-+ ret = test_snapshot_filter(c, snapids[0], snapids[1]);
-+ bch_err_msg(c, ret, "from test_snapshot_filter");
-+ return ret;
-+}
-+
-+/* perf tests */
-+
-+static u64 test_rand(void)
-+{
-+ u64 v;
-+
-+ get_random_bytes(&v, sizeof(v));
-+ return v;
-+}
-+
-+static int rand_insert(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct bkey_i_cookie k;
-+ int ret = 0;
-+ u64 i;
-+
-+ for (i = 0; i < nr; i++) {
-+ bkey_cookie_init(&k.k_i);
-+ k.k.p.offset = test_rand();
-+ k.k.p.snapshot = U32_MAX;
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0));
-+ if (ret)
-+ break;
-+ }
-+
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int rand_insert_multi(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct bkey_i_cookie k[8];
-+ int ret = 0;
-+ unsigned j;
-+ u64 i;
-+
-+ for (i = 0; i < nr; i += ARRAY_SIZE(k)) {
-+ for (j = 0; j < ARRAY_SIZE(k); j++) {
-+ bkey_cookie_init(&k[j].k_i);
-+ k[j].k.p.offset = test_rand();
-+ k[j].k.p.snapshot = U32_MAX;
-+ }
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?:
-+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?:
-+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?:
-+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[3].k_i, 0) ?:
-+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[4].k_i, 0) ?:
-+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[5].k_i, 0) ?:
-+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?:
-+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0));
-+ if (ret)
-+ break;
-+ }
-+
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int rand_lookup(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+ u64 i;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX), 0);
-+
-+ for (i = 0; i < nr; i++) {
-+ bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX));
-+
-+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
-+ ret = bkey_err(k);
-+ if (ret)
-+ break;
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int rand_mixed_trans(struct btree_trans *trans,
-+ struct btree_iter *iter,
-+ struct bkey_i_cookie *cookie,
-+ u64 i, u64 pos)
-+{
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX));
-+
-+ k = bch2_btree_iter_peek(iter);
-+ ret = bkey_err(k);
-+ bch_err_msg(trans->c, ret, "lookup error");
-+ if (ret)
-+ return ret;
-+
-+ if (!(i & 3) && k.k) {
-+ bkey_cookie_init(&cookie->k_i);
-+ cookie->k.p = iter->pos;
-+ ret = bch2_trans_update(trans, iter, &cookie->k_i, 0);
-+ }
-+
-+ return ret;
-+}
-+
-+static int rand_mixed(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_i_cookie cookie;
-+ int ret = 0;
-+ u64 i, rand;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX), 0);
-+
-+ for (i = 0; i < nr; i++) {
-+ rand = test_rand();
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ rand_mixed_trans(trans, &iter, &cookie, i, rand));
-+ if (ret)
-+ break;
-+ }
-+
-+ bch2_trans_iter_exit(trans, &iter);
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int __do_delete(struct btree_trans *trans, struct bpos pos)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ int ret = 0;
-+
-+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
-+ BTREE_ITER_INTENT);
-+ k = bch2_btree_iter_peek(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err;
-+
-+ if (!k.k)
-+ goto err;
-+
-+ ret = bch2_btree_delete_at(trans, &iter, 0);
-+err:
-+ bch2_trans_iter_exit(trans, &iter);
-+ return ret;
-+}
-+
-+static int rand_delete(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ int ret = 0;
-+ u64 i;
-+
-+ for (i = 0; i < nr; i++) {
-+ struct bpos pos = SPOS(0, test_rand(), U32_MAX);
-+
-+ ret = commit_do(trans, NULL, NULL, 0,
-+ __do_delete(trans, pos));
-+ if (ret)
-+ break;
-+ }
-+
-+ bch2_trans_put(trans);
-+ return ret;
-+}
-+
-+static int seq_insert(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct bkey_i_cookie insert;
-+
-+ bkey_cookie_init(&insert.k_i);
-+
-+ return bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX),
-+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k,
-+ NULL, NULL, 0, ({
-+ if (iter.pos.offset >= nr)
-+ break;
-+ insert.k.p = iter.pos;
-+ bch2_trans_update(trans, &iter, &insert.k_i, 0);
-+ })));
-+}
-+
-+static int seq_lookup(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+
-+ return bch2_trans_run(c,
-+ for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
-+ 0, k,
-+ 0));
-+}
-+
-+static int seq_overwrite(struct bch_fs *c, u64 nr)
-+{
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+
-+ return bch2_trans_run(c,
-+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX),
-+ BTREE_ITER_INTENT, k,
-+ NULL, NULL, 0, ({
-+ struct bkey_i_cookie u;
-+
-+ bkey_reassemble(&u.k_i, k);
-+ bch2_trans_update(trans, &iter, &u.k_i, 0);
-+ })));
-+}
-+
-+static int seq_delete(struct bch_fs *c, u64 nr)
-+{
-+ return bch2_btree_delete_range(c, BTREE_ID_xattrs,
-+ SPOS(0, 0, U32_MAX),
-+ POS(0, U64_MAX),
-+ 0, NULL);
-+}
-+
-+typedef int (*perf_test_fn)(struct bch_fs *, u64);
-+
-+struct test_job {
-+ struct bch_fs *c;
-+ u64 nr;
-+ unsigned nr_threads;
-+ perf_test_fn fn;
-+
-+ atomic_t ready;
-+ wait_queue_head_t ready_wait;
-+
-+ atomic_t done;
-+ struct completion done_completion;
-+
-+ u64 start;
-+ u64 finish;
-+ int ret;
-+};
-+
-+static int btree_perf_test_thread(void *data)
-+{
-+ struct test_job *j = data;
-+ int ret;
-+
-+ if (atomic_dec_and_test(&j->ready)) {
-+ wake_up(&j->ready_wait);
-+ j->start = sched_clock();
-+ } else {
-+ wait_event(j->ready_wait, !atomic_read(&j->ready));
-+ }
-+
-+ ret = j->fn(j->c, div64_u64(j->nr, j->nr_threads));
-+ if (ret) {
-+ bch_err(j->c, "%ps: error %s", j->fn, bch2_err_str(ret));
-+ j->ret = ret;
-+ }
-+
-+ if (atomic_dec_and_test(&j->done)) {
-+ j->finish = sched_clock();
-+ complete(&j->done_completion);
-+ }
-+
-+ return 0;
-+}
-+
-+int bch2_btree_perf_test(struct bch_fs *c, const char *testname,
-+ u64 nr, unsigned nr_threads)
-+{
-+ struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads };
-+ char name_buf[20];
-+ struct printbuf nr_buf = PRINTBUF;
-+ struct printbuf per_sec_buf = PRINTBUF;
-+ unsigned i;
-+ u64 time;
-+
-+ atomic_set(&j.ready, nr_threads);
-+ init_waitqueue_head(&j.ready_wait);
-+
-+ atomic_set(&j.done, nr_threads);
-+ init_completion(&j.done_completion);
-+
-+#define perf_test(_test) \
-+ if (!strcmp(testname, #_test)) j.fn = _test
-+
-+ perf_test(rand_insert);
-+ perf_test(rand_insert_multi);
-+ perf_test(rand_lookup);
-+ perf_test(rand_mixed);
-+ perf_test(rand_delete);
-+
-+ perf_test(seq_insert);
-+ perf_test(seq_lookup);
-+ perf_test(seq_overwrite);
-+ perf_test(seq_delete);
-+
-+ /* a unit test, not a perf test: */
-+ perf_test(test_delete);
-+ perf_test(test_delete_written);
-+ perf_test(test_iterate);
-+ perf_test(test_iterate_extents);
-+ perf_test(test_iterate_slots);
-+ perf_test(test_iterate_slots_extents);
-+ perf_test(test_peek_end);
-+ perf_test(test_peek_end_extents);
-+
-+ perf_test(test_extent_overwrite_front);
-+ perf_test(test_extent_overwrite_back);
-+ perf_test(test_extent_overwrite_middle);
-+ perf_test(test_extent_overwrite_all);
-+ perf_test(test_extent_create_overlapping);
-+
-+ perf_test(test_snapshots);
-+
-+ if (!j.fn) {
-+ pr_err("unknown test %s", testname);
-+ return -EINVAL;
-+ }
-+
-+ //pr_info("running test %s:", testname);
-+
-+ if (nr_threads == 1)
-+ btree_perf_test_thread(&j);
-+ else
-+ for (i = 0; i < nr_threads; i++)
-+ kthread_run(btree_perf_test_thread, &j,
-+ "bcachefs perf test[%u]", i);
-+
-+ while (wait_for_completion_interruptible(&j.done_completion))
-+ ;
-+
-+ time = j.finish - j.start;
-+
-+ scnprintf(name_buf, sizeof(name_buf), "%s:", testname);
-+ prt_human_readable_u64(&nr_buf, nr);
-+ prt_human_readable_u64(&per_sec_buf, div64_u64(nr * NSEC_PER_SEC, time));
-+ printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n",
-+ name_buf, nr_buf.buf, nr_threads,
-+ div_u64(time, NSEC_PER_SEC),
-+ div_u64(time * nr_threads, nr),
-+ per_sec_buf.buf);
-+ printbuf_exit(&per_sec_buf);
-+ printbuf_exit(&nr_buf);
-+ return j.ret;
-+}
-+
-+#endif /* CONFIG_BCACHEFS_TESTS */
-diff --git a/fs/bcachefs/tests.h b/fs/bcachefs/tests.h
-new file mode 100644
-index 000000000000..c73b18aea7e0
---- /dev/null
-+++ b/fs/bcachefs/tests.h
-@@ -0,0 +1,15 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_TEST_H
-+#define _BCACHEFS_TEST_H
-+
-+struct bch_fs;
-+
-+#ifdef CONFIG_BCACHEFS_TESTS
-+
-+int bch2_btree_perf_test(struct bch_fs *, const char *, u64, unsigned);
-+
-+#else
-+
-+#endif /* CONFIG_BCACHEFS_TESTS */
-+
-+#endif /* _BCACHEFS_TEST_H */
-diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c
-new file mode 100644
-index 000000000000..dc48b52b01b4
---- /dev/null
-+++ b/fs/bcachefs/trace.c
-@@ -0,0 +1,17 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#include "bcachefs.h"
-+#include "alloc_types.h"
-+#include "buckets.h"
-+#include "btree_cache.h"
-+#include "btree_iter.h"
-+#include "btree_locking.h"
-+#include "btree_update_interior.h"
-+#include "keylist.h"
-+#include "move_types.h"
-+#include "opts.h"
-+#include "six.h"
-+
-+#include <linux/blktrace_api.h>
-+
-+#define CREATE_TRACE_POINTS
-+#include "trace.h"
-diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
-new file mode 100644
-index 000000000000..893304a1f06e
---- /dev/null
-+++ b/fs/bcachefs/trace.h
-@@ -0,0 +1,1334 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#undef TRACE_SYSTEM
-+#define TRACE_SYSTEM bcachefs
-+
-+#if !defined(_TRACE_BCACHEFS_H) || defined(TRACE_HEADER_MULTI_READ)
-+#define _TRACE_BCACHEFS_H
-+
-+#include <linux/tracepoint.h>
-+
-+#define TRACE_BPOS_entries(name) \
-+ __field(u64, name##_inode ) \
-+ __field(u64, name##_offset ) \
-+ __field(u32, name##_snapshot )
-+
-+#define TRACE_BPOS_assign(dst, src) \
-+ __entry->dst##_inode = (src).inode; \
-+ __entry->dst##_offset = (src).offset; \
-+ __entry->dst##_snapshot = (src).snapshot
-+
-+DECLARE_EVENT_CLASS(bpos,
-+ TP_PROTO(const struct bpos *p),
-+ TP_ARGS(p),
-+
-+ TP_STRUCT__entry(
-+ TRACE_BPOS_entries(p)
-+ ),
-+
-+ TP_fast_assign(
-+ TRACE_BPOS_assign(p, *p);
-+ ),
-+
-+ TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot)
-+);
-+
-+DECLARE_EVENT_CLASS(bkey,
-+ TP_PROTO(struct bch_fs *c, const char *k),
-+ TP_ARGS(c, k),
-+
-+ TP_STRUCT__entry(
-+ __string(k, k )
-+ ),
-+
-+ TP_fast_assign(
-+ __assign_str(k, k);
-+ ),
-+
-+ TP_printk("%s", __get_str(k))
-+);
-+
-+DECLARE_EVENT_CLASS(btree_node,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(u8, level )
-+ __field(u8, btree_id )
-+ TRACE_BPOS_entries(pos)
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->level = b->c.level;
-+ __entry->btree_id = b->c.btree_id;
-+ TRACE_BPOS_assign(pos, b->key.k.p);
-+ ),
-+
-+ TP_printk("%d,%d %u %s %llu:%llu:%u",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->level,
-+ bch2_btree_id_str(__entry->btree_id),
-+ __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
-+);
-+
-+DECLARE_EVENT_CLASS(bch_fs,
-+ TP_PROTO(struct bch_fs *c),
-+ TP_ARGS(c),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ ),
-+
-+ TP_printk("%d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
-+);
-+
-+DECLARE_EVENT_CLASS(bio,
-+ TP_PROTO(struct bio *bio),
-+ TP_ARGS(bio),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(sector_t, sector )
-+ __field(unsigned int, nr_sector )
-+ __array(char, rwbs, 6 )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = bio->bi_bdev ? bio_dev(bio) : 0;
-+ __entry->sector = bio->bi_iter.bi_sector;
-+ __entry->nr_sector = bio->bi_iter.bi_size >> 9;
-+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
-+ ),
-+
-+ TP_printk("%d,%d %s %llu + %u",
-+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-+ (unsigned long long)__entry->sector, __entry->nr_sector)
-+);
-+
-+/* super-io.c: */
-+TRACE_EVENT(write_super,
-+ TP_PROTO(struct bch_fs *c, unsigned long ip),
-+ TP_ARGS(c, ip),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(unsigned long, ip )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->ip = ip;
-+ ),
-+
-+ TP_printk("%d,%d for %pS",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ (void *) __entry->ip)
-+);
-+
-+/* io.c: */
-+
-+DEFINE_EVENT(bio, read_promote,
-+ TP_PROTO(struct bio *bio),
-+ TP_ARGS(bio)
-+);
-+
-+TRACE_EVENT(read_nopromote,
-+ TP_PROTO(struct bch_fs *c, int ret),
-+ TP_ARGS(c, ret),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __array(char, ret, 32 )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret));
-+ ),
-+
-+ TP_printk("%d,%d ret %s",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->ret)
-+);
-+
-+DEFINE_EVENT(bio, read_bounce,
-+ TP_PROTO(struct bio *bio),
-+ TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, read_split,
-+ TP_PROTO(struct bio *bio),
-+ TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, read_retry,
-+ TP_PROTO(struct bio *bio),
-+ TP_ARGS(bio)
-+);
-+
-+DEFINE_EVENT(bio, read_reuse_race,
-+ TP_PROTO(struct bio *bio),
-+ TP_ARGS(bio)
-+);
-+
-+/* Journal */
-+
-+DEFINE_EVENT(bch_fs, journal_full,
-+ TP_PROTO(struct bch_fs *c),
-+ TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, journal_entry_full,
-+ TP_PROTO(struct bch_fs *c),
-+ TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bio, journal_write,
-+ TP_PROTO(struct bio *bio),
-+ TP_ARGS(bio)
-+);
-+
-+TRACE_EVENT(journal_reclaim_start,
-+ TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
-+ u64 min_nr, u64 min_key_cache,
-+ u64 prereserved, u64 prereserved_total,
-+ u64 btree_cache_dirty, u64 btree_cache_total,
-+ u64 btree_key_cache_dirty, u64 btree_key_cache_total),
-+ TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
-+ btree_cache_dirty, btree_cache_total,
-+ btree_key_cache_dirty, btree_key_cache_total),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(bool, direct )
-+ __field(bool, kicked )
-+ __field(u64, min_nr )
-+ __field(u64, min_key_cache )
-+ __field(u64, prereserved )
-+ __field(u64, prereserved_total )
-+ __field(u64, btree_cache_dirty )
-+ __field(u64, btree_cache_total )
-+ __field(u64, btree_key_cache_dirty )
-+ __field(u64, btree_key_cache_total )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->direct = direct;
-+ __entry->kicked = kicked;
-+ __entry->min_nr = min_nr;
-+ __entry->min_key_cache = min_key_cache;
-+ __entry->prereserved = prereserved;
-+ __entry->prereserved_total = prereserved_total;
-+ __entry->btree_cache_dirty = btree_cache_dirty;
-+ __entry->btree_cache_total = btree_cache_total;
-+ __entry->btree_key_cache_dirty = btree_key_cache_dirty;
-+ __entry->btree_key_cache_total = btree_key_cache_total;
-+ ),
-+
-+ TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->direct,
-+ __entry->kicked,
-+ __entry->min_nr,
-+ __entry->min_key_cache,
-+ __entry->prereserved,
-+ __entry->prereserved_total,
-+ __entry->btree_cache_dirty,
-+ __entry->btree_cache_total,
-+ __entry->btree_key_cache_dirty,
-+ __entry->btree_key_cache_total)
-+);
-+
-+TRACE_EVENT(journal_reclaim_finish,
-+ TP_PROTO(struct bch_fs *c, u64 nr_flushed),
-+ TP_ARGS(c, nr_flushed),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(u64, nr_flushed )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->nr_flushed = nr_flushed;
-+ ),
-+
-+ TP_printk("%d,%d flushed %llu",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->nr_flushed)
-+);
-+
-+/* bset.c: */
-+
-+DEFINE_EVENT(bpos, bkey_pack_pos_fail,
-+ TP_PROTO(const struct bpos *p),
-+ TP_ARGS(p)
-+);
-+
-+/* Btree cache: */
-+
-+TRACE_EVENT(btree_cache_scan,
-+ TP_PROTO(long nr_to_scan, long can_free, long ret),
-+ TP_ARGS(nr_to_scan, can_free, ret),
-+
-+ TP_STRUCT__entry(
-+ __field(long, nr_to_scan )
-+ __field(long, can_free )
-+ __field(long, ret )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->nr_to_scan = nr_to_scan;
-+ __entry->can_free = can_free;
-+ __entry->ret = ret;
-+ ),
-+
-+ TP_printk("scanned for %li nodes, can free %li, ret %li",
-+ __entry->nr_to_scan, __entry->can_free, __entry->ret)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_cache_reap,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock_fail,
-+ TP_PROTO(struct bch_fs *c),
-+ TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock,
-+ TP_PROTO(struct bch_fs *c),
-+ TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, btree_cache_cannibalize,
-+ TP_PROTO(struct bch_fs *c),
-+ TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, btree_cache_cannibalize_unlock,
-+ TP_PROTO(struct bch_fs *c),
-+ TP_ARGS(c)
-+);
-+
-+/* Btree */
-+
-+DEFINE_EVENT(btree_node, btree_node_read,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b)
-+);
-+
-+TRACE_EVENT(btree_node_write,
-+ TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors),
-+ TP_ARGS(b, bytes, sectors),
-+
-+ TP_STRUCT__entry(
-+ __field(enum btree_node_type, type)
-+ __field(unsigned, bytes )
-+ __field(unsigned, sectors )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->type = btree_node_type(b);
-+ __entry->bytes = bytes;
-+ __entry->sectors = sectors;
-+ ),
-+
-+ TP_printk("bkey type %u bytes %u sectors %u",
-+ __entry->type , __entry->bytes, __entry->sectors)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_alloc,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_free,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b)
-+);
-+
-+TRACE_EVENT(btree_reserve_get_fail,
-+ TP_PROTO(const char *trans_fn,
-+ unsigned long caller_ip,
-+ size_t required,
-+ int ret),
-+ TP_ARGS(trans_fn, caller_ip, required, ret),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ __field(size_t, required )
-+ __array(char, ret, 32 )
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ __entry->required = required;
-+ strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret));
-+ ),
-+
-+ TP_printk("%s %pS required %zu ret %s",
-+ __entry->trans_fn,
-+ (void *) __entry->caller_ip,
-+ __entry->required,
-+ __entry->ret)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_compact,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_merge,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_split,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_rewrite,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b)
-+);
-+
-+DEFINE_EVENT(btree_node, btree_node_set_root,
-+ TP_PROTO(struct bch_fs *c, struct btree *b),
-+ TP_ARGS(c, b)
-+);
-+
-+TRACE_EVENT(btree_path_relock_fail,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path,
-+ unsigned level),
-+ TP_ARGS(trans, caller_ip, path, level),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ __field(u8, btree_id )
-+ __field(u8, level )
-+ TRACE_BPOS_entries(pos)
-+ __array(char, node, 24 )
-+ __field(u8, self_read_count )
-+ __field(u8, self_intent_count)
-+ __field(u8, read_count )
-+ __field(u8, intent_count )
-+ __field(u32, iter_lock_seq )
-+ __field(u32, node_lock_seq )
-+ ),
-+
-+ TP_fast_assign(
-+ struct btree *b = btree_path_node(path, level);
-+ struct six_lock_count c;
-+
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ __entry->btree_id = path->btree_id;
-+ __entry->level = path->level;
-+ TRACE_BPOS_assign(pos, path->pos);
-+
-+ c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level),
-+ __entry->self_read_count = c.n[SIX_LOCK_read];
-+ __entry->self_intent_count = c.n[SIX_LOCK_intent];
-+
-+ if (IS_ERR(b)) {
-+ strscpy(__entry->node, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node));
-+ } else {
-+ c = six_lock_counts(&path->l[level].b->c.lock);
-+ __entry->read_count = c.n[SIX_LOCK_read];
-+ __entry->intent_count = c.n[SIX_LOCK_intent];
-+ scnprintf(__entry->node, sizeof(__entry->node), "%px", b);
-+ }
-+ __entry->iter_lock_seq = path->l[level].lock_seq;
-+ __entry->node_lock_seq = is_btree_node(path, level)
-+ ? six_lock_seq(&path->l[level].b->c.lock)
-+ : 0;
-+ ),
-+
-+ TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u node %s held %u:%u lock count %u:%u iter seq %u lock seq %u",
-+ __entry->trans_fn,
-+ (void *) __entry->caller_ip,
-+ bch2_btree_id_str(__entry->btree_id),
-+ __entry->pos_inode,
-+ __entry->pos_offset,
-+ __entry->pos_snapshot,
-+ __entry->level,
-+ __entry->node,
-+ __entry->self_read_count,
-+ __entry->self_intent_count,
-+ __entry->read_count,
-+ __entry->intent_count,
-+ __entry->iter_lock_seq,
-+ __entry->node_lock_seq)
-+);
-+
-+TRACE_EVENT(btree_path_upgrade_fail,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path,
-+ unsigned level),
-+ TP_ARGS(trans, caller_ip, path, level),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ __field(u8, btree_id )
-+ __field(u8, level )
-+ TRACE_BPOS_entries(pos)
-+ __field(u8, locked )
-+ __field(u8, self_read_count )
-+ __field(u8, self_intent_count)
-+ __field(u8, read_count )
-+ __field(u8, intent_count )
-+ __field(u32, iter_lock_seq )
-+ __field(u32, node_lock_seq )
-+ ),
-+
-+ TP_fast_assign(
-+ struct six_lock_count c;
-+
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ __entry->btree_id = path->btree_id;
-+ __entry->level = level;
-+ TRACE_BPOS_assign(pos, path->pos);
-+ __entry->locked = btree_node_locked(path, level);
-+
-+ c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level),
-+ __entry->self_read_count = c.n[SIX_LOCK_read];
-+ __entry->self_intent_count = c.n[SIX_LOCK_intent];
-+ c = six_lock_counts(&path->l[level].b->c.lock);
-+ __entry->read_count = c.n[SIX_LOCK_read];
-+ __entry->intent_count = c.n[SIX_LOCK_intent];
-+ __entry->iter_lock_seq = path->l[level].lock_seq;
-+ __entry->node_lock_seq = is_btree_node(path, level)
-+ ? six_lock_seq(&path->l[level].b->c.lock)
-+ : 0;
-+ ),
-+
-+ TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u locked %u held %u:%u lock count %u:%u iter seq %u lock seq %u",
-+ __entry->trans_fn,
-+ (void *) __entry->caller_ip,
-+ bch2_btree_id_str(__entry->btree_id),
-+ __entry->pos_inode,
-+ __entry->pos_offset,
-+ __entry->pos_snapshot,
-+ __entry->level,
-+ __entry->locked,
-+ __entry->self_read_count,
-+ __entry->self_intent_count,
-+ __entry->read_count,
-+ __entry->intent_count,
-+ __entry->iter_lock_seq,
-+ __entry->node_lock_seq)
-+);
-+
-+/* Garbage collection */
-+
-+DEFINE_EVENT(bch_fs, gc_gens_start,
-+ TP_PROTO(struct bch_fs *c),
-+ TP_ARGS(c)
-+);
-+
-+DEFINE_EVENT(bch_fs, gc_gens_end,
-+ TP_PROTO(struct bch_fs *c),
-+ TP_ARGS(c)
-+);
-+
-+/* Allocator */
-+
-+DECLARE_EVENT_CLASS(bucket_alloc,
-+ TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
-+ u64 bucket,
-+ u64 free,
-+ u64 avail,
-+ u64 copygc_wait_amount,
-+ s64 copygc_waiting_for,
-+ struct bucket_alloc_state *s,
-+ bool nonblocking,
-+ const char *err),
-+ TP_ARGS(ca, alloc_reserve, bucket, free, avail,
-+ copygc_wait_amount, copygc_waiting_for,
-+ s, nonblocking, err),
-+
-+ TP_STRUCT__entry(
-+ __field(u8, dev )
-+ __array(char, reserve, 16 )
-+ __field(u64, bucket )
-+ __field(u64, free )
-+ __field(u64, avail )
-+ __field(u64, copygc_wait_amount )
-+ __field(s64, copygc_waiting_for )
-+ __field(u64, seen )
-+ __field(u64, open )
-+ __field(u64, need_journal_commit )
-+ __field(u64, nouse )
-+ __field(bool, nonblocking )
-+ __field(u64, nocow )
-+ __array(char, err, 32 )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = ca->dev_idx;
-+ strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
-+ __entry->bucket = bucket;
-+ __entry->free = free;
-+ __entry->avail = avail;
-+ __entry->copygc_wait_amount = copygc_wait_amount;
-+ __entry->copygc_waiting_for = copygc_waiting_for;
-+ __entry->seen = s->buckets_seen;
-+ __entry->open = s->skipped_open;
-+ __entry->need_journal_commit = s->skipped_need_journal_commit;
-+ __entry->nouse = s->skipped_nouse;
-+ __entry->nonblocking = nonblocking;
-+ __entry->nocow = s->skipped_nocow;
-+ strscpy(__entry->err, err, sizeof(__entry->err));
-+ ),
-+
-+ TP_printk("reserve %s bucket %u:%llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s",
-+ __entry->reserve,
-+ __entry->dev,
-+ __entry->bucket,
-+ __entry->free,
-+ __entry->avail,
-+ __entry->copygc_wait_amount,
-+ __entry->copygc_waiting_for,
-+ __entry->seen,
-+ __entry->open,
-+ __entry->need_journal_commit,
-+ __entry->nouse,
-+ __entry->nocow,
-+ __entry->nonblocking,
-+ __entry->err)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, bucket_alloc,
-+ TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
-+ u64 bucket,
-+ u64 free,
-+ u64 avail,
-+ u64 copygc_wait_amount,
-+ s64 copygc_waiting_for,
-+ struct bucket_alloc_state *s,
-+ bool nonblocking,
-+ const char *err),
-+ TP_ARGS(ca, alloc_reserve, bucket, free, avail,
-+ copygc_wait_amount, copygc_waiting_for,
-+ s, nonblocking, err)
-+);
-+
-+DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
-+ TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
-+ u64 bucket,
-+ u64 free,
-+ u64 avail,
-+ u64 copygc_wait_amount,
-+ s64 copygc_waiting_for,
-+ struct bucket_alloc_state *s,
-+ bool nonblocking,
-+ const char *err),
-+ TP_ARGS(ca, alloc_reserve, bucket, free, avail,
-+ copygc_wait_amount, copygc_waiting_for,
-+ s, nonblocking, err)
-+);
-+
-+TRACE_EVENT(discard_buckets,
-+ TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
-+ u64 need_journal_commit, u64 discarded, const char *err),
-+ TP_ARGS(c, seen, open, need_journal_commit, discarded, err),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(u64, seen )
-+ __field(u64, open )
-+ __field(u64, need_journal_commit )
-+ __field(u64, discarded )
-+ __array(char, err, 16 )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->seen = seen;
-+ __entry->open = open;
-+ __entry->need_journal_commit = need_journal_commit;
-+ __entry->discarded = discarded;
-+ strscpy(__entry->err, err, sizeof(__entry->err));
-+ ),
-+
-+ TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu err %s",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->seen,
-+ __entry->open,
-+ __entry->need_journal_commit,
-+ __entry->discarded,
-+ __entry->err)
-+);
-+
-+TRACE_EVENT(bucket_invalidate,
-+ TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors),
-+ TP_ARGS(c, dev, bucket, sectors),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(u32, dev_idx )
-+ __field(u32, sectors )
-+ __field(u64, bucket )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->dev_idx = dev;
-+ __entry->sectors = sectors;
-+ __entry->bucket = bucket;
-+ ),
-+
-+ TP_printk("%d:%d invalidated %u:%llu cached sectors %u",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->dev_idx, __entry->bucket,
-+ __entry->sectors)
-+);
-+
-+/* Moving IO */
-+
-+TRACE_EVENT(bucket_evacuate,
-+ TP_PROTO(struct bch_fs *c, struct bpos *bucket),
-+ TP_ARGS(c, bucket),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(u32, dev_idx )
-+ __field(u64, bucket )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->dev_idx = bucket->inode;
-+ __entry->bucket = bucket->offset;
-+ ),
-+
-+ TP_printk("%d:%d %u:%llu",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->dev_idx, __entry->bucket)
-+);
-+
-+DEFINE_EVENT(bkey, move_extent,
-+ TP_PROTO(struct bch_fs *c, const char *k),
-+ TP_ARGS(c, k)
-+);
-+
-+DEFINE_EVENT(bkey, move_extent_read,
-+ TP_PROTO(struct bch_fs *c, const char *k),
-+ TP_ARGS(c, k)
-+);
-+
-+DEFINE_EVENT(bkey, move_extent_write,
-+ TP_PROTO(struct bch_fs *c, const char *k),
-+ TP_ARGS(c, k)
-+);
-+
-+DEFINE_EVENT(bkey, move_extent_finish,
-+ TP_PROTO(struct bch_fs *c, const char *k),
-+ TP_ARGS(c, k)
-+);
-+
-+TRACE_EVENT(move_extent_fail,
-+ TP_PROTO(struct bch_fs *c, const char *msg),
-+ TP_ARGS(c, msg),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __string(msg, msg )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __assign_str(msg, msg);
-+ ),
-+
-+ TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
-+);
-+
-+DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
-+ TP_PROTO(struct bch_fs *c, const char *k),
-+ TP_ARGS(c, k)
-+);
-+
-+TRACE_EVENT(move_data,
-+ TP_PROTO(struct bch_fs *c,
-+ struct bch_move_stats *stats),
-+ TP_ARGS(c, stats),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(u64, keys_moved )
-+ __field(u64, keys_raced )
-+ __field(u64, sectors_seen )
-+ __field(u64, sectors_moved )
-+ __field(u64, sectors_raced )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->keys_moved = atomic64_read(&stats->keys_moved);
-+ __entry->keys_raced = atomic64_read(&stats->keys_raced);
-+ __entry->sectors_seen = atomic64_read(&stats->sectors_seen);
-+ __entry->sectors_moved = atomic64_read(&stats->sectors_moved);
-+ __entry->sectors_raced = atomic64_read(&stats->sectors_raced);
-+ ),
-+
-+ TP_printk("%d,%d keys moved %llu raced %llu"
-+ "sectors seen %llu moved %llu raced %llu",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->keys_moved,
-+ __entry->keys_raced,
-+ __entry->sectors_seen,
-+ __entry->sectors_moved,
-+ __entry->sectors_raced)
-+);
-+
-+TRACE_EVENT(evacuate_bucket,
-+ TP_PROTO(struct bch_fs *c, struct bpos *bucket,
-+ unsigned sectors, unsigned bucket_size,
-+ u64 fragmentation, int ret),
-+ TP_ARGS(c, bucket, sectors, bucket_size, fragmentation, ret),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(u64, member )
-+ __field(u64, bucket )
-+ __field(u32, sectors )
-+ __field(u32, bucket_size )
-+ __field(u64, fragmentation )
-+ __field(int, ret )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->member = bucket->inode;
-+ __entry->bucket = bucket->offset;
-+ __entry->sectors = sectors;
-+ __entry->bucket_size = bucket_size;
-+ __entry->fragmentation = fragmentation;
-+ __entry->ret = ret;
-+ ),
-+
-+ TP_printk("%d,%d %llu:%llu sectors %u/%u fragmentation %llu ret %i",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->member, __entry->bucket,
-+ __entry->sectors, __entry->bucket_size,
-+ __entry->fragmentation, __entry->ret)
-+);
-+
-+TRACE_EVENT(copygc,
-+ TP_PROTO(struct bch_fs *c,
-+ u64 sectors_moved, u64 sectors_not_moved,
-+ u64 buckets_moved, u64 buckets_not_moved),
-+ TP_ARGS(c,
-+ sectors_moved, sectors_not_moved,
-+ buckets_moved, buckets_not_moved),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(u64, sectors_moved )
-+ __field(u64, sectors_not_moved )
-+ __field(u64, buckets_moved )
-+ __field(u64, buckets_not_moved )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->sectors_moved = sectors_moved;
-+ __entry->sectors_not_moved = sectors_not_moved;
-+ __entry->buckets_moved = buckets_moved;
-+ __entry->buckets_not_moved = buckets_moved;
-+ ),
-+
-+ TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->sectors_moved, __entry->sectors_not_moved,
-+ __entry->buckets_moved, __entry->buckets_not_moved)
-+);
-+
-+TRACE_EVENT(copygc_wait,
-+ TP_PROTO(struct bch_fs *c,
-+ u64 wait_amount, u64 until),
-+ TP_ARGS(c, wait_amount, until),
-+
-+ TP_STRUCT__entry(
-+ __field(dev_t, dev )
-+ __field(u64, wait_amount )
-+ __field(u64, until )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->dev = c->dev;
-+ __entry->wait_amount = wait_amount;
-+ __entry->until = until;
-+ ),
-+
-+ TP_printk("%d,%u waiting for %llu sectors until %llu",
-+ MAJOR(__entry->dev), MINOR(__entry->dev),
-+ __entry->wait_amount, __entry->until)
-+);
-+
-+/* btree transactions: */
-+
-+DECLARE_EVENT_CLASS(transaction_event,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ ),
-+
-+ TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip)
-+);
-+
-+DEFINE_EVENT(transaction_event, transaction_commit,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_restart_injected,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+TRACE_EVENT(trans_restart_split_race,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree *b),
-+ TP_ARGS(trans, caller_ip, b),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ __field(u8, level )
-+ __field(u16, written )
-+ __field(u16, blocks )
-+ __field(u16, u64s_remaining )
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ __entry->level = b->c.level;
-+ __entry->written = b->written;
-+ __entry->blocks = btree_blocks(trans->c);
-+ __entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b);
-+ ),
-+
-+ TP_printk("%s %pS l=%u written %u/%u u64s remaining %u",
-+ __entry->trans_fn, (void *) __entry->caller_ip,
-+ __entry->level,
-+ __entry->written, __entry->blocks,
-+ __entry->u64s_remaining)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+TRACE_EVENT(trans_restart_journal_preres_get,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ unsigned flags),
-+ TP_ARGS(trans, caller_ip, flags),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ __field(unsigned, flags )
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ __entry->flags = flags;
-+ ),
-+
-+ TP_printk("%s %pS %x", __entry->trans_fn,
-+ (void *) __entry->caller_ip,
-+ __entry->flags)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_restart_fault_inject,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_traverse_all,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_restart_too_many_iters,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+DECLARE_EVENT_CLASS(transaction_restart_iter,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ __field(u8, btree_id )
-+ TRACE_BPOS_entries(pos)
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ __entry->btree_id = path->btree_id;
-+ TRACE_BPOS_assign(pos, path->pos)
-+ ),
-+
-+ TP_printk("%s %pS btree %s pos %llu:%llu:%u",
-+ __entry->trans_fn,
-+ (void *) __entry->caller_ip,
-+ bch2_btree_id_str(__entry->btree_id),
-+ __entry->pos_inode,
-+ __entry->pos_offset,
-+ __entry->pos_snapshot)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+struct get_locks_fail;
-+
-+TRACE_EVENT(trans_restart_upgrade,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path,
-+ unsigned old_locks_want,
-+ unsigned new_locks_want,
-+ struct get_locks_fail *f),
-+ TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ __field(u8, btree_id )
-+ __field(u8, old_locks_want )
-+ __field(u8, new_locks_want )
-+ __field(u8, level )
-+ __field(u32, path_seq )
-+ __field(u32, node_seq )
-+ __field(u32, path_alloc_seq )
-+ __field(u32, downgrade_seq)
-+ TRACE_BPOS_entries(pos)
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ __entry->btree_id = path->btree_id;
-+ __entry->old_locks_want = old_locks_want;
-+ __entry->new_locks_want = new_locks_want;
-+ __entry->level = f->l;
-+ __entry->path_seq = path->l[f->l].lock_seq;
-+ __entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq;
-+ __entry->path_alloc_seq = path->alloc_seq;
-+ __entry->downgrade_seq = path->downgrade_seq;
-+ TRACE_BPOS_assign(pos, path->pos)
-+ ),
-+
-+ TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u alloc_seq %u downgrade_seq %u",
-+ __entry->trans_fn,
-+ (void *) __entry->caller_ip,
-+ bch2_btree_id_str(__entry->btree_id),
-+ __entry->pos_inode,
-+ __entry->pos_offset,
-+ __entry->pos_snapshot,
-+ __entry->old_locks_want,
-+ __entry->new_locks_want,
-+ __entry->level,
-+ __entry->path_seq,
-+ __entry->node_seq,
-+ __entry->path_alloc_seq,
-+ __entry->downgrade_seq)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_restart_would_deadlock,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+TRACE_EVENT(trans_restart_would_deadlock_write,
-+ TP_PROTO(struct btree_trans *trans),
-+ TP_ARGS(trans),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ ),
-+
-+ TP_printk("%s", __entry->trans_fn)
-+);
-+
-+TRACE_EVENT(trans_restart_mem_realloced,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ unsigned long bytes),
-+ TP_ARGS(trans, caller_ip, bytes),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ __field(unsigned long, bytes )
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ __entry->bytes = bytes;
-+ ),
-+
-+ TP_printk("%s %pS bytes %lu",
-+ __entry->trans_fn,
-+ (void *) __entry->caller_ip,
-+ __entry->bytes)
-+);
-+
-+TRACE_EVENT(trans_restart_key_cache_key_realloced,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path,
-+ unsigned old_u64s,
-+ unsigned new_u64s),
-+ TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ __field(enum btree_id, btree_id )
-+ TRACE_BPOS_entries(pos)
-+ __field(u32, old_u64s )
-+ __field(u32, new_u64s )
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+
-+ __entry->btree_id = path->btree_id;
-+ TRACE_BPOS_assign(pos, path->pos);
-+ __entry->old_u64s = old_u64s;
-+ __entry->new_u64s = new_u64s;
-+ ),
-+
-+ TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u",
-+ __entry->trans_fn,
-+ (void *) __entry->caller_ip,
-+ bch2_btree_id_str(__entry->btree_id),
-+ __entry->pos_inode,
-+ __entry->pos_offset,
-+ __entry->pos_snapshot,
-+ __entry->old_u64s,
-+ __entry->new_u64s)
-+);
-+
-+TRACE_EVENT(path_downgrade,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip,
-+ struct btree_path *path),
-+ TP_ARGS(trans, caller_ip, path),
-+
-+ TP_STRUCT__entry(
-+ __array(char, trans_fn, 32 )
-+ __field(unsigned long, caller_ip )
-+ ),
-+
-+ TP_fast_assign(
-+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
-+ __entry->caller_ip = caller_ip;
-+ ),
-+
-+ TP_printk("%s %pS",
-+ __entry->trans_fn,
-+ (void *) __entry->caller_ip)
-+);
-+
-+DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
-+ TP_PROTO(struct btree_trans *trans,
-+ unsigned long caller_ip),
-+ TP_ARGS(trans, caller_ip)
-+);
-+
-+TRACE_EVENT(write_buffer_flush,
-+ TP_PROTO(struct btree_trans *trans, size_t nr, size_t skipped, size_t fast, size_t size),
-+ TP_ARGS(trans, nr, skipped, fast, size),
-+
-+ TP_STRUCT__entry(
-+ __field(size_t, nr )
-+ __field(size_t, skipped )
-+ __field(size_t, fast )
-+ __field(size_t, size )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->nr = nr;
-+ __entry->skipped = skipped;
-+ __entry->fast = fast;
-+ __entry->size = size;
-+ ),
-+
-+ TP_printk("%zu/%zu skipped %zu fast %zu",
-+ __entry->nr, __entry->size, __entry->skipped, __entry->fast)
-+);
-+
-+TRACE_EVENT(write_buffer_flush_slowpath,
-+ TP_PROTO(struct btree_trans *trans, size_t nr, size_t size),
-+ TP_ARGS(trans, nr, size),
-+
-+ TP_STRUCT__entry(
-+ __field(size_t, nr )
-+ __field(size_t, size )
-+ ),
-+
-+ TP_fast_assign(
-+ __entry->nr = nr;
-+ __entry->size = size;
-+ ),
-+
-+ TP_printk("%zu/%zu", __entry->nr, __entry->size)
-+);
-+
-+#endif /* _TRACE_BCACHEFS_H */
-+
-+/* This part must be outside protection */
-+#undef TRACE_INCLUDE_PATH
-+#define TRACE_INCLUDE_PATH ../../fs/bcachefs
-+
-+#undef TRACE_INCLUDE_FILE
-+#define TRACE_INCLUDE_FILE trace
-+
-+#include <trace/define_trace.h>
-diff --git a/fs/bcachefs/two_state_shared_lock.c b/fs/bcachefs/two_state_shared_lock.c
-new file mode 100644
-index 000000000000..9764c2e6a910
---- /dev/null
-+++ b/fs/bcachefs/two_state_shared_lock.c
-@@ -0,0 +1,8 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "two_state_shared_lock.h"
-+
-+void __bch2_two_state_lock(two_state_lock_t *lock, int s)
-+{
-+ __wait_event(lock->wait, bch2_two_state_trylock(lock, s));
-+}
-diff --git a/fs/bcachefs/two_state_shared_lock.h b/fs/bcachefs/two_state_shared_lock.h
-new file mode 100644
-index 000000000000..905801772002
---- /dev/null
-+++ b/fs/bcachefs/two_state_shared_lock.h
-@@ -0,0 +1,59 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_TWO_STATE_LOCK_H
-+#define _BCACHEFS_TWO_STATE_LOCK_H
-+
-+#include <linux/atomic.h>
-+#include <linux/sched.h>
-+#include <linux/wait.h>
-+
-+#include "util.h"
-+
-+/*
-+ * Two-state lock - can be taken for add or block - both states are shared,
-+ * like read side of rwsem, but conflict with other state:
-+ */
-+typedef struct {
-+ atomic_long_t v;
-+ wait_queue_head_t wait;
-+} two_state_lock_t;
-+
-+static inline void two_state_lock_init(two_state_lock_t *lock)
-+{
-+ atomic_long_set(&lock->v, 0);
-+ init_waitqueue_head(&lock->wait);
-+}
-+
-+static inline void bch2_two_state_unlock(two_state_lock_t *lock, int s)
-+{
-+ long i = s ? 1 : -1;
-+
-+ EBUG_ON(atomic_long_read(&lock->v) == 0);
-+
-+ if (atomic_long_sub_return_release(i, &lock->v) == 0)
-+ wake_up_all(&lock->wait);
-+}
-+
-+static inline bool bch2_two_state_trylock(two_state_lock_t *lock, int s)
-+{
-+ long i = s ? 1 : -1;
-+ long v = atomic_long_read(&lock->v), old;
-+
-+ do {
-+ old = v;
-+
-+ if (i > 0 ? v < 0 : v > 0)
-+ return false;
-+ } while ((v = atomic_long_cmpxchg_acquire(&lock->v,
-+ old, old + i)) != old);
-+ return true;
-+}
-+
-+void __bch2_two_state_lock(two_state_lock_t *, int);
-+
-+static inline void bch2_two_state_lock(two_state_lock_t *lock, int s)
-+{
-+ if (!bch2_two_state_trylock(lock, s))
-+ __bch2_two_state_lock(lock, s);
-+}
-+
-+#endif /* _BCACHEFS_TWO_STATE_LOCK_H */
-diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
-new file mode 100644
-index 000000000000..84b142fcc3df
---- /dev/null
-+++ b/fs/bcachefs/util.c
-@@ -0,0 +1,1159 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * random utiility code, for bcache but in theory not specific to bcache
-+ *
-+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
-+ * Copyright 2012 Google, Inc.
-+ */
-+
-+#include <linux/bio.h>
-+#include <linux/blkdev.h>
-+#include <linux/console.h>
-+#include <linux/ctype.h>
-+#include <linux/debugfs.h>
-+#include <linux/freezer.h>
-+#include <linux/kthread.h>
-+#include <linux/log2.h>
-+#include <linux/math64.h>
-+#include <linux/percpu.h>
-+#include <linux/preempt.h>
-+#include <linux/random.h>
-+#include <linux/seq_file.h>
-+#include <linux/string.h>
-+#include <linux/types.h>
-+#include <linux/sched/clock.h>
-+
-+#include "eytzinger.h"
-+#include "mean_and_variance.h"
-+#include "util.h"
-+
-+static const char si_units[] = "?kMGTPEZY";
-+
-+/* string_get_size units: */
-+static const char *const units_2[] = {
-+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
-+};
-+static const char *const units_10[] = {
-+ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
-+};
-+
-+static int parse_u64(const char *cp, u64 *res)
-+{
-+ const char *start = cp;
-+ u64 v = 0;
-+
-+ if (!isdigit(*cp))
-+ return -EINVAL;
-+
-+ do {
-+ if (v > U64_MAX / 10)
-+ return -ERANGE;
-+ v *= 10;
-+ if (v > U64_MAX - (*cp - '0'))
-+ return -ERANGE;
-+ v += *cp - '0';
-+ cp++;
-+ } while (isdigit(*cp));
-+
-+ *res = v;
-+ return cp - start;
-+}
-+
-+static int bch2_pow(u64 n, u64 p, u64 *res)
-+{
-+ *res = 1;
-+
-+ while (p--) {
-+ if (*res > div_u64(U64_MAX, n))
-+ return -ERANGE;
-+ *res *= n;
-+ }
-+ return 0;
-+}
-+
-+static int parse_unit_suffix(const char *cp, u64 *res)
-+{
-+ const char *start = cp;
-+ u64 base = 1024;
-+ unsigned u;
-+ int ret;
-+
-+ if (*cp == ' ')
-+ cp++;
-+
-+ for (u = 1; u < strlen(si_units); u++)
-+ if (*cp == si_units[u]) {
-+ cp++;
-+ goto got_unit;
-+ }
-+
-+ for (u = 0; u < ARRAY_SIZE(units_2); u++)
-+ if (!strncmp(cp, units_2[u], strlen(units_2[u]))) {
-+ cp += strlen(units_2[u]);
-+ goto got_unit;
-+ }
-+
-+ for (u = 0; u < ARRAY_SIZE(units_10); u++)
-+ if (!strncmp(cp, units_10[u], strlen(units_10[u]))) {
-+ cp += strlen(units_10[u]);
-+ base = 1000;
-+ goto got_unit;
-+ }
-+
-+ *res = 1;
-+ return 0;
-+got_unit:
-+ ret = bch2_pow(base, u, res);
-+ if (ret)
-+ return ret;
-+
-+ return cp - start;
-+}
-+
-+#define parse_or_ret(cp, _f) \
-+do { \
-+ int _ret = _f; \
-+ if (_ret < 0) \
-+ return _ret; \
-+ cp += _ret; \
-+} while (0)
-+
-+static int __bch2_strtou64_h(const char *cp, u64 *res)
-+{
-+ const char *start = cp;
-+ u64 v = 0, b, f_n = 0, f_d = 1;
-+ int ret;
-+
-+ parse_or_ret(cp, parse_u64(cp, &v));
-+
-+ if (*cp == '.') {
-+ cp++;
-+ ret = parse_u64(cp, &f_n);
-+ if (ret < 0)
-+ return ret;
-+ cp += ret;
-+
-+ ret = bch2_pow(10, ret, &f_d);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ parse_or_ret(cp, parse_unit_suffix(cp, &b));
-+
-+ if (v > div_u64(U64_MAX, b))
-+ return -ERANGE;
-+ v *= b;
-+
-+ if (f_n > div_u64(U64_MAX, b))
-+ return -ERANGE;
-+
-+ f_n = div_u64(f_n * b, f_d);
-+ if (v + f_n < v)
-+ return -ERANGE;
-+ v += f_n;
-+
-+ *res = v;
-+ return cp - start;
-+}
-+
-+static int __bch2_strtoh(const char *cp, u64 *res,
-+ u64 t_max, bool t_signed)
-+{
-+ bool positive = *cp != '-';
-+ u64 v = 0;
-+
-+ if (*cp == '+' || *cp == '-')
-+ cp++;
-+
-+ parse_or_ret(cp, __bch2_strtou64_h(cp, &v));
-+
-+ if (*cp == '\n')
-+ cp++;
-+ if (*cp)
-+ return -EINVAL;
-+
-+ if (positive) {
-+ if (v > t_max)
-+ return -ERANGE;
-+ } else {
-+ if (v && !t_signed)
-+ return -ERANGE;
-+
-+ if (v > t_max + 1)
-+ return -ERANGE;
-+ v = -v;
-+ }
-+
-+ *res = v;
-+ return 0;
-+}
-+
-+#define STRTO_H(name, type) \
-+int bch2_ ## name ## _h(const char *cp, type *res) \
-+{ \
-+ u64 v = 0; \
-+ int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type), \
-+ ANYSINT_MAX(type) != ((type) ~0ULL)); \
-+ *res = v; \
-+ return ret; \
-+}
-+
-+STRTO_H(strtoint, int)
-+STRTO_H(strtouint, unsigned int)
-+STRTO_H(strtoll, long long)
-+STRTO_H(strtoull, unsigned long long)
-+STRTO_H(strtou64, u64)
-+
-+u64 bch2_read_flag_list(char *opt, const char * const list[])
-+{
-+ u64 ret = 0;
-+ char *p, *s, *d = kstrdup(opt, GFP_KERNEL);
-+
-+ if (!d)
-+ return -ENOMEM;
-+
-+ s = strim(d);
-+
-+ while ((p = strsep(&s, ","))) {
-+ int flag = match_string(list, -1, p);
-+
-+ if (flag < 0) {
-+ ret = -1;
-+ break;
-+ }
-+
-+ ret |= 1 << flag;
-+ }
-+
-+ kfree(d);
-+
-+ return ret;
-+}
-+
-+bool bch2_is_zero(const void *_p, size_t n)
-+{
-+ const char *p = _p;
-+ size_t i;
-+
-+ for (i = 0; i < n; i++)
-+ if (p[i])
-+ return false;
-+ return true;
-+}
-+
-+void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits)
-+{
-+ while (nr_bits)
-+ prt_char(out, '0' + ((v >> --nr_bits) & 1));
-+}
-+
-+void bch2_print_string_as_lines(const char *prefix, const char *lines)
-+{
-+ const char *p;
-+
-+ if (!lines) {
-+ printk("%s (null)\n", prefix);
-+ return;
-+ }
-+
-+ console_lock();
-+ while (1) {
-+ p = strchrnul(lines, '\n');
-+ printk("%s%.*s\n", prefix, (int) (p - lines), lines);
-+ if (!*p)
-+ break;
-+ lines = p + 1;
-+ }
-+ console_unlock();
-+}
-+
-+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task)
-+{
-+#ifdef CONFIG_STACKTRACE
-+ unsigned nr_entries = 0;
-+ int ret = 0;
-+
-+ stack->nr = 0;
-+ ret = darray_make_room(stack, 32);
-+ if (ret)
-+ return ret;
-+
-+ if (!down_read_trylock(&task->signal->exec_update_lock))
-+ return -1;
-+
-+ do {
-+ nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, 0);
-+ } while (nr_entries == stack->size &&
-+ !(ret = darray_make_room(stack, stack->size * 2)));
-+
-+ stack->nr = nr_entries;
-+ up_read(&task->signal->exec_update_lock);
-+
-+ return ret;
-+#else
-+ return 0;
-+#endif
-+}
-+
-+void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack)
-+{
-+ unsigned long *i;
-+
-+ darray_for_each(*stack, i) {
-+ prt_printf(out, "[<0>] %pB", (void *) *i);
-+ prt_newline(out);
-+ }
-+}
-+
-+int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task)
-+{
-+ bch_stacktrace stack = { 0 };
-+ int ret = bch2_save_backtrace(&stack, task);
-+
-+ bch2_prt_backtrace(out, &stack);
-+ darray_exit(&stack);
-+ return ret;
-+}
-+
-+/* time stats: */
-+
-+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-+static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v)
-+{
-+ unsigned i = 0;
-+
-+ while (i < ARRAY_SIZE(q->entries)) {
-+ struct bch2_quantile_entry *e = q->entries + i;
-+
-+ if (unlikely(!e->step)) {
-+ e->m = v;
-+ e->step = max_t(unsigned, v / 2, 1024);
-+ } else if (e->m > v) {
-+ e->m = e->m >= e->step
-+ ? e->m - e->step
-+ : 0;
-+ } else if (e->m < v) {
-+ e->m = e->m + e->step > e->m
-+ ? e->m + e->step
-+ : U32_MAX;
-+ }
-+
-+ if ((e->m > v ? e->m - v : v - e->m) < e->step)
-+ e->step = max_t(unsigned, e->step / 2, 1);
-+
-+ if (v >= e->m)
-+ break;
-+
-+ i = eytzinger0_child(i, v > e->m);
-+ }
-+}
-+
-+static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
-+ u64 start, u64 end)
-+{
-+ u64 duration, freq;
-+
-+ if (time_after64(end, start)) {
-+ duration = end - start;
-+ mean_and_variance_update(&stats->duration_stats, duration);
-+ mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration);
-+ stats->max_duration = max(stats->max_duration, duration);
-+ stats->min_duration = min(stats->min_duration, duration);
-+ bch2_quantiles_update(&stats->quantiles, duration);
-+ }
-+
-+ if (time_after64(end, stats->last_event)) {
-+ freq = end - stats->last_event;
-+ mean_and_variance_update(&stats->freq_stats, freq);
-+ mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
-+ stats->max_freq = max(stats->max_freq, freq);
-+ stats->min_freq = min(stats->min_freq, freq);
-+ stats->last_event = end;
-+ }
-+}
-+
-+static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
-+ struct bch2_time_stat_buffer *b)
-+{
-+ struct bch2_time_stat_buffer_entry *i;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&stats->lock, flags);
-+ for (i = b->entries;
-+ i < b->entries + ARRAY_SIZE(b->entries);
-+ i++)
-+ bch2_time_stats_update_one(stats, i->start, i->end);
-+ spin_unlock_irqrestore(&stats->lock, flags);
-+
-+ b->nr = 0;
-+}
-+
-+void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
-+{
-+ unsigned long flags;
-+
-+ WARN_RATELIMIT(!stats->min_duration || !stats->min_freq,
-+ "time_stats: min_duration = %llu, min_freq = %llu",
-+ stats->min_duration, stats->min_freq);
-+
-+ if (!stats->buffer) {
-+ spin_lock_irqsave(&stats->lock, flags);
-+ bch2_time_stats_update_one(stats, start, end);
-+
-+ if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 &&
-+ stats->duration_stats.n > 1024)
-+ stats->buffer =
-+ alloc_percpu_gfp(struct bch2_time_stat_buffer,
-+ GFP_ATOMIC);
-+ spin_unlock_irqrestore(&stats->lock, flags);
-+ } else {
-+ struct bch2_time_stat_buffer *b;
-+
-+ preempt_disable();
-+ b = this_cpu_ptr(stats->buffer);
-+
-+ BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
-+ b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) {
-+ .start = start,
-+ .end = end
-+ };
-+
-+ if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
-+ bch2_time_stats_clear_buffer(stats, b);
-+ preempt_enable();
-+ }
-+}
-+#endif
-+
-+static const struct time_unit {
-+ const char *name;
-+ u64 nsecs;
-+} time_units[] = {
-+ { "ns", 1 },
-+ { "us", NSEC_PER_USEC },
-+ { "ms", NSEC_PER_MSEC },
-+ { "s", NSEC_PER_SEC },
-+ { "m", (u64) NSEC_PER_SEC * 60},
-+ { "h", (u64) NSEC_PER_SEC * 3600},
-+ { "eon", U64_MAX },
-+};
-+
-+static const struct time_unit *pick_time_units(u64 ns)
-+{
-+ const struct time_unit *u;
-+
-+ for (u = time_units;
-+ u + 1 < time_units + ARRAY_SIZE(time_units) &&
-+ ns >= u[1].nsecs << 1;
-+ u++)
-+ ;
-+
-+ return u;
-+}
-+
-+void bch2_pr_time_units(struct printbuf *out, u64 ns)
-+{
-+ const struct time_unit *u = pick_time_units(ns);
-+
-+ prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
-+}
-+
-+static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
-+{
-+ const struct time_unit *u = pick_time_units(ns);
-+
-+ prt_printf(out, "%llu ", div64_u64(ns, u->nsecs));
-+ prt_tab_rjust(out);
-+ prt_printf(out, "%s", u->name);
-+}
-+
-+#ifndef __KERNEL__
-+#include <time.h>
-+void bch2_prt_datetime(struct printbuf *out, time64_t sec)
-+{
-+ time_t t = sec;
-+ char buf[64];
-+ ctime_r(&t, buf);
-+ prt_str(out, buf);
-+}
-+#else
-+void bch2_prt_datetime(struct printbuf *out, time64_t sec)
-+{
-+ char buf[64];
-+ snprintf(buf, sizeof(buf), "%ptT", &sec);
-+ prt_u64(out, sec);
-+}
-+#endif
-+
-+#define TABSTOP_SIZE 12
-+
-+static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
-+{
-+ prt_str(out, name);
-+ prt_tab(out);
-+ bch2_pr_time_units_aligned(out, ns);
-+ prt_newline(out);
-+}
-+
-+void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats)
-+{
-+ const struct time_unit *u;
-+ s64 f_mean = 0, d_mean = 0;
-+ u64 q, last_q = 0, f_stddev = 0, d_stddev = 0;
-+ int i;
-+ /*
-+ * avoid divide by zero
-+ */
-+ if (stats->freq_stats.n) {
-+ f_mean = mean_and_variance_get_mean(stats->freq_stats);
-+ f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
-+ d_mean = mean_and_variance_get_mean(stats->duration_stats);
-+ d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
-+ }
-+
-+ printbuf_tabstop_push(out, out->indent + TABSTOP_SIZE);
-+ prt_printf(out, "count:");
-+ prt_tab(out);
-+ prt_printf(out, "%llu ",
-+ stats->duration_stats.n);
-+ printbuf_tabstop_pop(out);
-+ prt_newline(out);
-+
-+ printbuf_tabstops_reset(out);
-+
-+ printbuf_tabstop_push(out, out->indent + 20);
-+ printbuf_tabstop_push(out, TABSTOP_SIZE + 2);
-+ printbuf_tabstop_push(out, 0);
-+ printbuf_tabstop_push(out, TABSTOP_SIZE + 2);
-+
-+ prt_tab(out);
-+ prt_printf(out, "since mount");
-+ prt_tab_rjust(out);
-+ prt_tab(out);
-+ prt_printf(out, "recent");
-+ prt_tab_rjust(out);
-+ prt_newline(out);
-+
-+ printbuf_tabstops_reset(out);
-+ printbuf_tabstop_push(out, out->indent + 20);
-+ printbuf_tabstop_push(out, TABSTOP_SIZE);
-+ printbuf_tabstop_push(out, 2);
-+ printbuf_tabstop_push(out, TABSTOP_SIZE);
-+
-+ prt_printf(out, "duration of events");
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+
-+ pr_name_and_units(out, "min:", stats->min_duration);
-+ pr_name_and_units(out, "max:", stats->max_duration);
-+
-+ prt_printf(out, "mean:");
-+ prt_tab(out);
-+ bch2_pr_time_units_aligned(out, d_mean);
-+ prt_tab(out);
-+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted));
-+ prt_newline(out);
-+
-+ prt_printf(out, "stddev:");
-+ prt_tab(out);
-+ bch2_pr_time_units_aligned(out, d_stddev);
-+ prt_tab(out);
-+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted));
-+
-+ printbuf_indent_sub(out, 2);
-+ prt_newline(out);
-+
-+ prt_printf(out, "time between events");
-+ prt_newline(out);
-+ printbuf_indent_add(out, 2);
-+
-+ pr_name_and_units(out, "min:", stats->min_freq);
-+ pr_name_and_units(out, "max:", stats->max_freq);
-+
-+ prt_printf(out, "mean:");
-+ prt_tab(out);
-+ bch2_pr_time_units_aligned(out, f_mean);
-+ prt_tab(out);
-+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted));
-+ prt_newline(out);
-+
-+ prt_printf(out, "stddev:");
-+ prt_tab(out);
-+ bch2_pr_time_units_aligned(out, f_stddev);
-+ prt_tab(out);
-+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted));
-+
-+ printbuf_indent_sub(out, 2);
-+ prt_newline(out);
-+
-+ printbuf_tabstops_reset(out);
-+
-+ i = eytzinger0_first(NR_QUANTILES);
-+ u = pick_time_units(stats->quantiles.entries[i].m);
-+
-+ prt_printf(out, "quantiles (%s):\t", u->name);
-+ eytzinger0_for_each(i, NR_QUANTILES) {
-+ bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
-+
-+ q = max(stats->quantiles.entries[i].m, last_q);
-+ prt_printf(out, "%llu ",
-+ div_u64(q, u->nsecs));
-+ if (is_last)
-+ prt_newline(out);
-+ last_q = q;
-+ }
-+}
-+
-+void bch2_time_stats_exit(struct bch2_time_stats *stats)
-+{
-+ free_percpu(stats->buffer);
-+}
-+
-+void bch2_time_stats_init(struct bch2_time_stats *stats)
-+{
-+ memset(stats, 0, sizeof(*stats));
-+ stats->duration_stats_weighted.weight = 8;
-+ stats->freq_stats_weighted.weight = 8;
-+ stats->min_duration = U64_MAX;
-+ stats->min_freq = U64_MAX;
-+ spin_lock_init(&stats->lock);
-+}
-+
-+/* ratelimit: */
-+
-+/**
-+ * bch2_ratelimit_delay() - return how long to delay until the next time to do
-+ * some work
-+ * @d: the struct bch_ratelimit to update
-+ * Returns: the amount of time to delay by, in jiffies
-+ */
-+u64 bch2_ratelimit_delay(struct bch_ratelimit *d)
-+{
-+ u64 now = local_clock();
-+
-+ return time_after64(d->next, now)
-+ ? nsecs_to_jiffies(d->next - now)
-+ : 0;
-+}
-+
-+/**
-+ * bch2_ratelimit_increment() - increment @d by the amount of work done
-+ * @d: the struct bch_ratelimit to update
-+ * @done: the amount of work done, in arbitrary units
-+ */
-+void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done)
-+{
-+ u64 now = local_clock();
-+
-+ d->next += div_u64(done * NSEC_PER_SEC, d->rate);
-+
-+ if (time_before64(now + NSEC_PER_SEC, d->next))
-+ d->next = now + NSEC_PER_SEC;
-+
-+ if (time_after64(now - NSEC_PER_SEC * 2, d->next))
-+ d->next = now - NSEC_PER_SEC * 2;
-+}
-+
-+/* pd controller: */
-+
-+/*
-+ * Updates pd_controller. Attempts to scale inputed values to units per second.
-+ * @target: desired value
-+ * @actual: current value
-+ *
-+ * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing
-+ * it makes actual go down.
-+ */
-+void bch2_pd_controller_update(struct bch_pd_controller *pd,
-+ s64 target, s64 actual, int sign)
-+{
-+ s64 proportional, derivative, change;
-+
-+ unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ;
-+
-+ if (seconds_since_update == 0)
-+ return;
-+
-+ pd->last_update = jiffies;
-+
-+ proportional = actual - target;
-+ proportional *= seconds_since_update;
-+ proportional = div_s64(proportional, pd->p_term_inverse);
-+
-+ derivative = actual - pd->last_actual;
-+ derivative = div_s64(derivative, seconds_since_update);
-+ derivative = ewma_add(pd->smoothed_derivative, derivative,
-+ (pd->d_term / seconds_since_update) ?: 1);
-+ derivative = derivative * pd->d_term;
-+ derivative = div_s64(derivative, pd->p_term_inverse);
-+
-+ change = proportional + derivative;
-+
-+ /* Don't increase rate if not keeping up */
-+ if (change > 0 &&
-+ pd->backpressure &&
-+ time_after64(local_clock(),
-+ pd->rate.next + NSEC_PER_MSEC))
-+ change = 0;
-+
-+ change *= (sign * -1);
-+
-+ pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change,
-+ 1, UINT_MAX);
-+
-+ pd->last_actual = actual;
-+ pd->last_derivative = derivative;
-+ pd->last_proportional = proportional;
-+ pd->last_change = change;
-+ pd->last_target = target;
-+}
-+
-+void bch2_pd_controller_init(struct bch_pd_controller *pd)
-+{
-+ pd->rate.rate = 1024;
-+ pd->last_update = jiffies;
-+ pd->p_term_inverse = 6000;
-+ pd->d_term = 30;
-+ pd->d_smooth = pd->d_term;
-+ pd->backpressure = 1;
-+}
-+
-+void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd)
-+{
-+ if (!out->nr_tabstops)
-+ printbuf_tabstop_push(out, 20);
-+
-+ prt_printf(out, "rate:");
-+ prt_tab(out);
-+ prt_human_readable_s64(out, pd->rate.rate);
-+ prt_newline(out);
-+
-+ prt_printf(out, "target:");
-+ prt_tab(out);
-+ prt_human_readable_u64(out, pd->last_target);
-+ prt_newline(out);
-+
-+ prt_printf(out, "actual:");
-+ prt_tab(out);
-+ prt_human_readable_u64(out, pd->last_actual);
-+ prt_newline(out);
-+
-+ prt_printf(out, "proportional:");
-+ prt_tab(out);
-+ prt_human_readable_s64(out, pd->last_proportional);
-+ prt_newline(out);
-+
-+ prt_printf(out, "derivative:");
-+ prt_tab(out);
-+ prt_human_readable_s64(out, pd->last_derivative);
-+ prt_newline(out);
-+
-+ prt_printf(out, "change:");
-+ prt_tab(out);
-+ prt_human_readable_s64(out, pd->last_change);
-+ prt_newline(out);
-+
-+ prt_printf(out, "next io:");
-+ prt_tab(out);
-+ prt_printf(out, "%llims", div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC));
-+ prt_newline(out);
-+}
-+
-+/* misc: */
-+
-+void bch2_bio_map(struct bio *bio, void *base, size_t size)
-+{
-+ while (size) {
-+ struct page *page = is_vmalloc_addr(base)
-+ ? vmalloc_to_page(base)
-+ : virt_to_page(base);
-+ unsigned offset = offset_in_page(base);
-+ unsigned len = min_t(size_t, PAGE_SIZE - offset, size);
-+
-+ BUG_ON(!bio_add_page(bio, page, len, offset));
-+ size -= len;
-+ base += len;
-+ }
-+}
-+
-+int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
-+{
-+ while (size) {
-+ struct page *page = alloc_pages(gfp_mask, 0);
-+ unsigned len = min_t(size_t, PAGE_SIZE, size);
-+
-+ if (!page)
-+ return -ENOMEM;
-+
-+ if (unlikely(!bio_add_page(bio, page, len, 0))) {
-+ __free_page(page);
-+ break;
-+ }
-+
-+ size -= len;
-+ }
-+
-+ return 0;
-+}
-+
-+size_t bch2_rand_range(size_t max)
-+{
-+ size_t rand;
-+
-+ if (!max)
-+ return 0;
-+
-+ do {
-+ rand = get_random_long();
-+ rand &= roundup_pow_of_two(max) - 1;
-+ } while (rand >= max);
-+
-+ return rand;
-+}
-+
-+void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
-+{
-+ struct bio_vec bv;
-+ struct bvec_iter iter;
-+
-+ __bio_for_each_segment(bv, dst, iter, dst_iter) {
-+ void *dstp = kmap_local_page(bv.bv_page);
-+
-+ memcpy(dstp + bv.bv_offset, src, bv.bv_len);
-+ kunmap_local(dstp);
-+
-+ src += bv.bv_len;
-+ }
-+}
-+
-+void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
-+{
-+ struct bio_vec bv;
-+ struct bvec_iter iter;
-+
-+ __bio_for_each_segment(bv, src, iter, src_iter) {
-+ void *srcp = kmap_local_page(bv.bv_page);
-+
-+ memcpy(dst, srcp + bv.bv_offset, bv.bv_len);
-+ kunmap_local(srcp);
-+
-+ dst += bv.bv_len;
-+ }
-+}
-+
-+static int alignment_ok(const void *base, size_t align)
-+{
-+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
-+ ((unsigned long)base & (align - 1)) == 0;
-+}
-+
-+static void u32_swap(void *a, void *b, size_t size)
-+{
-+ u32 t = *(u32 *)a;
-+ *(u32 *)a = *(u32 *)b;
-+ *(u32 *)b = t;
-+}
-+
-+static void u64_swap(void *a, void *b, size_t size)
-+{
-+ u64 t = *(u64 *)a;
-+ *(u64 *)a = *(u64 *)b;
-+ *(u64 *)b = t;
-+}
-+
-+static void generic_swap(void *a, void *b, size_t size)
-+{
-+ char t;
-+
-+ do {
-+ t = *(char *)a;
-+ *(char *)a++ = *(char *)b;
-+ *(char *)b++ = t;
-+ } while (--size > 0);
-+}
-+
-+static inline int do_cmp(void *base, size_t n, size_t size,
-+ int (*cmp_func)(const void *, const void *, size_t),
-+ size_t l, size_t r)
-+{
-+ return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
-+ base + inorder_to_eytzinger0(r, n) * size,
-+ size);
-+}
-+
-+static inline void do_swap(void *base, size_t n, size_t size,
-+ void (*swap_func)(void *, void *, size_t),
-+ size_t l, size_t r)
-+{
-+ swap_func(base + inorder_to_eytzinger0(l, n) * size,
-+ base + inorder_to_eytzinger0(r, n) * size,
-+ size);
-+}
-+
-+void eytzinger0_sort(void *base, size_t n, size_t size,
-+ int (*cmp_func)(const void *, const void *, size_t),
-+ void (*swap_func)(void *, void *, size_t))
-+{
-+ int i, c, r;
-+
-+ if (!swap_func) {
-+ if (size == 4 && alignment_ok(base, 4))
-+ swap_func = u32_swap;
-+ else if (size == 8 && alignment_ok(base, 8))
-+ swap_func = u64_swap;
-+ else
-+ swap_func = generic_swap;
-+ }
-+
-+ /* heapify */
-+ for (i = n / 2 - 1; i >= 0; --i) {
-+ for (r = i; r * 2 + 1 < n; r = c) {
-+ c = r * 2 + 1;
-+
-+ if (c + 1 < n &&
-+ do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-+ c++;
-+
-+ if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-+ break;
-+
-+ do_swap(base, n, size, swap_func, r, c);
-+ }
-+ }
-+
-+ /* sort */
-+ for (i = n - 1; i > 0; --i) {
-+ do_swap(base, n, size, swap_func, 0, i);
-+
-+ for (r = 0; r * 2 + 1 < i; r = c) {
-+ c = r * 2 + 1;
-+
-+ if (c + 1 < i &&
-+ do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-+ c++;
-+
-+ if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-+ break;
-+
-+ do_swap(base, n, size, swap_func, r, c);
-+ }
-+ }
-+}
-+
-+void sort_cmp_size(void *base, size_t num, size_t size,
-+ int (*cmp_func)(const void *, const void *, size_t),
-+ void (*swap_func)(void *, void *, size_t size))
-+{
-+ /* pre-scale counters for performance */
-+ int i = (num/2 - 1) * size, n = num * size, c, r;
-+
-+ if (!swap_func) {
-+ if (size == 4 && alignment_ok(base, 4))
-+ swap_func = u32_swap;
-+ else if (size == 8 && alignment_ok(base, 8))
-+ swap_func = u64_swap;
-+ else
-+ swap_func = generic_swap;
-+ }
-+
-+ /* heapify */
-+ for ( ; i >= 0; i -= size) {
-+ for (r = i; r * 2 + size < n; r = c) {
-+ c = r * 2 + size;
-+ if (c < n - size &&
-+ cmp_func(base + c, base + c + size, size) < 0)
-+ c += size;
-+ if (cmp_func(base + r, base + c, size) >= 0)
-+ break;
-+ swap_func(base + r, base + c, size);
-+ }
-+ }
-+
-+ /* sort */
-+ for (i = n - size; i > 0; i -= size) {
-+ swap_func(base, base + i, size);
-+ for (r = 0; r * 2 + size < i; r = c) {
-+ c = r * 2 + size;
-+ if (c < i - size &&
-+ cmp_func(base + c, base + c + size, size) < 0)
-+ c += size;
-+ if (cmp_func(base + r, base + c, size) >= 0)
-+ break;
-+ swap_func(base + r, base + c, size);
-+ }
-+ }
-+}
-+
-+static void mempool_free_vp(void *element, void *pool_data)
-+{
-+ size_t size = (size_t) pool_data;
-+
-+ vpfree(element, size);
-+}
-+
-+static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
-+{
-+ size_t size = (size_t) pool_data;
-+
-+ return vpmalloc(size, gfp_mask);
-+}
-+
-+int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
-+{
-+ return size < PAGE_SIZE
-+ ? mempool_init_kmalloc_pool(pool, min_nr, size)
-+ : mempool_init(pool, min_nr, mempool_alloc_vp,
-+ mempool_free_vp, (void *) size);
-+}
-+
-+#if 0
-+void eytzinger1_test(void)
-+{
-+ unsigned inorder, eytz, size;
-+
-+ pr_info("1 based eytzinger test:");
-+
-+ for (size = 2;
-+ size < 65536;
-+ size++) {
-+ unsigned extra = eytzinger1_extra(size);
-+
-+ if (!(size % 4096))
-+ pr_info("tree size %u", size);
-+
-+ BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size));
-+ BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size));
-+
-+ BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0);
-+ BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0);
-+
-+ inorder = 1;
-+ eytzinger1_for_each(eytz, size) {
-+ BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz);
-+ BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder);
-+ BUG_ON(eytz != eytzinger1_last(size) &&
-+ eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz);
-+
-+ inorder++;
-+ }
-+ }
-+}
-+
-+void eytzinger0_test(void)
-+{
-+
-+ unsigned inorder, eytz, size;
-+
-+ pr_info("0 based eytzinger test:");
-+
-+ for (size = 1;
-+ size < 65536;
-+ size++) {
-+ unsigned extra = eytzinger0_extra(size);
-+
-+ if (!(size % 4096))
-+ pr_info("tree size %u", size);
-+
-+ BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size));
-+ BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size));
-+
-+ BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1);
-+ BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1);
-+
-+ inorder = 0;
-+ eytzinger0_for_each(eytz, size) {
-+ BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz);
-+ BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder);
-+ BUG_ON(eytz != eytzinger0_last(size) &&
-+ eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz);
-+
-+ inorder++;
-+ }
-+ }
-+}
-+
-+static inline int cmp_u16(const void *_l, const void *_r, size_t size)
-+{
-+ const u16 *l = _l, *r = _r;
-+
-+ return (*l > *r) - (*r - *l);
-+}
-+
-+static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
-+{
-+ int i, c1 = -1, c2 = -1;
-+ ssize_t r;
-+
-+ r = eytzinger0_find_le(test_array, nr,
-+ sizeof(test_array[0]),
-+ cmp_u16, &search);
-+ if (r >= 0)
-+ c1 = test_array[r];
-+
-+ for (i = 0; i < nr; i++)
-+ if (test_array[i] <= search && test_array[i] > c2)
-+ c2 = test_array[i];
-+
-+ if (c1 != c2) {
-+ eytzinger0_for_each(i, nr)
-+ pr_info("[%3u] = %12u", i, test_array[i]);
-+ pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i",
-+ i, r, c1, c2);
-+ }
-+}
-+
-+void eytzinger0_find_test(void)
-+{
-+ unsigned i, nr, allocated = 1 << 12;
-+ u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL);
-+
-+ for (nr = 1; nr < allocated; nr++) {
-+ pr_info("testing %u elems", nr);
-+
-+ get_random_bytes(test_array, nr * sizeof(test_array[0]));
-+ eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL);
-+
-+ /* verify array is sorted correctly: */
-+ eytzinger0_for_each(i, nr)
-+ BUG_ON(i != eytzinger0_last(nr) &&
-+ test_array[i] > test_array[eytzinger0_next(i, nr)]);
-+
-+ for (i = 0; i < U16_MAX; i += 1 << 12)
-+ eytzinger0_find_test_val(test_array, nr, i);
-+
-+ for (i = 0; i < nr; i++) {
-+ eytzinger0_find_test_val(test_array, nr, test_array[i] - 1);
-+ eytzinger0_find_test_val(test_array, nr, test_array[i]);
-+ eytzinger0_find_test_val(test_array, nr, test_array[i] + 1);
-+ }
-+ }
-+
-+ kfree(test_array);
-+}
-+#endif
-+
-+/*
-+ * Accumulate percpu counters onto one cpu's copy - only valid when access
-+ * against any percpu counter is guarded against
-+ */
-+u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
-+{
-+ u64 *ret;
-+ int cpu;
-+
-+ /* access to pcpu vars has to be blocked by other locking */
-+ preempt_disable();
-+ ret = this_cpu_ptr(p);
-+ preempt_enable();
-+
-+ for_each_possible_cpu(cpu) {
-+ u64 *i = per_cpu_ptr(p, cpu);
-+
-+ if (i != ret) {
-+ acc_u64s(ret, i, nr);
-+ memset(i, 0, nr * sizeof(u64));
-+ }
-+ }
-+
-+ return ret;
-+}
-diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
-new file mode 100644
-index 000000000000..2984b57b2958
---- /dev/null
-+++ b/fs/bcachefs/util.h
-@@ -0,0 +1,833 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_UTIL_H
-+#define _BCACHEFS_UTIL_H
-+
-+#include <linux/bio.h>
-+#include <linux/blkdev.h>
-+#include <linux/closure.h>
-+#include <linux/errno.h>
-+#include <linux/freezer.h>
-+#include <linux/kernel.h>
-+#include <linux/sched/clock.h>
-+#include <linux/llist.h>
-+#include <linux/log2.h>
-+#include <linux/percpu.h>
-+#include <linux/preempt.h>
-+#include <linux/ratelimit.h>
-+#include <linux/slab.h>
-+#include <linux/vmalloc.h>
-+#include <linux/workqueue.h>
-+
-+#include "mean_and_variance.h"
-+
-+#include "darray.h"
-+
-+struct closure;
-+
-+#ifdef CONFIG_BCACHEFS_DEBUG
-+#define EBUG_ON(cond) BUG_ON(cond)
-+#else
-+#define EBUG_ON(cond)
-+#endif
-+
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+#define CPU_BIG_ENDIAN 0
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+#define CPU_BIG_ENDIAN 1
-+#endif
-+
-+/* type hackery */
-+
-+#define type_is_exact(_val, _type) \
-+ __builtin_types_compatible_p(typeof(_val), _type)
-+
-+#define type_is(_val, _type) \
-+ (__builtin_types_compatible_p(typeof(_val), _type) || \
-+ __builtin_types_compatible_p(typeof(_val), const _type))
-+
-+/* Userspace doesn't align allocations as nicely as the kernel allocators: */
-+static inline size_t buf_pages(void *p, size_t len)
-+{
-+ return DIV_ROUND_UP(len +
-+ ((unsigned long) p & (PAGE_SIZE - 1)),
-+ PAGE_SIZE);
-+}
-+
-+static inline void vpfree(void *p, size_t size)
-+{
-+ if (is_vmalloc_addr(p))
-+ vfree(p);
-+ else
-+ free_pages((unsigned long) p, get_order(size));
-+}
-+
-+static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
-+{
-+ return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
-+ get_order(size)) ?:
-+ __vmalloc(size, gfp_mask);
-+}
-+
-+static inline void kvpfree(void *p, size_t size)
-+{
-+ if (size < PAGE_SIZE)
-+ kfree(p);
-+ else
-+ vpfree(p, size);
-+}
-+
-+static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
-+{
-+ return size < PAGE_SIZE
-+ ? kmalloc(size, gfp_mask)
-+ : vpmalloc(size, gfp_mask);
-+}
-+
-+int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
-+
-+#define HEAP(type) \
-+struct { \
-+ size_t size, used; \
-+ type *data; \
-+}
-+
-+#define DECLARE_HEAP(type, name) HEAP(type) name
-+
-+#define init_heap(heap, _size, gfp) \
-+({ \
-+ (heap)->used = 0; \
-+ (heap)->size = (_size); \
-+ (heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\
-+ (gfp)); \
-+})
-+
-+#define free_heap(heap) \
-+do { \
-+ kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0])); \
-+ (heap)->data = NULL; \
-+} while (0)
-+
-+#define heap_set_backpointer(h, i, _fn) \
-+do { \
-+ void (*fn)(typeof(h), size_t) = _fn; \
-+ if (fn) \
-+ fn(h, i); \
-+} while (0)
-+
-+#define heap_swap(h, i, j, set_backpointer) \
-+do { \
-+ swap((h)->data[i], (h)->data[j]); \
-+ heap_set_backpointer(h, i, set_backpointer); \
-+ heap_set_backpointer(h, j, set_backpointer); \
-+} while (0)
-+
-+#define heap_peek(h) \
-+({ \
-+ EBUG_ON(!(h)->used); \
-+ (h)->data[0]; \
-+})
-+
-+#define heap_full(h) ((h)->used == (h)->size)
-+
-+#define heap_sift_down(h, i, cmp, set_backpointer) \
-+do { \
-+ size_t _c, _j = i; \
-+ \
-+ for (; _j * 2 + 1 < (h)->used; _j = _c) { \
-+ _c = _j * 2 + 1; \
-+ if (_c + 1 < (h)->used && \
-+ cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \
-+ _c++; \
-+ \
-+ if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \
-+ break; \
-+ heap_swap(h, _c, _j, set_backpointer); \
-+ } \
-+} while (0)
-+
-+#define heap_sift_up(h, i, cmp, set_backpointer) \
-+do { \
-+ while (i) { \
-+ size_t p = (i - 1) / 2; \
-+ if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \
-+ break; \
-+ heap_swap(h, i, p, set_backpointer); \
-+ i = p; \
-+ } \
-+} while (0)
-+
-+#define __heap_add(h, d, cmp, set_backpointer) \
-+({ \
-+ size_t _i = (h)->used++; \
-+ (h)->data[_i] = d; \
-+ heap_set_backpointer(h, _i, set_backpointer); \
-+ \
-+ heap_sift_up(h, _i, cmp, set_backpointer); \
-+ _i; \
-+})
-+
-+#define heap_add(h, d, cmp, set_backpointer) \
-+({ \
-+ bool _r = !heap_full(h); \
-+ if (_r) \
-+ __heap_add(h, d, cmp, set_backpointer); \
-+ _r; \
-+})
-+
-+#define heap_add_or_replace(h, new, cmp, set_backpointer) \
-+do { \
-+ if (!heap_add(h, new, cmp, set_backpointer) && \
-+ cmp(h, new, heap_peek(h)) >= 0) { \
-+ (h)->data[0] = new; \
-+ heap_set_backpointer(h, 0, set_backpointer); \
-+ heap_sift_down(h, 0, cmp, set_backpointer); \
-+ } \
-+} while (0)
-+
-+#define heap_del(h, i, cmp, set_backpointer) \
-+do { \
-+ size_t _i = (i); \
-+ \
-+ BUG_ON(_i >= (h)->used); \
-+ (h)->used--; \
-+ if ((_i) < (h)->used) { \
-+ heap_swap(h, _i, (h)->used, set_backpointer); \
-+ heap_sift_up(h, _i, cmp, set_backpointer); \
-+ heap_sift_down(h, _i, cmp, set_backpointer); \
-+ } \
-+} while (0)
-+
-+#define heap_pop(h, d, cmp, set_backpointer) \
-+({ \
-+ bool _r = (h)->used; \
-+ if (_r) { \
-+ (d) = (h)->data[0]; \
-+ heap_del(h, 0, cmp, set_backpointer); \
-+ } \
-+ _r; \
-+})
-+
-+#define heap_resort(heap, cmp, set_backpointer) \
-+do { \
-+ ssize_t _i; \
-+ for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
-+ heap_sift_down(heap, _i, cmp, set_backpointer); \
-+} while (0)
-+
-+#define ANYSINT_MAX(t) \
-+ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
-+
-+#include "printbuf.h"
-+
-+#define prt_vprintf(_out, ...) bch2_prt_vprintf(_out, __VA_ARGS__)
-+#define prt_printf(_out, ...) bch2_prt_printf(_out, __VA_ARGS__)
-+#define printbuf_str(_buf) bch2_printbuf_str(_buf)
-+#define printbuf_exit(_buf) bch2_printbuf_exit(_buf)
-+
-+#define printbuf_tabstops_reset(_buf) bch2_printbuf_tabstops_reset(_buf)
-+#define printbuf_tabstop_pop(_buf) bch2_printbuf_tabstop_pop(_buf)
-+#define printbuf_tabstop_push(_buf, _n) bch2_printbuf_tabstop_push(_buf, _n)
-+
-+#define printbuf_indent_add(_out, _n) bch2_printbuf_indent_add(_out, _n)
-+#define printbuf_indent_sub(_out, _n) bch2_printbuf_indent_sub(_out, _n)
-+
-+#define prt_newline(_out) bch2_prt_newline(_out)
-+#define prt_tab(_out) bch2_prt_tab(_out)
-+#define prt_tab_rjust(_out) bch2_prt_tab_rjust(_out)
-+
-+#define prt_bytes_indented(...) bch2_prt_bytes_indented(__VA_ARGS__)
-+#define prt_u64(_out, _v) prt_printf(_out, "%llu", (u64) (_v))
-+#define prt_human_readable_u64(...) bch2_prt_human_readable_u64(__VA_ARGS__)
-+#define prt_human_readable_s64(...) bch2_prt_human_readable_s64(__VA_ARGS__)
-+#define prt_units_u64(...) bch2_prt_units_u64(__VA_ARGS__)
-+#define prt_units_s64(...) bch2_prt_units_s64(__VA_ARGS__)
-+#define prt_string_option(...) bch2_prt_string_option(__VA_ARGS__)
-+#define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__)
-+
-+void bch2_pr_time_units(struct printbuf *, u64);
-+void bch2_prt_datetime(struct printbuf *, time64_t);
-+
-+#ifdef __KERNEL__
-+static inline void uuid_unparse_lower(u8 *uuid, char *out)
-+{
-+ sprintf(out, "%pUb", uuid);
-+}
-+#else
-+#include <uuid/uuid.h>
-+#endif
-+
-+static inline void pr_uuid(struct printbuf *out, u8 *uuid)
-+{
-+ char uuid_str[40];
-+
-+ uuid_unparse_lower(uuid, uuid_str);
-+ prt_printf(out, "%s", uuid_str);
-+}
-+
-+int bch2_strtoint_h(const char *, int *);
-+int bch2_strtouint_h(const char *, unsigned int *);
-+int bch2_strtoll_h(const char *, long long *);
-+int bch2_strtoull_h(const char *, unsigned long long *);
-+int bch2_strtou64_h(const char *, u64 *);
-+
-+static inline int bch2_strtol_h(const char *cp, long *res)
-+{
-+#if BITS_PER_LONG == 32
-+ return bch2_strtoint_h(cp, (int *) res);
-+#else
-+ return bch2_strtoll_h(cp, (long long *) res);
-+#endif
-+}
-+
-+static inline int bch2_strtoul_h(const char *cp, long *res)
-+{
-+#if BITS_PER_LONG == 32
-+ return bch2_strtouint_h(cp, (unsigned int *) res);
-+#else
-+ return bch2_strtoull_h(cp, (unsigned long long *) res);
-+#endif
-+}
-+
-+#define strtoi_h(cp, res) \
-+ ( type_is(*res, int) ? bch2_strtoint_h(cp, (void *) res)\
-+ : type_is(*res, long) ? bch2_strtol_h(cp, (void *) res)\
-+ : type_is(*res, long long) ? bch2_strtoll_h(cp, (void *) res)\
-+ : type_is(*res, unsigned) ? bch2_strtouint_h(cp, (void *) res)\
-+ : type_is(*res, unsigned long) ? bch2_strtoul_h(cp, (void *) res)\
-+ : type_is(*res, unsigned long long) ? bch2_strtoull_h(cp, (void *) res)\
-+ : -EINVAL)
-+
-+#define strtoul_safe(cp, var) \
-+({ \
-+ unsigned long _v; \
-+ int _r = kstrtoul(cp, 10, &_v); \
-+ if (!_r) \
-+ var = _v; \
-+ _r; \
-+})
-+
-+#define strtoul_safe_clamp(cp, var, min, max) \
-+({ \
-+ unsigned long _v; \
-+ int _r = kstrtoul(cp, 10, &_v); \
-+ if (!_r) \
-+ var = clamp_t(typeof(var), _v, min, max); \
-+ _r; \
-+})
-+
-+#define strtoul_safe_restrict(cp, var, min, max) \
-+({ \
-+ unsigned long _v; \
-+ int _r = kstrtoul(cp, 10, &_v); \
-+ if (!_r && _v >= min && _v <= max) \
-+ var = _v; \
-+ else \
-+ _r = -EINVAL; \
-+ _r; \
-+})
-+
-+#define snprint(out, var) \
-+ prt_printf(out, \
-+ type_is(var, int) ? "%i\n" \
-+ : type_is(var, unsigned) ? "%u\n" \
-+ : type_is(var, long) ? "%li\n" \
-+ : type_is(var, unsigned long) ? "%lu\n" \
-+ : type_is(var, s64) ? "%lli\n" \
-+ : type_is(var, u64) ? "%llu\n" \
-+ : type_is(var, char *) ? "%s\n" \
-+ : "%i\n", var)
-+
-+bool bch2_is_zero(const void *, size_t);
-+
-+u64 bch2_read_flag_list(char *, const char * const[]);
-+
-+void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
-+
-+void bch2_print_string_as_lines(const char *prefix, const char *lines);
-+
-+typedef DARRAY(unsigned long) bch_stacktrace;
-+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *);
-+void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *);
-+int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *);
-+
-+#define NR_QUANTILES 15
-+#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
-+#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
-+#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES)
-+
-+struct bch2_quantiles {
-+ struct bch2_quantile_entry {
-+ u64 m;
-+ u64 step;
-+ } entries[NR_QUANTILES];
-+};
-+
-+struct bch2_time_stat_buffer {
-+ unsigned nr;
-+ struct bch2_time_stat_buffer_entry {
-+ u64 start;
-+ u64 end;
-+ } entries[32];
-+};
-+
-+struct bch2_time_stats {
-+ spinlock_t lock;
-+ /* all fields are in nanoseconds */
-+ u64 max_duration;
-+ u64 min_duration;
-+ u64 max_freq;
-+ u64 min_freq;
-+ u64 last_event;
-+ struct bch2_quantiles quantiles;
-+
-+ struct mean_and_variance duration_stats;
-+ struct mean_and_variance_weighted duration_stats_weighted;
-+ struct mean_and_variance freq_stats;
-+ struct mean_and_variance_weighted freq_stats_weighted;
-+ struct bch2_time_stat_buffer __percpu *buffer;
-+};
-+
-+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-+void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64);
-+#else
-+static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {}
-+#endif
-+
-+static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start)
-+{
-+ __bch2_time_stats_update(stats, start, local_clock());
-+}
-+
-+void bch2_time_stats_to_text(struct printbuf *, struct bch2_time_stats *);
-+
-+void bch2_time_stats_exit(struct bch2_time_stats *);
-+void bch2_time_stats_init(struct bch2_time_stats *);
-+
-+#define ewma_add(ewma, val, weight) \
-+({ \
-+ typeof(ewma) _ewma = (ewma); \
-+ typeof(weight) _weight = (weight); \
-+ \
-+ (((_ewma << _weight) - _ewma) + (val)) >> _weight; \
-+})
-+
-+struct bch_ratelimit {
-+ /* Next time we want to do some work, in nanoseconds */
-+ u64 next;
-+
-+ /*
-+ * Rate at which we want to do work, in units per nanosecond
-+ * The units here correspond to the units passed to
-+ * bch2_ratelimit_increment()
-+ */
-+ unsigned rate;
-+};
-+
-+static inline void bch2_ratelimit_reset(struct bch_ratelimit *d)
-+{
-+ d->next = local_clock();
-+}
-+
-+u64 bch2_ratelimit_delay(struct bch_ratelimit *);
-+void bch2_ratelimit_increment(struct bch_ratelimit *, u64);
-+
-+struct bch_pd_controller {
-+ struct bch_ratelimit rate;
-+ unsigned long last_update;
-+
-+ s64 last_actual;
-+ s64 smoothed_derivative;
-+
-+ unsigned p_term_inverse;
-+ unsigned d_smooth;
-+ unsigned d_term;
-+
-+ /* for exporting to sysfs (no effect on behavior) */
-+ s64 last_derivative;
-+ s64 last_proportional;
-+ s64 last_change;
-+ s64 last_target;
-+
-+ /*
-+ * If true, the rate will not increase if bch2_ratelimit_delay()
-+ * is not being called often enough.
-+ */
-+ bool backpressure;
-+};
-+
-+void bch2_pd_controller_update(struct bch_pd_controller *, s64, s64, int);
-+void bch2_pd_controller_init(struct bch_pd_controller *);
-+void bch2_pd_controller_debug_to_text(struct printbuf *, struct bch_pd_controller *);
-+
-+#define sysfs_pd_controller_attribute(name) \
-+ rw_attribute(name##_rate); \
-+ rw_attribute(name##_rate_bytes); \
-+ rw_attribute(name##_rate_d_term); \
-+ rw_attribute(name##_rate_p_term_inverse); \
-+ read_attribute(name##_rate_debug)
-+
-+#define sysfs_pd_controller_files(name) \
-+ &sysfs_##name##_rate, \
-+ &sysfs_##name##_rate_bytes, \
-+ &sysfs_##name##_rate_d_term, \
-+ &sysfs_##name##_rate_p_term_inverse, \
-+ &sysfs_##name##_rate_debug
-+
-+#define sysfs_pd_controller_show(name, var) \
-+do { \
-+ sysfs_hprint(name##_rate, (var)->rate.rate); \
-+ sysfs_print(name##_rate_bytes, (var)->rate.rate); \
-+ sysfs_print(name##_rate_d_term, (var)->d_term); \
-+ sysfs_print(name##_rate_p_term_inverse, (var)->p_term_inverse); \
-+ \
-+ if (attr == &sysfs_##name##_rate_debug) \
-+ bch2_pd_controller_debug_to_text(out, var); \
-+} while (0)
-+
-+#define sysfs_pd_controller_store(name, var) \
-+do { \
-+ sysfs_strtoul_clamp(name##_rate, \
-+ (var)->rate.rate, 1, UINT_MAX); \
-+ sysfs_strtoul_clamp(name##_rate_bytes, \
-+ (var)->rate.rate, 1, UINT_MAX); \
-+ sysfs_strtoul(name##_rate_d_term, (var)->d_term); \
-+ sysfs_strtoul_clamp(name##_rate_p_term_inverse, \
-+ (var)->p_term_inverse, 1, INT_MAX); \
-+} while (0)
-+
-+#define container_of_or_null(ptr, type, member) \
-+({ \
-+ typeof(ptr) _ptr = ptr; \
-+ _ptr ? container_of(_ptr, type, member) : NULL; \
-+})
-+
-+/* Does linear interpolation between powers of two */
-+static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
-+{
-+ unsigned fract = x & ~(~0 << fract_bits);
-+
-+ x >>= fract_bits;
-+ x = 1 << x;
-+ x += (x * fract) >> fract_bits;
-+
-+ return x;
-+}
-+
-+void bch2_bio_map(struct bio *bio, void *base, size_t);
-+int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
-+
-+static inline sector_t bdev_sectors(struct block_device *bdev)
-+{
-+ return bdev->bd_inode->i_size >> 9;
-+}
-+
-+#define closure_bio_submit(bio, cl) \
-+do { \
-+ closure_get(cl); \
-+ submit_bio(bio); \
-+} while (0)
-+
-+#define kthread_wait(cond) \
-+({ \
-+ int _ret = 0; \
-+ \
-+ while (1) { \
-+ set_current_state(TASK_INTERRUPTIBLE); \
-+ if (kthread_should_stop()) { \
-+ _ret = -1; \
-+ break; \
-+ } \
-+ \
-+ if (cond) \
-+ break; \
-+ \
-+ schedule(); \
-+ } \
-+ set_current_state(TASK_RUNNING); \
-+ _ret; \
-+})
-+
-+#define kthread_wait_freezable(cond) \
-+({ \
-+ int _ret = 0; \
-+ while (1) { \
-+ set_current_state(TASK_INTERRUPTIBLE); \
-+ if (kthread_should_stop()) { \
-+ _ret = -1; \
-+ break; \
-+ } \
-+ \
-+ if (cond) \
-+ break; \
-+ \
-+ schedule(); \
-+ try_to_freeze(); \
-+ } \
-+ set_current_state(TASK_RUNNING); \
-+ _ret; \
-+})
-+
-+size_t bch2_rand_range(size_t);
-+
-+void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
-+void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
-+
-+static inline void memcpy_u64s_small(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+ u64 *d = dst;
-+ const u64 *s = src;
-+
-+ while (u64s--)
-+ *d++ = *s++;
-+}
-+
-+static inline void __memcpy_u64s(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+#ifdef CONFIG_X86_64
-+ long d0, d1, d2;
-+
-+ asm volatile("rep ; movsq"
-+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-+ : "0" (u64s), "1" (dst), "2" (src)
-+ : "memory");
-+#else
-+ u64 *d = dst;
-+ const u64 *s = src;
-+
-+ while (u64s--)
-+ *d++ = *s++;
-+#endif
-+}
-+
-+static inline void memcpy_u64s(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+ EBUG_ON(!(dst >= src + u64s * sizeof(u64) ||
-+ dst + u64s * sizeof(u64) <= src));
-+
-+ __memcpy_u64s(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_down(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+ __memcpy_u64s(dst, src, u64s);
-+}
-+
-+static inline void memmove_u64s_down(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+ EBUG_ON(dst > src);
-+
-+ __memmove_u64s_down(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_down_small(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+ memcpy_u64s_small(dst, src, u64s);
-+}
-+
-+static inline void memmove_u64s_down_small(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+ EBUG_ON(dst > src);
-+
-+ __memmove_u64s_down_small(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_up_small(void *_dst, const void *_src,
-+ unsigned u64s)
-+{
-+ u64 *dst = (u64 *) _dst + u64s;
-+ u64 *src = (u64 *) _src + u64s;
-+
-+ while (u64s--)
-+ *--dst = *--src;
-+}
-+
-+static inline void memmove_u64s_up_small(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+ EBUG_ON(dst < src);
-+
-+ __memmove_u64s_up_small(dst, src, u64s);
-+}
-+
-+static inline void __memmove_u64s_up(void *_dst, const void *_src,
-+ unsigned u64s)
-+{
-+ u64 *dst = (u64 *) _dst + u64s - 1;
-+ u64 *src = (u64 *) _src + u64s - 1;
-+
-+#ifdef CONFIG_X86_64
-+ long d0, d1, d2;
-+
-+ asm volatile("std ;\n"
-+ "rep ; movsq\n"
-+ "cld ;\n"
-+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-+ : "0" (u64s), "1" (dst), "2" (src)
-+ : "memory");
-+#else
-+ while (u64s--)
-+ *dst-- = *src--;
-+#endif
-+}
-+
-+static inline void memmove_u64s_up(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+ EBUG_ON(dst < src);
-+
-+ __memmove_u64s_up(dst, src, u64s);
-+}
-+
-+static inline void memmove_u64s(void *dst, const void *src,
-+ unsigned u64s)
-+{
-+ if (dst < src)
-+ __memmove_u64s_down(dst, src, u64s);
-+ else
-+ __memmove_u64s_up(dst, src, u64s);
-+}
-+
-+/* Set the last few bytes up to a u64 boundary given an offset into a buffer. */
-+static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
-+{
-+ unsigned rem = round_up(bytes, sizeof(u64)) - bytes;
-+
-+ memset(s + bytes, c, rem);
-+}
-+
-+void sort_cmp_size(void *base, size_t num, size_t size,
-+ int (*cmp_func)(const void *, const void *, size_t),
-+ void (*swap_func)(void *, void *, size_t));
-+
-+/* just the memmove, doesn't update @_nr */
-+#define __array_insert_item(_array, _nr, _pos) \
-+ memmove(&(_array)[(_pos) + 1], \
-+ &(_array)[(_pos)], \
-+ sizeof((_array)[0]) * ((_nr) - (_pos)))
-+
-+#define array_insert_item(_array, _nr, _pos, _new_item) \
-+do { \
-+ __array_insert_item(_array, _nr, _pos); \
-+ (_nr)++; \
-+ (_array)[(_pos)] = (_new_item); \
-+} while (0)
-+
-+#define array_remove_items(_array, _nr, _pos, _nr_to_remove) \
-+do { \
-+ (_nr) -= (_nr_to_remove); \
-+ memmove(&(_array)[(_pos)], \
-+ &(_array)[(_pos) + (_nr_to_remove)], \
-+ sizeof((_array)[0]) * ((_nr) - (_pos))); \
-+} while (0)
-+
-+#define array_remove_item(_array, _nr, _pos) \
-+ array_remove_items(_array, _nr, _pos, 1)
-+
-+static inline void __move_gap(void *array, size_t element_size,
-+ size_t nr, size_t size,
-+ size_t old_gap, size_t new_gap)
-+{
-+ size_t gap_end = old_gap + size - nr;
-+
-+ if (new_gap < old_gap) {
-+ size_t move = old_gap - new_gap;
-+
-+ memmove(array + element_size * (gap_end - move),
-+ array + element_size * (old_gap - move),
-+ element_size * move);
-+ } else if (new_gap > old_gap) {
-+ size_t move = new_gap - old_gap;
-+
-+ memmove(array + element_size * old_gap,
-+ array + element_size * gap_end,
-+ element_size * move);
-+ }
-+}
-+
-+/* Move the gap in a gap buffer: */
-+#define move_gap(_array, _nr, _size, _old_gap, _new_gap) \
-+ __move_gap(_array, sizeof(_array[0]), _nr, _size, _old_gap, _new_gap)
-+
-+#define bubble_sort(_base, _nr, _cmp) \
-+do { \
-+ ssize_t _i, _last; \
-+ bool _swapped = true; \
-+ \
-+ for (_last= (ssize_t) (_nr) - 1; _last > 0 && _swapped; --_last) {\
-+ _swapped = false; \
-+ for (_i = 0; _i < _last; _i++) \
-+ if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) { \
-+ swap((_base)[_i], (_base)[_i + 1]); \
-+ _swapped = true; \
-+ } \
-+ } \
-+} while (0)
-+
-+static inline u64 percpu_u64_get(u64 __percpu *src)
-+{
-+ u64 ret = 0;
-+ int cpu;
-+
-+ for_each_possible_cpu(cpu)
-+ ret += *per_cpu_ptr(src, cpu);
-+ return ret;
-+}
-+
-+static inline void percpu_u64_set(u64 __percpu *dst, u64 src)
-+{
-+ int cpu;
-+
-+ for_each_possible_cpu(cpu)
-+ *per_cpu_ptr(dst, cpu) = 0;
-+ this_cpu_write(*dst, src);
-+}
-+
-+static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < nr; i++)
-+ acc[i] += src[i];
-+}
-+
-+static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src,
-+ unsigned nr)
-+{
-+ int cpu;
-+
-+ for_each_possible_cpu(cpu)
-+ acc_u64s(acc, per_cpu_ptr(src, cpu), nr);
-+}
-+
-+static inline void percpu_memset(void __percpu *p, int c, size_t bytes)
-+{
-+ int cpu;
-+
-+ for_each_possible_cpu(cpu)
-+ memset(per_cpu_ptr(p, cpu), c, bytes);
-+}
-+
-+u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
-+
-+#define cmp_int(l, r) ((l > r) - (l < r))
-+
-+static inline int u8_cmp(u8 l, u8 r)
-+{
-+ return cmp_int(l, r);
-+}
-+
-+static inline int cmp_le32(__le32 l, __le32 r)
-+{
-+ return cmp_int(le32_to_cpu(l), le32_to_cpu(r));
-+}
-+
-+#include <linux/uuid.h>
-+
-+#endif /* _BCACHEFS_UTIL_H */
-diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c
-new file mode 100644
-index 000000000000..cb4f33ed9ab3
---- /dev/null
-+++ b/fs/bcachefs/varint.c
-@@ -0,0 +1,129 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/bitops.h>
-+#include <linux/math.h>
-+#include <linux/string.h>
-+#include <asm/unaligned.h>
-+
-+#ifdef CONFIG_VALGRIND
-+#include <valgrind/memcheck.h>
-+#endif
-+
-+#include "varint.h"
-+
-+/**
-+ * bch2_varint_encode - encode a variable length integer
-+ * @out: destination to encode to
-+ * @v: unsigned integer to encode
-+ * Returns: size in bytes of the encoded integer - at most 9 bytes
-+ */
-+int bch2_varint_encode(u8 *out, u64 v)
-+{
-+ unsigned bits = fls64(v|1);
-+ unsigned bytes = DIV_ROUND_UP(bits, 7);
-+ __le64 v_le;
-+
-+ if (likely(bytes < 9)) {
-+ v <<= bytes;
-+ v |= ~(~0 << (bytes - 1));
-+ v_le = cpu_to_le64(v);
-+ memcpy(out, &v_le, bytes);
-+ } else {
-+ *out++ = 255;
-+ bytes = 9;
-+ put_unaligned_le64(v, out);
-+ }
-+
-+ return bytes;
-+}
-+
-+/**
-+ * bch2_varint_decode - encode a variable length integer
-+ * @in: varint to decode
-+ * @end: end of buffer to decode from
-+ * @out: on success, decoded integer
-+ * Returns: size in bytes of the decoded integer - or -1 on failure (would
-+ * have read past the end of the buffer)
-+ */
-+int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
-+{
-+ unsigned bytes = likely(in < end)
-+ ? ffz(*in & 255) + 1
-+ : 1;
-+ u64 v;
-+
-+ if (unlikely(in + bytes > end))
-+ return -1;
-+
-+ if (likely(bytes < 9)) {
-+ __le64 v_le = 0;
-+
-+ memcpy(&v_le, in, bytes);
-+ v = le64_to_cpu(v_le);
-+ v >>= bytes;
-+ } else {
-+ v = get_unaligned_le64(++in);
-+ }
-+
-+ *out = v;
-+ return bytes;
-+}
-+
-+/**
-+ * bch2_varint_encode_fast - fast version of bch2_varint_encode
-+ * @out: destination to encode to
-+ * @v: unsigned integer to encode
-+ * Returns: size in bytes of the encoded integer - at most 9 bytes
-+ *
-+ * This version assumes it's always safe to write 8 bytes to @out, even if the
-+ * encoded integer would be smaller.
-+ */
-+int bch2_varint_encode_fast(u8 *out, u64 v)
-+{
-+ unsigned bits = fls64(v|1);
-+ unsigned bytes = DIV_ROUND_UP(bits, 7);
-+
-+ if (likely(bytes < 9)) {
-+ v <<= bytes;
-+ v |= ~(~0 << (bytes - 1));
-+ } else {
-+ *out++ = 255;
-+ bytes = 9;
-+ }
-+
-+ put_unaligned_le64(v, out);
-+ return bytes;
-+}
-+
-+/**
-+ * bch2_varint_decode_fast - fast version of bch2_varint_decode
-+ * @in: varint to decode
-+ * @end: end of buffer to decode from
-+ * @out: on success, decoded integer
-+ * Returns: size in bytes of the decoded integer - or -1 on failure (would
-+ * have read past the end of the buffer)
-+ *
-+ * This version assumes that it is safe to read at most 8 bytes past the end of
-+ * @end (we still return an error if the varint extends past @end).
-+ */
-+int bch2_varint_decode_fast(const u8 *in, const u8 *end, u64 *out)
-+{
-+#ifdef CONFIG_VALGRIND
-+ VALGRIND_MAKE_MEM_DEFINED(in, 8);
-+#endif
-+ u64 v = get_unaligned_le64(in);
-+ unsigned bytes = ffz(*in) + 1;
-+
-+ if (unlikely(in + bytes > end))
-+ return -1;
-+
-+ if (likely(bytes < 9)) {
-+ v >>= bytes;
-+ v &= ~(~0ULL << (7 * bytes));
-+ } else {
-+ v = get_unaligned_le64(++in);
-+ }
-+
-+ *out = v;
-+ return bytes;
-+}
-diff --git a/fs/bcachefs/varint.h b/fs/bcachefs/varint.h
-new file mode 100644
-index 000000000000..92a182fb3d7a
---- /dev/null
-+++ b/fs/bcachefs/varint.h
-@@ -0,0 +1,11 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_VARINT_H
-+#define _BCACHEFS_VARINT_H
-+
-+int bch2_varint_encode(u8 *, u64);
-+int bch2_varint_decode(const u8 *, const u8 *, u64 *);
-+
-+int bch2_varint_encode_fast(u8 *, u64);
-+int bch2_varint_decode_fast(const u8 *, const u8 *, u64 *);
-+
-+#endif /* _BCACHEFS_VARINT_H */
-diff --git a/fs/bcachefs/vstructs.h b/fs/bcachefs/vstructs.h
-new file mode 100644
-index 000000000000..a6561b4b36a6
---- /dev/null
-+++ b/fs/bcachefs/vstructs.h
-@@ -0,0 +1,63 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _VSTRUCTS_H
-+#define _VSTRUCTS_H
-+
-+#include "util.h"
-+
-+/*
-+ * NOTE: we can't differentiate between __le64 and u64 with type_is - this
-+ * assumes u64 is little endian:
-+ */
-+#define __vstruct_u64s(_s) \
-+({ \
-+ ( type_is((_s)->u64s, u64) ? le64_to_cpu((__force __le64) (_s)->u64s) \
-+ : type_is((_s)->u64s, u32) ? le32_to_cpu((__force __le32) (_s)->u64s) \
-+ : type_is((_s)->u64s, u16) ? le16_to_cpu((__force __le16) (_s)->u64s) \
-+ : ((__force u8) ((_s)->u64s))); \
-+})
-+
-+#define __vstruct_bytes(_type, _u64s) \
-+({ \
-+ BUILD_BUG_ON(offsetof(_type, _data) % sizeof(u64)); \
-+ \
-+ (size_t) (offsetof(_type, _data) + (_u64s) * sizeof(u64)); \
-+})
-+
-+#define vstruct_bytes(_s) \
-+ __vstruct_bytes(typeof(*(_s)), __vstruct_u64s(_s))
-+
-+#define __vstruct_blocks(_type, _sector_block_bits, _u64s) \
-+ (round_up(__vstruct_bytes(_type, _u64s), \
-+ 512 << (_sector_block_bits)) >> (9 + (_sector_block_bits)))
-+
-+#define vstruct_blocks(_s, _sector_block_bits) \
-+ __vstruct_blocks(typeof(*(_s)), _sector_block_bits, __vstruct_u64s(_s))
-+
-+#define vstruct_blocks_plus(_s, _sector_block_bits, _u64s) \
-+ __vstruct_blocks(typeof(*(_s)), _sector_block_bits, \
-+ __vstruct_u64s(_s) + (_u64s))
-+
-+#define vstruct_sectors(_s, _sector_block_bits) \
-+ (round_up(vstruct_bytes(_s), 512 << (_sector_block_bits)) >> 9)
-+
-+#define vstruct_next(_s) \
-+ ((typeof(_s)) ((u64 *) (_s)->_data + __vstruct_u64s(_s)))
-+#define vstruct_last(_s) \
-+ ((typeof(&(_s)->start[0])) ((u64 *) (_s)->_data + __vstruct_u64s(_s)))
-+#define vstruct_end(_s) \
-+ ((void *) ((u64 *) (_s)->_data + __vstruct_u64s(_s)))
-+
-+#define vstruct_for_each(_s, _i) \
-+ for (_i = (_s)->start; \
-+ _i < vstruct_last(_s); \
-+ _i = vstruct_next(_i))
-+
-+#define vstruct_for_each_safe(_s, _i, _t) \
-+ for (_i = (_s)->start; \
-+ _i < vstruct_last(_s) && (_t = vstruct_next(_i), true); \
-+ _i = _t)
-+
-+#define vstruct_idx(_s, _idx) \
-+ ((typeof(&(_s)->start[0])) ((_s)->_data + (_idx)))
-+
-+#endif /* _VSTRUCTS_H */
-diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
-new file mode 100644
-index 000000000000..a39ff0c296ec
---- /dev/null
-+++ b/fs/bcachefs/xattr.c
-@@ -0,0 +1,643 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include "bcachefs.h"
-+#include "acl.h"
-+#include "bkey_methods.h"
-+#include "btree_update.h"
-+#include "extents.h"
-+#include "fs.h"
-+#include "rebalance.h"
-+#include "str_hash.h"
-+#include "xattr.h"
-+
-+#include <linux/dcache.h>
-+#include <linux/posix_acl_xattr.h>
-+#include <linux/xattr.h>
-+
-+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
-+
-+static u64 bch2_xattr_hash(const struct bch_hash_info *info,
-+ const struct xattr_search_key *key)
-+{
-+ struct bch_str_hash_ctx ctx;
-+
-+ bch2_str_hash_init(&ctx, info);
-+ bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type));
-+ bch2_str_hash_update(&ctx, info, key->name.name, key->name.len);
-+
-+ return bch2_str_hash_end(&ctx, info);
-+}
-+
-+static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
-+{
-+ return bch2_xattr_hash(info, key);
-+}
-+
-+static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
-+{
-+ struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
-+
-+ return bch2_xattr_hash(info,
-+ &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
-+}
-+
-+static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
-+{
-+ struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
-+ const struct xattr_search_key *r = _r;
-+
-+ return l.v->x_type != r->type ||
-+ l.v->x_name_len != r->name.len ||
-+ memcmp(l.v->x_name, r->name.name, r->name.len);
-+}
-+
-+static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
-+{
-+ struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
-+ struct bkey_s_c_xattr r = bkey_s_c_to_xattr(_r);
-+
-+ return l.v->x_type != r.v->x_type ||
-+ l.v->x_name_len != r.v->x_name_len ||
-+ memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
-+}
-+
-+const struct bch_hash_desc bch2_xattr_hash_desc = {
-+ .btree_id = BTREE_ID_xattrs,
-+ .key_type = KEY_TYPE_xattr,
-+ .hash_key = xattr_hash_key,
-+ .hash_bkey = xattr_hash_bkey,
-+ .cmp_key = xattr_cmp_key,
-+ .cmp_bkey = xattr_cmp_bkey,
-+};
-+
-+int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k,
-+ enum bkey_invalid_flags flags,
-+ struct printbuf *err)
-+{
-+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
-+ unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len,
-+ le16_to_cpu(xattr.v->x_val_len));
-+ int ret = 0;
-+
-+ bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err,
-+ xattr_val_size_too_small,
-+ "value too small (%zu < %u)",
-+ bkey_val_u64s(k.k), val_u64s);
-+
-+ /* XXX why +4 ? */
-+ val_u64s = xattr_val_u64s(xattr.v->x_name_len,
-+ le16_to_cpu(xattr.v->x_val_len) + 4);
-+
-+ bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err,
-+ xattr_val_size_too_big,
-+ "value too big (%zu > %u)",
-+ bkey_val_u64s(k.k), val_u64s);
-+
-+ bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err,
-+ xattr_invalid_type,
-+ "invalid type (%u)", xattr.v->x_type);
-+
-+ bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err,
-+ xattr_name_invalid_chars,
-+ "xattr name has invalid characters");
-+fsck_err:
-+ return ret;
-+}
-+
-+void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
-+ struct bkey_s_c k)
-+{
-+ const struct xattr_handler *handler;
-+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
-+
-+ handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-+ if (handler && handler->prefix)
-+ prt_printf(out, "%s", handler->prefix);
-+ else if (handler)
-+ prt_printf(out, "(type %u)", xattr.v->x_type);
-+ else
-+ prt_printf(out, "(unknown type %u)", xattr.v->x_type);
-+
-+ prt_printf(out, "%.*s:%.*s",
-+ xattr.v->x_name_len,
-+ xattr.v->x_name,
-+ le16_to_cpu(xattr.v->x_val_len),
-+ (char *) xattr_val(xattr.v));
-+
-+ if (xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS ||
-+ xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT) {
-+ prt_char(out, ' ');
-+ bch2_acl_to_text(out, xattr_val(xattr.v),
-+ le16_to_cpu(xattr.v->x_val_len));
-+ }
-+}
-+
-+static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info *inode,
-+ const char *name, void *buffer, size_t size, int type)
-+{
-+ struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode);
-+ struct xattr_search_key search = X_SEARCH(type, name, strlen(name));
-+ struct btree_iter iter;
-+ struct bkey_s_c_xattr xattr;
-+ struct bkey_s_c k;
-+ int ret;
-+
-+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash,
-+ inode_inum(inode), &search, 0);
-+ if (ret)
-+ goto err1;
-+
-+ k = bch2_btree_iter_peek_slot(&iter);
-+ ret = bkey_err(k);
-+ if (ret)
-+ goto err2;
-+
-+ xattr = bkey_s_c_to_xattr(k);
-+ ret = le16_to_cpu(xattr.v->x_val_len);
-+ if (buffer) {
-+ if (ret > size)
-+ ret = -ERANGE;
-+ else
-+ memcpy(buffer, xattr_val(xattr.v), ret);
-+ }
-+err2:
-+ bch2_trans_iter_exit(trans, &iter);
-+err1:
-+ return ret < 0 && bch2_err_matches(ret, ENOENT) ? -ENODATA : ret;
-+}
-+
-+int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
-+ struct bch_inode_unpacked *inode_u,
-+ const struct bch_hash_info *hash_info,
-+ const char *name, const void *value, size_t size,
-+ int type, int flags)
-+{
-+ struct bch_fs *c = trans->c;
-+ struct btree_iter inode_iter = { NULL };
-+ int ret;
-+
-+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
-+ if (ret)
-+ return ret;
-+
-+ inode_u->bi_ctime = bch2_current_time(c);
-+
-+ ret = bch2_inode_write(trans, &inode_iter, inode_u);
-+ bch2_trans_iter_exit(trans, &inode_iter);
-+
-+ if (ret)
-+ return ret;
-+
-+ if (value) {
-+ struct bkey_i_xattr *xattr;
-+ unsigned namelen = strlen(name);
-+ unsigned u64s = BKEY_U64s +
-+ xattr_val_u64s(namelen, size);
-+
-+ if (u64s > U8_MAX)
-+ return -ERANGE;
-+
-+ xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
-+ if (IS_ERR(xattr))
-+ return PTR_ERR(xattr);
-+
-+ bkey_xattr_init(&xattr->k_i);
-+ xattr->k.u64s = u64s;
-+ xattr->v.x_type = type;
-+ xattr->v.x_name_len = namelen;
-+ xattr->v.x_val_len = cpu_to_le16(size);
-+ memcpy(xattr->v.x_name, name, namelen);
-+ memcpy(xattr_val(&xattr->v), value, size);
-+
-+ ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
-+ inum, &xattr->k_i,
-+ (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
-+ (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
-+ } else {
-+ struct xattr_search_key search =
-+ X_SEARCH(type, name, strlen(name));
-+
-+ ret = bch2_hash_delete(trans, bch2_xattr_hash_desc,
-+ hash_info, inum, &search);
-+ }
-+
-+ if (bch2_err_matches(ret, ENOENT))
-+ ret = flags & XATTR_REPLACE ? -ENODATA : 0;
-+
-+ return ret;
-+}
-+
-+struct xattr_buf {
-+ char *buf;
-+ size_t len;
-+ size_t used;
-+};
-+
-+static int __bch2_xattr_emit(const char *prefix,
-+ const char *name, size_t name_len,
-+ struct xattr_buf *buf)
-+{
-+ const size_t prefix_len = strlen(prefix);
-+ const size_t total_len = prefix_len + name_len + 1;
-+
-+ if (buf->buf) {
-+ if (buf->used + total_len > buf->len)
-+ return -ERANGE;
-+
-+ memcpy(buf->buf + buf->used, prefix, prefix_len);
-+ memcpy(buf->buf + buf->used + prefix_len,
-+ name, name_len);
-+ buf->buf[buf->used + prefix_len + name_len] = '\0';
-+ }
-+
-+ buf->used += total_len;
-+ return 0;
-+}
-+
-+static int bch2_xattr_emit(struct dentry *dentry,
-+ const struct bch_xattr *xattr,
-+ struct xattr_buf *buf)
-+{
-+ const struct xattr_handler *handler =
-+ bch2_xattr_type_to_handler(xattr->x_type);
-+
-+ return handler && (!handler->list || handler->list(dentry))
-+ ? __bch2_xattr_emit(handler->prefix ?: handler->name,
-+ xattr->x_name, xattr->x_name_len, buf)
-+ : 0;
-+}
-+
-+static int bch2_xattr_list_bcachefs(struct bch_fs *c,
-+ struct bch_inode_unpacked *inode,
-+ struct xattr_buf *buf,
-+ bool all)
-+{
-+ const char *prefix = all ? "bcachefs_effective." : "bcachefs.";
-+ unsigned id;
-+ int ret = 0;
-+ u64 v;
-+
-+ for (id = 0; id < Inode_opt_nr; id++) {
-+ v = bch2_inode_opt_get(inode, id);
-+ if (!v)
-+ continue;
-+
-+ if (!all &&
-+ !(inode->bi_fields_set & (1 << id)))
-+ continue;
-+
-+ ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id],
-+ strlen(bch2_inode_opts[id]), buf);
-+ if (ret)
-+ break;
-+ }
-+
-+ return ret;
-+}
-+
-+ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
-+{
-+ struct bch_fs *c = dentry->d_sb->s_fs_info;
-+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
-+ struct btree_trans *trans = bch2_trans_get(c);
-+ struct btree_iter iter;
-+ struct bkey_s_c k;
-+ struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
-+ u64 offset = 0, inum = inode->ei_inode.bi_inum;
-+ u32 snapshot;
-+ int ret;
-+retry:
-+ bch2_trans_begin(trans);
-+ iter = (struct btree_iter) { NULL };
-+
-+ ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
-+ if (ret)
-+ goto err;
-+
-+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_xattrs,
-+ SPOS(inum, offset, snapshot),
-+ POS(inum, U64_MAX), 0, k, ret) {
-+ if (k.k->type != KEY_TYPE_xattr)
-+ continue;
-+
-+ ret = bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf);
-+ if (ret)
-+ break;
-+ }
-+
-+ offset = iter.pos.offset;
-+ bch2_trans_iter_exit(trans, &iter);
-+err:
-+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-+ goto retry;
-+
-+ bch2_trans_put(trans);
-+
-+ if (ret)
-+ goto out;
-+
-+ ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false);
-+ if (ret)
-+ goto out;
-+
-+ ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true);
-+ if (ret)
-+ goto out;
-+
-+ return buf.used;
-+out:
-+ return bch2_err_class(ret);
-+}
-+
-+static int bch2_xattr_get_handler(const struct xattr_handler *handler,
-+ struct dentry *dentry, struct inode *vinode,
-+ const char *name, void *buffer, size_t size)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ int ret = bch2_trans_do(c, NULL, NULL, 0,
-+ bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags));
-+
-+ return bch2_err_class(ret);
-+}
-+
-+static int bch2_xattr_set_handler(const struct xattr_handler *handler,
-+ struct mnt_idmap *idmap,
-+ struct dentry *dentry, struct inode *vinode,
-+ const char *name, const void *value,
-+ size_t size, int flags)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
-+ struct bch_inode_unpacked inode_u;
-+ int ret;
-+
-+ ret = bch2_trans_run(c,
-+ commit_do(trans, NULL, NULL, 0,
-+ bch2_xattr_set(trans, inode_inum(inode), &inode_u,
-+ &hash, name, value, size,
-+ handler->flags, flags)) ?:
-+ (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0));
-+
-+ return bch2_err_class(ret);
-+}
-+
-+static const struct xattr_handler bch_xattr_user_handler = {
-+ .prefix = XATTR_USER_PREFIX,
-+ .get = bch2_xattr_get_handler,
-+ .set = bch2_xattr_set_handler,
-+ .flags = KEY_TYPE_XATTR_INDEX_USER,
-+};
-+
-+static bool bch2_xattr_trusted_list(struct dentry *dentry)
-+{
-+ return capable(CAP_SYS_ADMIN);
-+}
-+
-+static const struct xattr_handler bch_xattr_trusted_handler = {
-+ .prefix = XATTR_TRUSTED_PREFIX,
-+ .list = bch2_xattr_trusted_list,
-+ .get = bch2_xattr_get_handler,
-+ .set = bch2_xattr_set_handler,
-+ .flags = KEY_TYPE_XATTR_INDEX_TRUSTED,
-+};
-+
-+static const struct xattr_handler bch_xattr_security_handler = {
-+ .prefix = XATTR_SECURITY_PREFIX,
-+ .get = bch2_xattr_get_handler,
-+ .set = bch2_xattr_set_handler,
-+ .flags = KEY_TYPE_XATTR_INDEX_SECURITY,
-+};
-+
-+#ifndef NO_BCACHEFS_FS
-+
-+static int opt_to_inode_opt(int id)
-+{
-+ switch (id) {
-+#define x(name, ...) \
-+ case Opt_##name: return Inode_opt_##name;
-+ BCH_INODE_OPTS()
-+#undef x
-+ default:
-+ return -1;
-+ }
-+}
-+
-+static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
-+ struct dentry *dentry, struct inode *vinode,
-+ const char *name, void *buffer, size_t size,
-+ bool all)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ struct bch_opts opts =
-+ bch2_inode_opts_to_opts(&inode->ei_inode);
-+ const struct bch_option *opt;
-+ int id, inode_opt_id;
-+ struct printbuf out = PRINTBUF;
-+ int ret;
-+ u64 v;
-+
-+ id = bch2_opt_lookup(name);
-+ if (id < 0 || !bch2_opt_is_inode_opt(id))
-+ return -EINVAL;
-+
-+ inode_opt_id = opt_to_inode_opt(id);
-+ if (inode_opt_id < 0)
-+ return -EINVAL;
-+
-+ opt = bch2_opt_table + id;
-+
-+ if (!bch2_opt_defined_by_id(&opts, id))
-+ return -ENODATA;
-+
-+ if (!all &&
-+ !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id)))
-+ return -ENODATA;
-+
-+ v = bch2_opt_get_by_id(&opts, id);
-+ bch2_opt_to_text(&out, c, c->disk_sb.sb, opt, v, 0);
-+
-+ ret = out.pos;
-+
-+ if (out.allocation_failure) {
-+ ret = -ENOMEM;
-+ } else if (buffer) {
-+ if (out.pos > size)
-+ ret = -ERANGE;
-+ else
-+ memcpy(buffer, out.buf, out.pos);
-+ }
-+
-+ printbuf_exit(&out);
-+ return ret;
-+}
-+
-+static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
-+ struct dentry *dentry, struct inode *vinode,
-+ const char *name, void *buffer, size_t size)
-+{
-+ return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
-+ name, buffer, size, false);
-+}
-+
-+struct inode_opt_set {
-+ int id;
-+ u64 v;
-+ bool defined;
-+};
-+
-+static int inode_opt_set_fn(struct btree_trans *trans,
-+ struct bch_inode_info *inode,
-+ struct bch_inode_unpacked *bi,
-+ void *p)
-+{
-+ struct inode_opt_set *s = p;
-+
-+ if (s->defined)
-+ bi->bi_fields_set |= 1U << s->id;
-+ else
-+ bi->bi_fields_set &= ~(1U << s->id);
-+
-+ bch2_inode_opt_set(bi, s->id, s->v);
-+
-+ return 0;
-+}
-+
-+static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
-+ struct mnt_idmap *idmap,
-+ struct dentry *dentry, struct inode *vinode,
-+ const char *name, const void *value,
-+ size_t size, int flags)
-+{
-+ struct bch_inode_info *inode = to_bch_ei(vinode);
-+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
-+ const struct bch_option *opt;
-+ char *buf;
-+ struct inode_opt_set s;
-+ int opt_id, inode_opt_id, ret;
-+
-+ opt_id = bch2_opt_lookup(name);
-+ if (opt_id < 0)
-+ return -EINVAL;
-+
-+ opt = bch2_opt_table + opt_id;
-+
-+ inode_opt_id = opt_to_inode_opt(opt_id);
-+ if (inode_opt_id < 0)
-+ return -EINVAL;
-+
-+ s.id = inode_opt_id;
-+
-+ if (value) {
-+ u64 v = 0;
-+
-+ buf = kmalloc(size + 1, GFP_KERNEL);
-+ if (!buf)
-+ return -ENOMEM;
-+ memcpy(buf, value, size);
-+ buf[size] = '\0';
-+
-+ ret = bch2_opt_parse(c, opt, buf, &v, NULL);
-+ kfree(buf);
-+
-+ if (ret < 0)
-+ return ret;
-+
-+ ret = bch2_opt_check_may_set(c, opt_id, v);
-+ if (ret < 0)
-+ return ret;
-+
-+ s.v = v + 1;
-+ s.defined = true;
-+ } else {
-+ if (!IS_ROOT(dentry)) {
-+ struct bch_inode_info *dir =
-+ to_bch_ei(d_inode(dentry->d_parent));
-+
-+ s.v = bch2_inode_opt_get(&dir->ei_inode, inode_opt_id);
-+ } else {
-+ s.v = 0;
-+ }
-+
-+ s.defined = false;
-+ }
-+
-+ mutex_lock(&inode->ei_update_lock);
-+ if (inode_opt_id == Inode_opt_project) {
-+ /*
-+ * inode fields accessible via the xattr interface are stored
-+ * with a +1 bias, so that 0 means unset:
-+ */
-+ ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0);
-+ if (ret)
-+ goto err;
-+ }
-+
-+ ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
-+err:
-+ mutex_unlock(&inode->ei_update_lock);
-+
-+ if (value &&
-+ (opt_id == Opt_background_compression ||
-+ opt_id == Opt_background_target))
-+ bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum);
-+
-+ return bch2_err_class(ret);
-+}
-+
-+static const struct xattr_handler bch_xattr_bcachefs_handler = {
-+ .prefix = "bcachefs.",
-+ .get = bch2_xattr_bcachefs_get,
-+ .set = bch2_xattr_bcachefs_set,
-+};
-+
-+static int bch2_xattr_bcachefs_get_effective(
-+ const struct xattr_handler *handler,
-+ struct dentry *dentry, struct inode *vinode,
-+ const char *name, void *buffer, size_t size)
-+{
-+ return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
-+ name, buffer, size, true);
-+}
-+
-+static const struct xattr_handler bch_xattr_bcachefs_effective_handler = {
-+ .prefix = "bcachefs_effective.",
-+ .get = bch2_xattr_bcachefs_get_effective,
-+ .set = bch2_xattr_bcachefs_set,
-+};
-+
-+#endif /* NO_BCACHEFS_FS */
-+
-+const struct xattr_handler *bch2_xattr_handlers[] = {
-+ &bch_xattr_user_handler,
-+#ifdef CONFIG_BCACHEFS_POSIX_ACL
-+ &nop_posix_acl_access,
-+ &nop_posix_acl_default,
-+#endif
-+ &bch_xattr_trusted_handler,
-+ &bch_xattr_security_handler,
-+#ifndef NO_BCACHEFS_FS
-+ &bch_xattr_bcachefs_handler,
-+ &bch_xattr_bcachefs_effective_handler,
-+#endif
-+ NULL
-+};
-+
-+static const struct xattr_handler *bch_xattr_handler_map[] = {
-+ [KEY_TYPE_XATTR_INDEX_USER] = &bch_xattr_user_handler,
-+ [KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS] =
-+ &nop_posix_acl_access,
-+ [KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT] =
-+ &nop_posix_acl_default,
-+ [KEY_TYPE_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler,
-+ [KEY_TYPE_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler,
-+};
-+
-+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type)
-+{
-+ return type < ARRAY_SIZE(bch_xattr_handler_map)
-+ ? bch_xattr_handler_map[type]
-+ : NULL;
-+}
-diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h
-new file mode 100644
-index 000000000000..1337f31a5c49
---- /dev/null
-+++ b/fs/bcachefs/xattr.h
-@@ -0,0 +1,50 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _BCACHEFS_XATTR_H
-+#define _BCACHEFS_XATTR_H
-+
-+#include "str_hash.h"
-+
-+extern const struct bch_hash_desc bch2_xattr_hash_desc;
-+
-+int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c,
-+ enum bkey_invalid_flags, struct printbuf *);
-+void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-+
-+#define bch2_bkey_ops_xattr ((struct bkey_ops) { \
-+ .key_invalid = bch2_xattr_invalid, \
-+ .val_to_text = bch2_xattr_to_text, \
-+ .min_val_size = 8, \
-+})
-+
-+static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
-+{
-+ return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
-+ name_len + val_len, sizeof(u64));
-+}
-+
-+#define xattr_val(_xattr) \
-+ ((void *) (_xattr)->x_name + (_xattr)->x_name_len)
-+
-+struct xattr_search_key {
-+ u8 type;
-+ struct qstr name;
-+};
-+
-+#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key) \
-+ { .type = _type, .name = QSTR_INIT(_name, _len) })
-+
-+struct dentry;
-+struct xattr_handler;
-+struct bch_hash_info;
-+struct bch_inode_info;
-+
-+/* Exported for cmd_migrate.c in tools: */
-+int bch2_xattr_set(struct btree_trans *, subvol_inum,
-+ struct bch_inode_unpacked *, const struct bch_hash_info *,
-+ const char *, const void *, size_t, int, int);
-+
-+ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
-+
-+extern const struct xattr_handler *bch2_xattr_handlers[];
-+
-+#endif /* _BCACHEFS_XATTR_H */
-diff --git a/fs/dcache.c b/fs/dcache.c
-index 25ac74d30bff..796e23761ba0 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -3246,11 +3246,10 @@ void d_genocide(struct dentry *parent)
- d_walk(parent, parent, d_genocide_kill);
- }
-
--void d_tmpfile(struct file *file, struct inode *inode)
-+void d_mark_tmpfile(struct file *file, struct inode *inode)
- {
- struct dentry *dentry = file->f_path.dentry;
-
-- inode_dec_link_count(inode);
- BUG_ON(dentry->d_name.name != dentry->d_iname ||
- !hlist_unhashed(&dentry->d_u.d_alias) ||
- !d_unlinked(dentry));
-@@ -3260,6 +3259,15 @@ void d_tmpfile(struct file *file, struct inode *inode)
- (unsigned long long)inode->i_ino);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&dentry->d_parent->d_lock);
-+}
-+EXPORT_SYMBOL(d_mark_tmpfile);
-+
-+void d_tmpfile(struct file *file, struct inode *inode)
-+{
-+ struct dentry *dentry = file->f_path.dentry;
-+
-+ inode_dec_link_count(inode);
-+ d_mark_tmpfile(file, inode);
- d_instantiate(dentry, inode);
- }
- EXPORT_SYMBOL(d_tmpfile);
-diff --git a/drivers/md/bcache/closure.h b/include/linux/closure.h
-similarity index 91%
-rename from drivers/md/bcache/closure.h
-rename to include/linux/closure.h
-index c88cdc4ae4ec..de7bb47d8a46 100644
---- a/drivers/md/bcache/closure.h
-+++ b/include/linux/closure.h
-@@ -154,8 +154,9 @@ struct closure {
- struct closure *parent;
-
- atomic_t remaining;
-+ bool closure_get_happened;
-
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-+#ifdef CONFIG_DEBUG_CLOSURES
- #define CLOSURE_MAGIC_DEAD 0xc054dead
- #define CLOSURE_MAGIC_ALIVE 0xc054a11e
-
-@@ -172,6 +173,11 @@ void __closure_wake_up(struct closure_waitlist *list);
- bool closure_wait(struct closure_waitlist *list, struct closure *cl);
- void __closure_sync(struct closure *cl);
-
-+static inline unsigned closure_nr_remaining(struct closure *cl)
-+{
-+ return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK;
-+}
-+
- /**
- * closure_sync - sleep until a closure a closure has nothing left to wait on
- *
-@@ -180,19 +186,21 @@ void __closure_sync(struct closure *cl);
- */
- static inline void closure_sync(struct closure *cl)
- {
-- if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1)
-+#ifdef CONFIG_DEBUG_CLOSURES
-+ BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened);
-+#endif
-+
-+ if (cl->closure_get_happened)
- __closure_sync(cl);
- }
-
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-+#ifdef CONFIG_DEBUG_CLOSURES
-
--void closure_debug_init(void);
- void closure_debug_create(struct closure *cl);
- void closure_debug_destroy(struct closure *cl);
-
- #else
-
--static inline void closure_debug_init(void) {}
- static inline void closure_debug_create(struct closure *cl) {}
- static inline void closure_debug_destroy(struct closure *cl) {}
-
-@@ -200,21 +208,21 @@ static inline void closure_debug_destroy(struct closure *cl) {}
-
- static inline void closure_set_ip(struct closure *cl)
- {
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-+#ifdef CONFIG_DEBUG_CLOSURES
- cl->ip = _THIS_IP_;
- #endif
- }
-
- static inline void closure_set_ret_ip(struct closure *cl)
- {
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-+#ifdef CONFIG_DEBUG_CLOSURES
- cl->ip = _RET_IP_;
- #endif
- }
-
- static inline void closure_set_waiting(struct closure *cl, unsigned long f)
- {
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-+#ifdef CONFIG_DEBUG_CLOSURES
- cl->waiting_on = f;
- #endif
- }
-@@ -230,8 +238,6 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
- closure_set_ip(cl);
- cl->fn = fn;
- cl->wq = wq;
-- /* between atomic_dec() in closure_put() */
-- smp_mb__before_atomic();
- }
-
- static inline void closure_queue(struct closure *cl)
-@@ -243,6 +249,7 @@ static inline void closure_queue(struct closure *cl)
- */
- BUILD_BUG_ON(offsetof(struct closure, fn)
- != offsetof(struct work_struct, func));
-+
- if (wq) {
- INIT_WORK(&cl->work, cl->work.func);
- BUG_ON(!queue_work(wq, &cl->work));
-@@ -255,7 +262,9 @@ static inline void closure_queue(struct closure *cl)
- */
- static inline void closure_get(struct closure *cl)
- {
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-+ cl->closure_get_happened = true;
-+
-+#ifdef CONFIG_DEBUG_CLOSURES
- BUG_ON((atomic_inc_return(&cl->remaining) &
- CLOSURE_REMAINING_MASK) <= 1);
- #else
-@@ -271,12 +280,13 @@ static inline void closure_get(struct closure *cl)
- */
- static inline void closure_init(struct closure *cl, struct closure *parent)
- {
-- memset(cl, 0, sizeof(struct closure));
-+ cl->fn = NULL;
- cl->parent = parent;
- if (parent)
- closure_get(parent);
-
- atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
-+ cl->closure_get_happened = false;
-
- closure_debug_create(cl);
- closure_set_ip(cl);
-@@ -375,4 +385,26 @@ static inline void closure_call(struct closure *cl, closure_fn fn,
- continue_at_nobarrier(cl, fn, wq);
- }
-
-+#define __closure_wait_event(waitlist, _cond) \
-+do { \
-+ struct closure cl; \
-+ \
-+ closure_init_stack(&cl); \
-+ \
-+ while (1) { \
-+ closure_wait(waitlist, &cl); \
-+ if (_cond) \
-+ break; \
-+ closure_sync(&cl); \
-+ } \
-+ closure_wake_up(waitlist); \
-+ closure_sync(&cl); \
-+} while (0)
-+
-+#define closure_wait_event(waitlist, _cond) \
-+do { \
-+ if (!(_cond)) \
-+ __closure_wait_event(waitlist, _cond); \
-+} while (0)
-+
- #endif /* _LINUX_CLOSURE_H */
-diff --git a/include/linux/dcache.h b/include/linux/dcache.h
-index 6b351e009f59..3da2f0545d5d 100644
---- a/include/linux/dcache.h
-+++ b/include/linux/dcache.h
-@@ -251,6 +251,7 @@ extern struct dentry * d_make_root(struct inode *);
- /* <clickety>-<click> the ramfs-type tree */
- extern void d_genocide(struct dentry *);
-
-+extern void d_mark_tmpfile(struct file *, struct inode *);
- extern void d_tmpfile(struct file *, struct inode *);
-
- extern struct dentry *d_find_alias(struct inode *);
-diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
-index 11fbd0ee1370..f75e0914d40d 100644
---- a/include/linux/exportfs.h
-+++ b/include/linux/exportfs.h
-@@ -104,6 +104,12 @@ enum fid_type {
- */
- FILEID_LUSTRE = 0x97,
-
-+ /*
-+ * 64 bit inode number, 32 bit subvolume, 32 bit generation number:
-+ */
-+ FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1,
-+ FILEID_BCACHEFS_WITH_PARENT = 0xb2,
-+
- /*
- * 64 bit unique kernfs id
- */
-diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
-index 107613f7d792..847413164738 100644
---- a/include/linux/generic-radix-tree.h
-+++ b/include/linux/generic-radix-tree.h
-@@ -116,6 +117,11 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
-
- #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *)
- #define __genradix_obj_size(_radix) sizeof((_radix)->type[0])
-+#define __genradix_objs_per_page(_radix) \
-+ (PAGE_SIZE / sizeof((_radix)->type[0]))
-+#define __genradix_page_remainder(_radix) \
-+ (PAGE_SIZE % sizeof((_radix)->type[0]))
-+
- #define __genradix_idx_to_offset(_radix, _idx) \
- __idx_to_offset(_idx, __genradix_obj_size(_radix))
-
-@@ -185,7 +185,25 @@
- #define genradix_iter_peek(_iter, _radix) \
- (__genradix_cast(_radix) \
- __genradix_iter_peek(_iter, &(_radix)->tree, \
-- PAGE_SIZE / __genradix_obj_size(_radix)))
-+ __genradix_objs_per_page(_radix)))
-+
-+void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *,
-+ size_t, size_t);
-+
-+/**
-+ * genradix_iter_peek_prev - get first entry at or below iterator's current
-+ * position
-+ * @_iter: a genradix_iter
-+ * @_radix: genradix being iterated over
-+ *
-+ * If no more entries exist at or below @_iter's current position, returns NULL
-+ */
-+#define genradix_iter_peek_prev(_iter, _radix) \
-+ (__genradix_cast(_radix) \
-+ __genradix_iter_peek_prev(_iter, &(_radix)->tree, \
-+ __genradix_objs_per_page(_radix), \
-+ __genradix_obj_size(_radix) + \
-+ __genradix_page_remainder(_radix)))
-
- static inline void __genradix_iter_advance(struct genradix_iter *iter,
- size_t obj_size)
-@@ -196,6 +226,25 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter,
- #define genradix_iter_advance(_iter, _radix) \
- __genradix_iter_advance(_iter, __genradix_obj_size(_radix))
-
-+static inline void __genradix_iter_rewind(struct genradix_iter *iter,
-+ size_t obj_size)
-+{
-+ if (iter->offset == 0 ||
-+ iter->offset == SIZE_MAX) {
-+ iter->offset = SIZE_MAX;
-+ return;
-+ }
-+
-+ if ((iter->offset & (PAGE_SIZE - 1)) == 0)
-+ iter->offset -= PAGE_SIZE % obj_size;
-+
-+ iter->offset -= obj_size;
-+ iter->pos--;
-+}
-+
-+#define genradix_iter_rewind(_iter, _radix) \
-+ __genradix_iter_rewind(_iter, __genradix_obj_size(_radix))
-+
- #define genradix_for_each_from(_radix, _iter, _p, _start) \
- for (_iter = genradix_iter_init(_radix, _start); \
- (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \
-@@ -213,6 +262,23 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter,
- #define genradix_for_each(_radix, _iter, _p) \
- genradix_for_each_from(_radix, _iter, _p, 0)
-
-+#define genradix_last_pos(_radix) \
-+ (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1)
-+
-+/**
-+ * genradix_for_each_reverse - iterate over entry in a genradix, reverse order
-+ * @_radix: genradix to iterate over
-+ * @_iter: a genradix_iter to track current position
-+ * @_p: pointer to genradix entry type
-+ *
-+ * On every iteration, @_p will point to the current entry, and @_iter.pos
-+ * will be the current entry's index.
-+ */
-+#define genradix_for_each_reverse(_radix, _iter, _p) \
-+ for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\
-+ (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\
-+ genradix_iter_rewind(&_iter, _radix))
-+
- int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
-
- /**
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 77f01ac385f7..d5951e99706a 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -875,6 +875,7 @@ struct task_struct {
-
- struct mm_struct *mm;
- struct mm_struct *active_mm;
-+ struct address_space *faults_disabled_mapping;
-
- int exit_state;
- int exit_code;
-diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
-index 9d1f5bb74dd5..58fb1f90eda5 100644
---- a/include/linux/string_helpers.h
-+++ b/include/linux/string_helpers.h
-@@ -24,8 +24,8 @@ enum string_size_units {
- STRING_UNITS_2, /* use binary powers of 2^10 */
- };
-
--void string_get_size(u64 size, u64 blk_size, enum string_size_units units,
-- char *buf, int len);
-+int string_get_size(u64 size, u64 blk_size, enum string_size_units units,
-+ char *buf, int len);
-
- int parse_int_array_user(const char __user *from, size_t count, int **array);
-
-diff --git a/init/init_task.c b/init/init_task.c
-index ff6c4b9bfe6b..f703116e0523 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -85,6 +85,7 @@ struct task_struct init_task
- .nr_cpus_allowed= NR_CPUS,
- .mm = NULL,
- .active_mm = &init_mm,
-+ .faults_disabled_mapping = NULL,
- .restart_block = {
- .fn = do_no_restart_syscall,
- },
-diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
-index d973fe6041bf..2deeeca3e71b 100644
---- a/kernel/locking/mutex.c
-+++ b/kernel/locking/mutex.c
-@@ -1126,6 +1126,9 @@ EXPORT_SYMBOL(ww_mutex_lock_interruptible);
- #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
- #endif /* !CONFIG_PREEMPT_RT */
-
-+EXPORT_TRACEPOINT_SYMBOL_GPL(contention_begin);
-+EXPORT_TRACEPOINT_SYMBOL_GPL(contention_end);
-+
- /**
- * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
- * @cnt: the atomic which we are to dec
-diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
-index 9ed5ce989415..4f65824879ab 100644
---- a/kernel/stacktrace.c
-+++ b/kernel/stacktrace.c
-@@ -151,6 +151,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
- put_task_stack(tsk);
- return c.len;
- }
-+EXPORT_SYMBOL_GPL(stack_trace_save_tsk);
-
- /**
- * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array
-@@ -301,6 +302,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *task,
- save_stack_trace_tsk(task, &trace);
- return trace.nr_entries;
- }
-+EXPORT_SYMBOL_GPL(stack_trace_save_tsk);
-
- /**
- * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array
-diff --git a/lib/Kconfig b/lib/Kconfig
-index c686f4adc124..263aa6ae8d7c 100644
---- a/lib/Kconfig
-+++ b/lib/Kconfig
-@@ -506,6 +506,9 @@ config ASSOCIATIVE_ARRAY
-
- for more information.
-
-+config CLOSURES
-+ bool
-+
- config HAS_IOMEM
- bool
- depends on !NO_IOMEM
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index fa307f93fa2e..ce3a4abf40f8 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -1720,6 +1720,15 @@ config DEBUG_NOTIFIERS
- This is a relatively cheap check but if you care about maximum
- performance, say N.
-
-+config DEBUG_CLOSURES
-+ bool "Debug closures (bcache async widgits)"
-+ depends on CLOSURES
-+ select DEBUG_FS
-+ help
-+ Keeps all active closures in a linked list and provides a debugfs
-+ interface to list them, which makes it possible to see asynchronous
-+ operations that get stuck.
-+
- config DEBUG_MAPLE_TREE
- bool "Debug maple trees"
- depends on DEBUG_KERNEL
-diff --git a/lib/Makefile b/lib/Makefile
-index 740109b6e2c8..57d394575919 100644
---- a/lib/Makefile
-+++ b/lib/Makefile
-@@ -255,6 +255,8 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
-
- obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
-
-+obj-$(CONFIG_CLOSURES) += closure.o
-+
- obj-$(CONFIG_DQL) += dynamic_queue_limits.o
-
- obj-$(CONFIG_GLOB) += glob.o
-diff --git a/drivers/md/bcache/closure.c b/lib/closure.c
-similarity index 83%
-rename from drivers/md/bcache/closure.c
-rename to lib/closure.c
-index d8d9394a6beb..f86c9eeafb35 100644
---- a/drivers/md/bcache/closure.c
-+++ b/lib/closure.c
-@@ -6,13 +6,13 @@
- * Copyright 2012 Google, Inc.
- */
-
-+#include <linux/closure.h>
- #include <linux/debugfs.h>
--#include <linux/module.h>
-+#include <linux/export.h>
-+#include <linux/rcupdate.h>
- #include <linux/seq_file.h>
- #include <linux/sched/debug.h>
-
--#include "closure.h"
--
- static inline void closure_put_after_sub(struct closure *cl, int flags)
- {
- int r = flags & CLOSURE_REMAINING_MASK;
-@@ -21,6 +21,10 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
- BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
-
- if (!r) {
-+ smp_acquire__after_ctrl_dep();
-+
-+ cl->closure_get_happened = false;
-+
- if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
- atomic_set(&cl->remaining,
- CLOSURE_REMAINING_INITIALIZER);
-@@ -43,16 +47,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
- /* For clearing flags with the same atomic op as a put */
- void closure_sub(struct closure *cl, int v)
- {
-- closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
-+ closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining));
- }
-+EXPORT_SYMBOL(closure_sub);
-
- /*
- * closure_put - decrement a closure's refcount
- */
- void closure_put(struct closure *cl)
- {
-- closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
-+ closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining));
- }
-+EXPORT_SYMBOL(closure_put);
-
- /*
- * closure_wake_up - wake up all closures on a wait list, without memory barrier
-@@ -74,6 +80,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
- closure_sub(cl, CLOSURE_WAITING + 1);
- }
- }
-+EXPORT_SYMBOL(__closure_wake_up);
-
- /**
- * closure_wait - add a closure to a waitlist
-@@ -87,12 +94,14 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
- if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
- return false;
-
-+ cl->closure_get_happened = true;
- closure_set_waiting(cl, _RET_IP_);
- atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
- llist_add(&cl->list, &waitlist->list);
-
- return true;
- }
-+EXPORT_SYMBOL(closure_wait);
-
- struct closure_syncer {
- struct task_struct *task;
-@@ -127,8 +136,9 @@ void __sched __closure_sync(struct closure *cl)
-
- __set_current_state(TASK_RUNNING);
- }
-+EXPORT_SYMBOL(__closure_sync);
-
--#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-+#ifdef CONFIG_DEBUG_CLOSURES
-
- static LIST_HEAD(closure_list);
- static DEFINE_SPINLOCK(closure_list_lock);
-@@ -144,6 +154,7 @@ void closure_debug_create(struct closure *cl)
- list_add(&cl->all, &closure_list);
- spin_unlock_irqrestore(&closure_list_lock, flags);
- }
-+EXPORT_SYMBOL(closure_debug_create);
-
- void closure_debug_destroy(struct closure *cl)
- {
-@@ -156,8 +167,7 @@ void closure_debug_destroy(struct closure *cl)
- list_del(&cl->all);
- spin_unlock_irqrestore(&closure_list_lock, flags);
- }
--
--static struct dentry *closure_debug;
-+EXPORT_SYMBOL(closure_debug_destroy);
-
- static int debug_show(struct seq_file *f, void *data)
- {
-@@ -181,7 +191,7 @@ static int debug_show(struct seq_file *f, void *data)
- seq_printf(f, " W %pS\n",
- (void *) cl->waiting_on);
-
-- seq_printf(f, "\n");
-+ seq_puts(f, "\n");
- }
-
- spin_unlock_irq(&closure_list_lock);
-@@ -190,18 +200,11 @@ static int debug_show(struct seq_file *f, void *data)
-
- DEFINE_SHOW_ATTRIBUTE(debug);
-
--void __init closure_debug_init(void)
-+static int __init closure_debug_init(void)
- {
-- if (!IS_ERR_OR_NULL(bcache_debug))
-- /*
-- * it is unnecessary to check return value of
-- * debugfs_create_file(), we should not care
-- * about this.
-- */
-- closure_debug = debugfs_create_file(
-- "closures", 0400, bcache_debug, NULL, &debug_fops);
-+ debugfs_create_file("closures", 0400, NULL, NULL, &debug_fops);
-+ return 0;
- }
--#endif
-+late_initcall(closure_debug_init)
-
--MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");
--MODULE_LICENSE("GPL");
-+#endif
-diff --git a/lib/errname.c b/lib/errname.c
-index 67739b174a8c..dd1b998552cd 100644
---- a/lib/errname.c
-+++ b/lib/errname.c
-@@ -228,3 +228,4 @@ const char *errname(int err)
-
- return err > 0 ? name + 1 : name;
- }
-+EXPORT_SYMBOL(errname);
-diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c
-index f25eb111c051..41f1bcdc4488 100644
---- a/lib/generic-radix-tree.c
-+++ b/lib/generic-radix-tree.c
-@@ -1,4 +1,5 @@
-
-+#include <linux/atomic.h>
- #include <linux/export.h>
- #include <linux/generic-radix-tree.h>
- #include <linux/gfp.h>
-@@ -201,6 +213,64 @@ void *__genradix_iter_peek(struct genradix_iter *iter,
- }
- EXPORT_SYMBOL(__genradix_iter_peek);
-
-+void *__genradix_iter_peek_prev(struct genradix_iter *iter,
-+ struct __genradix *radix,
-+ size_t objs_per_page,
-+ size_t obj_size_plus_page_remainder)
-+{
-+ struct genradix_root *r;
-+ struct genradix_node *n;
-+ unsigned level, i;
-+
-+ if (iter->offset == SIZE_MAX)
-+ return NULL;
-+
-+restart:
-+ r = READ_ONCE(radix->root);
-+ if (!r)
-+ return NULL;
-+
-+ n = genradix_root_to_node(r);
-+ level = genradix_root_to_depth(r);
-+
-+ if (ilog2(iter->offset) >= genradix_depth_shift(level)) {
-+ iter->offset = genradix_depth_size(level);
-+ iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
-+
-+ iter->offset -= obj_size_plus_page_remainder;
-+ iter->pos--;
-+ }
-+
-+ while (level) {
-+ level--;
-+
-+ i = (iter->offset >> genradix_depth_shift(level)) &
-+ (GENRADIX_ARY - 1);
-+
-+ while (!n->children[i]) {
-+ size_t objs_per_ptr = genradix_depth_size(level);
-+
-+ iter->offset = round_down(iter->offset, objs_per_ptr);
-+ iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
-+
-+ if (!iter->offset)
-+ return NULL;
-+
-+ iter->offset -= obj_size_plus_page_remainder;
-+ iter->pos--;
-+
-+ if (!i)
-+ goto restart;
-+ --i;
-+ }
-+
-+ n = n->children[i];
-+ }
-+
-+ return &n->data[iter->offset & (PAGE_SIZE - 1)];
-+}
-+EXPORT_SYMBOL(__genradix_iter_peek_prev);
-+
- static void genradix_free_recurse(struct genradix_node *n, unsigned level)
- {
- if (level) {
-diff --git a/lib/string_helpers.c b/lib/string_helpers.c
-index 9982344cca34..7713f73e66b0 100644
---- a/lib/string_helpers.c
-+++ b/lib/string_helpers.c
-@@ -31,9 +31,11 @@
- * giving the size in the required units. @buf should have room for
- * at least 9 bytes and will always be zero terminated.
- *
-+ * Return value: number of characters of output that would have been written
-+ * (which may be greater than len, if output was truncated).
- */
--void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
-- char *buf, int len)
-+int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
-+ char *buf, int len)
- {
- static const char *const units_10[] = {
- "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
-@@ -126,8 +128,8 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
- else
- unit = units_str[units][i];
-
-- snprintf(buf, len, "%u%s %s", (u32)size,
-- tmp, unit);
-+ return snprintf(buf, len, "%u%s %s", (u32)size,
-+ tmp, unit);
- }
- EXPORT_SYMBOL(string_get_size);
-
-diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
-index e45c7cb1d5bc..e92f67383dde 100644
---- a/tools/objtool/noreturns.h
-+++ b/tools/objtool/noreturns.h
-@@ -14,6 +14,8 @@ NORETURN(__stack_chk_fail)
- NORETURN(__ubsan_handle_builtin_unreachable)
- NORETURN(arch_call_rest_init)
- NORETURN(arch_cpu_idle_dead)
-+NORETURN(bch2_trans_in_restart_error)
-+NORETURN(bch2_trans_restart_error)
- NORETURN(cpu_bringup_and_idle)
- NORETURN(cpu_startup_entry)
- NORETURN(do_exit)
---
-2.42.0
-
diff --git a/SOURCES/tkg-misc-additions.patch b/SOURCES/tkg-misc-additions.patch
index 4969dc3..618b53c 100644
--- a/SOURCES/tkg-misc-additions.patch
+++ b/SOURCES/tkg-misc-additions.patch
@@ -64,760 +64,194 @@ index 2c7171e0b0010..85de313ddec29 100644
select CPU_FREQ_GOV_PERFORMANCE
help
-From 7695eb71d0872ed9633daf0ca779da3344b87dec Mon Sep 17 00:00:00 2001
-From: Evan Quan <evan.quan@amd.com>
-Date: Mon, 21 Aug 2023 14:15:13 +0800
-Subject: [PATCH] drm/amd/pm: correct SMU13 gfx voltage related OD settings
+From 3a88b77d3cb9f9cd8a8aee052ab479b73aeb2e80 Mon Sep 17 00:00:00 2001
+From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
+Date: Sat, 13 Jan 2024 15:29:25 +0100
+Subject: [PATCH] arch/Kconfig: Default to maximum amount of ASLR bits
-The voltage offset setting will be applied to the whole v/f curve line
-instead of per anchor point base.
-
-Signed-off-by: Evan Quan <evan.quan@amd.com>
-Acked-by: Alex Deucher <alexander.deucher@amd.com>
+To mitigate https://zolutal.github.io/aslrnt/; do this with a patch to
+avoid having to enable `CONFIG_EXPERT`.
---
- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 45 +++++++------------
- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 31 ++++++-------
- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 31 ++++++-------
- 3 files changed, 43 insertions(+), 64 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
-index 1da7ece4c627..06aa5c18b40f 100644
---- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
-+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
-@@ -643,18 +643,14 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
- * They can be used to calibrate the sclk voltage curve. This is
- * available for Vega20 and NV1X.
- *
-- * - voltage offset for the six anchor points of the v/f curve labeled
-- * OD_VDDC_CURVE. They can be used to calibrate the v/f curve. This
-- * is only availabe for some SMU13 ASICs.
-- *
- * - voltage offset(in mV) applied on target voltage calculation.
-- * This is available for Sienna Cichlid, Navy Flounder and Dimgrey
-- * Cavefish. For these ASICs, the target voltage calculation can be
-- * illustrated by "voltage = voltage calculated from v/f curve +
-- * overdrive vddgfx offset"
-+ * This is available for Sienna Cichlid, Navy Flounder, Dimgrey
-+ * Cavefish and some later SMU13 ASICs. For these ASICs, the target
-+ * voltage calculation can be illustrated by "voltage = voltage
-+ * calculated from v/f curve + overdrive vddgfx offset"
- *
-- * - a list of valid ranges for sclk, mclk, and voltage curve points
-- * labeled OD_RANGE
-+ * - a list of valid ranges for sclk, mclk, voltage curve points
-+ * or voltage offset labeled OD_RANGE
- *
- * < For APUs >
- *
-@@ -686,24 +682,17 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
- * E.g., "p 2 0 800" would set the minimum core clock on core
- * 2 to 800Mhz.
- *
-- * For sclk voltage curve,
-- * - For NV1X, enter the new values by writing a string that
-- * contains "vc point clock voltage" to the file. The points
-- * are indexed by 0, 1 and 2. E.g., "vc 0 300 600" will update
-- * point1 with clock set as 300Mhz and voltage as 600mV. "vc 2
-- * 1000 1000" will update point3 with clock set as 1000Mhz and
-- * voltage 1000mV.
-- * - For SMU13 ASICs, enter the new values by writing a string that
-- * contains "vc anchor_point_index voltage_offset" to the file.
-- * There are total six anchor points defined on the v/f curve with
-- * index as 0 - 5.
-- * - "vc 0 10" will update the voltage offset for point1 as 10mv.
-- * - "vc 5 -10" will update the voltage offset for point6 as -10mv.
-- *
-- * To update the voltage offset applied for gfxclk/voltage calculation,
-- * enter the new value by writing a string that contains "vo offset".
-- * This is supported by Sienna Cichlid, Navy Flounder and Dimgrey Cavefish.
-- * And the offset can be a positive or negative value.
-+ * For sclk voltage curve supported by Vega20 and NV1X, enter the new
-+ * values by writing a string that contains "vc point clock voltage"
-+ * to the file. The points are indexed by 0, 1 and 2. E.g., "vc 0 300
-+ * 600" will update point1 with clock set as 300Mhz and voltage as 600mV.
-+ * "vc 2 1000 1000" will update point3 with clock set as 1000Mhz and
-+ * voltage 1000mV.
-+ *
-+ * For voltage offset supported by Sienna Cichlid, Navy Flounder, Dimgrey
-+ * Cavefish and some later SMU13 ASICs, enter the new value by writing a
-+ * string that contains "vo offset". E.g., "vo -10" will update the extra
-+ * voltage offset applied to the whole v/f curve line as -10mv.
- *
- * - When you have edited all of the states as needed, write "c" (commit)
- * to the file to commit your changes
-diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
-index 3903a47669e4..bd0d5f027cac 100644
---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
-+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
-@@ -1304,16 +1304,14 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
- od_table->OverDriveTable.UclkFmax);
- break;
-
-- case SMU_OD_VDDC_CURVE:
-+ case SMU_OD_VDDGFX_OFFSET:
- if (!smu_v13_0_0_is_od_feature_supported(smu,
- PP_OD_FEATURE_GFX_VF_CURVE_BIT))
- break;
-
-- size += sysfs_emit_at(buf, size, "OD_VDDC_CURVE:\n");
-- for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++)
-- size += sysfs_emit_at(buf, size, "%d: %dmv\n",
-- i,
-- od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i]);
-+ size += sysfs_emit_at(buf, size, "OD_VDDGFX_OFFSET:\n");
-+ size += sysfs_emit_at(buf, size, "%dmV\n",
-+ od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[0]);
- break;
-
- case SMU_OD_RANGE:
-@@ -1355,7 +1353,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
- PP_OD_FEATURE_GFX_VF_CURVE,
- &min_value,
- &max_value);
-- size += sysfs_emit_at(buf, size, "VDDC_CURVE: %7dmv %10dmv\n",
-+ size += sysfs_emit_at(buf, size, "VDDGFX_OFFSET: %7dmv %10dmv\n",
- min_value, max_value);
- }
- break;
-@@ -1504,29 +1502,26 @@ static int smu_v13_0_0_od_edit_dpm_table(struct smu_context *smu,
- }
- break;
-
-- case PP_OD_EDIT_VDDC_CURVE:
-+ case PP_OD_EDIT_VDDGFX_OFFSET:
- if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) {
-- dev_warn(adev->dev, "VF curve setting not supported!\n");
-+ dev_warn(adev->dev, "Gfx offset setting not supported!\n");
- return -ENOTSUPP;
- }
-
-- if (input[0] >= PP_NUM_OD_VF_CURVE_POINTS ||
-- input[0] < 0)
-- return -EINVAL;
+ arch/Kconfig | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/Kconfig b/arch/Kconfig
+index f4b210ab061291..837d0dbb28ea08 100644
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -1032,7 +1032,7 @@ config ARCH_MMAP_RND_BITS
+ int "Number of bits to use for ASLR of mmap base address" if EXPERT
+ range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
+ default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
+- default ARCH_MMAP_RND_BITS_MIN
++ default ARCH_MMAP_RND_BITS_MAX
+ depends on HAVE_ARCH_MMAP_RND_BITS
+ help
+ This value can be used to select the number of bits to use to
+@@ -1066,7 +1066,7 @@ config ARCH_MMAP_RND_COMPAT_BITS
+ int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
+ range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
+ default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
+- default ARCH_MMAP_RND_COMPAT_BITS_MIN
++ default ARCH_MMAP_RND_COMPAT_BITS_MAX
+ depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ help
+ This value can be used to select the number of bits to use to
+
+From 3cfb591e23181791195a74efe2e9065e0d4bd201 Mon Sep 17 00:00:00 2001
+From: Etienne JUVIGNY <ti3nou@gmail.com>
+Date: Mon, 15 Jan 2024 19:09:39 +0100
+Subject: Revert: drm/amd/pm: fix the high voltage and temperature issue
+
+This was supposed to fix the high voltage and temperature issue after the driver is unloaded on smu 13.0.0,
+smu 13.0.7 and smu 13.0.10, but introduced an arguably more annoying issue. Let's revert it until a proper fix is offered.
+
+Fixes rdna3 shutdown/reboot hang.
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 93cf73d6f..960966f4b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -4050,23 +4050,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ }
+ }
+ } else {
+- switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+- case IP_VERSION(13, 0, 0):
+- case IP_VERSION(13, 0, 7):
+- case IP_VERSION(13, 0, 10):
+- r = psp_gpu_reset(adev);
+- break;
+- default:
+- tmp = amdgpu_reset_method;
+- /* It should do a default reset when loading or reloading the driver,
+- * regardless of the module parameter reset_method.
+- */
+- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+- r = amdgpu_asic_reset(adev);
+- amdgpu_reset_method = tmp;
+- break;
+- }
-
- smu_v13_0_0_get_od_setting_limits(smu,
- PP_OD_FEATURE_GFX_VF_CURVE,
- &minimum,
- &maximum);
-- if (input[1] < minimum ||
-- input[1] > maximum) {
-+ if (input[0] < minimum ||
-+ input[0] > maximum) {
- dev_info(adev->dev, "Voltage offset (%ld) must be within [%d, %d]!\n",
-- input[1], minimum, maximum);
-+ input[0], minimum, maximum);
- return -EINVAL;
- }
-
-- od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[input[0]] = input[1];
-- od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFX_VF_CURVE_BIT;
-+ for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++)
-+ od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = input[0];
-+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT);
- break;
-
- case PP_OD_RESTORE_DEFAULT_TABLE:
-diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
-index 94ef5b4d116d..b9b3bf41eed3 100644
---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
-+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
-@@ -1284,16 +1284,14 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu,
- od_table->OverDriveTable.UclkFmax);
- break;
-
-- case SMU_OD_VDDC_CURVE:
-+ case SMU_OD_VDDGFX_OFFSET:
- if (!smu_v13_0_7_is_od_feature_supported(smu,
- PP_OD_FEATURE_GFX_VF_CURVE_BIT))
- break;
-
-- size += sysfs_emit_at(buf, size, "OD_VDDC_CURVE:\n");
-- for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++)
-- size += sysfs_emit_at(buf, size, "%d: %dmv\n",
-- i,
-- od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i]);
-+ size += sysfs_emit_at(buf, size, "OD_VDDGFX_OFFSET:\n");
-+ size += sysfs_emit_at(buf, size, "%dmV\n",
-+ od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[0]);
- break;
-
- case SMU_OD_RANGE:
-@@ -1335,7 +1333,7 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu,
- PP_OD_FEATURE_GFX_VF_CURVE,
- &min_value,
- &max_value);
-- size += sysfs_emit_at(buf, size, "VDDC_CURVE: %7dmv %10dmv\n",
-+ size += sysfs_emit_at(buf, size, "VDDGFX_OFFSET: %7dmv %10dmv\n",
- min_value, max_value);
- }
- break;
-@@ -1484,29 +1482,26 @@ static int smu_v13_0_7_od_edit_dpm_table(struct smu_context *smu,
- }
- break;
-
-- case PP_OD_EDIT_VDDC_CURVE:
-+ case PP_OD_EDIT_VDDGFX_OFFSET:
- if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) {
-- dev_warn(adev->dev, "VF curve setting not supported!\n");
-+ dev_warn(adev->dev, "Gfx offset setting not supported!\n");
- return -ENOTSUPP;
- }
++ tmp = amdgpu_reset_method;
++ /* It should do a default reset when loading or reloading the driver,
++ * regardless of the module parameter reset_method.
++ */
++ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
++ r = amdgpu_asic_reset(adev);
++ amdgpu_reset_method = tmp;
+ if (r) {
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+index e1a5ee911..308ebeb43 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -733,7 +733,7 @@ static int smu_early_init(void *handle)
+ smu->adev = adev;
+ smu->pm_enabled = !!amdgpu_dpm;
+ smu->is_apu = false;
+- smu->smu_baco.state = SMU_BACO_STATE_NONE;
++ smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+ smu->smu_baco.platform_support = false;
+ smu->user_dpm_profile.fan_mode = -1;
+
+@@ -1753,31 +1753,10 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
+ return 0;
+ }
-- if (input[0] >= PP_NUM_OD_VF_CURVE_POINTS ||
-- input[0] < 0)
-- return -EINVAL;
+-static int smu_reset_mp1_state(struct smu_context *smu)
+-{
+- struct amdgpu_device *adev = smu->adev;
+- int ret = 0;
-
- smu_v13_0_7_get_od_setting_limits(smu,
- PP_OD_FEATURE_GFX_VF_CURVE,
- &minimum,
- &maximum);
-- if (input[1] < minimum ||
-- input[1] > maximum) {
-+ if (input[0] < minimum ||
-+ input[0] > maximum) {
- dev_info(adev->dev, "Voltage offset (%ld) must be within [%d, %d]!\n",
-- input[1], minimum, maximum);
-+ input[0], minimum, maximum);
- return -EINVAL;
- }
-
-- od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[input[0]] = input[1];
-- od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFX_VF_CURVE_BIT;
-+ for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++)
-+ od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = input[0];
-+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT);
- break;
-
- case PP_OD_RESTORE_DEFAULT_TABLE:
---
-GitLab
-
-
-From 8bad128720ebc69e37f1c66767fb276088ef4fa7 Mon Sep 17 00:00:00 2001
-From: Evan Quan <evan.quan@amd.com>
-Date: Wed, 16 Aug 2023 14:51:19 +0800
-Subject: [PATCH] drm/amd/pm: fulfill the support for SMU13 `pp_dpm_dcefclk`
- interface
-
-Fulfill the incomplete SMU13 `pp_dpm_dcefclk` implementation.
-
-Reported-by: Guan Yu <guan.yu@amd.com>
-Signed-off-by: Evan Quan <evan.quan@amd.com>
-Acked-by: Alex Deucher <alexander.deucher@amd.com>
----
- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 27 +++++++++++++++++++
- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 27 +++++++++++++++++++
- 2 files changed, 54 insertions(+)
+- if ((!adev->in_runpm) && (!adev->in_suspend) &&
+- (!amdgpu_in_reset(adev)))
+- switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+- case IP_VERSION(13, 0, 0):
+- case IP_VERSION(13, 0, 7):
+- case IP_VERSION(13, 0, 10):
+- ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+- break;
+- default:
+- break;
+- }
+-
+- return ret;
+-}
+-
+ static int smu_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct smu_context *smu = adev->powerplay.pp_handle;
+- int ret;
-diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
-index bd0d5f027cac..5fdb2b3c042a 100644
---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
-+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
-@@ -176,6 +176,7 @@ static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
- CLK_MAP(VCLK1, PPCLK_VCLK_1),
- CLK_MAP(DCLK, PPCLK_DCLK_0),
- CLK_MAP(DCLK1, PPCLK_DCLK_1),
-+ CLK_MAP(DCEFCLK, PPCLK_DCFCLK),
- };
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+@@ -1795,15 +1774,7 @@ static int smu_hw_fini(void *handle)
- static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] = {
-@@ -707,6 +708,22 @@ static int smu_v13_0_0_set_default_dpm_table(struct smu_context *smu)
- pcie_table->num_of_link_levels++;
- }
+ adev->pm.dpm_enabled = false;
-+ /* dcefclk dpm table setup */
-+ dpm_table = &dpm_context->dpm_tables.dcef_table;
-+ if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCN_BIT)) {
-+ ret = smu_v13_0_set_single_dpm_table(smu,
-+ SMU_DCEFCLK,
-+ dpm_table);
-+ if (ret)
-+ return ret;
-+ } else {
-+ dpm_table->count = 1;
-+ dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100;
-+ dpm_table->dpm_levels[0].enabled = true;
-+ dpm_table->min = dpm_table->dpm_levels[0].value;
-+ dpm_table->max = dpm_table->dpm_levels[0].value;
-+ }
-+
- return 0;
+- ret = smu_smc_hw_cleanup(smu);
+- if (ret)
+- return ret;
+-
+- ret = smu_reset_mp1_state(smu);
+- if (ret)
+- return ret;
+-
+- return 0;
++ return smu_smc_hw_cleanup(smu);
}
-@@ -794,6 +811,9 @@ static int smu_v13_0_0_get_smu_metrics_data(struct smu_context *smu,
- case METRICS_CURR_FCLK:
- *value = metrics->CurrClock[PPCLK_FCLK];
- break;
-+ case METRICS_CURR_DCEFCLK:
-+ *value = metrics->CurrClock[PPCLK_DCFCLK];
-+ break;
- case METRICS_AVERAGE_GFXCLK:
- if (metrics->AverageGfxActivity <= SMU_13_0_0_BUSY_THRESHOLD)
- *value = metrics->AverageGfxclkFrequencyPostDs;
-@@ -1047,6 +1067,9 @@ static int smu_v13_0_0_get_current_clk_freq_by_table(struct smu_context *smu,
- case PPCLK_DCLK_1:
- member_type = METRICS_AVERAGE_DCLK1;
- break;
-+ case PPCLK_DCFCLK:
-+ member_type = METRICS_CURR_DCEFCLK;
-+ break;
- default:
- return -EINVAL;
- }
-@@ -1196,6 +1219,9 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
- case SMU_DCLK1:
- single_dpm_table = &(dpm_context->dpm_tables.dclk_table);
- break;
-+ case SMU_DCEFCLK:
-+ single_dpm_table = &(dpm_context->dpm_tables.dcef_table);
-+ break;
- default:
- break;
- }
-@@ -1209,6 +1235,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
- case SMU_VCLK1:
- case SMU_DCLK:
- case SMU_DCLK1:
-+ case SMU_DCEFCLK:
- ret = smu_v13_0_0_get_current_clk_freq_by_table(smu, clk_type, &curr_freq);
- if (ret) {
- dev_err(smu->adev->dev, "Failed to get current clock freq!");
-diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
-index b9b3bf41eed3..12949928e285 100644
---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
-+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
-@@ -147,6 +147,7 @@ static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
- CLK_MAP(VCLK1, PPCLK_VCLK_1),
- CLK_MAP(DCLK, PPCLK_DCLK_0),
- CLK_MAP(DCLK1, PPCLK_DCLK_1),
-+ CLK_MAP(DCEFCLK, PPCLK_DCFCLK),
+ static void smu_late_fini(void *handle)
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+index f8b2e6cc2..e8329d157 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+@@ -419,7 +419,6 @@ enum smu_reset_mode {
+ enum smu_baco_state {
+ SMU_BACO_STATE_ENTER = 0,
+ SMU_BACO_STATE_EXIT,
+- SMU_BACO_STATE_NONE,
};
- static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] = {
-@@ -696,6 +697,22 @@ static int smu_v13_0_7_set_default_dpm_table(struct smu_context *smu)
- pcie_table->num_of_link_levels++;
- }
-
-+ /* dcefclk dpm table setup */
-+ dpm_table = &dpm_context->dpm_tables.dcef_table;
-+ if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCN_BIT)) {
-+ ret = smu_v13_0_set_single_dpm_table(smu,
-+ SMU_DCEFCLK,
-+ dpm_table);
-+ if (ret)
-+ return ret;
-+ } else {
-+ dpm_table->count = 1;
-+ dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100;
-+ dpm_table->dpm_levels[0].enabled = true;
-+ dpm_table->min = dpm_table->dpm_levels[0].value;
-+ dpm_table->max = dpm_table->dpm_levels[0].value;
-+ }
-+
- return 0;
- }
-
-@@ -777,6 +794,9 @@ static int smu_v13_0_7_get_smu_metrics_data(struct smu_context *smu,
- case METRICS_CURR_FCLK:
- *value = metrics->CurrClock[PPCLK_FCLK];
- break;
-+ case METRICS_CURR_DCEFCLK:
-+ *value = metrics->CurrClock[PPCLK_DCFCLK];
-+ break;
- case METRICS_AVERAGE_GFXCLK:
- *value = metrics->AverageGfxclkFrequencyPreDs;
- break;
-@@ -1027,6 +1047,9 @@ static int smu_v13_0_7_get_current_clk_freq_by_table(struct smu_context *smu,
- case PPCLK_DCLK_1:
- member_type = METRICS_CURR_DCLK1;
- break;
-+ case PPCLK_DCFCLK:
-+ member_type = METRICS_CURR_DCEFCLK;
-+ break;
- default:
- return -EINVAL;
- }
-@@ -1176,6 +1199,9 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu,
- case SMU_DCLK1:
- single_dpm_table = &(dpm_context->dpm_tables.dclk_table);
- break;
-+ case SMU_DCEFCLK:
-+ single_dpm_table = &(dpm_context->dpm_tables.dcef_table);
-+ break;
- default:
- break;
- }
-@@ -1189,6 +1215,7 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu,
- case SMU_VCLK1:
- case SMU_DCLK:
- case SMU_DCLK1:
-+ case SMU_DCEFCLK:
- ret = smu_v13_0_7_get_current_clk_freq_by_table(smu, clk_type, &curr_freq);
- if (ret) {
- dev_err(smu->adev->dev, "Failed to get current clock freq!");
---
-GitLab
-
-From 3a2fb905145e76e4bbb32e90e0c6cd532dafb1b0 Mon Sep 17 00:00:00 2001
-From: Evan Quan <evan.quan@amd.com>
-Date: Mon, 14 Aug 2023 10:16:27 +0800
-Subject: [PATCH] Revert "drm/amd/pm: disable the SMU13 OD feature support
- temporarily"
-
-This reverts commit 3592cc20beeece83db4c50a0f400e2dd15139de9.
-
-The enablement for the new OD mechanism completed. Also, the support for
-fan control related OD feature has been added via this new mechanism.
-Thus, it is time to bring back the SMU13 OD support.
-
-Signed-off-by: Evan Quan <evan.quan@amd.com>
-Acked-by: Alex Deucher <alexander.deucher@amd.com>
----
- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 18 +++---------------
- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 12 +++---------
- 2 files changed, 6 insertions(+), 24 deletions(-)
-
+ struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
-index c48f81450d24..093962a37688 100644
+index 82c4e1f1c..2ba77b1d1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
-@@ -348,13 +348,10 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
- table_context->power_play_table;
- struct smu_baco_context *smu_baco = &smu->smu_baco;
- PPTable_t *pptable = smu->smu_table.driver_pptable;
--#if 0
-- PPTable_t *pptable = smu->smu_table.driver_pptable;
- const OverDriveLimits_t * const overdrive_upperlimits =
- &pptable->SkuTable.OverDriveLimitsBasicMax;
- const OverDriveLimits_t * const overdrive_lowerlimits =
- &pptable->SkuTable.OverDriveLimitsMin;
--#endif
+@@ -2772,13 +2766,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
- if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC)
- smu->dc_controlled_by_gpio = true;
-@@ -357,27 +357,18 @@
- smu_baco->maco_support = true;
- }
-
-- /*
-- * We are in the transition to a new OD mechanism.
-- * Disable the OD feature support for SMU13 temporarily.
-- * TODO: get this reverted when new OD mechanism online
-- */
--#if 0
- if (!overdrive_lowerlimits->FeatureCtrlMask ||
- !overdrive_upperlimits->FeatureCtrlMask)
- smu->od_enabled = false;
-
-+ table_context->thermal_controller_type =
-+ powerplay_table->thermal_controller_type;
-+
- /*
- * Instead of having its own buffer space and get overdrive_table copied,
- * smu->od_settings just points to the actual overdrive_table
- */
- smu->od_settings = &powerplay_table->overdrive_table;
--#else
-- smu->od_enabled = false;
--#endif
+ switch (mp1_state) {
+ case PP_MP1_STATE_UNLOAD:
+- ret = smu_cmn_send_smc_msg_with_param(smu,
+- SMU_MSG_PrepareMp1ForUnload,
+- 0x55, NULL);
-
-- table_context->thermal_controller_type =
-- powerplay_table->thermal_controller_type;
-
- smu->adev->pm.no_fan =
- !(pptable->SkuTable.FeaturesToRun[0] & (1 << FEATURE_FAN_CONTROL_BIT));
+- if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+- ret = smu_v13_0_disable_pmfw_state(smu);
+-
++ ret = smu_cmn_set_mp1_state(smu, mp1_state);
+ break;
+ default:
+ /* Ignore others */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
-index 99bc449799a6..430ad1b05ba3 100644
+index 81eafed76..19c1289d0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
-@@ -338,12 +338,10 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
- struct smu_baco_context *smu_baco = &smu->smu_baco;
- PPTable_t *smc_pptable = table_context->driver_pptable;
- BoardTable_t *BoardTable = &smc_pptable->BoardTable;
--#if 0
- const OverDriveLimits_t * const overdrive_upperlimits =
- &smc_pptable->SkuTable.OverDriveLimitsBasicMax;
- const OverDriveLimits_t * const overdrive_lowerlimits =
- &smc_pptable->SkuTable.OverDriveLimitsMin;
--#endif
+@@ -2499,13 +2499,7 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
- if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC)
- smu->dc_controlled_by_gpio = true;
-@@ -348,22 +348,18 @@
- smu_baco->maco_support = true;
- }
-
--#if 0
- if (!overdrive_lowerlimits->FeatureCtrlMask ||
- !overdrive_upperlimits->FeatureCtrlMask)
- smu->od_enabled = false;
-
-+ table_context->thermal_controller_type =
-+ powerplay_table->thermal_controller_type;
-+
- /*
- * Instead of having its own buffer space and get overdrive_table copied,
- * smu->od_settings just points to the actual overdrive_table
- */
- smu->od_settings = &powerplay_table->overdrive_table;
--#else
-- smu->od_enabled = false;
--#endif
+ switch (mp1_state) {
+ case PP_MP1_STATE_UNLOAD:
+- ret = smu_cmn_send_smc_msg_with_param(smu,
+- SMU_MSG_PrepareMp1ForUnload,
+- 0x55, NULL);
-
-- table_context->thermal_controller_type =
-- powerplay_table->thermal_controller_type;
-
- return 0;
- }
---
-GitLab
-
-From 072a8dc3b5260ba08ba2e66036c2c63abd77df52 Mon Sep 17 00:00:00 2001
-From: Lijo Lazar <lijo.lazar@amd.com>
-Date: Thu, 24 Aug 2023 17:25:51 +0530
-Subject: [PATCH] drm/amd/pm: Fix clock reporting for SMUv13.0.6
-
-On SMU v13.0.6, effective clocks are reported by FW which won't exactly
-match with DPM level. Report the current clock based on the values
-matching closest to the effective clock. Also, when deep sleep is
-applied to a clock, report it with a special level "S:" as in sample
-clock levels below
-
-S: 19Mhz *
-0: 615Mhz
-1: 800Mhz
-2: 888Mhz
-3: 1000Mhz
-
-Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
-Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
-Reviewed-by: Evan Quan <evan.quan@amd.com>
----
- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 159 +++++++-----------
- 1 file changed, 62 insertions(+), 97 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
-index c2308783053c..29e1cada7667 100644
---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
-+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
-@@ -91,6 +91,8 @@
- #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5
- #define LINK_SPEED_MAX 4
-
-+#define SMU_13_0_6_DSCLK_THRESHOLD 100
-+
- static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
- MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0),
- MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1),
-@@ -783,13 +785,61 @@ static int smu_v13_0_6_get_current_clk_freq_by_table(struct smu_context *smu,
- return smu_v13_0_6_get_smu_metrics_data(smu, member_type, value);
- }
-
-+static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf,
-+ struct smu_13_0_dpm_table *single_dpm_table,
-+ uint32_t curr_clk, const char *clk_name)
-+{
-+ struct pp_clock_levels_with_latency clocks;
-+ int i, ret, size = 0, level = -1;
-+ uint32_t clk1, clk2;
-+
-+ ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table);
-+ if (ret) {
-+ dev_err(smu->adev->dev, "Attempt to get %s clk levels failed!",
-+ clk_name);
-+ return ret;
-+ }
-+
-+ if (!clocks.num_levels)
-+ return -EINVAL;
-+
-+ if (curr_clk < SMU_13_0_6_DSCLK_THRESHOLD) {
-+ size = sysfs_emit_at(buf, size, "S: %uMhz *\n", curr_clk);
-+ for (i = 0; i < clocks.num_levels; i++)
-+ size += sysfs_emit_at(buf, size, "%d: %uMhz\n", i,
-+ clocks.data[i].clocks_in_khz /
-+ 1000);
-+
-+ } else {
-+ if ((clocks.num_levels == 1) ||
-+ (curr_clk < (clocks.data[0].clocks_in_khz / 1000)))
-+ level = 0;
-+ for (i = 0; i < clocks.num_levels; i++) {
-+ clk1 = clocks.data[i].clocks_in_khz / 1000;
-+
-+ if (i < (clocks.num_levels - 1))
-+ clk2 = clocks.data[i + 1].clocks_in_khz / 1000;
-+
-+ if (curr_clk >= clk1 && curr_clk < clk2) {
-+ level = (curr_clk - clk1) <= (clk2 - curr_clk) ?
-+ i :
-+ i + 1;
-+ }
-+
-+ size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i,
-+ clk1, (level == i) ? "*" : "");
-+ }
-+ }
-+
-+ return size;
-+}
-+
- static int smu_v13_0_6_print_clk_levels(struct smu_context *smu,
- enum smu_clk_type type, char *buf)
- {
-- int i, now, size = 0;
-+ int now, size = 0;
- int ret = 0;
- struct smu_umd_pstate_table *pstate_table = &smu->pstate_table;
-- struct pp_clock_levels_with_latency clocks;
- struct smu_13_0_dpm_table *single_dpm_table;
- struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
- struct smu_13_0_dpm_context *dpm_context = NULL;
-@@ -852,26 +902,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu,
- }
-
- single_dpm_table = &(dpm_context->dpm_tables.uclk_table);
-- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table);
-- if (ret) {
-- dev_err(smu->adev->dev,
-- "Attempt to get memory clk levels Failed!");
-- return ret;
-- }
-
-- for (i = 0; i < clocks.num_levels; i++)
-- size += sysfs_emit_at(
-- buf, size, "%d: %uMhz %s\n", i,
-- clocks.data[i].clocks_in_khz / 1000,
-- (clocks.num_levels == 1) ?
-- "*" :
-- (smu_v13_0_6_freqs_in_same_level(
-- clocks.data[i].clocks_in_khz /
-- 1000,
-- now) ?
-- "*" :
-- ""));
-- break;
-+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
-+ "mclk");
-
- case SMU_SOCCLK:
- ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_SOCCLK,
-@@ -883,26 +916,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu,
- }
-
- single_dpm_table = &(dpm_context->dpm_tables.soc_table);
-- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table);
-- if (ret) {
-- dev_err(smu->adev->dev,
-- "Attempt to get socclk levels Failed!");
-- return ret;
-- }
-
-- for (i = 0; i < clocks.num_levels; i++)
-- size += sysfs_emit_at(
-- buf, size, "%d: %uMhz %s\n", i,
-- clocks.data[i].clocks_in_khz / 1000,
-- (clocks.num_levels == 1) ?
-- "*" :
-- (smu_v13_0_6_freqs_in_same_level(
-- clocks.data[i].clocks_in_khz /
-- 1000,
-- now) ?
-- "*" :
-- ""));
-- break;
-+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
-+ "socclk");
-
- case SMU_FCLK:
- ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_FCLK,
-@@ -914,26 +930,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu,
- }
-
- single_dpm_table = &(dpm_context->dpm_tables.fclk_table);
-- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table);
-- if (ret) {
-- dev_err(smu->adev->dev,
-- "Attempt to get fclk levels Failed!");
-- return ret;
-- }
-
-- for (i = 0; i < single_dpm_table->count; i++)
-- size += sysfs_emit_at(
-- buf, size, "%d: %uMhz %s\n", i,
-- single_dpm_table->dpm_levels[i].value,
-- (clocks.num_levels == 1) ?
-- "*" :
-- (smu_v13_0_6_freqs_in_same_level(
-- clocks.data[i].clocks_in_khz /
-- 1000,
-- now) ?
-- "*" :
-- ""));
-- break;
-+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
-+ "fclk");
-
- case SMU_VCLK:
- ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_VCLK,
-@@ -945,26 +944,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu,
- }
-
- single_dpm_table = &(dpm_context->dpm_tables.vclk_table);
-- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table);
-- if (ret) {
-- dev_err(smu->adev->dev,
-- "Attempt to get vclk levels Failed!");
-- return ret;
-- }
-
-- for (i = 0; i < single_dpm_table->count; i++)
-- size += sysfs_emit_at(
-- buf, size, "%d: %uMhz %s\n", i,
-- single_dpm_table->dpm_levels[i].value,
-- (clocks.num_levels == 1) ?
-- "*" :
-- (smu_v13_0_6_freqs_in_same_level(
-- clocks.data[i].clocks_in_khz /
-- 1000,
-- now) ?
-- "*" :
-- ""));
-- break;
-+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
-+ "vclk");
-
- case SMU_DCLK:
- ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_DCLK,
-@@ -976,26 +958,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu,
- }
-
- single_dpm_table = &(dpm_context->dpm_tables.dclk_table);
-- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table);
-- if (ret) {
-- dev_err(smu->adev->dev,
-- "Attempt to get dclk levels Failed!");
-- return ret;
-- }
-
-- for (i = 0; i < single_dpm_table->count; i++)
-- size += sysfs_emit_at(
-- buf, size, "%d: %uMhz %s\n", i,
-- single_dpm_table->dpm_levels[i].value,
-- (clocks.num_levels == 1) ?
-- "*" :
-- (smu_v13_0_6_freqs_in_same_level(
-- clocks.data[i].clocks_in_khz /
-- 1000,
-- now) ?
-- "*" :
-- ""));
-- break;
-+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now,
-+ "dclk");
-
- default:
+- if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+- ret = smu_v13_0_disable_pmfw_state(smu);
+-
++ ret = smu_cmn_set_mp1_state(smu, mp1_state);
break;
---
-GitLab
-
+ default:
+ /* Ignore others */
diff --git a/SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch b/SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch
index 6fe4c39..c12229d 100644
--- a/SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch
+++ b/SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch
@@ -69,7 +69,7 @@ index 08969f5aa38d59..ff601cb7a1fae0 100644
@@ -100,6 +100,10 @@
#include <linux/user_events.h>
#include <linux/iommu.h>
-
+
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
@@ -146,6 +146,6 @@ index 54211dbd516c57..16ca0c1516298d 100644
+int unprivileged_userns_clone;
+#endif
+
- static struct kmem_cache *user_ns_cachep __read_mostly;
+ static struct kmem_cache *user_ns_cachep __ro_after_init;
static DEFINE_MUTEX(userns_state_mutex);
diff --git a/SOURCES/valve-gamescope-framerate-control-fixups.patch b/SOURCES/valve-gamescope-framerate-control-fixups.patch
new file mode 100644
index 0000000..425ee09
--- /dev/null
+++ b/SOURCES/valve-gamescope-framerate-control-fixups.patch
@@ -0,0 +1,647 @@
+From 79f7b70729663c5986c84e1a0888f50a55a81093 Mon Sep 17 00:00:00 2001
+From: Thomas Crider <gloriouseggroll@gmail.com>
+Date: Mon, 18 Dec 2023 03:36:09 -0500
+Subject: [PATCH 1/6] revert 1101185bc50f5e45b8b89300914d9aa35a0c8cbe
+
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index 7dab01803..81672738a 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -6106,6 +6106,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+
+ if (recalculate_timing)
+ drm_mode_set_crtcinfo(&saved_mode, 0);
++ else if (!old_stream)
++ drm_mode_set_crtcinfo(&mode, 0);
+
+ /*
+ * If scaling is enabled and refresh rate didn't change
+@@ -6669,8 +6671,6 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec
+ goto fail;
+ }
+
+- drm_mode_set_crtcinfo(mode, 0);
+-
+ stream = create_validate_stream_for_sink(aconnector, mode,
+ to_dm_connector_state(connector->state),
+ NULL);
+--
+2.43.0
+
+From 38f2149c7e97f379210c658c21124d547e7b503a Mon Sep 17 00:00:00 2001
+From: Simon Ser <contact@emersion.fr>
+Date: Tue, 30 Aug 2022 17:29:43 +0000
+Subject: [PATCH] drm: introduce DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This new kernel capability indicates whether async page-flips are
+supported via the atomic uAPI. DRM clients can use it to check
+for support before feeding DRM_MODE_PAGE_FLIP_ASYNC to the kernel.
+
+Make it clear that DRM_CAP_ASYNC_PAGE_FLIP is for legacy uAPI only.
+
+Signed-off-by: Simon Ser <contact@emersion.fr>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Joshua Ashton <joshua@froggi.es>
+Cc: Melissa Wen <mwen@igalia.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Harry Wentland <hwentlan@amd.com>
+Cc: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Cc: André Almeida <andrealmeid@igalia.com>
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Link: https://lore.kernel.org/r/20220830172851.269402-6-contact@emersion.fr
+---
+ drivers/gpu/drm/drm_ioctl.c | 5 +++++
+ include/uapi/drm/drm.h | 10 +++++++++-
+ 2 files changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
+index ca2a6e6101dc8..5b1591e2b46c9 100644
+--- a/drivers/gpu/drm/drm_ioctl.c
++++ b/drivers/gpu/drm/drm_ioctl.c
+@@ -302,6 +302,11 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_
+ case DRM_CAP_CRTC_IN_VBLANK_EVENT:
+ req->value = 1;
+ break;
++ case DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP:
++ req->value = drm_core_check_feature(dev, DRIVER_ATOMIC) &&
++ dev->mode_config.async_page_flip &&
++ !dev->mode_config.atomic_async_page_flip_not_supported;
++ break;
+ default:
+ return -EINVAL;
+ }
+diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
+index 642808520d922..b1962628ecda9 100644
+--- a/include/uapi/drm/drm.h
++++ b/include/uapi/drm/drm.h
+@@ -706,7 +706,8 @@ struct drm_gem_open {
+ /**
+ * DRM_CAP_ASYNC_PAGE_FLIP
+ *
+- * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
++ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy
++ * page-flips.
+ */
+ #define DRM_CAP_ASYNC_PAGE_FLIP 0x7
+ /**
+@@ -773,6 +773,13 @@
+ * :ref:`drm_sync_objects`.
+ */
+ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14
++/**
++ * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
++ *
++ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic
++ * commits.
++ */
++#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15
+
+ /* DRM_IOCTL_GET_CAP ioctl argument type */
+ struct drm_get_cap {
+--
+GitLab
+
+From f6de551227de6244119f9f2bea3ae81543ee7c4f Mon Sep 17 00:00:00 2001
+From: Simon Ser <contact@emersion.fr>
+Date: Tue, 30 Aug 2022 17:29:35 +0000
+Subject: [PATCH] drm: allow DRM_MODE_PAGE_FLIP_ASYNC for atomic commits
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+If the driver supports it, allow user-space to supply the
+DRM_MODE_PAGE_FLIP_ASYNC flag to request an async page-flip.
+Set drm_crtc_state.async_flip accordingly.
+
+Document that drivers will reject atomic commits if an async
+flip isn't possible. This allows user-space to fall back to
+something else. For instance, Xorg falls back to a blit.
+Another option is to wait as close to the next vblank as
+possible before performing the page-flip to reduce latency.
+
+v2: document new uAPI
+
+Signed-off-by: Simon Ser <contact@emersion.fr>
+Co-developed-by: André Almeida <andrealmeid@igalia.com>
+Signed-off-by: André Almeida <andrealmeid@igalia.com>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Joshua Ashton <joshua@froggi.es>
+Cc: Melissa Wen <mwen@igalia.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Harry Wentland <hwentlan@amd.com>
+Cc: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Link: https://lore.kernel.org/r/20220830172851.269402-5-contact@emersion.fr
+---
+ drivers/gpu/drm/drm_atomic_uapi.c | 28 +++++++++++++++++++++++++---
+ include/uapi/drm/drm_mode.h | 4 ++++
+ 2 files changed, 29 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
+index c06d0639d552d..945761968428e 100644
+--- a/drivers/gpu/drm/drm_atomic_uapi.c
++++ b/drivers/gpu/drm/drm_atomic_uapi.c
+@@ -1282,6 +1282,18 @@ static void complete_signaling(struct drm_device *dev,
+ kfree(fence_state);
+ }
+
++static void
++set_async_flip(struct drm_atomic_state *state)
++{
++ struct drm_crtc *crtc;
++ struct drm_crtc_state *crtc_state;
++ int i;
++
++ for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
++ crtc_state->async_flip = true;
++ }
++}
++
+ int drm_mode_atomic_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv)
+ {
+@@ -1322,9 +1334,16 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
+ }
+
+ if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) {
+- drm_dbg_atomic(dev,
+- "commit failed: invalid flag DRM_MODE_PAGE_FLIP_ASYNC\n");
+- return -EINVAL;
++ if (!dev->mode_config.async_page_flip) {
++ drm_dbg_atomic(dev,
++ "commit failed: DRM_MODE_PAGE_FLIP_ASYNC not supported\n");
++ return -EINVAL;
++ }
++ if (dev->mode_config.atomic_async_page_flip_not_supported) {
++ drm_dbg_atomic(dev,
++ "commit failed: DRM_MODE_PAGE_FLIP_ASYNC not supported with atomic\n");
++ return -EINVAL;
++ }
+ }
+
+ /* can't test and expect an event at the same time. */
+@@ -1422,6 +1441,9 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
+ if (ret)
+ goto out;
+
++ if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC)
++ set_async_flip(state);
++
+ if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) {
+ ret = drm_atomic_check_only(state);
+ } else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) {
+diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
+index 46becedf5b2fc..f1422c8387224 100644
+--- a/include/uapi/drm/drm_mode.h
++++ b/include/uapi/drm/drm_mode.h
+@@ -949,6 +949,10 @@ struct hdr_output_metadata {
+ * Request that the page-flip is performed as soon as possible, ie. with no
+ * delay due to waiting for vblank. This may cause tearing to be visible on
+ * the screen.
++ *
++ * When used with atomic uAPI, the driver will return an error if the hardware
++ * doesn't support performing an asynchronous page-flip for this update.
++ * User-space should handle this, e.g. by falling back to a regular page-flip.
+ */
+ #define DRM_MODE_PAGE_FLIP_ASYNC 0x02
+ #define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4
+--
+GitLab
+
+From 9d923e79d060d8c7218c8229c65c964b7f04e864 Mon Sep 17 00:00:00 2001
+From: Simon Ser <contact@emersion.fr>
+Date: Tue, 30 Aug 2022 17:29:26 +0000
+Subject: [PATCH] drm: introduce
+ drm_mode_config.atomic_async_page_flip_not_supported
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This new field indicates whether the driver has the necessary logic
+to support async page-flips via the atomic uAPI. This is leveraged by
+the next commit to allow user-space to use this functionality.
+
+All atomic drivers setting drm_mode_config.async_page_flip are updated
+to also set drm_mode_config.atomic_async_page_flip_not_supported. We
+will gradually check and update these drivers to properly handle
+drm_crtc_state.async_flip in their atomic logic.
+
+The goal of this negative flag is the same as
+fb_modifiers_not_supported: we want to eventually get rid of all
+drivers missing atomic support for async flips. New drivers should not
+set this flag, instead they should support atomic async flips (if
+they support async flips at all). IOW, we don't want more drivers
+with async flip support for legacy but not atomic.
+
+v2: only set the flag on atomic drivers (remove it on amdgpu DCE and
+on radeon)
+
+Signed-off-by: Simon Ser <contact@emersion.fr>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Joshua Ashton <joshua@froggi.es>
+Cc: Melissa Wen <mwen@igalia.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Harry Wentland <hwentlan@amd.com>
+Cc: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Cc: André Almeida <andrealmeid@igalia.com>
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Link: https://lore.kernel.org/r/20220830172851.269402-4-contact@emersion.fr
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 +
+ drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c | 1 +
+ drivers/gpu/drm/i915/display/intel_display_driver.c | 1 +
+ drivers/gpu/drm/nouveau/nouveau_display.c | 1 +
+ drivers/gpu/drm/vc4/vc4_kms.c | 1 +
+ include/drm/drm_mode_config.h | 11 +++++++++++
+ 6 files changed, 16 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index 81672738a..05c404fcc 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -3998,6 +3998,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* indicates support for immediate flip */
+ adev_to_drm(adev)->mode_config.async_page_flip = true;
++ adev_to_drm(adev)->mode_config.atomic_async_page_flip_not_supported = true;
+
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+index fa0f9a93d..301b222c4 100644
+--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
++++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+@@ -639,6 +639,7 @@ static int atmel_hlcdc_dc_modeset_init(struct drm_device *dev)
+ dev->mode_config.max_height = dc->desc->max_height;
+ dev->mode_config.funcs = &mode_config_funcs;
+ dev->mode_config.async_page_flip = true;
++ dev->mode_config.atomic_async_page_flip_not_supported = true;
+
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
+index 8f144d4d3..f290c5c2e 100644
+--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
++++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
+@@ -126,6 +126,7 @@
+ mode_config->helper_private = &intel_mode_config_funcs;
+
+ mode_config->async_page_flip = HAS_ASYNC_FLIPS(i915) && !i915->params.disable_async_page_flip;
++ mode_config->atomic_async_page_flip_not_supported = true;
+
+ /*
+ * Maximum framebuffer dimensions, chosen to match
+diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
+index 99977e5fe..540895dab 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_display.c
++++ b/drivers/gpu/drm/nouveau/nouveau_display.c
+@@ -720,6 +720,7 @@ nouveau_display_create(struct drm_device *dev)
+ dev->mode_config.async_page_flip = false;
+ else
+ dev->mode_config.async_page_flip = true;
++ dev->mode_config.atomic_async_page_flip_not_supported = true;
+
+ drm_kms_helper_poll_init(dev);
+ drm_kms_helper_poll_disable(dev);
+diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
+index 5495f2a94..5b6b311e7 100644
+--- a/drivers/gpu/drm/vc4/vc4_kms.c
++++ b/drivers/gpu/drm/vc4/vc4_kms.c
+@@ -1068,6 +1068,7 @@ int vc4_kms_load(struct drm_device *dev)
+ dev->mode_config.helper_private = &vc4_mode_config_helpers;
+ dev->mode_config.preferred_depth = 24;
+ dev->mode_config.async_page_flip = true;
++ dev->mode_config.atomic_async_page_flip_not_supported = true;
+ dev->mode_config.normalize_zpos = true;
+
+ ret = vc4_ctm_obj_init(vc4);
+diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
+index 973119a91..47b005671 100644
+--- a/include/drm/drm_mode_config.h
++++ b/include/drm/drm_mode_config.h
+@@ -918,6 +918,17 @@ struct drm_mode_config {
+ */
+ bool async_page_flip;
+
++ /**
++ * @atomic_async_page_flip_not_supported:
++ *
++ * If true, the driver does not support async page-flips with the
++ * atomic uAPI. This is only used by old drivers which haven't yet
++ * accomodated for &drm_crtc_state.async_flip in their atomic logic,
++ * even if they have &drm_mode_config.async_page_flip set to true.
++ * New drivers shall not set this flag.
++ */
++ bool atomic_async_page_flip_not_supported;
++
+ /**
+ * @fb_modifiers_not_supported:
+ *
+--
+2.43.0
+
+From 24ac301d6208f1135644fe32514994799e79a6a0 Mon Sep 17 00:00:00 2001
+From: Simon Ser <contact@emersion.fr>
+Date: Tue, 30 Aug 2022 17:29:52 +0000
+Subject: [PATCH] amd/display: indicate support for atomic async page-flips on
+ DC
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+amdgpu_dm_commit_planes() already sets the flip_immediate flag for
+async page-flips. This flag is used to set the UNP_FLIP_CONTROL
+register. Thus, no additional change is required to handle async
+page-flips with the atomic uAPI.
+
+v2: make it clear this commit is about DC and not only DCN
+
+Signed-off-by: Simon Ser <contact@emersion.fr>
+Cc: Joshua Ashton <joshua@froggi.es>
+Cc: Melissa Wen <mwen@igalia.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Harry Wentland <hwentlan@amd.com>
+Cc: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Cc: André Almeida <andrealmeid@igalia.com>
+Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Link: https://lore.kernel.org/r/20220830172851.269402-7-contact@emersion.fr
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index 27a1e3a0046c9..a003e796aa183 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -3980,7 +3980,6 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* indicates support for immediate flip */
+ adev_to_drm(adev)->mode_config.async_page_flip = true;
+- adev_to_drm(adev)->mode_config.atomic_async_page_flip_not_supported = true;
+
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+--
+GitLab
+
+From 32993fef83542e3bea66ed3ceec4944b3ae9d4f1 Mon Sep 17 00:00:00 2001
+From: Joshua Ashton <joshua@froggi.es>
+Date: Mon, 14 Nov 2022 19:52:30 +0000
+Subject: [PATCH] drm/amd/display: Always set crtcinfo from
+ create_stream_for_sink
+
+Given that we always pass dm_state into here now, this won't ever
+trigger anymore.
+
+This is needed for we will always fail mode validation with invalid
+clocks or link bandwidth errors.
+
+Signed-off-by: Joshua Ashton <joshua@froggi.es>
+Signed-off-by: Harry Wentland <harry.wentland@amd.com>
+Reviewed-by: Harry Wentland <harry.wentland@amd.com>
+
+Cc: Pekka Paalanen <ppaalanen@gmail.com>
+Cc: Sebastian Wick <sebastian.wick@redhat.com>
+Cc: Vitaly.Prosyak@amd.com
+Cc: Joshua Ashton <joshua@froggi.es>
+Cc: Simon Ser <contact@emersion.fr>
+Cc: Melissa Wen <mwen@igalia.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: amd-gfx@lists.freedesktop.org
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index 81672738a..8eb14c74a 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -6106,7 +6106,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+
+ if (recalculate_timing)
+ drm_mode_set_crtcinfo(&saved_mode, 0);
+- else if (!old_stream)
++ else
+ drm_mode_set_crtcinfo(&mode, 0);
+
+ /*
+--
+2.43.0
+
+From 0af59135c2a9e05af87bc82f492fab13fff52fbd Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@igalia.com>
+Date: Wed, 22 Nov 2023 13:19:38 -0300
+Subject: [PATCH] drm: Refuse to async flip with atomic prop changes
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Given that prop changes may lead to modesetting, which would defeat the
+fast path of the async flip, refuse any atomic prop change for async
+flips in atomic API. The only exception is the framebuffer ID to flip
+to. Currently the only plane type supported is the primary one.
+
+Signed-off-by: André Almeida <andrealmeid@igalia.com>
+Reviewed-by: Simon Ser <contact@emersion.fr>
+---
+ drivers/gpu/drm/drm_atomic_uapi.c | 52 +++++++++++++++++++++++++++--
+ drivers/gpu/drm/drm_crtc_internal.h | 2 +-
+ drivers/gpu/drm/drm_mode_object.c | 2 +-
+ 3 files changed, 51 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
+index 37caa6c33e22b3..86083184ac6bb2 100644
+--- a/drivers/gpu/drm/drm_atomic_uapi.c
++++ b/drivers/gpu/drm/drm_atomic_uapi.c
+@@ -964,13 +964,28 @@ int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state,
+ return ret;
+ }
+
++static int drm_atomic_check_prop_changes(int ret, uint64_t old_val, uint64_t prop_value,
++ struct drm_property *prop)
++{
++ if (ret != 0 || old_val != prop_value) {
++ drm_dbg_atomic(prop->dev,
++ "[PROP:%d:%s] No prop can be changed during async flip\n",
++ prop->base.id, prop->name);
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
+ int drm_atomic_set_property(struct drm_atomic_state *state,
+ struct drm_file *file_priv,
+ struct drm_mode_object *obj,
+ struct drm_property *prop,
+- uint64_t prop_value)
++ uint64_t prop_value,
++ bool async_flip)
+ {
+ struct drm_mode_object *ref;
++ uint64_t old_val;
+ int ret;
+
+ if (!drm_property_change_valid_get(prop, prop_value, &ref))
+@@ -987,6 +1002,13 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
+ break;
+ }
+
++ if (async_flip) {
++ ret = drm_atomic_connector_get_property(connector, connector_state,
++ prop, &old_val);
++ ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop);
++ break;
++ }
++
+ ret = drm_atomic_connector_set_property(connector,
+ connector_state, file_priv,
+ prop, prop_value);
+@@ -1002,6 +1024,13 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
+ break;
+ }
+
++ if (async_flip) {
++ ret = drm_atomic_crtc_get_property(crtc, crtc_state,
++ prop, &old_val);
++ ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop);
++ break;
++ }
++
+ ret = drm_atomic_crtc_set_property(crtc,
+ crtc_state, prop, prop_value);
+ break;
+@@ -1009,6 +1038,7 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
+ case DRM_MODE_OBJECT_PLANE: {
+ struct drm_plane *plane = obj_to_plane(obj);
+ struct drm_plane_state *plane_state;
++ struct drm_mode_config *config = &plane->dev->mode_config;
+
+ plane_state = drm_atomic_get_plane_state(state, plane);
+ if (IS_ERR(plane_state)) {
+@@ -1016,6 +1046,21 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
+ break;
+ }
+
++ if (async_flip && prop != config->prop_fb_id) {
++ ret = drm_atomic_plane_get_property(plane, plane_state,
++ prop, &old_val);
++ ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop);
++ break;
++ }
++
++ if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) {
++ drm_dbg_atomic(prop->dev,
++ "[OBJECT:%d] Only primary planes can be changed during async flip\n",
++ obj->id);
++ ret = -EINVAL;
++ break;
++ }
++
+ ret = drm_atomic_plane_set_property(plane,
+ plane_state, file_priv,
+ prop, prop_value);
+@@ -1295,6 +1340,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
+ struct drm_out_fence_state *fence_state;
+ int ret = 0;
+ unsigned int i, j, num_fences;
++ bool async_flip = false;
+
+ /* disallow for drivers not supporting atomic: */
+ if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
+@@ -1408,8 +1454,8 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
+ goto out;
+ }
+
+- ret = drm_atomic_set_property(state, file_priv,
+- obj, prop, prop_value);
++ ret = drm_atomic_set_property(state, file_priv, obj,
++ prop, prop_value, async_flip);
+ if (ret) {
+ drm_mode_object_put(obj);
+ goto out;
+diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
+index 501a10edd0e1dc..381130cebe811c 100644
+--- a/drivers/gpu/drm/drm_crtc_internal.h
++++ b/drivers/gpu/drm/drm_crtc_internal.h
+@@ -251,7 +251,7 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
+ struct drm_file *file_priv,
+ struct drm_mode_object *obj,
+ struct drm_property *prop,
+- uint64_t prop_value);
++ uint64_t prop_value, bool async_flip);
+ int drm_atomic_get_property(struct drm_mode_object *obj,
+ struct drm_property *property, uint64_t *val);
+
+diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c
+index ac0d2ce3f87041..0e8355063eee36 100644
+--- a/drivers/gpu/drm/drm_mode_object.c
++++ b/drivers/gpu/drm/drm_mode_object.c
+@@ -538,7 +538,7 @@ static int set_property_atomic(struct drm_mode_object *obj,
+ obj_to_connector(obj),
+ prop_value);
+ } else {
+- ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value);
++ ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value, false);
+ if (ret)
+ goto out;
+ ret = drm_atomic_commit(state);
+From 1edf3fbbeb36440e1222c2fe0e8127fb804c5278 Mon Sep 17 00:00:00 2001
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Date: Fri, 4 Aug 2023 11:13:04 -0400
+Subject: [PATCH] drm/amd/display: ensure async flips are only accepted for
+ fast updates
+
+We should be checking to see if async flips are supported in
+amdgpu_dm_atomic_check() (i.e. not dm_crtc_helper_atomic_check()). Also,
+async flipping isn't supported if a plane's framebuffer changes memory
+domains during an atomic commit. So, move the check from
+dm_crtc_helper_atomic_check() to amdgpu_dm_atomic_check() and check if
+the memory domain has changed in amdgpu_dm_atomic_check().
+
+Cc: stable@vger.kernel.org
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2733
+Fixes: c1e18c44dc7f ("drm/amd/display: only accept async flips for fast updates")
+Reviewed-by: Harry Wentland <harry.wentland@amd.com>
+Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit a7c0cad0dc060bb77e9c9d235d68441b0fc69507)
+Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+---
+ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 12 ------------
+ 1 file changed, 12 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+index be1ebe826442a4..4b223db0cf2fe8 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+@@ -473,18 +473,6 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
+ return -EINVAL;
+ }
+
+- /*
+- * Only allow async flips for fast updates that don't change the FB
+- * pitch, the DCC state, rotation, etc.
+- */
+- if (crtc_state->async_flip &&
+- dm_crtc_state->update_type != UPDATE_TYPE_FAST) {
+- drm_dbg_atomic(crtc->dev,
+- "[CRTC:%d:%s] async flips are only supported for fast updates\n",
+- crtc->base.id, crtc->name);
+- return -EINVAL;
+- }
+-
+ /* In some use cases, like reset, no stream is attached */
+ if (!dm_crtc_state->stream)
+ return 0;
diff --git a/SOURCES/winesync.patch b/SOURCES/winesync.patch
index 5b2eaf3..459bf54 100644
--- a/SOURCES/winesync.patch
+++ b/SOURCES/winesync.patch
@@ -15,9 +15,9 @@ diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 94e9fb4cdd76..4f9e3d80a6e8 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
-@@ -561,6 +561,17 @@
- This driver can also be built as a module. If so, the module
- will be called tps6594-pfsm.
+@@ -519,6 +519,17 @@
+
+ If you do not intend to run this kernel as a guest, say N.
+config WINESYNC
+ tristate "Synchronization primitives for Wine"
@@ -30,9 +30,9 @@ index 94e9fb4cdd76..4f9e3d80a6e8 100644
+
+ If unsure, say N.
+
- source "drivers/misc/c2port/Kconfig"
- source "drivers/misc/eeprom/Kconfig"
- source "drivers/misc/cb710/Kconfig"
+ config TMR_MANAGER
+ tristate "Select TMR Manager"
+ depends on MICROBLAZE && MB_MANAGER
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 2be8542616dd..d061fe45407b 100644
--- a/drivers/misc/Makefile
@@ -2175,17 +2175,17 @@ Subject: [PATCH 13/34] selftests: winesync: Add some tests for semaphore
create mode 100644 tools/testing/selftests/drivers/winesync/winesync.c
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
-index c852eb40c4f7..a366016d6254 100644
+index 8247a7c69..553c949dc 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
-@@ -14,6 +14,7 @@ TARGETS += drivers/dma-buf
+@@ -18,6 +18,7 @@ TARGETS += drivers/dma-buf
TARGETS += drivers/s390x/uvdevice
TARGETS += drivers/net/bonding
TARGETS += drivers/net/team
+TARGETS += drivers/winesync
+ TARGETS += dt
TARGETS += efivarfs
TARGETS += exec
- TARGETS += fchmodat2
diff --git a/tools/testing/selftests/drivers/winesync/Makefile b/tools/testing/selftests/drivers/winesync/Makefile
new file mode 100644
index 000000000000..43b39fdeea10
@@ -3286,7 +3286,7 @@ diff --git a/MAINTAINERS b/MAINTAINERS
index 72b9654f764c..ff31beb17835 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
-@@ -22976,6 +22976,15 @@
+@@ -23391,6 +23391,15 @@
S: Maintained
F: drivers/media/rc/winbond-cir.c
diff --git a/SPECS/kernel.spec b/SPECS/kernel.spec
index cd80e42..0256d09 100644
--- a/SPECS/kernel.spec
+++ b/SPECS/kernel.spec
@@ -160,18 +160,18 @@ Summary: The Linux kernel
# the --with-release option overrides this setting.)
%define debugbuildsenabled 1
%define buildid .fsync
-%define specrpmversion 6.6.14
-%define specversion 6.6.14
-%define patchversion 6.6
+%define specrpmversion 6.7.3
+%define specversion 6.7.3
+%define patchversion 6.7
%define pkgrelease 200
%define kversion 6
-%define tarfile_release 6.6.14
+%define tarfile_release 6.7.3
# This is needed to do merge window version magic
-%define patchlevel 6
+%define patchlevel 7
# This allows pkg_release to have configurable %%{?dist} tag
-%define specrelease 202%{?buildid}%{?dist}
+%define specrelease 201%{?buildid}%{?dist}
# This defines the kabi tarball version
-%define kabiversion 6.6.14
+%define kabiversion 6.7.3
# If this variable is set to 1, a bpf selftests build failure will cause a
# fatal kernel package build error
@@ -224,6 +224,8 @@ Summary: The Linux kernel
%define with_cross_headers %{?_without_cross_headers: 0} %{?!_without_cross_headers: 1}
# perf
%define with_perf %{?_without_perf: 0} %{?!_without_perf: 1}
+# libperf
+%define with_libperf %{?_without_libperf: 0} %{?!_without_libperf: 1}
# tools
%define with_tools %{?_without_tools: 0} %{?!_without_tools: 1}
# bpf tool
@@ -285,7 +287,7 @@ Summary: The Linux kernel
# Want to build a vanilla kernel build without any non-upstream patches?
%define with_vanilla %{?_with_vanilla: 1} %{?!_with_vanilla: 0}
-%ifarch x86_64
+%ifarch x86_64 aarch64
%define with_efiuki %{?_without_efiuki: 0} %{?!_without_efiuki: 1}
%else
%define with_efiuki 0
@@ -300,9 +302,10 @@ Summary: The Linux kernel
# no stablelist
%define with_kernel_abi_stablelists 0
# Fedora builds these separately
-%define with_perf 0
-%define with_tools 0
-%define with_bpftool 0
+%define with_perf 1
+%define with_libperf 1
+%define with_tools 1
+%define with_bpftool 1
# No realtime fedora variants
%define with_realtime 0
%define with_arm64_64k 0
@@ -388,6 +391,7 @@ Summary: The Linux kernel
%define with_realtime 0
%define with_vdso_install 0
%define with_perf 0
+%define with_libperf 0
%define with_tools 0
%define with_bpftool 0
%define with_kernel_abi_stablelists 0
@@ -402,6 +406,7 @@ Summary: The Linux kernel
%define with_base 0
%define with_vdso_install 0
%define with_perf 0
+%define with_libperf 0
%define with_tools 0
%define with_bpftool 0
%define with_kernel_abi_stablelists 0
@@ -417,6 +422,7 @@ Summary: The Linux kernel
%define with_debuginfo 0
%define with_vdso_install 0
%define with_perf 0
+%define with_libperf 0
%define with_tools 0
%define with_bpftool 0
%define with_kernel_abi_stablelists 0
@@ -480,6 +486,7 @@ Summary: The Linux kernel
%define with_cross_headers 0
%define with_tools 0
%define with_perf 0
+%define with_libperf 0
%define with_bpftool 0
%define with_selftests 0
%define with_debug 0
@@ -572,6 +579,7 @@ Summary: The Linux kernel
%define with_debuginfo 0
%define with_perf 0
+%define with_libperf 0
%define with_tools 0
%define with_bpftool 0
%define with_selftests 0
@@ -696,7 +704,11 @@ BuildRequires: opencsd-devel >= 1.0.0
BuildRequires: python3-docutils
BuildRequires: gettext ncurses-devel
BuildRequires: libcap-devel libcap-ng-devel
+# The following are rtla requirements
+BuildRequires: python3-docutils
+BuildRequires: libtraceevent-devel
BuildRequires: libtracefs-devel
+
%ifnarch s390x
BuildRequires: pciutils-devel
%endif
@@ -713,6 +725,9 @@ BuildRequires: zlib-devel binutils-devel
%endif
%if %{with_selftests}
BuildRequires: clang llvm-devel fuse-devel
+%ifarch x86_64
+BuildRequires: lld
+%endif
BuildRequires: libcap-devel libcap-ng-devel rsync libmnl-devel
BuildRequires: numactl-devel
%endif
@@ -785,7 +800,7 @@ BuildRequires: binutils
BuildRequires: lvm2
BuildRequires: systemd-boot-unsigned
# For systemd-stub and systemd-pcrphase
-BuildRequires: systemd-udev
+BuildRequires: systemd-udev >= 252-1
# For TPM operations in UKI initramfs
BuildRequires: tpm2-tools
%endif
@@ -799,7 +814,7 @@ BuildRequires: tpm2-tools
Source0: linux-%{tarfile_release}.tar.xz
Source1: Makefile.rhelver
-
+Source2: kernel.changelog
# Name of the packaged file containing signing key
%ifarch ppc64le
@@ -919,8 +934,6 @@ Source77: partial-clang_lto-aarch64-debug-snip.config
Source80: generate_all_configs.sh
Source81: process_configs.sh
-Source82: update_scripts.sh
-
Source84: mod-internal.list
Source85: mod-partner.list
@@ -984,9 +997,8 @@ Patch200: tkg.patch
Patch202: fsync.patch
Patch203: OpenRGB.patch
Patch206: amdgpu-si-cik-default.patch
+Patch207: nouveau-gsp-default.patch
Patch208: winesync.patch
-Patch209: tkg-BBRv2.patch
-Patch210: tkg-bcachefs.patch
Patch211: tkg-misc-additions.patch
Patch212: tkg-unprivileged-CLONE_NEWUSER.patch
@@ -1007,17 +1019,27 @@ Patch319: v10-0002-HID-asus-make-asus_kbd_init-generic-remove-rog_n.patch
Patch320: v10-0003-HID-asus-add-ROG-Ally-N-Key-ID-and-keycodes.patch
Patch321: v10-0004-HID-asus-add-ROG-Ally-xpad-settings.patch
Patch323: rog-ally-bmc150.patch
+Patch404: rog-ally-gyro-fix.patch
# hdr: https://github.com/CachyOS/kernel-patches
Patch326: 0001-amd-hdr.patch
Patch327: 0001-add-acpi_call.patch
Patch328: uinput.patch
+# fixes framerate control in gamescope
+# also fixes https://gitlab.freedesktop.org/drm/amd/-/issues/2733
+Patch330: valve-gamescope-framerate-control-fixups.patch
+
+# fixes HAINAN amdgpu card not being bootable
+# https://gitlab.freedesktop.org/drm/amd/-/issues/1839
+Patch331: amdgpu-HAINAN-variant-fixup.patch
+
# steamdeck oled patches
Patch310: steamdeck-oled-wifi.patch
-Patch311: steamdeck-oled-bt.patch
Patch312: steamdeck-oled-audio.patch
-Patch314: steamdeck-oled-hw-quirks.patch
+
+# t2 macbook patches
+Patch332: t2linux.patch
# temporary patches
Patch401: 0001-Remove-REBAR-size-quirk-for-Sapphire-RX-5600-XT-Puls.patch
@@ -1179,6 +1201,23 @@ This package provides debug information for the perf python bindings.
# with_perf
%endif
+%if %{with_libperf}
+%package -n libperf
+Summary: The perf library from kernel source
+License: GPL-2.0-only AND (LGPL-2.1-only OR BSD-2-Clause)
+%description -n libperf
+This package contains the kernel source perf library.
+
+%package -n libperf-devel
+Summary: Developement files for the perf library from kernel source
+License: GPL-2.0-only AND (LGPL-2.1-only OR BSD-2-Clause)
+%description -n libperf-devel
+This package includes libraries and header files needed for development
+of applications which use perf library from kernel source.
+
+# with_libperf
+%endif
+
%if %{with_tools}
%package -n %{package_name}-tools
Summary: Assortment of tools for the Linux kernel
@@ -1233,13 +1272,14 @@ This package provides debug information for package %{package_name}-tools.
%if 0%{gemini}
Epoch: %{gemini}
%endif
-Summary: RTLA: Real-Time Linux Analysis tools
+Summary: Real-Time Linux Analysis tools
+Requires: libtraceevent
+Requires: libtracefs
%description -n rtla
-The rtla tool is a meta-tool that includes a set of commands that
-aims to analyze the real-time properties of Linux. But, instead of
-testing Linux as a black box, rtla leverages kernel tracing
-capabilities to provide precise information about the properties
-and root causes of unexpected results.
+The rtla meta-tool includes a set of commands that aims to analyze
+the real-time properties of Linux. Instead of testing Linux as a black box,
+rtla leverages kernel tracing capabilities to provide precise information
+about the properties and root causes of unexpected results.
%package -n rv
Summary: RV: Runtime Verification
@@ -1256,18 +1296,14 @@ analysing the logical and timing behavior of Linux.
%if %{with_bpftool}
-%define bpftoolversion 7.3.0
-
%package -n bpftool
Summary: Inspection and simple manipulation of eBPF programs and maps
-Version: %{bpftoolversion}
%description -n bpftool
This package contains the bpftool, which allows inspection and simple
manipulation of eBPF programs and maps.
%package -n bpftool-debuginfo
Summary: Debug information for package bpftool
-Version: %{bpftoolversion}
Group: Development/Debug
Requires: %{name}-debuginfo-common-%{_target_cpu} = %{specrpmversion}-%{release}
AutoReqProv: no
@@ -1287,7 +1323,7 @@ This package provides debug information for the bpftool package.
%package selftests-internal
Summary: Kernel samples and selftests
-Requires: binutils, bpftool, iproute-tc, nmap-ncat, python3, fuse-libs
+Requires: binutils, bpftool, iproute-tc, nmap-ncat, python3, fuse-libs, keyutils
%description selftests-internal
Kernel sample programs and selftests.
@@ -1297,6 +1333,8 @@ Kernel sample programs and selftests.
# of matching the pattern against the symlinks file.
%{expand:%%global _find_debuginfo_opts %{?_find_debuginfo_opts} -p '.*%%{_libexecdir}/(ksamples|kselftests)/.*|XXX' -o selftests-debuginfo.list}
+%define __requires_exclude ^liburandom_read.so.*$
+
# with_selftests
%endif
@@ -1716,6 +1754,26 @@ Prebuilt debug unified kernel image for virtual machines.
Prebuilt default unified kernel image for virtual machines.
%endif
+%if %{with_arm64_16k} && %{with_debug} && %{with_efiuki}
+%description 16k-debug-uki-virt
+Prebuilt 16k debug unified kernel image for virtual machines.
+%endif
+
+%if %{with_arm64_16k_base} && %{with_efiuki}
+%description 16k-uki-virt
+Prebuilt 16k unified kernel image for virtual machines.
+%endif
+
+%if %{with_arm64_64k} && %{with_debug} && %{with_efiuki}
+%description 64k-debug-uki-virt
+Prebuilt 64k debug unified kernel image for virtual machines.
+%endif
+
+%if %{with_arm64_64k_base} && %{with_efiuki}
+%description 64k-uki-virt
+Prebuilt 64k unified kernel image for virtual machines.
+%endif
+
%if %{with_ipaclones}
%kernel_ipaclones_package
%endif
@@ -1790,9 +1848,8 @@ ApplyOptionalPatch tkg.patch
ApplyOptionalPatch fsync.patch
ApplyOptionalPatch OpenRGB.patch
ApplyOptionalPatch amdgpu-si-cik-default.patch
+ApplyOptionalPatch nouveau-gsp-default.patch
ApplyOptionalPatch winesync.patch
-ApplyOptionalPatch tkg-BBRv2.patch
-ApplyOptionalPatch tkg-bcachefs.patch
ApplyOptionalPatch tkg-misc-additions.patch
ApplyOptionalPatch tkg-unprivileged-CLONE_NEWUSER.patch
@@ -1813,17 +1870,27 @@ ApplyOptionalPatch v10-0002-HID-asus-make-asus_kbd_init-generic-remove-rog_n.pat
ApplyOptionalPatch v10-0003-HID-asus-add-ROG-Ally-N-Key-ID-and-keycodes.patch
ApplyOptionalPatch v10-0004-HID-asus-add-ROG-Ally-xpad-settings.patch
ApplyOptionalPatch rog-ally-bmc150.patch
+ApplyOptionalPatch rog-ally-gyro-fix.patch
# hdr: https://github.com/CachyOS/kernel-patches
ApplyOptionalPatch 0001-amd-hdr.patch
ApplyOptionalPatch 0001-add-acpi_call.patch
ApplyOptionalPatch uinput.patch
+# fixes framerate control in gamescope
+# also fixes https://gitlab.freedesktop.org/drm/amd/-/issues/2733
+ApplyOptionalPatch valve-gamescope-framerate-control-fixups.patch
+
+# fixes HAINAN amdgpu card not being bootable
+# https://gitlab.freedesktop.org/drm/amd/-/issues/1839
+ApplyOptionalPatch amdgpu-HAINAN-variant-fixup.patch
+
# steamdeck oled patches
ApplyOptionalPatch steamdeck-oled-wifi.patch
-ApplyOptionalPatch steamdeck-oled-bt.patch
ApplyOptionalPatch steamdeck-oled-audio.patch
-ApplyOptionalPatch steamdeck-oled-hw-quirks.patch
+
+# t2 macbook patches
+ApplyOptionalPatch t2linux.patch
# temporary patches
ApplyOptionalPatch 0001-Remove-REBAR-size-quirk-for-Sapphire-RX-5600-XT-Puls.patch
@@ -1980,20 +2047,28 @@ done
%endif
RHJOBS=$RPM_BUILD_NCPUS SPECPACKAGE_NAME=%{name} ./process_configs.sh $OPTS %{specrpmversion}
-cp %{SOURCE82} .
-RPM_SOURCE_DIR=$RPM_SOURCE_DIR ./update_scripts.sh %{primary_target}
-
# We may want to override files from the primary target in case of building
# against a flavour of it (eg. centos not rhel), thus override it here if
# necessary
+update_scripts() {
+ TARGET="$1"
+
+ for i in "$RPM_SOURCE_DIR"/*."$TARGET"; do
+ NEW=${i%."$TARGET"}
+ cp "$i" "$(basename "$NEW")"
+ done
+}
+
+update_target=%{primary_target}
if [ "%{primary_target}" == "rhel" ]; then
+: # no-op to avoid empty if-fi error
%if 0%{?centos}
+ update_scripts $update_target
echo "Updating scripts/sources to centos version"
- RPM_SOURCE_DIR=$RPM_SOURCE_DIR ./update_scripts.sh centos
-%else
- echo "Not updating scripts/sources to centos version"
+ update_target=centos
%endif
fi
+update_scripts $update_target
# end of kernel config
%endif
@@ -2804,8 +2879,9 @@ InitBuildVars
%ifarch aarch64
%global perf_build_extra_opts CORESIGHT=1
%endif
+# LIBBPF_DYNAMIC=1 temporarily removed from the next command, it breaks the build on f39 and 38
%global perf_make \
- %{__make} %{?make_opts} EXTRA_CFLAGS="${RPM_OPT_FLAGS}" LDFLAGS="%{__global_ldflags} -Wl,-E" %{?cross_opts} -C tools/perf V=1 NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 WERROR=0 NO_LIBUNWIND=1 HAVE_CPLUS_DEMANGLE=1 NO_GTK2=1 NO_STRLCPY=1 NO_BIONIC=1 LIBBPF_DYNAMIC=1 LIBTRACEEVENT_DYNAMIC=1 %{?perf_build_extra_opts} prefix=%{_prefix} PYTHON=%{__python3}
+ %{__make} %{?make_opts} EXTRA_CFLAGS="${RPM_OPT_FLAGS}" LDFLAGS="%{__global_ldflags} -Wl,-E" %{?cross_opts} -C tools/perf V=1 NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 WERROR=0 NO_LIBUNWIND=1 HAVE_CPLUS_DEMANGLE=1 NO_GTK2=1 NO_STRLCPY=1 NO_BIONIC=1 LIBTRACEEVENT_DYNAMIC=1 %{?perf_build_extra_opts} prefix=%{_prefix} PYTHON=%{__python3}
%if %{with_perf}
# perf
# make sure check-headers.sh is executable
@@ -3014,6 +3090,8 @@ docdir=$RPM_BUILD_ROOT%{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease}
# copy the source over
mkdir -p $docdir
tar -h -f - --exclude=man --exclude='.*' -c Documentation | tar xf - -C $docdir
+cat %{SOURCE2} | xz > $docdir/kernel.changelog.xz
+chmod 0644 $docdir/kernel.changelog.xz
# with_doc
%endif
@@ -3092,6 +3170,13 @@ mkdir -p %{buildroot}/%{_mandir}/man1
rm -rf %{buildroot}%{_libdir}/traceevent
%endif
+%if %{with_libperf}
+pushd tools/lib/perf
+%{tools_make} DESTDIR=%{buildroot} prefix=%{_prefix} libdir=%{_libdir} install install_headers
+rm -rf %{buildroot}%{_libdir}/libperf.a
+popd
+%endif
+
%if %{with_tools}
%ifarch %{cpupowerarchs}
%{make} -C tools/power/cpupower DESTDIR=$RPM_BUILD_ROOT libdir=%{_libdir} mandir=%{_mandir} CPUFREQ_BENCH=false install
@@ -3446,7 +3531,7 @@ fi\
%if %{with_up_base}
%kernel_variant_preun
-%kernel_variant_post -r kernel-smp
+%kernel_variant_post
%endif
%if %{with_zfcpdump}
@@ -3474,6 +3559,16 @@ fi\
%kernel_variant_post -v 16k-debug
%endif
+%if %{with_arm64_16k} && %{with_debug} && %{with_efiuki}
+%kernel_variant_posttrans -v 16k-debug -u virt
+%kernel_variant_preun -v 16k-debug -u virt
+%endif
+
+%if %{with_arm64_16k_base} && %{with_efiuki}
+%kernel_variant_posttrans -v 16k -u virt
+%kernel_variant_preun -v 16k -u virt
+%endif
+
%if %{with_arm64_64k_base}
%kernel_variant_preun -v 64k
%kernel_variant_post -v 64k
@@ -3484,9 +3579,19 @@ fi\
%kernel_variant_post -v 64k-debug
%endif
+%if %{with_arm64_64k} && %{with_debug} && %{with_efiuki}
+%kernel_variant_posttrans -v 64k-debug -u virt
+%kernel_variant_preun -v 64k-debug -u virt
+%endif
+
+%if %{with_arm64_64k_base} && %{with_efiuki}
+%kernel_variant_posttrans -v 64k -u virt
+%kernel_variant_preun -v 64k -u virt
+%endif
+
%if %{with_realtime_base}
%kernel_variant_preun -v rt
-%kernel_variant_post -v rt -r (kernel|kernel-smp)
+%kernel_variant_post -v rt -r kernel
%kernel_kvm_post rt
%endif
@@ -3531,6 +3636,7 @@ fi\
%{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease}/Documentation/*
%dir %{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease}/Documentation
%dir %{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease}
+%{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease}/kernel.changelog.xz
%endif
%if %{with_perf}
@@ -3556,6 +3662,37 @@ fi\
# with_perf
%endif
+%if %{with_libperf}
+%files -n libperf
+%{_libdir}/libperf.so.0
+%{_libdir}/libperf.so.0.0.1
+
+%files -n libperf-devel
+%{_libdir}/libperf.so
+%{_libdir}/pkgconfig/libperf.pc
+%{_includedir}/internal/*.h
+%{_includedir}/perf/bpf_perf.h
+%{_includedir}/perf/core.h
+%{_includedir}/perf/cpumap.h
+%{_includedir}/perf/perf_dlfilter.h
+%{_includedir}/perf/event.h
+%{_includedir}/perf/evlist.h
+%{_includedir}/perf/evsel.h
+%{_includedir}/perf/mmap.h
+%{_includedir}/perf/threadmap.h
+%{_mandir}/man3/libperf.3.gz
+%{_mandir}/man7/libperf-counting.7.gz
+%{_mandir}/man7/libperf-sampling.7.gz
+%{_docdir}/libperf/examples/sampling.c
+%{_docdir}/libperf/examples/counting.c
+%{_docdir}/libperf/html/libperf.html
+%{_docdir}/libperf/html/libperf-counting.html
+%{_docdir}/libperf/html/libperf-sampling.html
+
+# with_libperf
+%endif
+
+
%if %{with_tools}
%ifnarch %{cpupowerarchs}
%files -n %{package_name}-tools
@@ -3599,12 +3736,14 @@ fi\
%ifarch %{cpupowerarchs}
%files -n %{package_name}-tools-libs
-%{_libdir}/libcpupower.so.1
+%{_libdir}/libcpupower.so.0
%{_libdir}/libcpupower.so.0.0.1
%files -n %{package_name}-tools-libs-devel
%{_libdir}/libcpupower.so
%{_includedir}/cpufreq.h
+%{_includedir}/cpuidle.h
+%{_includedir}/powercap.h
%endif
%files -n rtla
@@ -3690,7 +3829,7 @@ fi\
/lib/modules/%{KVERREL}%{?3:+%{3}}/dtb \
%ghost /%{image_install_path}/dtb-%{KVERREL}%{?3:+%{3}} \
%endif\
-%attr(0600, root, root) /lib/modules/%{KVERREL}%{?3:+%{3}}/System.map\
+/lib/modules/%{KVERREL}%{?3:+%{3}}/System.map\
%ghost %attr(0600, root, root) /boot/System.map-%{KVERREL}%{?3:+%{3}}\
%dir /lib/modules\
%dir /lib/modules/%{KVERREL}%{?3:+%{3}}\
@@ -3750,11 +3889,11 @@ fi\
%else\
%if %{with_efiuki}\
%{expand:%%files %{?3:%{3}-}uki-virt}\
-%attr(0600, root, root) /lib/modules/%{KVERREL}%{?3:+%{3}}/System.map\
+/lib/modules/%{KVERREL}%{?3:+%{3}}/System.map\
/lib/modules/%{KVERREL}%{?3:+%{3}}/symvers.%compext\
/lib/modules/%{KVERREL}%{?3:+%{3}}/config\
/lib/modules/%{KVERREL}%{?3:+%{3}}/modules.builtin*\
-/lib/modules/%{KVERREL}%{?3:+%{3}}/%{?-k:%{-k*}}%{!?-k:vmlinuz}-virt.efi\
+%attr(0644, root, root) /lib/modules/%{KVERREL}%{?3:+%{3}}/%{?-k:%{-k*}}%{!?-k:vmlinuz}-virt.efi\
%ghost /%{image_install_path}/efi/EFI/Linux/%{?-k:%{-k*}}%{!?-k:*}-%{KVERREL}%{?3:+%{3}}.efi\
%endif\
%endif\
@@ -3830,85 +3969,318 @@ fi\
#
#
%changelog
-* Thu Feb 01 2024 Jan Drögehoff <sentrycraft123@gmail.com> - 6.6.14-202.fsync.1
-- disable valve15 patch: introduced amdgpu regressions
+* Wed Feb 07 2024 Jan Drögehoff <sentrycraft123@gmail.com> - 6.7.3-201.fsync
+- kernel-fsync v6.7.3
+
+* Wed Jan 31 2024 Justin M. Forbes <jforbes@fedoraproject.org> [6.7.3-0]
+- Config update for stable backport (Justin M. Forbes)
+- Add some more bugs to BugsFixed (Justin M. Forbes)
+- Linux v6.7.3
+
+* Fri Jan 26 2024 Justin M. Forbes <jforbes@fedoraproject.org> [6.7.2-0]
+- redhat: spec: Fix update_scripts run for CentOS builds (Neal Gompa)
+- BPF Tool versioning seems incompatible with stable Fedroa (Justin M. Forbes)
+- Linux v6.7.2
+
+* Sat Jan 20 2024 Justin M. Forbes <jforbes@fedoraproject.org> [6.7.1-0]
+- Fix up requires for UKI (Justin M. Forbes)
+- Fix up libperf install (Justin M. Forbes)
+- Drop soname for libcpupower.so since we reverted the bump (Justin M. Forbes)
+- Turn on CONFIG_TCP_AO for Fedora (Justin M. Forbes)
+- temporarily remove LIBBPF_DYNAMIC=1 from perf build (Thorsten Leemhuis)
+- add libperf packages and enable perf, libperf, tools and bpftool packages (Thorsten Leemhuis)
+- Revert "cpupower: Bump soname version" (Justin M. Forbes)
+- Turn on Renesas RZ for Fedora IOT rhbz2257913 (Justin M. Forbes)
+- Add bugs to BugsFixed (Justin M. Forbes)
+- wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() (Xingyuan Mo)
+- drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set (Javier Martinez Canillas)
+- Basic scaffolding to create a kernel-headers package (Justin M. Forbes)
+- Initial config for fedora-6.7 branch (Justin M. Forbes)
+- Reset RHEL_RELEASE for 6.8 series (Justin M. Forbes)
+- common: cleanup MX3_IPU (Peter Robinson)
+- all: The Octeon MDIO driver is aarch64/mips (Peter Robinson)
+- common: rtc: remove bq4802 config (Peter Robinson)
+- common: de-dupe MARVELL_GTI_WDT (Peter Robinson)
+- all: Remove CAN_BXCAN (Peter Robinson)
+- common: cleanup SND_SOC_ROCKCHIP (Peter Robinson)
+- common: move RHEL DP83867_PHY to common (Peter Robinson)
+- common: Make ASYMMETRIC_KEY_TYPE enable explicit (Peter Robinson)
+- common: Disable aarch64 ARCH_MA35 universally (Peter Robinson)
+- common: arm64: enable Tegra234 pinctrl driver (Peter Robinson)
+- rhel: arm64: Enable qoriq thermal driver (Peter Robinson)
+- common: aarch64: Cleanup some i.MX8 config options (Peter Robinson)
+- all: EEPROM_LEGACY has been removed (Peter Robinson)
+- all: rmeove AppleTalk hardware configs (Peter Robinson)
+- all: cleanup: remove references to SLOB (Peter Robinson)
+- all: cleanup: Drop unnessary BRCMSTB configs (Peter Robinson)
+- all: net: remove retired network schedulers (Peter Robinson)
+- all: cleanup removed CONFIG_IMA_TRUSTED_KEYRING (Peter Robinson)
+- BuildRequires: lld for build with selftests for x86 (Jan Stancek)
+- spec: add keyutils to selftest-internal subpackage requirements (Artem Savkov) [2166911]
+- redhat/spec: exclude liburandom_read.so from requires (Artem Savkov) [2120968]
+- rtla: sync summary text with upstream and update Requires (Jan Stancek)
+- uki-virt: add systemd-sysext dracut module (Gerd Hoffmann)
+- uki-virt: add virtiofs dracut module (Gerd Hoffmann)
+- common: disable the FB device creation (Peter Robinson)
+- s390x: There's no FB on Z-series (Peter Robinson)
+- Linux v6.7.1
+
+* Mon Jan 08 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-68]
+- fedora: aarch64: enable SM_VIDEOCC_8350 (Peter Robinson)
+- Linux v6.7.0
+
+* Sun Jan 07 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.52b1853b080a.67]
+- Linux v6.7.0-0.rc8.52b1853b080a
+
+* Sat Jan 06 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.95c8a35f1c01.66]
+- fedora: arm64: enable ethernet on newer TI industrial (Peter Robinson)
+- fedora: arm64: Disable VIDEO_IMX_MEDIA (Peter Robinson)
+- fedora: use common config for Siemens Simatic IPC (Peter Robinson)
+- fedora: arm: enable Rockchip SPI flash (Peter Robinson)
+- fedora: arm64: enable DRM_TI_SN65DSI83 (Peter Robinson)
+- Linux v6.7.0-0.rc8.95c8a35f1c01
+
+* Fri Jan 05 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.1f874787ed9a.65]
+- Linux v6.7.0-0.rc8.1f874787ed9a
+
+* Thu Jan 04 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.ac865f00af29.64]
+- Linux v6.7.0-0.rc8.ac865f00af29
+
+* Wed Jan 03 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.63]
+- kernel.spec: remove kernel-smp reference from scripts (Jan Stancek)
+
+* Tue Jan 02 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.62]
+- redhat: do not compress the full kernel changelog in the src.rpm (Herton R. Krzesinski)
+
+* Mon Jan 01 2024 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc8.61]
+- Linux v6.7.0-0.rc8
+
+* Sun Dec 31 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.453f5db0619e.60]
+- Linux v6.7.0-0.rc7.453f5db0619e
+
+* Sat Dec 30 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.f016f7547aee.59]
+- Auto consolidate configs for the 6.7 cycle (Justin M. Forbes)
+- Linux v6.7.0-0.rc7.f016f7547aee
+
+* Fri Dec 29 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.8735c7c84d1b.58]
+- Linux v6.7.0-0.rc7.8735c7c84d1b
+
+* Thu Dec 28 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.f5837722ffec.57]
+- Linux v6.7.0-0.rc7.f5837722ffec
+
+* Tue Dec 26 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.fbafc3e621c3.56]
+- Linux v6.7.0-0.rc7.fbafc3e621c3
+
+* Mon Dec 25 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.55]
+- Enable sound for a line of Huawei laptops (TomZanna)
+
+* Sun Dec 24 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc7.54]
+- Linux v6.7.0-0.rc7
+
+* Sat Dec 23 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.5254c0cbc92d.53]
+- Linux v6.7.0-0.rc6.5254c0cbc92d
+
+* Fri Dec 22 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.24e0d2e527a3.52]
+- fedora: a few cleanups and driver enablements (Peter Robinson)
+- fedora: arm64: cleanup Allwinner Pinctrl drivers (Peter Robinson)
+- fedora: aarch64: Enable some DW drivers (Peter Robinson)
+- Linux v6.7.0-0.rc6.24e0d2e527a3
-* Mon Jan 29 2024 Jan Drögehoff <sentrycraft123@gmail.com> - 6.6.14-201.fsync
-- kernel-fsync v6.6.14
+* Thu Dec 21 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.a4aebe936554.51]
+- redhat: ship all the changelog from source git into kernel-doc (Herton R. Krzesinski)
+- redhat: create an empty changelog file when changing its name (Herton R. Krzesinski)
+- Linux v6.7.0-0.rc6.a4aebe936554
-* Fri Jan 26 2024 Augusto Caringi <acaringi@redhat.com> [6.6.14-0]
-- Add some CVE fixes staged for 6.6.14 (Justin M. Forbes)
-- Linux v6.6.14
+* Wed Dec 20 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.55cb5f43689d.50]
+- redhat/self-test: Remove --all from git query (Prarit Bhargava)
+- Linux v6.7.0-0.rc6.55cb5f43689d
-* Sat Jan 20 2024 Justin M. Forbes <jforbes@fedoraproject.org> [6.6.13-0]
-- Linux v6.6.13
+* Tue Dec 19 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.2cf4f94d8e86.49]
+- Linux v6.7.0-0.rc6.2cf4f94d8e86
-* Mon Jan 15 2024 Augusto Caringi <acaringi@redhat.com> [6.6.12-0]
-- Add CVE Fixes to BugsFixed for 6.6.12 (Justin M. Forbes)
-- ida: Fix crash in ida_free when the bitmap is empty (Matthew Wilcox (Oracle))
-- wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() (Xingyuan Mo)
-- Linux v6.6.12
+* Mon Dec 18 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc6.48]
+- Disable accel drivers for Fedora x86 (Kate Hsuan)
+- redhat: scripts: An automation script for disabling unused driver for x86 (Kate Hsuan)
+- Fix up Fedora LJCA configs and filters (Justin M. Forbes)
+- Linux v6.7.0-0.rc6
-* Wed Jan 10 2024 Augusto Caringi <acaringi@redhat.com> [6.6.11-0]
-- Linux v6.6.11
+* Sun Dec 17 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.3b8a9b2e6809.47]
+- Linux v6.7.0-0.rc5.3b8a9b2e6809
-* Fri Jan 05 2024 Augusto Caringi <acaringi@redhat.com> [6.6.10-0]
-- Linux v6.6.10
+* Sat Dec 16 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.c8e97fc6b4c0.46]
+- Fedora configs for 6.7 (Justin M. Forbes)
+- Linux v6.7.0-0.rc5.c8e97fc6b4c0
-* Mon Jan 01 2024 Justin M. Forbes <jforbes@fedoraproject.org> [6.6.9-0]
-- ALSA: hda: cs35l41: Add notification support into component binding (Stefan Binding)
-- ALSA: hda: cs35l41: Support mute notifications for CS35L41 HDA (Stefan Binding)
-- Linux v6.6.9
+* Fri Dec 15 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.3f7168591ebf.45]
+- Linux v6.7.0-0.rc5.3f7168591ebf
-* Wed Dec 20 2023 Augusto Caringi <acaringi@redhat.com> [6.6.8-0]
-- Add BugsFixed entries for rhbz#2254797 and #rhbz2254704 (Justin M. Forbes)
-- Add support for various laptops using CS35L41 HDA without _DSD (Stefan Binding)
-- fedora: arm64: Enable required AllWinner pinctrl drivers (Peter Robinson)
-- fedora: arm64: cleanup Allwinner Pinctrl drivers (Peter Robinson)
-- fedora: aarch64: Enable some DW drivers (Peter Robinson)
-- Basic scaffolding to create a kernel-headers package (Justin M. Forbes)
-- Linux v6.6.8
+* Thu Dec 14 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.5bd7ef53ffe5.44]
+- Linux v6.7.0-0.rc5.5bd7ef53ffe5
-* Wed Dec 13 2023 Augusto Caringi <acaringi@redhat.com> [6.6.7-0]
-- Add rhbz#2253632 rhbz#2253633 to BugsFixed (Justin M. Forbes)
+* Wed Dec 13 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.88035e5694a8.43]
+- Some Fedora config updates for MLX5 (Justin M. Forbes)
- Turn on DRM_ACCEL drivers for Fedora (Justin M. Forbes)
-- Linux v6.6.7
+- Linux v6.7.0-0.rc5.88035e5694a8
-* Mon Dec 11 2023 Augusto Caringi <acaringi@redhat.com> [6.6.6-0]
+* Tue Dec 12 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.26aff849438c.42]
+- redhat: enable the kfence test (Nico Pache)
+- Linux v6.7.0-0.rc5.26aff849438c
+
+* Mon Dec 11 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc5.41]
- redhat/configs: Enable UCLAMP_TASK for PipeWire and WirePlumber (Neal Gompa)
-- Linux v6.6.6
+- Linux v6.7.0-0.rc5
+
+* Sun Dec 10 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.c527f5606aa5.40]
+- Linux v6.7.0-0.rc4.c527f5606aa5
+
+* Sat Dec 09 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.f2e8a57ee903.39]
+- Linux v6.7.0-0.rc4.f2e8a57ee903
+
+* Fri Dec 08 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.5e3f5b81de80.38]
+- Turn on CONFIG_SECURITY_DMESG_RESTRICT for Fedora (Justin M. Forbes)
+- Linux v6.7.0-0.rc4.5e3f5b81de80
+
+* Wed Dec 06 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.bee0e7762ad2.37]
+- Turn off shellcheck for the fedora-stable-release script (Justin M. Forbes)
+
+* Tue Dec 05 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.bee0e7762ad2.36]
+- Add some initial Fedora stable branch script to redhat/scripts/fedora/ (Justin M. Forbes)
+- Linux v6.7.0-0.rc4.bee0e7762ad2
+
+* Mon Dec 04 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc4.35]
+- Linux v6.7.0-0.rc4
+
+* Sun Dec 03 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.968f35f4ab1c.34]
+- Linux v6.7.0-0.rc3.968f35f4ab1c
+
+* Sat Dec 02 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.815fb87b7530.33]
+- redhat: disable iptables-legacy compatibility layer (Florian Westphal)
+- redhat: disable dccp conntrack support (Florian Westphal)
+- configs: enable netfilter_netlink_hook in fedora too (Florian Westphal)
+- Linux v6.7.0-0.rc3.815fb87b7530
+
+* Fri Dec 01 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.994d5c58e50e.32]
+- ext4: Mark mounting fs-verity filesystems as tech-preview (Alexander Larsson)
+- erofs: Add tech preview markers at mount (Alexander Larsson)
+- Enable fs-verity (Alexander Larsson)
+- Enable erofs (Alexander Larsson)
+- aarch64: enable uki (Gerd Hoffmann)
+- redhat: enable CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH as a module for x86 (Patrick Talbert)
+- Turn CONFIG_MFD_CS42L43_SDW on for RHEL (Justin M. Forbes)
+- Linux v6.7.0-0.rc3.994d5c58e50e
-* Fri Dec 08 2023 Augusto Caringi <acaringi@redhat.com> [6.6.5-0]
-- Add io_uring CVE for 6.6.5 (Justin M. Forbes)
-- Linux v6.6.5
+* Thu Nov 30 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.3b47bc037bd4.31]
+- Linux v6.7.0-0.rc3.3b47bc037bd4
-* Sun Dec 03 2023 Justin M. Forbes <jforbes@fedoraproject.org> [6.6.4-0]
-- redhat: Fix macro for kernel-uki-virt flavor (Neal Gompa)
-- Change the uki reqs for Fedora (Justin M. Forbes)
-- Linux v6.6.4
+* Wed Nov 29 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.18d46e76d7c2.30]
+- Enable cryptographic acceleration config flags for PowerPC (Mamatha Inamdar)
+- Also make vmlinuz-virt.efi world readable (Zbigniew Jędrzejewski-Szmek)
+- Drop custom mode for System.map file (Zbigniew Jędrzejewski-Szmek)
+- Linux v6.7.0-0.rc3.18d46e76d7c2
-* Tue Nov 28 2023 Justin M. Forbes <jforbes@fedoraproject.org> [6.6.3-0]
-- Add BugsFixed for 6.6.3 (Justin M. Forbes)
-- Update BugsFixed (Justin M. Forbes)
+* Tue Nov 28 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.df60cee26a2e.29]
+- Add drm_exec_test to mod-internal.list for depmod to succeed (Mika Penttilä)
+- RHEL 9.4 DRM backport (upto v6.6 kernel), sync Kconfigs (Mika Penttilä)
+- Linux v6.7.0-0.rc3.df60cee26a2e
+
+* Mon Nov 27 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc3.28]
+- Linux v6.7.0-0.rc3
+
+* Sun Nov 26 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.090472ed9c92.27]
+- Linux v6.7.0-0.rc2.090472ed9c92
+
+* Sat Nov 25 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.0f5cc96c367f.26]
+- Linux v6.7.0-0.rc2.0f5cc96c367f
+
+* Fri Nov 24 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.f1a09972a45a.25]
+- Linux v6.7.0-0.rc2.f1a09972a45a
+
+* Thu Nov 23 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.9b6de136b5f0.24]
- Turn on USB_DWC3 for Fedora (rhbz 2250955) (Justin M. Forbes)
-- Revert "netfilter: nf_tables: remove catchall element in GC sync path" (Justin M. Forbes)
-- More BugsFixed (Justin M. Forbes)
-- netfilter: nf_tables: remove catchall element in GC sync path (Pablo Neira Ayuso)
-- frop the build number back to 200 for fedora-srpm.sh (Justin M. Forbes)
-- ACPI: video: Use acpi_device_fix_up_power_children() (Hans de Goede)
-- ACPI: PM: Add acpi_device_fix_up_power_children() function (Hans de Goede)
-- Linux v6.6.3
-
-* Mon Nov 20 2023 Justin M. Forbes <jforbes@fedoraproject.org> [6.6.2-0]
-- Add bug for AMD ACPI alarm (Justin M. Forbes)
-- rtc: cmos: Use ACPI alarm for non-Intel x86 systems too (Mario Limonciello)
-- Add bluetooth fixes to BugsFixed (Justin M. Forbes)
-- Drop F37 from release targets as it will not rebase to 6.6 (Justin M. Forbes)
-- Linux v6.6.2
-
-* Wed Nov 08 2023 Justin M. Forbes <jforbes@fedoraproject.org> [6.6.1-0]
-- drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set (Javier Martinez Canillas)
-- Added required files for rebase (Augusto Caringi)
+- Linux v6.7.0-0.rc2.9b6de136b5f0
+
+* Wed Nov 22 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.c2d5304e6c64.23]
+- redhat/configs: Move IOMMUFD to common (Alex Williamson)
+- redhat: Really remove cpupower files (Prarit Bhargava)
+- redhat: remove update_scripts.sh (Prarit Bhargava)
+- Linux v6.7.0-0.rc2.c2d5304e6c64
+
+* Mon Nov 20 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc2.22]
+- Fix s390 zfcpfdump bpf build failures for cgroups (Don Zickus)
+- Linux v6.7.0-0.rc2
+
+* Sun Nov 19 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.037266a5f723.21]
+- Linux v6.7.0-0.rc1.037266a5f723
+
+* Sat Nov 18 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.791c8ab095f7.20]
+- Linux v6.7.0-0.rc1.791c8ab095f7
+
+* Fri Nov 17 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.7475e51b8796.19]
+- Linux v6.7.0-0.rc1.7475e51b8796
+
+* Wed Nov 15 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.c42d9eeef8e5.18]
+- Linux v6.7.0-0.rc1.c42d9eeef8e5
+
+* Tue Nov 14 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.9bacdd8996c7.17]
+- Linux v6.7.0-0.rc1.9bacdd8996c7
+
+* Mon Nov 13 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc1.16]
+- Linux v6.7.0-0.rc1
+
+* Sun Nov 12 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.1b907d050735.15]
+- Linux v6.7.0-0.rc0.1b907d050735
+
+* Sat Nov 11 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.3ca112b71f35.14]
+- Flip CONFIG_NVME_AUTH to m in pending (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.3ca112b71f35
+
+* Fri Nov 10 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.89cdf9d55601.13]
+- Linux v6.7.0-0.rc0.89cdf9d55601
+
+* Thu Nov 09 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.6bc986ab839c.12]
+- Linux v6.7.0-0.rc0.6bc986ab839c
+
+* Wed Nov 08 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.305230142ae0.11]
+- Turn CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 on for Fedora x86 (Jason Montleon)
+- kernel/rh_messages.c: Mark functions as possibly unused (Prarit Bhargava)
+- Add snd-hda-cirrus-scodec-test to mod-internal.list (Scott Weaver)
+- Linux v6.7.0-0.rc0.305230142ae0
+
+* Tue Nov 07 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.be3ca57cfb77.10]
+- Turn off BPF_SYSCALL in pending for zfcpdump (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.be3ca57cfb77
+
+* Mon Nov 06 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.d2f51b3516da.9]
+- Linux v6.7.0-0.rc0.d2f51b3516da
+
+* Sun Nov 05 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.1c41041124bd.8]
+- Linux v6.7.0-0.rc0.1c41041124bd
+
+* Sat Nov 04 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.90b0c2b2edd1.7]
+- Add mean_and_variance_test to mod-internal.list (Justin M. Forbes)
+- Add cfg80211-tests and mac80211-tests to mod-internal.list (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.90b0c2b2edd1
+
+* Fri Nov 03 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.8f6f76a6a29f.6]
+- Turn on CONFIG_MFD_CS42L43_SDW for RHEL in pending (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.8f6f76a6a29f
+
+* Fri Nov 03 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.21e80f3841c0.5]
+- Turn on bcachefs for Fedora (Justin M. Forbes)
+- redhat: configs: fedora: Enable QSEECOM and friends (Andrew Halaney)
+
+* Thu Nov 02 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.21e80f3841c0.4]
+- Add clk-fractional-divider_test to mod-internal.list (Thorsten Leemhuis)
+- Add gso_test to mod-internal.list (Thorsten Leemhuis)
+- Add property-entry-test to mod-internal.list (Thorsten Leemhuis)
+- Linux v6.7.0-0.rc0.21e80f3841c0
+
+* Wed Nov 01 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.8bc9e6515183.3]
+- Fedora 6.7 configs part 1 (Justin M. Forbes)
+- Trim changelog after version bump (Justin M. Forbes)
+- Linux v6.7.0-0.rc0.8bc9e6515183
+
+* Tue Oct 31 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.7.0-0.rc0.5a6a09e97199.2]
- Reset RHEL_RELEASE for rebase (Justin M. Forbes)
- [Scheduled job] Catch config mismatches early during upstream merge (Don Zickus)
- redhat/self-test: Update data for KABI xz change (Prarit Bhargava)
@@ -3916,61 +4288,19 @@ fi\
- redhat/kernel.spec.template: Switch KABI compression to xz (Prarit Bhargava)
- redhat: self-test: Use a more complete SRPM file suffix (Andrew Halaney)
- redhat: makefile: remove stray rpmbuild --without (Eric Chanudet)
-- Linux v6.6.1
-
-* Mon Oct 30 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-61]
-- Linux v6.6.0
-
-* Sun Oct 29 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc7.2af9b20dbb39.60]
-- Linux v6.6.0-0.rc7.2af9b20dbb39
-
-* Sat Oct 28 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc7.56567a20b22b.59]
- Consolidate configs into common for 6.6 (Justin M. Forbes)
-- Linux v6.6.0-0.rc7.56567a20b22b
-
-* Fri Oct 27 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc7.750b95887e56.58]
-- Linux v6.6.0-0.rc7.750b95887e56
-
-* Thu Oct 26 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc7.611da07b89fd.57]
- Updated Fedora configs (Justin M. Forbes)
- Turn on UFSHCD for Fedora x86 (Justin M. Forbes)
-- Linux v6.6.0-0.rc7.611da07b89fd
-
-* Wed Oct 25 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc7.4f82870119a4.56]
- redhat: configs: generic: x86: Disable CONFIG_VIDEO_OV01A10 for x86 platform (Hans de Goede)
-- Linux v6.6.0-0.rc7.4f82870119a4
-
-* Tue Oct 24 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc7.d88520ad73b7.55]
- redhat: remove pending-rhel CONFIG_XFS_ASSERT_FATAL file (Patrick Talbert)
- New configs in fs/xfs (Fedora Kernel Team)
- crypto: rng - Override drivers/char/random in FIPS mode (Herbert Xu)
- random: Add hook to override device reads and getrandom(2) (Herbert Xu)
-- Linux v6.6.0-0.rc7.d88520ad73b7
-
-* Mon Oct 23 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc7.54]
-- Linux v6.6.0-0.rc7
-
-* Sun Oct 22 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc6.1acfd2bd3f0d.53]
-- Linux v6.6.0-0.rc6.1acfd2bd3f0d
-
-* Sat Oct 21 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc6.9c5d00cb7b6b.52]
-- Linux v6.6.0-0.rc6.9c5d00cb7b6b
-
-* Fri Oct 20 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc6.ce55c22ec8b2.51]
- redhat/configs: share CONFIG_ARM64_ERRATUM_2966298 between rhel and fedora (Mark Salter)
- configs: Remove S390 IOMMU config options that no longer exist (Jerry Snitselaar)
- redhat: docs: clarify where bugs and issues are created (Scott Weaver)
- redhat/scripts/rh-dist-git.sh does not take any arguments: fix error message (Denys Vlasenko)
- Add target_branch for gen_config_patches.sh (Don Zickus)
-- Linux v6.6.0-0.rc6.ce55c22ec8b2
-
-* Thu Oct 19 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc6.dd72f9c7e512.50]
-- Linux v6.6.0-0.rc6.dd72f9c7e512
-
-* Wed Oct 18 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc6.06dc10eae55b.49]
-- Linux v6.6.0-0.rc6.06dc10eae55b
-
-* Tue Oct 17 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc6.213f891525c2.48]
- redhat: disable kunit by default (Nico Pache)
- redhat/configs: enable the AMD_PMF driver for RHEL (David Arcari)
- Make CONFIG_ADDRESS_MASKING consistent between fedora and rhel (Chris von Recklinghausen)
@@ -3980,77 +4310,18 @@ fi\
- CI: provide descriptive pipeline name for scheduled pipelines (Michael Hofmann)
- CI: use job templates for variant variables (Michael Hofmann)
- redhat/kernel.spec.template: simplify __modsign_install_post (Jan Stancek)
-- Linux v6.6.0-0.rc6.213f891525c2
-
-* Mon Oct 16 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc6.47]
-- Linux v6.6.0-0.rc6
-
-* Sun Oct 15 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc5.9a3dad63edbe.46]
- Fedora filter updates after configs (Justin M. Forbes)
- Fedora configs for 6.6 (Justin M. Forbes)
-- Linux v6.6.0-0.rc5.9a3dad63edbe
-
-* Sat Oct 14 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc5.727fb8376504.45]
-- Linux v6.6.0-0.rc5.727fb8376504
-
-* Fri Oct 13 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc5.10a6e5feccb8.44]
-- Linux v6.6.0-0.rc5.10a6e5feccb8
-
-* Thu Oct 12 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc5.401644852d0b.43]
-- Linux v6.6.0-0.rc5.401644852d0b
-
-* Wed Oct 11 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc5.1c8b86a3799f.42]
-- Linux v6.6.0-0.rc5.1c8b86a3799f
-
-* Tue Oct 10 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc5.41]
- redhat/configs: Freescale Layerscape SoC family (Steve Best)
- Add clang MR/baseline pipelines (Michael Hofmann)
-
-* Mon Oct 09 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc5.40]
- CI: Remove unused kpet_tree_family (Nikolai Kondrashov)
-- Linux v6.6.0-0.rc5
-
-* Sun Oct 08 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc4.b9ddbb0cde2a.39]
-- Linux v6.6.0-0.rc4.b9ddbb0cde2a
-
-* Sat Oct 07 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc4.82714078aee4.38]
-- Linux v6.6.0-0.rc4.82714078aee4
-
-* Fri Oct 06 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc4.b78b18fb8ee1.37]
- Add clang config framework (Don Zickus)
- Apply partial snippet configs to all configs (Don Zickus)
- Remove unpackaged kgcov config files (Don Zickus)
- redhat/configs: enable missing Kconfig options for Qualcomm RideSX4 (Brian Masney)
- enable CONFIG_ADDRESS_MASKING for x86_64 (Chris von Recklinghausen)
-- Linux v6.6.0-0.rc4.b78b18fb8ee1
-
-* Thu Oct 05 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc4.3006adf3be79.36]
-- Linux v6.6.0-0.rc4.3006adf3be79
-
-* Wed Oct 04 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc4.cbf3a2cb156a.35]
-- Linux v6.6.0-0.rc4.cbf3a2cb156a
-
-* Tue Oct 03 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc4.ce36c8b14987.34]
- common: aarch64: enable NXP Flex SPI (Peter Robinson)
-- Linux v6.6.0-0.rc4.ce36c8b14987
-
-* Mon Oct 02 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc4.33]
-- Linux v6.6.0-0.rc4
-
-* Sun Oct 01 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc3.e402b08634b3.32]
-- Linux v6.6.0-0.rc3.e402b08634b3
-
-* Sat Sep 30 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc3.9f3ebbef746f.31]
- fedora: Switch TI_SCI_CLK and TI_SCI_PM_DOMAINS symbols to built-in (Javier Martinez Canillas)
-- Linux v6.6.0-0.rc3.9f3ebbef746f
-
-* Fri Sep 29 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc3.9ed22ae6be81.30]
-- Linux v6.6.0-0.rc3.9ed22ae6be81
-
-* Thu Sep 28 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc3.633b47cb009d.29]
-- Linux v6.6.0-0.rc3.633b47cb009d
-
-* Wed Sep 27 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc3.0e945134b680.28]
- kernel.spec: adjust build option comment (Michael Hofmann)
- kernel.spec: allow to enable arm64_16k variant (Michael Hofmann)
- gitlab-ci: enable build-only pipelines for Rawhide/16k/aarch64 (Michael Hofmann)
@@ -4059,122 +4330,40 @@ fi\
- redhat/self-test: Update data for cross compile fields (Prarit Bhargava)
- redhat/Makefile.cross: Add message for disabled subpackages (Prarit Bhargava)
- redhat/Makefile.cross: Update cross targets with disabled subpackages (Prarit Bhargava)
-- Linux v6.6.0-0.rc3.0e945134b680
-
-* Tue Sep 26 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc3.27]
- Remove XFS_ASSERT_FATAL from pending-fedora (Justin M. Forbes)
-
-* Mon Sep 25 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc3.26]
- Change default pending for XFS_ONLINE_SCRUB_STATSas it now selects XFS_DEBUG (Justin M. Forbes)
-- Linux v6.6.0-0.rc3
-
-* Sun Sep 24 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc2.3aba70aed91f.25]
-- Linux v6.6.0-0.rc2.3aba70aed91f
-
-* Sat Sep 23 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc2.d90b0276af8f.24]
-- Linux v6.6.0-0.rc2.d90b0276af8f
-
-* Fri Sep 22 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc2.27bbf45eae9c.23]
- gitlab-ci: use --with debug/base to select kernel variants (Michael Hofmann)
- kernel.spec: add rpmbuild --without base option (Michael Hofmann)
-- Linux v6.6.0-0.rc2.27bbf45eae9c
-
-* Thu Sep 21 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc2.42dc814987c1.22]
- redhat: spec: Fix typo for kernel_variant_preun for 16k-debug flavor (Neal Gompa)
-- Linux v6.6.0-0.rc2.42dc814987c1
-
-* Tue Sep 19 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc2.2cf0f7156238.21]
-- Linux v6.6.0-0.rc2.2cf0f7156238
-
-* Mon Sep 18 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc2.20]
-- Linux v6.6.0-0.rc2
-
-* Sun Sep 17 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc1.f0b0d403eabb.19]
-- Linux v6.6.0-0.rc1.f0b0d403eabb
-
-* Sat Sep 16 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc1.57d88e8a5974.18]
-- Linux v6.6.0-0.rc1.57d88e8a5974
-
-* Fri Sep 15 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc1.9fdfb15a3dbf.17]
- Turn off appletalk for fedora (Justin M. Forbes)
-- Linux v6.6.0-0.rc1.9fdfb15a3dbf
-
-* Thu Sep 14 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc1.aed8aee11130.16]
-- Linux v6.6.0-0.rc1.aed8aee11130
-
-* Wed Sep 13 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc1.3669558bdf35.15]
-- Linux v6.6.0-0.rc1.3669558bdf35
-
-* Tue Sep 12 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc1.14]
- New configs in drivers/media (Fedora Kernel Team)
- redhat/docs: Add a mention of bugzilla for bugs (Prarit Bhargava)
- Fix the fixup of Fedora release (Don Zickus)
-
-* Mon Sep 11 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc1.13]
-- Linux v6.6.0-0.rc1
-
-* Sun Sep 10 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.535a265d7f0d.12]
-- Linux v6.6.0-0.rc0.535a265d7f0d
-
-* Sat Sep 09 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.6099776f9f26.11]
-- Linux v6.6.0-0.rc0.6099776f9f26
-
-* Fri Sep 08 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.a48fa7efaf11.10]
-- Linux v6.6.0-0.rc0.a48fa7efaf11
-
-* Thu Sep 07 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.7ba2090ca64e.9]
- Fix Fedora release scheduled job (Don Zickus)
- Move squashfs to kernel-modules-core (Justin M. Forbes)
- redhat: Explicitly disable CONFIG_COPS (Vitaly Kuznetsov)
- redhat: Add dist-check-licenses target (Vitaly Kuznetsov)
- redhat: Introduce "Verify SPDX-License-Identifier tags" selftest (Vitaly Kuznetsov)
- redhat: Use kspdx-tool output for the License: field (Vitaly Kuznetsov)
-- Linux v6.6.0-0.rc0.7ba2090ca64e
-
-* Wed Sep 06 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.65d6e954e378.8]
- Rename pipeline repo branch and DW tree names (Michael Hofmann)
- Adjust comments that refer to ARK in a Rawhide context (Michael Hofmann)
- Rename variable names starting with ark- to rawhide- (Michael Hofmann)
- Rename trigger-ark to trigger-rawhide (Michael Hofmann)
- Fix up config mismatches for Fedora (Justin M. Forbes)
-- Linux v6.6.0-0.rc0.65d6e954e378
-
-* Tue Sep 05 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.3f86ed6ec0b3.7]
- redhat/configs: Texas Instruments Inc. K3 multicore SoC architecture (Steve Best)
-- Linux v6.6.0-0.rc0.3f86ed6ec0b3
-
-* Mon Sep 04 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.708283abf896.6]
-- Linux v6.6.0-0.rc0.708283abf896
-
-* Sun Sep 03 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.92901222f83d.5]
- Flip CONFIG_VIDEO_V4L2_SUBDEV_API in pending RHEL due to mismatch (Justin M. Forbes)
-- Linux v6.6.0-0.rc0.92901222f83d
-
-* Sat Sep 02 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.0468be89b3fa.4]
- CONFIG_HW_RANDOM_HISI: move to common and set to m (Scott Weaver)
- Turn off CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE for Fedora s390x (Justin M. Forbes)
-- Linux v6.6.0-0.rc0.0468be89b3fa
-
-* Fri Sep 01 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.99d99825fc07.3.eln130]
- Disable tests for ELN realtime pipelines (Michael Hofmann)
- New configs in mm/Kconfig (Fedora Kernel Team)
- Flip CONFIG_SND_SOC_CS35L56_SDW to m and clean up (Justin M. Forbes)
- Add drm_exec_test to mod-internal.list (Thorsten Leemhuis)
- Add new pending entry for CONFIG_SND_SOC_CS35L56_SDW to fix mismatch (Justin M. Forbes)
-- Linux v6.6.0-0.rc0.99d99825fc07
-
-* Thu Aug 31 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.b97d64c72259.2.eln130]
- Fix tarball creation logic (Don Zickus)
- redhat: bump libcpupower soname to match upstream (Patrick Talbert)
- Turn on MEMFD_CREATE in pending as it is selected by CONFIG_TMPFS (Justin M. Forbes)
-- Linux v6.6.0-0.rc0.b97d64c72259
-
-* Wed Aug 30 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.6c1b980a7e79.1.eln130]
- redhat: drop unneeded build-time dependency gcc-plugin-devel (Coiby Xu)
- Reset RHEL release and trim changelog after rebase (Justin M. Forbes)
-- Linux v6.6.0-0.rc0.6c1b980a7e79
-
-* Tue Aug 29 2023 Fedora Kernel Team <kernel-team@fedoraproject.org> [6.6.0-0.rc0.1c59d383390f.59.eln130]
- all: x86: move wayward x86 specific config home (Peter Robinson)
- all: de-dupe non standard config options (Peter Robinson)
- all: x86: clean up microcode loading options (Peter Robinson)
@@ -4942,8 +5131,6 @@ fi\
- redhat: generate distgit changelog in genspec.sh as well (Herton R. Krzesinski)
- redhat: make genspec prefer metadata from git notes (Herton R. Krzesinski)
- redhat: use tags from git notes for zstream to generate changelog (Herton R. Krzesinski)
-- ARK: Remove code marking drivers as tech preview (Peter Georg)
-- ARK: Remove code marking devices deprecated (Peter Georg)
- ARK: Remove code marking devices unmaintained (Peter Georg)
- rh_message: Fix function name (Peter Georg) [2019377]
- Turn on CONFIG_RANDOM_TRUST_BOOTLOADER (Justin M. Forbes)
@@ -5287,7 +5474,6 @@ fi\
- Attempt to fix Intel PMT code (David Arcari)
- CI: Enable realtime branch testing (Veronika Kabatova)
- CI: Enable realtime checks for c9s and RHEL9 (Veronika Kabatova)
-- [fs] dax: mark tech preview (Bill O'Donnell) [1995338]
- ark: wireless: enable all rtw88 pcie wirless variants (Peter Robinson)
- wireless: rtw88: move debug options to common/debug (Peter Robinson)
- fedora: minor PTP clock driver cleanups (Peter Robinson)
@@ -5462,7 +5648,6 @@ fi\
- Enable CONFIG_BPF_UNPRIV_DEFAULT_OFF (Jiri Olsa)
- configs/common/s390: disable CONFIG_QETH_{OSN,OSX} (Philipp Rudo) [1903201]
- nvme: nvme_mpath_init remove multipath check (Mike Snitzer)
-- team: mark team driver as deprecated (Hangbin Liu) [1945477]
- Make CRYPTO_EC also builtin (Simo Sorce) [1947240]
- Do not hard-code a default value for DIST (David Ward)
- Override %%{debugbuildsenabled} if the --with-release option is used (David Ward)
@@ -5471,7 +5656,6 @@ fi\
- Revert s390x/zfcpdump part of a9d179c40281 and ecbfddd98621 (Vladis Dronov)
- Embed crypto algos, modes and templates needed in the FIPS mode (Vladis Dronov) [1947240]
- configs: Add and enable CONFIG_HYPERV_TESTING for debug kernels (Mohammed Gamal)
-- mm/cma: mark CMA on x86_64 tech preview and print RHEL-specific infos (David Hildenbrand) [1945002]
- configs: enable CONFIG_CMA on x86_64 in ARK (David Hildenbrand) [1945002]
- rpmspec: build debug-* meta-packages if debug builds are disabled (Herton R. Krzesinski)
- UIO: disable unused config options (Aristeu Rozanski) [1957819]
@@ -5556,7 +5740,6 @@ fi\
- Limit CONFIG_USB_CDNS_SUPPORT to x86_64 and arm in Fedora (David Ward)
- Fedora: Enable CHARGER_GPIO on aarch64 too (Peter Robinson)
- Fedora config updates (Justin M. Forbes)
-- wireguard: mark as Tech Preview (Hangbin Liu) [1613522]
- configs: enable CONFIG_WIREGUARD in ARK (Hangbin Liu) [1613522]
- Remove duplicate configs acroos fedora, ark and common (Don Zickus)
- Combine duplicate configs across ark and fedora into common (Don Zickus)
@@ -6195,7 +6378,7 @@ fi\
- [initial commit] Add scripts (Laura Abbott)
- [initial commit] Add configs (Laura Abbott)
- [initial commit] Add Makefiles (Laura Abbott)
-- Linux v6.6.0-0.rc0.1c59d383390f
+- Linux v6.7.0-0.rc0.5a6a09e97199
###
# The following Emacs magic makes C-c C-e use UTC dates.
diff --git a/TOOLS/patch_configs.py b/TOOLS/patch_configs.py
index 54b039f..b69dc57 100755
--- a/TOOLS/patch_configs.py
+++ b/TOOLS/patch_configs.py
@@ -20,17 +20,6 @@ GENERIC_PATCHES = [
["WINESYNC", None, ENABLE],
["USER_NS_UNPRIVILEGED", None, ENABLE],
["TCP_CONG_BBR2", None, MODULE],
- # bcachefs
- ["BCACHEFS_FS", None, MODULE],
- ["BCACHEFS_QUOTA", None, ENABLE],
- ["BCACHEFS_POSIX_ACL", None, ENABLE],
- ["BCACHEFS_DEBUG_TRANSACTIONS", None, UNSET],
- ["BCACHEFS_DEBUG", None, UNSET],
- ["BCACHEFS_TESTS", None, UNSET],
- ["BCACHEFS_LOCK_TIME_STATS", None, UNSET],
- ["BCACHEFS_NO_LATENCY_ACCT", None, UNSET],
- ["MEAN_AND_VARIANCE_UNIT_TEST", None, UNSET],
- ["DEBUG_CLOSURES", None, UNSET],
# device specific config
# Microsoft Surface
@@ -40,9 +29,22 @@ GENERIC_PATCHES = [
["VIDEO_DW9719", None, MODULE],
["IPC_CLASSES", None, ENABLE],
["LEDS_TPS68470", None, MODULE],
-
- # Steam Deck HDR Color management
- ["DRM_AMD_COLOR_STEAMDECK", None, UNSET],
+ ["SENSORS_SURFACE_FAN", None, MODULE],
+ ["SENSORS_SURFACE_TEMP", None, MODULE],
+
+ # Steam Deck / amdgpu HDR Color management
+ ["DRM_AMD_COLOR_STEAMDECK", None, ENABLE],
+
+ # Rog Ally Gyro Fix
+ ["BMI323_I2C", None, MODULE],
+ ["BMI323_SPI", None, MODULE],
+
+ # Mac T2 supprot
+ ["DRM_APPLETBDRM", None, MODULE],
+ ["HID_APPLETB_BL", None, MODULE],
+ ["HID_APPLETB_KBD", None, MODULE],
+ ["HID_APPLE_MAGIC_BACKLIGHT", None, MODULE],
+ ["CONFIG_APPLE_BCE", None, MODULE],
]
ARCH_PATCHES = {
@@ -139,10 +141,11 @@ def apply_patches(data: str, patches, flags = None) -> str:
flags = []
for name, *val in patches:
- c = f"CONFIG_{name}"
+ if not name.startswith("CONFIG_"):
+ name = f"CONFIG_{name}"
- s = f"{c}="
- u = f"# {c} "
+ s = f"{name}="
+ u = f"# {name} "
if len(val) == 3 and val[2] not in flags:
continue
@@ -166,15 +169,15 @@ def apply_patches(data: str, patches, flags = None) -> str:
if val[0] is not None:
# verify we found what we expect
- l = generate_line(c, val[0])
+ l = generate_line(name, val[0])
if l != line:
#print(f" Could not apply {name}: could not find expected config")
continue
- data = data[:line_start] + generate_line(c, val[1]) + data[line_end:]
+ data = data[:line_start] + generate_line(name, val[1]) + data[line_end:]
elif val[0] is None:
# relevant entry does not exist yet and we don't want to replace anything specific
- data += generate_line(c, val[1])
+ data += generate_line(name, val[1])
data += "\n"
else:
print(f" Couldn't find {name}")